From c886f976e0ba8bc6d55c8be8f0f1241ac5b80ebc Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Tue, 20 Aug 2019 13:56:03 +0100 Subject: Opentracing doc update (#5776) Update opentracing docs to use the unified 'trace' method --- synapse/logging/opentracing.py | 67 +++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'synapse/logging') diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index d2c209c471..6b706e1892 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -43,6 +43,9 @@ OpenTracing to be easily disabled in Synapse and thereby have OpenTracing as an optional dependency. This does however limit the number of modifiable spans at any point in the code to one. From here out references to `opentracing` in the code snippets refer to the Synapses module. +Most methods provided in the module have a direct correlation to those provided +by opentracing. Refer to docs there for a more in-depth documentation on some of +the args and methods. Tracing ------- @@ -68,52 +71,62 @@ set a tag on the current active span. Tracing functions ----------------- -Functions can be easily traced using decorators. There is a decorator for -'normal' function and for functions which are actually deferreds. The name of +Functions can be easily traced using decorators. The name of the function becomes the operation name for the span. .. code-block:: python - from synapse.logging.opentracing import trace, trace_deferred + from synapse.logging.opentracing import trace - # Start a span using 'normal_function' as the operation name + # Start a span using 'interesting_function' as the operation name @trace - def normal_function(*args, **kwargs): + def interesting_function(*args, **kwargs): # Does all kinds of cool and expected things return something_usual_and_useful - # Start a span using 'deferred_function' as the operation name - @trace_deferred - @defer.inlineCallbacks - def deferred_function(*args, **kwargs): - # We start - yield we_wait - # we finish - return something_usual_and_useful Operation names can be explicitly set for functions by using -``trace_using_operation_name`` and -``trace_deferred_using_operation_name`` +``trace_using_operation_name`` .. code-block:: python - from synapse.logging.opentracing import ( - trace_using_operation_name, - trace_deferred_using_operation_name - ) + from synapse.logging.opentracing import trace_using_operation_name @trace_using_operation_name("A *much* better operation name") - def normal_function(*args, **kwargs): + def interesting_badly_named_function(*args, **kwargs): # Does all kinds of cool and expected things return something_usual_and_useful - @trace_deferred_using_operation_name("Another exciting operation name!") - @defer.inlineCallbacks - def deferred_function(*args, **kwargs): - # We start - yield we_wait - # we finish - return something_usual_and_useful +Setting Tags +------------ + +To set a tag on the active span do + +.. code-block:: python + + from synapse.logging.opentracing import set_tag + + set_tag(tag_name, tag_value) + +There's a convenient decorator to tag all the args of the method. It uses +inspection in order to use the formal parameter names prefixed with 'ARG_' as +tag names. It uses kwarg names as tag names without the prefix. + +.. code-block:: python + + from synapse.logging.opentracing import tag_args + + @tag_args + def set_fates(clotho, lachesis, atropos, father="Zues", mother="Themis"): + pass + + set_fates("the story", "the end", "the act") + # This will have the following tags + # - ARG_clotho: "the story" + # - ARG_lachesis: "the end" + # - ARG_atropos: "the act" + # - father: "Zues" + # - mother: "Themis" Contexts and carriers --------------------- -- cgit 1.5.1 From 812ed6b0d5b2c682d8032fc83e3041a9da93f670 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 22 Aug 2019 18:08:07 +0100 Subject: Opentracing across workers (#5771) Propagate opentracing contexts across workers Also includes some Convenience modifications to opentracing for servlets, notably: - Add boolean to skip the whitelisting check on inject extract methods. - useful when injecting into carriers locally. Otherwise we'd always have to include our own servername and whitelist our servername - start_active_span_from_request instead of header - Add boolean to decide whether to extract context from a request to a servlet --- changelog.d/5771.feature | 1 + synapse/federation/transport/server.py | 43 ++++++---- synapse/http/servlet.py | 2 +- synapse/logging/opentracing.py | 144 ++++++++++++++++++--------------- synapse/replication/http/_base.py | 16 +++- 5 files changed, 123 insertions(+), 83 deletions(-) create mode 100644 changelog.d/5771.feature (limited to 'synapse/logging') diff --git a/changelog.d/5771.feature b/changelog.d/5771.feature new file mode 100644 index 0000000000..f2f4de1fdd --- /dev/null +++ b/changelog.d/5771.feature @@ -0,0 +1 @@ +Make Opentracing work in worker mode. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index a17148fc3c..dc53b4b170 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -38,7 +38,12 @@ from synapse.http.servlet import ( parse_string_from_args, ) from synapse.logging.context import run_in_background -from synapse.logging.opentracing import start_active_span_from_context, tags +from synapse.logging.opentracing import ( + start_active_span, + start_active_span_from_request, + tags, + whitelisted_homeserver, +) from synapse.types import ThirdPartyInstanceID, get_domain_from_id from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.versionstring import get_version_string @@ -288,20 +293,28 @@ class BaseFederationServlet(object): logger.warn("authenticate_request failed: %s", e) raise - # Start an opentracing span - with start_active_span_from_context( - request.requestHeaders, - "incoming-federation-request", - tags={ - "request_id": request.get_request_id(), - tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, - tags.HTTP_METHOD: request.get_method(), - tags.HTTP_URL: request.get_redacted_uri(), - tags.PEER_HOST_IPV6: request.getClientIP(), - "authenticated_entity": origin, - "servlet_name": request.request_metrics.name, - }, - ): + request_tags = { + "request_id": request.get_request_id(), + tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, + tags.HTTP_METHOD: request.get_method(), + tags.HTTP_URL: request.get_redacted_uri(), + tags.PEER_HOST_IPV6: request.getClientIP(), + "authenticated_entity": origin, + "servlet_name": request.request_metrics.name, + } + + # Only accept the span context if the origin is authenticated + # and whitelisted + if origin and whitelisted_homeserver(origin): + scope = start_active_span_from_request( + request, "incoming-federation-request", tags=request_tags + ) + else: + scope = start_active_span( + "incoming-federation-request", tags=request_tags + ) + + with scope: if origin: with ratelimiter.ratelimit(origin) as d: await d diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index fd07bf7b8e..c186b31f59 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -300,7 +300,7 @@ class RestServlet(object): http_server.register_paths( method, patterns, - trace_servlet(servlet_classname, method_handler), + trace_servlet(servlet_classname)(method_handler), servlet_classname, ) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 6b706e1892..4abea4474b 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -174,10 +174,48 @@ from twisted.internet import defer from synapse.config import ConfigError +# Helper class + + +class _DummyTagNames(object): + """wrapper of opentracings tags. We need to have them if we + want to reference them without opentracing around. Clearly they + should never actually show up in a trace. `set_tags` overwrites + these with the correct ones.""" + + INVALID_TAG = "invalid-tag" + COMPONENT = INVALID_TAG + DATABASE_INSTANCE = INVALID_TAG + DATABASE_STATEMENT = INVALID_TAG + DATABASE_TYPE = INVALID_TAG + DATABASE_USER = INVALID_TAG + ERROR = INVALID_TAG + HTTP_METHOD = INVALID_TAG + HTTP_STATUS_CODE = INVALID_TAG + HTTP_URL = INVALID_TAG + MESSAGE_BUS_DESTINATION = INVALID_TAG + PEER_ADDRESS = INVALID_TAG + PEER_HOSTNAME = INVALID_TAG + PEER_HOST_IPV4 = INVALID_TAG + PEER_HOST_IPV6 = INVALID_TAG + PEER_PORT = INVALID_TAG + PEER_SERVICE = INVALID_TAG + SAMPLING_PRIORITY = INVALID_TAG + SERVICE = INVALID_TAG + SPAN_KIND = INVALID_TAG + SPAN_KIND_CONSUMER = INVALID_TAG + SPAN_KIND_PRODUCER = INVALID_TAG + SPAN_KIND_RPC_CLIENT = INVALID_TAG + SPAN_KIND_RPC_SERVER = INVALID_TAG + + try: import opentracing + + tags = opentracing.tags except ImportError: opentracing = None + tags = _DummyTagNames try: from jaeger_client import Config as JaegerConfig from synapse.logging.scopecontextmanager import LogContextScopeManager @@ -252,10 +290,6 @@ def init_tracer(config): scope_manager=LogContextScopeManager(config), ).initialize_tracer() - # Set up tags to be opentracing's tags - global tags - tags = opentracing.tags - # Whitelisting @@ -334,8 +368,8 @@ def start_active_span_follows_from(operation_name, contexts): return scope -def start_active_span_from_context( - headers, +def start_active_span_from_request( + request, operation_name, references=None, tags=None, @@ -344,9 +378,9 @@ def start_active_span_from_context( finish_on_close=True, ): """ - Extracts a span context from Twisted Headers. + Extracts a span context from a Twisted Request. args: - headers (twisted.web.http_headers.Headers) + headers (twisted.web.http.Request) For the other args see opentracing.tracer @@ -360,7 +394,9 @@ def start_active_span_from_context( if opentracing is None: return _noop_context_manager() - header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()} + header_dict = { + k.decode(): v[0].decode() for k, v in request.requestHeaders.getAllRawHeaders() + } context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict) return opentracing.tracer.start_active_span( @@ -448,7 +484,7 @@ def set_operation_name(operation_name): @only_if_tracing -def inject_active_span_twisted_headers(headers, destination): +def inject_active_span_twisted_headers(headers, destination, check_destination=True): """ Injects a span context into twisted headers in-place @@ -467,7 +503,7 @@ def inject_active_span_twisted_headers(headers, destination): https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py """ - if not whitelisted_homeserver(destination): + if check_destination and not whitelisted_homeserver(destination): return span = opentracing.tracer.active_span @@ -479,7 +515,7 @@ def inject_active_span_twisted_headers(headers, destination): @only_if_tracing -def inject_active_span_byte_dict(headers, destination): +def inject_active_span_byte_dict(headers, destination, check_destination=True): """ Injects a span context into a dict where the headers are encoded as byte strings @@ -511,7 +547,7 @@ def inject_active_span_byte_dict(headers, destination): @only_if_tracing -def inject_active_span_text_map(carrier, destination=None): +def inject_active_span_text_map(carrier, destination, check_destination=True): """ Injects a span context into a dict @@ -532,7 +568,7 @@ def inject_active_span_text_map(carrier, destination=None): https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py """ - if destination and not whitelisted_homeserver(destination): + if check_destination and not whitelisted_homeserver(destination): return opentracing.tracer.inject( @@ -689,65 +725,43 @@ def tag_args(func): return _tag_args_inner -def trace_servlet(servlet_name, func): +def trace_servlet(servlet_name, extract_context=False): """Decorator which traces a serlet. It starts a span with some servlet specific - tags such as the servlet_name and request information""" - if not opentracing: - return func + tags such as the servlet_name and request information - @wraps(func) - @defer.inlineCallbacks - def _trace_servlet_inner(request, *args, **kwargs): - with start_active_span( - "incoming-client-request", - tags={ + Args: + servlet_name (str): The name to be used for the span's operation_name + extract_context (bool): Whether to attempt to extract the opentracing + context from the request the servlet is handling. + + """ + + def _trace_servlet_inner_1(func): + if not opentracing: + return func + + @wraps(func) + @defer.inlineCallbacks + def _trace_servlet_inner(request, *args, **kwargs): + request_tags = { "request_id": request.get_request_id(), tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, tags.HTTP_METHOD: request.get_method(), tags.HTTP_URL: request.get_redacted_uri(), tags.PEER_HOST_IPV6: request.getClientIP(), - "servlet_name": servlet_name, - }, - ): - result = yield defer.maybeDeferred(func, request, *args, **kwargs) - return result - - return _trace_servlet_inner - - -# Helper class - + } -class _DummyTagNames(object): - """wrapper of opentracings tags. We need to have them if we - want to reference them without opentracing around. Clearly they - should never actually show up in a trace. `set_tags` overwrites - these with the correct ones.""" + if extract_context: + scope = start_active_span_from_request( + request, servlet_name, tags=request_tags + ) + else: + scope = start_active_span(servlet_name, tags=request_tags) - INVALID_TAG = "invalid-tag" - COMPONENT = INVALID_TAG - DATABASE_INSTANCE = INVALID_TAG - DATABASE_STATEMENT = INVALID_TAG - DATABASE_TYPE = INVALID_TAG - DATABASE_USER = INVALID_TAG - ERROR = INVALID_TAG - HTTP_METHOD = INVALID_TAG - HTTP_STATUS_CODE = INVALID_TAG - HTTP_URL = INVALID_TAG - MESSAGE_BUS_DESTINATION = INVALID_TAG - PEER_ADDRESS = INVALID_TAG - PEER_HOSTNAME = INVALID_TAG - PEER_HOST_IPV4 = INVALID_TAG - PEER_HOST_IPV6 = INVALID_TAG - PEER_PORT = INVALID_TAG - PEER_SERVICE = INVALID_TAG - SAMPLING_PRIORITY = INVALID_TAG - SERVICE = INVALID_TAG - SPAN_KIND = INVALID_TAG - SPAN_KIND_CONSUMER = INVALID_TAG - SPAN_KIND_PRODUCER = INVALID_TAG - SPAN_KIND_RPC_CLIENT = INVALID_TAG - SPAN_KIND_RPC_SERVER = INVALID_TAG + with scope: + result = yield defer.maybeDeferred(func, request, *args, **kwargs) + return result + return _trace_servlet_inner -tags = _DummyTagNames + return _trace_servlet_inner_1 diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 2e0594e581..c4be9273f6 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -22,6 +22,7 @@ from six.moves import urllib from twisted.internet import defer +import synapse.logging.opentracing as opentracing from synapse.api.errors import ( CodeMessageException, HttpResponseException, @@ -165,8 +166,12 @@ class ReplicationEndpoint(object): # have a good idea that the request has either succeeded or failed on # the master, and so whether we should clean up or not. while True: + headers = {} + opentracing.inject_active_span_byte_dict( + headers, None, check_destination=False + ) try: - result = yield request_func(uri, data) + result = yield request_func(uri, data, headers=headers) break except CodeMessageException as e: if e.code != 504 or not cls.RETRY_ON_TIMEOUT: @@ -205,7 +210,14 @@ class ReplicationEndpoint(object): args = "/".join("(?P<%s>[^/]+)" % (arg,) for arg in url_args) pattern = re.compile("^/_synapse/replication/%s/%s$" % (self.NAME, args)) - http_server.register_paths(method, [pattern], handler, self.__class__.__name__) + http_server.register_paths( + method, + [pattern], + opentracing.trace_servlet(self.__class__.__name__, extract_context=True)( + handler + ), + self.__class__.__name__, + ) def _cached_handler(self, request, txn_id, **kwargs): """Called on new incoming requests when caching is enabled. Checks -- cgit 1.5.1 From 8767b63a821eb8612e2ab830534fd6f40eb1aaaa Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 22 Aug 2019 18:21:10 +0100 Subject: Propagate opentracing contexts through EDUs (#5852) Propagate opentracing contexts through EDUs Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/5852.feature | 1 + docs/opentracing.rst | 27 +++- synapse/federation/federation_server.py | 15 +- synapse/federation/sender/transaction_manager.py | 170 ++++++++++++--------- synapse/federation/units.py | 3 + synapse/handlers/devicemessage.py | 27 +++- synapse/logging/opentracing.py | 26 ++++ synapse/storage/devices.py | 39 ++++- .../schema/delta/56/add_spans_to_device_lists.sql | 20 +++ 9 files changed, 234 insertions(+), 94 deletions(-) create mode 100644 changelog.d/5852.feature create mode 100644 synapse/storage/schema/delta/56/add_spans_to_device_lists.sql (limited to 'synapse/logging') diff --git a/changelog.d/5852.feature b/changelog.d/5852.feature new file mode 100644 index 0000000000..4a0fc6c542 --- /dev/null +++ b/changelog.d/5852.feature @@ -0,0 +1 @@ +Pass opentracing contexts between servers when transmitting EDUs. diff --git a/docs/opentracing.rst b/docs/opentracing.rst index b91a2208a8..6e98ab56ba 100644 --- a/docs/opentracing.rst +++ b/docs/opentracing.rst @@ -32,7 +32,7 @@ It is up to the remote server to decide what it does with the spans it creates. This is called the sampling policy and it can be configured through Jaeger's settings. -For OpenTracing concepts see +For OpenTracing concepts see https://opentracing.io/docs/overview/what-is-tracing/. For more information about Jaeger's implementation see @@ -79,7 +79,7 @@ Homeserver whitelisting The homeserver whitelist is configured using regular expressions. A list of regular expressions can be given and their union will be compared when propagating any -spans contexts to another homeserver. +spans contexts to another homeserver. Though it's mostly safe to send and receive span contexts to and from untrusted users since span contexts are usually opaque ids it can lead to @@ -92,6 +92,29 @@ two problems, namely: but that doesn't prevent another server sending you baggage which will be logged to OpenTracing's logs. +========== +EDU FORMAT +========== + +EDUs can contain tracing data in their content. This is not specced but +it could be of interest for other homeservers. + +EDU format (if you're using jaeger): + +.. code-block:: json + + { + "edu_type": "type", + "content": { + "org.matrix.opentracing_context": { + "uber-trace-id": "fe57cf3e65083289" + } + } + } + +Though you don't have to use jaeger you must inject the span context into +`org.matrix.opentracing_context` using the opentracing `Format.TEXT_MAP` inject method. + ================== Configuring Jaeger ================== diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9286ca3202..05fd49f3c1 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -43,7 +43,7 @@ from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction from synapse.http.endpoint import parse_server_name from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import log_kv, trace +from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace from synapse.logging.utils import log_function from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -811,12 +811,13 @@ class FederationHandlerRegistry(object): if not handler: logger.warn("No handler registered for EDU type %s", edu_type) - try: - yield handler(origin, content) - except SynapseError as e: - logger.info("Failed to handle edu %r: %r", edu_type, e) - except Exception: - logger.exception("Failed to handle edu %r", edu_type) + with start_active_span_from_edu(content, "handle_edu"): + try: + yield handler(origin, content) + except SynapseError as e: + logger.info("Failed to handle edu %r: %r", edu_type, e) + except Exception: + logger.exception("Failed to handle edu %r", edu_type) def on_query(self, query_type, args): handler = self.query_handlers.get(query_type) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 52706302f2..62ca6a3e87 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -14,11 +14,19 @@ # limitations under the License. import logging +from canonicaljson import json + from twisted.internet import defer from synapse.api.errors import HttpResponseException from synapse.federation.persistence import TransactionActions from synapse.federation.units import Transaction +from synapse.logging.opentracing import ( + extract_text_map, + set_tag, + start_active_span_follows_from, + tags, +) from synapse.util.metrics import measure_func logger = logging.getLogger(__name__) @@ -44,93 +52,109 @@ class TransactionManager(object): @defer.inlineCallbacks def send_new_transaction(self, destination, pending_pdus, pending_edus): - # Sort based on the order field - pending_pdus.sort(key=lambda t: t[1]) - pdus = [x[0] for x in pending_pdus] - edus = pending_edus + # Make a transaction-sending opentracing span. This span follows on from + # all the edus in that transaction. This needs to be done since there is + # no active span here, so if the edus were not received by the remote the + # span would have no causality and it would be forgotten. + # The span_contexts is a generator so that it won't be evaluated if + # opentracing is disabled. (Yay speed!) - success = True + span_contexts = ( + extract_text_map(json.loads(edu.get_context())) for edu in pending_edus + ) - logger.debug("TX [%s] _attempt_new_transaction", destination) + with start_active_span_follows_from("send_transaction", span_contexts): - txn_id = str(self._next_txn_id) + # Sort based on the order field + pending_pdus.sort(key=lambda t: t[1]) + pdus = [x[0] for x in pending_pdus] + edus = pending_edus - logger.debug( - "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)", - destination, - txn_id, - len(pdus), - len(edus), - ) + success = True - transaction = Transaction.create_new( - origin_server_ts=int(self.clock.time_msec()), - transaction_id=txn_id, - origin=self._server_name, - destination=destination, - pdus=pdus, - edus=edus, - ) + logger.debug("TX [%s] _attempt_new_transaction", destination) - self._next_txn_id += 1 + txn_id = str(self._next_txn_id) - logger.info( - "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)", - destination, - txn_id, - transaction.transaction_id, - len(pdus), - len(edus), - ) + logger.debug( + "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)", + destination, + txn_id, + len(pdus), + len(edus), + ) - # Actually send the transaction - - # FIXME (erikj): This is a bit of a hack to make the Pdu age - # keys work - def json_data_cb(): - data = transaction.get_dict() - now = int(self.clock.time_msec()) - if "pdus" in data: - for p in data["pdus"]: - if "age_ts" in p: - unsigned = p.setdefault("unsigned", {}) - unsigned["age"] = now - int(p["age_ts"]) - del p["age_ts"] - return data - - try: - response = yield self._transport_layer.send_transaction( - transaction, json_data_cb + transaction = Transaction.create_new( + origin_server_ts=int(self.clock.time_msec()), + transaction_id=txn_id, + origin=self._server_name, + destination=destination, + pdus=pdus, + edus=edus, ) - code = 200 - except HttpResponseException as e: - code = e.code - response = e.response - if e.code in (401, 404, 429) or 500 <= e.code: - logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) - raise e + self._next_txn_id += 1 - logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) + logger.info( + "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)", + destination, + txn_id, + transaction.transaction_id, + len(pdus), + len(edus), + ) - if code == 200: - for e_id, r in response.get("pdus", {}).items(): - if "error" in r: + # Actually send the transaction + + # FIXME (erikj): This is a bit of a hack to make the Pdu age + # keys work + def json_data_cb(): + data = transaction.get_dict() + now = int(self.clock.time_msec()) + if "pdus" in data: + for p in data["pdus"]: + if "age_ts" in p: + unsigned = p.setdefault("unsigned", {}) + unsigned["age"] = now - int(p["age_ts"]) + del p["age_ts"] + return data + + try: + response = yield self._transport_layer.send_transaction( + transaction, json_data_cb + ) + code = 200 + except HttpResponseException as e: + code = e.code + response = e.response + + if e.code in (401, 404, 429) or 500 <= e.code: + logger.info( + "TX [%s] {%s} got %d response", destination, txn_id, code + ) + raise e + + logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) + + if code == 200: + for e_id, r in response.get("pdus", {}).items(): + if "error" in r: + logger.warn( + "TX [%s] {%s} Remote returned error for %s: %s", + destination, + txn_id, + e_id, + r, + ) + else: + for p in pdus: logger.warn( - "TX [%s] {%s} Remote returned error for %s: %s", + "TX [%s] {%s} Failed to send event %s", destination, txn_id, - e_id, - r, + p.event_id, ) - else: - for p in pdus: - logger.warn( - "TX [%s] {%s} Failed to send event %s", - destination, - txn_id, - p.event_id, - ) - success = False + success = False - return success + set_tag(tags.ERROR, not success) + return success diff --git a/synapse/federation/units.py b/synapse/federation/units.py index 14aad8f09d..aa84621206 100644 --- a/synapse/federation/units.py +++ b/synapse/federation/units.py @@ -38,6 +38,9 @@ class Edu(JsonEncodedObject): internal_keys = ["origin", "destination"] + def get_context(self): + return getattr(self, "content", {}).get("org.matrix.opentracing_context", "{}") + class Transaction(JsonEncodedObject): """ A transaction is a list of Pdus and Edus to be sent to a remote home diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index e1ebb6346c..c7d56779b8 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -15,9 +15,17 @@ import logging +from canonicaljson import json + from twisted.internet import defer from synapse.api.errors import SynapseError +from synapse.logging.opentracing import ( + get_active_span_text_map, + set_tag, + start_active_span, + whitelisted_homeserver, +) from synapse.types import UserID, get_domain_from_id from synapse.util.stringutils import random_string @@ -100,14 +108,21 @@ class DeviceMessageHandler(object): message_id = random_string(16) + context = get_active_span_text_map() + remote_edu_contents = {} for destination, messages in remote_messages.items(): - remote_edu_contents[destination] = { - "messages": messages, - "sender": sender_user_id, - "type": message_type, - "message_id": message_id, - } + with start_active_span("to_device_for_user"): + set_tag("destination", destination) + remote_edu_contents[destination] = { + "messages": messages, + "sender": sender_user_id, + "type": message_type, + "message_id": message_id, + "org.matrix.opentracing_context": json.dumps(context) + if whitelisted_homeserver(destination) + else None, + } stream_id = yield self.store.add_messages_to_device_inbox( local_messages, remote_edu_contents diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 4abea4474b..dd296027a1 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -149,6 +149,9 @@ unchartered waters will require the enforcement of the whitelist. ``logging/opentracing.py`` has a ``whitelisted_homeserver`` method which takes in a destination and compares it to the whitelist. +Most injection methods take a 'destination' arg. The context will only be injected +if the destination matches the whitelist or the destination is None. + ======= Gotchas ======= @@ -576,6 +579,29 @@ def inject_active_span_text_map(carrier, destination, check_destination=True): ) +def get_active_span_text_map(destination=None): + """ + Gets a span context as a dict. This can be used instead of manually + injecting a span into an empty carrier. + + Args: + destination (str): the name of the remote server. + + Returns: + dict: the active span's context if opentracing is enabled, otherwise empty. + """ + + if not opentracing or (destination and not whitelisted_homeserver(destination)): + return {} + + carrier = {} + opentracing.tracer.inject( + opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier + ) + + return carrier + + def active_span_context_as_string(): """ Returns: diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 8f72d92895..e11881161d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -21,6 +21,11 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.logging.opentracing import ( + get_active_span_text_map, + trace, + whitelisted_homeserver, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore @@ -73,6 +78,7 @@ class DeviceWorkerStore(SQLBaseStore): return {d["device_id"]: d for d in devices} + @trace @defer.inlineCallbacks def get_devices_by_remote(self, destination, from_stream_id, limit): """Get stream of updates to send to remote servers @@ -127,8 +133,15 @@ class DeviceWorkerStore(SQLBaseStore): # (user_id, device_id) entries into a map, with the value being # the max stream_id across each set of duplicate entries # - # maps (user_id, device_id) -> stream_id + # maps (user_id, device_id) -> (stream_id, opentracing_context) # as long as their stream_id does not match that of the last row + # + # opentracing_context contains the opentracing metadata for the request + # that created the poke + # + # The most recent request's opentracing_context is used as the + # context which created the Edu. + query_map = {} for update in updates: if stream_id_cutoff is not None and update[2] >= stream_id_cutoff: @@ -136,7 +149,14 @@ class DeviceWorkerStore(SQLBaseStore): break key = (update[0], update[1]) - query_map[key] = max(query_map.get(key, 0), update[2]) + + update_context = update[3] + update_stream_id = update[2] + + previous_update_stream_id, _ = query_map.get(key, (0, None)) + + if update_stream_id > previous_update_stream_id: + query_map[key] = (update_stream_id, update_context) # If we didn't find any updates with a stream_id lower than the cutoff, it # means that there are more than limit updates all of which have the same @@ -171,7 +191,7 @@ class DeviceWorkerStore(SQLBaseStore): List: List of device updates """ sql = """ - SELECT user_id, device_id, stream_id FROM device_lists_outbound_pokes + SELECT user_id, device_id, stream_id, opentracing_context FROM device_lists_outbound_pokes WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? ORDER BY stream_id LIMIT ? @@ -187,8 +207,9 @@ class DeviceWorkerStore(SQLBaseStore): Args: destination (str): The host the device updates are intended for from_stream_id (int): The minimum stream_id to filter updates by, exclusive - query_map (Dict[(str, str): int]): Dictionary mapping - user_id/device_id to update stream_id + query_map (Dict[(str, str): (int, str|None)]): Dictionary mapping + user_id/device_id to update stream_id and the relevent json-encoded + opentracing context Returns: List[Dict]: List of objects representing an device update EDU @@ -210,12 +231,13 @@ class DeviceWorkerStore(SQLBaseStore): destination, user_id, from_stream_id ) for device_id, device in iteritems(user_devices): - stream_id = query_map[(user_id, device_id)] + stream_id, opentracing_context = query_map[(user_id, device_id)] result = { "user_id": user_id, "device_id": device_id, "prev_id": [prev_id] if prev_id else [], "stream_id": stream_id, + "org.matrix.opentracing_context": opentracing_context, } prev_id = stream_id @@ -814,6 +836,8 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): ], ) + context = get_active_span_text_map() + self._simple_insert_many_txn( txn, table="device_lists_outbound_pokes", @@ -825,6 +849,9 @@ class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): "device_id": device_id, "sent": False, "ts": now, + "opentracing_context": json.dumps(context) + if whitelisted_homeserver(destination) + else None, } for destination in hosts for device_id in device_ids diff --git a/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql b/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql new file mode 100644 index 0000000000..41807eb1e7 --- /dev/null +++ b/synapse/storage/schema/delta/56/add_spans_to_device_lists.sql @@ -0,0 +1,20 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Opentracing context data for inclusion in the device_list_update EDUs, as a + * json-encoded dictionary. NULL if opentracing is disabled (or not enabled for this destination). + */ +ALTER TABLE device_lists_outbound_pokes ADD opentracing_context TEXT; -- cgit 1.5.1 From 7dc398586c2156a456d9526ac0e42c1fec9f8143 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 28 Aug 2019 21:18:53 +1000 Subject: Implement a structured logging output system. (#5680) --- .buildkite/docker-compose.py35.pg95.yaml | 1 + .buildkite/docker-compose.py37.pg11.yaml | 1 + .buildkite/docker-compose.py37.pg95.yaml | 1 + .buildkite/pipeline.yml | 10 +- .gitignore | 5 +- changelog.d/5680.misc | 1 + docs/structured_logging.md | 83 +++++++ synapse/app/_base.py | 12 +- synapse/app/admin_cmd.py | 4 +- synapse/app/appservice.py | 4 +- synapse/app/client_reader.py | 4 +- synapse/app/event_creator.py | 4 +- synapse/app/federation_reader.py | 4 +- synapse/app/federation_sender.py | 4 +- synapse/app/frontend_proxy.py | 4 +- synapse/app/homeserver.py | 4 +- synapse/app/media_repository.py | 4 +- synapse/app/pusher.py | 4 +- synapse/app/synchrotron.py | 4 +- synapse/app/user_dir.py | 4 +- synapse/config/logger.py | 103 +++++---- synapse/handlers/federation.py | 5 +- synapse/logging/_structured.py | 374 +++++++++++++++++++++++++++++++ synapse/logging/_terse_json.py | 278 +++++++++++++++++++++++ synapse/logging/context.py | 14 +- synapse/python_dependencies.py | 6 +- tests/logging/__init__.py | 0 tests/logging/test_structured.py | 197 ++++++++++++++++ tests/logging/test_terse_json.py | 234 +++++++++++++++++++ tests/server.py | 27 ++- tox.ini | 10 + 31 files changed, 1328 insertions(+), 82 deletions(-) create mode 100644 changelog.d/5680.misc create mode 100644 docs/structured_logging.md create mode 100644 synapse/logging/_structured.py create mode 100644 synapse/logging/_terse_json.py create mode 100644 tests/logging/__init__.py create mode 100644 tests/logging/test_structured.py create mode 100644 tests/logging/test_terse_json.py (limited to 'synapse/logging') diff --git a/.buildkite/docker-compose.py35.pg95.yaml b/.buildkite/docker-compose.py35.pg95.yaml index 2f14387fbc..aaea33006b 100644 --- a/.buildkite/docker-compose.py35.pg95.yaml +++ b/.buildkite/docker-compose.py35.pg95.yaml @@ -6,6 +6,7 @@ services: image: postgres:9.5 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.5 diff --git a/.buildkite/docker-compose.py37.pg11.yaml b/.buildkite/docker-compose.py37.pg11.yaml index f3eec05ceb..1b32675e78 100644 --- a/.buildkite/docker-compose.py37.pg11.yaml +++ b/.buildkite/docker-compose.py37.pg11.yaml @@ -6,6 +6,7 @@ services: image: postgres:11 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.7 diff --git a/.buildkite/docker-compose.py37.pg95.yaml b/.buildkite/docker-compose.py37.pg95.yaml index 2a41db8eba..7679f6508d 100644 --- a/.buildkite/docker-compose.py37.pg95.yaml +++ b/.buildkite/docker-compose.py37.pg95.yaml @@ -6,6 +6,7 @@ services: image: postgres:9.5 environment: POSTGRES_PASSWORD: postgres + command: -c fsync=off testenv: image: python:3.7 diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index b75269a155..d9327227ed 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -45,8 +45,15 @@ steps: - docker#v3.0.1: image: "python:3.6" - - wait + - command: + - "python -m pip install tox" + - "tox -e mypy" + label: ":mypy: mypy" + plugins: + - docker#v3.0.1: + image: "python:3.5" + - wait - command: - "apt-get update && apt-get install -y python3.5 python3.5-dev python3-pip libxml2-dev libxslt-dev zlib1g-dev" @@ -55,6 +62,7 @@ steps: label: ":python: 3.5 / SQLite / Old Deps" env: TRIAL_FLAGS: "-j 2" + LANG: "C.UTF-8" plugins: - docker#v3.0.1: image: "ubuntu:xenial" # We use xenail to get an old sqlite and python diff --git a/.gitignore b/.gitignore index f6168a8819..e53d4908d5 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ _trial_temp*/ /*.signing.key /env/ /homeserver*.yaml +/logs /media_store/ /uploads @@ -29,8 +30,9 @@ _trial_temp*/ /.vscode/ # build products -/.coverage* !/.coveragerc +/.coverage* +/.mypy_cache/ /.tox /build/ /coverage.* @@ -38,4 +40,3 @@ _trial_temp*/ /docs/build/ /htmlcov /pip-wheel-metadata/ - diff --git a/changelog.d/5680.misc b/changelog.d/5680.misc new file mode 100644 index 0000000000..46a403a188 --- /dev/null +++ b/changelog.d/5680.misc @@ -0,0 +1 @@ +Lay the groundwork for structured logging output. diff --git a/docs/structured_logging.md b/docs/structured_logging.md new file mode 100644 index 0000000000..decec9b8fa --- /dev/null +++ b/docs/structured_logging.md @@ -0,0 +1,83 @@ +# Structured Logging + +A structured logging system can be useful when your logs are destined for a machine to parse and process. By maintaining its machine-readable characteristics, it enables more efficient searching and aggregations when consumed by software such as the "ELK stack". + +Synapse's structured logging system is configured via the file that Synapse's `log_config` config option points to. The file must be YAML and contain `structured: true`. It must contain a list of "drains" (places where logs go to). + +A structured logging configuration looks similar to the following: + +```yaml +structured: true + +loggers: + synapse: + level: INFO + synapse.storage.SQL: + level: WARNING + +drains: + console: + type: console + location: stdout + file: + type: file_json + location: homeserver.log +``` + +The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will have two logging drains (to the console and to a file, stored as JSON). + +## Drain Types + +Drain types can be specified by the `type` key. + +### `console` + +Outputs human-readable logs to the console. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +### `console_json` + +Outputs machine-readable JSON logs to the console. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +### `console_json_terse` + +Outputs machine-readable JSON logs to the console, separated by newlines. This +format is not designed to be read and re-formatted into human-readable text, but +is optimal for a logging aggregation system. + +Arguments: + +- `location`: Either `stdout` or `stderr`. + +### `file` + +Outputs human-readable logs to a file. + +Arguments: + +- `location`: An absolute path to the file to log to. + +### `file_json` + +Outputs machine-readable logs to a file. + +Arguments: + +- `location`: An absolute path to the file to log to. + +### `network_json_terse` + +Delivers machine-readable JSON logs to a log aggregator over TCP. This is +compatible with LogStash's TCP input with the codec set to `json_lines`. + +Arguments: + +- `host`: Hostname or IP address of the log aggregator. +- `port`: Numerical port to contact on the host. \ No newline at end of file diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 69dcf3523f..c30fdeee9a 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -36,18 +36,20 @@ from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) +# list of tuples of function, args list, kwargs dict _sighup_callbacks = [] -def register_sighup(func): +def register_sighup(func, *args, **kwargs): """ Register a function to be called when a SIGHUP occurs. Args: func (function): Function to be called when sent a SIGHUP signal. - Will be called with a single argument, the homeserver. + Will be called with a single default argument, the homeserver. + *args, **kwargs: args and kwargs to be passed to the target function. """ - _sighup_callbacks.append(func) + _sighup_callbacks.append((func, args, kwargs)) def start_worker_reactor(appname, config, run_command=reactor.run): @@ -248,8 +250,8 @@ def start(hs, listeners=None): # we're not using systemd. sdnotify(b"RELOADING=1") - for i in _sighup_callbacks: - i(hs) + for i, args, kwargs in _sighup_callbacks: + i(hs, *args, **kwargs) sdnotify(b"READY=1") diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 1fd52a5526..04751a6a5e 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -227,8 +227,6 @@ def start(config_options): config.start_pushers = False config.send_federation = False - setup_logging(config, use_worker_options=True) - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -241,6 +239,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() # We use task.react as the basic run command as it correctly handles tearing diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 54bb114dec..767b87d2db 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -141,8 +141,6 @@ def start(config_options): assert config.worker_app == "synapse.app.appservice" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -167,6 +165,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ps, config, use_worker_options=True) + ps.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ps, config.worker_listeners diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 721bb5b119..86193d35a8 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -179,8 +179,6 @@ def start(config_options): assert config.worker_app == "synapse.app.client_reader" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -193,6 +191,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index 473c8895d0..c67fe69a50 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -175,8 +175,6 @@ def start(config_options): assert config.worker_replication_http_port is not None - setup_logging(config, use_worker_options=True) - # This should only be done on the user directory worker or the master config.update_user_directory = False @@ -192,6 +190,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 5255d9e8cc..1ef027a88c 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -160,8 +160,6 @@ def start(config_options): assert config.worker_app == "synapse.app.federation_reader" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -174,6 +172,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index c5a2880e69..04fbb407af 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -171,8 +171,6 @@ def start(config_options): assert config.worker_app == "synapse.app.federation_sender" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -197,6 +195,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index e2822ca848..611d285421 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -232,8 +232,6 @@ def start(config_options): assert config.worker_main_http_uri is not None - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -246,6 +244,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 8233905844..04f1ed14f3 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -341,8 +341,6 @@ def setup(config_options): # generating config files and shouldn't try to continue. sys.exit(0) - synapse.config.logger.setup_logging(config, use_worker_options=False) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -356,6 +354,8 @@ def setup(config_options): database_engine=database_engine, ) + synapse.config.logger.setup_logging(hs, config, use_worker_options=False) + logger.info("Preparing database: %s...", config.database_config["name"]) try: diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 3a168577c7..2ac783ffa3 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -155,8 +155,6 @@ def start(config_options): "Please add ``enable_media_repo: false`` to the main config\n" ) - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -169,6 +167,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 692ffa2f04..d84732ee3c 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -184,8 +184,6 @@ def start(config_options): assert config.worker_app == "synapse.app.pusher" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts if config.start_pushers: @@ -210,6 +208,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ps, config, use_worker_options=True) + ps.setup() def start(): diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index a1c3b162f7..473026fce5 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -435,8 +435,6 @@ def start(config_options): assert config.worker_app == "synapse.app.synchrotron" - setup_logging(config, use_worker_options=True) - synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -450,6 +448,8 @@ def start(config_options): application_service_handler=SynchrotronApplicationService(), ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index cb29a1afab..e01afb39f2 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -197,8 +197,6 @@ def start(config_options): assert config.worker_app == "synapse.app.user_dir" - setup_logging(config, use_worker_options=True) - events.USE_FROZEN_DICTS = config.use_frozen_dicts database_engine = create_engine(config.database_config) @@ -223,6 +221,8 @@ def start(config_options): database_engine=database_engine, ) + setup_logging(ss, config, use_worker_options=True) + ss.setup() reactor.addSystemEventTrigger( "before", "startup", _base.start, ss, config.worker_listeners diff --git a/synapse/config/logger.py b/synapse/config/logger.py index d321d00b80..981df5a10c 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -25,6 +25,10 @@ from twisted.logger import STDLibLogObserver, globalLogBeginner import synapse from synapse.app import _base as appbase +from synapse.logging._structured import ( + reload_structured_logging, + setup_structured_logging, +) from synapse.logging.context import LoggingContextFilter from synapse.util.versionstring import get_version_string @@ -119,21 +123,10 @@ class LoggingConfig(Config): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def setup_logging(config, use_worker_options=False): - """ Set up python logging - - Args: - config (LoggingConfig | synapse.config.workers.WorkerConfig): - configuration data - - use_worker_options (bool): True to use the 'worker_log_config' option - instead of 'log_config'. - - register_sighup (func | None): Function to call to register a - sighup handler. +def _setup_stdlib_logging(config, log_config): + """ + Set up Python stdlib logging. """ - log_config = config.worker_log_config if use_worker_options else config.log_config - if log_config is None: log_format = ( "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" @@ -151,35 +144,10 @@ def setup_logging(config, use_worker_options=False): handler.addFilter(LoggingContextFilter(request="")) logger.addHandler(handler) else: + logging.config.dictConfig(log_config) - def load_log_config(): - with open(log_config, "r") as f: - logging.config.dictConfig(yaml.safe_load(f)) - - def sighup(*args): - # it might be better to use a file watcher or something for this. - load_log_config() - logging.info("Reloaded log config from %s due to SIGHUP", log_config) - - load_log_config() - appbase.register_sighup(sighup) - - # make sure that the first thing we log is a thing we can grep backwards - # for - logging.warn("***** STARTING SERVER *****") - logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) - logging.info("Server hostname: %s", config.server_name) - - # It's critical to point twisted's internal logging somewhere, otherwise it - # stacks up and leaks kup to 64K object; - # see: https://twistedmatrix.com/trac/ticket/8164 - # - # Routing to the python logging framework could be a performance problem if - # the handlers blocked for a long time as python.logging is a blocking API - # see https://twistedmatrix.com/documents/current/core/howto/logger.html - # filed as https://github.com/matrix-org/synapse/issues/1727 - # - # However this may not be too much of a problem if we are just writing to a file. + # Route Twisted's native logging through to the standard library logging + # system. observer = STDLibLogObserver() def _log(event): @@ -201,3 +169,54 @@ def setup_logging(config, use_worker_options=False): ) if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") + + +def _reload_stdlib_logging(*args, log_config=None): + logger = logging.getLogger("") + + if not log_config: + logger.warn("Reloaded a blank config?") + + logging.config.dictConfig(log_config) + + +def setup_logging(hs, config, use_worker_options=False): + """ + Set up the logging subsystem. + + Args: + config (LoggingConfig | synapse.config.workers.WorkerConfig): + configuration data + + use_worker_options (bool): True to use the 'worker_log_config' option + instead of 'log_config'. + """ + log_config = config.worker_log_config if use_worker_options else config.log_config + + def read_config(*args, callback=None): + if log_config is None: + return None + + with open(log_config, "rb") as f: + log_config_body = yaml.safe_load(f.read()) + + if callback: + callback(log_config=log_config_body) + logging.info("Reloaded log config from %s due to SIGHUP", log_config) + + return log_config_body + + log_config_body = read_config() + + if log_config_body and log_config_body.get("structured") is True: + setup_structured_logging(hs, config, log_config_body) + appbase.register_sighup(read_config, callback=reload_structured_logging) + else: + _setup_stdlib_logging(config, log_config_body) + appbase.register_sighup(read_config, callback=_reload_stdlib_logging) + + # make sure that the first thing we log is a thing we can grep backwards + # for + logging.warn("***** STARTING SERVER *****") + logging.warn("Server %s version %s", sys.argv[0], get_version_string(synapse)) + logging.info("Server hostname: %s", config.server_name) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c86903b98b..94306c94a9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -326,8 +326,9 @@ class FederationHandler(BaseHandler): ours = yield self.store.get_state_groups_ids(room_id, seen) # state_maps is a list of mappings from (type, state_key) to event_id - # type: list[dict[tuple[str, str], str]] - state_maps = list(ours.values()) + state_maps = list( + ours.values() + ) # type: list[dict[tuple[str, str], str]] # we don't need this any more, let's delete it. del ours diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py new file mode 100644 index 0000000000..0367d6dfc4 --- /dev/null +++ b/synapse/logging/_structured.py @@ -0,0 +1,374 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os.path +import sys +import typing +import warnings + +import attr +from constantly import NamedConstant, Names, ValueConstant, Values +from zope.interface import implementer + +from twisted.logger import ( + FileLogObserver, + FilteringLogObserver, + ILogObserver, + LogBeginner, + Logger, + LogLevel, + LogLevelFilterPredicate, + LogPublisher, + eventAsText, + globalLogBeginner, + jsonFileLogObserver, +) + +from synapse.config._base import ConfigError +from synapse.logging._terse_json import ( + TerseJSONToConsoleLogObserver, + TerseJSONToTCPLogObserver, +) +from synapse.logging.context import LoggingContext + + +def stdlib_log_level_to_twisted(level: str) -> LogLevel: + """ + Convert a stdlib log level to Twisted's log level. + """ + lvl = level.lower().replace("warning", "warn") + return LogLevel.levelWithName(lvl) + + +@attr.s +@implementer(ILogObserver) +class LogContextObserver(object): + """ + An ILogObserver which adds Synapse-specific log context information. + + Attributes: + observer (ILogObserver): The target parent observer. + """ + + observer = attr.ib() + + def __call__(self, event: dict) -> None: + """ + Consume a log event and emit it to the parent observer after filtering + and adding log context information. + + Args: + event (dict) + """ + # Filter out some useless events that Twisted outputs + if "log_text" in event: + if event["log_text"].startswith("DNSDatagramProtocol starting on "): + return + + if event["log_text"].startswith("(UDP Port "): + return + + if event["log_text"].startswith("Timing out client") or event[ + "log_format" + ].startswith("Timing out client"): + return + + context = LoggingContext.current_context() + + # Copy the context information to the log event. + if context is not None: + context.copy_to_twisted_log_entry(event) + else: + # If there's no logging context, not even the root one, we might be + # starting up or it might be from non-Synapse code. Log it as if it + # came from the root logger. + event["request"] = None + event["scope"] = None + + self.observer(event) + + +class PythonStdlibToTwistedLogger(logging.Handler): + """ + Transform a Python stdlib log message into a Twisted one. + """ + + def __init__(self, observer, *args, **kwargs): + """ + Args: + observer (ILogObserver): A Twisted logging observer. + *args, **kwargs: Args/kwargs to be passed to logging.Handler. + """ + self.observer = observer + super().__init__(*args, **kwargs) + + def emit(self, record: logging.LogRecord) -> None: + """ + Emit a record to Twisted's observer. + + Args: + record (logging.LogRecord) + """ + + self.observer( + { + "log_time": record.created, + "log_text": record.getMessage(), + "log_format": "{log_text}", + "log_namespace": record.name, + "log_level": stdlib_log_level_to_twisted(record.levelname), + } + ) + + +def SynapseFileLogObserver(outFile: typing.io.TextIO) -> FileLogObserver: + """ + A log observer that formats events like the traditional log formatter and + sends them to `outFile`. + + Args: + outFile (file object): The file object to write to. + """ + + def formatEvent(_event: dict) -> str: + event = dict(_event) + event["log_level"] = event["log_level"].name.upper() + event["log_format"] = "- {log_namespace} - {log_level} - {request} - " + ( + event.get("log_format", "{log_text}") or "{log_text}" + ) + return eventAsText(event, includeSystem=False) + "\n" + + return FileLogObserver(outFile, formatEvent) + + +class DrainType(Names): + CONSOLE = NamedConstant() + CONSOLE_JSON = NamedConstant() + CONSOLE_JSON_TERSE = NamedConstant() + FILE = NamedConstant() + FILE_JSON = NamedConstant() + NETWORK_JSON_TERSE = NamedConstant() + + +class OutputPipeType(Values): + stdout = ValueConstant(sys.__stdout__) + stderr = ValueConstant(sys.__stderr__) + + +@attr.s +class DrainConfiguration(object): + name = attr.ib() + type = attr.ib() + location = attr.ib() + options = attr.ib(default=None) + + +@attr.s +class NetworkJSONTerseOptions(object): + maximum_buffer = attr.ib(type=int) + + +DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} + + +def parse_drain_configs( + drains: dict +) -> typing.Generator[DrainConfiguration, None, None]: + """ + Parse the drain configurations. + + Args: + drains (dict): A list of drain configurations. + + Yields: + DrainConfiguration instances. + + Raises: + ConfigError: If any of the drain configuration items are invalid. + """ + for name, config in drains.items(): + if "type" not in config: + raise ConfigError("Logging drains require a 'type' key.") + + try: + logging_type = DrainType.lookupByName(config["type"].upper()) + except ValueError: + raise ConfigError( + "%s is not a known logging drain type." % (config["type"],) + ) + + if logging_type in [ + DrainType.CONSOLE, + DrainType.CONSOLE_JSON, + DrainType.CONSOLE_JSON_TERSE, + ]: + location = config.get("location") + if location is None or location not in ["stdout", "stderr"]: + raise ConfigError( + ( + "The %s drain needs the 'location' key set to " + "either 'stdout' or 'stderr'." + ) + % (logging_type,) + ) + + pipe = OutputPipeType.lookupByName(location).value + + yield DrainConfiguration(name=name, type=logging_type, location=pipe) + + elif logging_type in [DrainType.FILE, DrainType.FILE_JSON]: + if "location" not in config: + raise ConfigError( + "The %s drain needs the 'location' key set." % (logging_type,) + ) + + location = config.get("location") + if os.path.abspath(location) != location: + raise ConfigError( + "File paths need to be absolute, '%s' is a relative path" + % (location,) + ) + yield DrainConfiguration(name=name, type=logging_type, location=location) + + elif logging_type in [DrainType.NETWORK_JSON_TERSE]: + host = config.get("host") + port = config.get("port") + maximum_buffer = config.get("maximum_buffer", 1000) + yield DrainConfiguration( + name=name, + type=logging_type, + location=(host, port), + options=NetworkJSONTerseOptions(maximum_buffer=maximum_buffer), + ) + + else: + raise ConfigError( + "The %s drain type is currently not implemented." + % (config["type"].upper(),) + ) + + +def setup_structured_logging( + hs, + config, + log_config: dict, + logBeginner: LogBeginner = globalLogBeginner, + redirect_stdlib_logging: bool = True, +) -> LogPublisher: + """ + Set up Twisted's structured logging system. + + Args: + hs: The homeserver to use. + config (HomeserverConfig): The configuration of the Synapse homeserver. + log_config (dict): The log configuration to use. + """ + if config.no_redirect_stdio: + raise ConfigError( + "no_redirect_stdio cannot be defined using structured logging." + ) + + logger = Logger() + + if "drains" not in log_config: + raise ConfigError("The logging configuration requires a list of drains.") + + observers = [] + + for observer in parse_drain_configs(log_config["drains"]): + # Pipe drains + if observer.type == DrainType.CONSOLE: + logger.debug( + "Starting up the {name} console logger drain", name=observer.name + ) + observers.append(SynapseFileLogObserver(observer.location)) + elif observer.type == DrainType.CONSOLE_JSON: + logger.debug( + "Starting up the {name} JSON console logger drain", name=observer.name + ) + observers.append(jsonFileLogObserver(observer.location)) + elif observer.type == DrainType.CONSOLE_JSON_TERSE: + logger.debug( + "Starting up the {name} terse JSON console logger drain", + name=observer.name, + ) + observers.append( + TerseJSONToConsoleLogObserver(observer.location, metadata={}) + ) + + # File drains + elif observer.type == DrainType.FILE: + logger.debug("Starting up the {name} file logger drain", name=observer.name) + log_file = open(observer.location, "at", buffering=1, encoding="utf8") + observers.append(SynapseFileLogObserver(log_file)) + elif observer.type == DrainType.FILE_JSON: + logger.debug( + "Starting up the {name} JSON file logger drain", name=observer.name + ) + log_file = open(observer.location, "at", buffering=1, encoding="utf8") + observers.append(jsonFileLogObserver(log_file)) + + elif observer.type == DrainType.NETWORK_JSON_TERSE: + metadata = {"server_name": hs.config.server_name} + log_observer = TerseJSONToTCPLogObserver( + hs=hs, + host=observer.location[0], + port=observer.location[1], + metadata=metadata, + maximum_buffer=observer.options.maximum_buffer, + ) + log_observer.start() + observers.append(log_observer) + else: + # We should never get here, but, just in case, throw an error. + raise ConfigError("%s drain type cannot be configured" % (observer.type,)) + + publisher = LogPublisher(*observers) + log_filter = LogLevelFilterPredicate() + + for namespace, namespace_config in log_config.get( + "loggers", DEFAULT_LOGGERS + ).items(): + # Set the log level for twisted.logger.Logger namespaces + log_filter.setLogLevelForNamespace( + namespace, + stdlib_log_level_to_twisted(namespace_config.get("level", "INFO")), + ) + + # Also set the log levels for the stdlib logger namespaces, to prevent + # them getting to PythonStdlibToTwistedLogger and having to be formatted + if "level" in namespace_config: + logging.getLogger(namespace).setLevel(namespace_config.get("level")) + + f = FilteringLogObserver(publisher, [log_filter]) + lco = LogContextObserver(f) + + if redirect_stdlib_logging: + stuff_into_twisted = PythonStdlibToTwistedLogger(lco) + stdliblogger = logging.getLogger() + stdliblogger.addHandler(stuff_into_twisted) + + # Always redirect standard I/O, otherwise other logging outputs might miss + # it. + logBeginner.beginLoggingTo([lco], redirectStandardIO=True) + + return publisher + + +def reload_structured_logging(*args, log_config=None) -> None: + warnings.warn( + "Currently the structured logging system can not be reloaded, doing nothing" + ) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py new file mode 100644 index 0000000000..7f1e8f23fe --- /dev/null +++ b/synapse/logging/_terse_json.py @@ -0,0 +1,278 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Log formatters that output terse JSON. +""" + +import sys +from collections import deque +from ipaddress import IPv4Address, IPv6Address, ip_address +from math import floor +from typing.io import TextIO + +import attr +from simplejson import dumps + +from twisted.application.internet import ClientService +from twisted.internet.endpoints import ( + HostnameEndpoint, + TCP4ClientEndpoint, + TCP6ClientEndpoint, +) +from twisted.internet.protocol import Factory, Protocol +from twisted.logger import FileLogObserver, Logger +from twisted.python.failure import Failure + + +def flatten_event(event: dict, metadata: dict, include_time: bool = False): + """ + Flatten a Twisted logging event to an dictionary capable of being sent + as a log event to a logging aggregation system. + + The format is vastly simplified and is not designed to be a "human readable + string" in the sense that traditional logs are. Instead, the structure is + optimised for searchability and filtering, with human-understandable log + keys. + + Args: + event (dict): The Twisted logging event we are flattening. + metadata (dict): Additional data to include with each log message. This + can be information like the server name. Since the target log + consumer does not know who we are other than by host IP, this + allows us to forward through static information. + include_time (bool): Should we include the `time` key? If False, the + event time is stripped from the event. + """ + new_event = {} + + # If it's a failure, make the new event's log_failure be the traceback text. + if "log_failure" in event: + new_event["log_failure"] = event["log_failure"].getTraceback() + + # If it's a warning, copy over a string representation of the warning. + if "warning" in event: + new_event["warning"] = str(event["warning"]) + + # Stdlib logging events have "log_text" as their human-readable portion, + # Twisted ones have "log_format". For now, include the log_format, so that + # context only given in the log format (e.g. what is being logged) is + # available. + if "log_text" in event: + new_event["log"] = event["log_text"] + else: + new_event["log"] = event["log_format"] + + # We want to include the timestamp when forwarding over the network, but + # exclude it when we are writing to stdout. This is because the log ingester + # (e.g. logstash, fluentd) can add its own timestamp. + if include_time: + new_event["time"] = round(event["log_time"], 2) + + # Convert the log level to a textual representation. + new_event["level"] = event["log_level"].name.upper() + + # Ignore these keys, and do not transfer them over to the new log object. + # They are either useless (isError), transferred manually above (log_time, + # log_level, etc), or contain Python objects which are not useful for output + # (log_logger, log_source). + keys_to_delete = [ + "isError", + "log_failure", + "log_format", + "log_level", + "log_logger", + "log_source", + "log_system", + "log_time", + "log_text", + "observer", + "warning", + ] + + # If it's from the Twisted legacy logger (twisted.python.log), it adds some + # more keys we want to purge. + if event.get("log_namespace") == "log_legacy": + keys_to_delete.extend(["message", "system", "time"]) + + # Rather than modify the dictionary in place, construct a new one with only + # the content we want. The original event should be considered 'frozen'. + for key in event.keys(): + + if key in keys_to_delete: + continue + + if isinstance(event[key], (str, int, bool, float)) or event[key] is None: + # If it's a plain type, include it as is. + new_event[key] = event[key] + else: + # If it's not one of those basic types, write out a string + # representation. This should probably be a warning in development, + # so that we are sure we are only outputting useful data. + new_event[key] = str(event[key]) + + # Add the metadata information to the event (e.g. the server_name). + new_event.update(metadata) + + return new_event + + +def TerseJSONToConsoleLogObserver(outFile: TextIO, metadata: dict) -> FileLogObserver: + """ + A log observer that formats events to a flattened JSON representation. + + Args: + outFile: The file object to write to. + metadata: Metadata to be added to each log object. + """ + + def formatEvent(_event: dict) -> str: + flattened = flatten_event(_event, metadata) + return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" + + return FileLogObserver(outFile, formatEvent) + + +@attr.s +class TerseJSONToTCPLogObserver(object): + """ + An IObserver that writes JSON logs to a TCP target. + + Args: + hs (HomeServer): The Homeserver that is being logged for. + host: The host of the logging target. + port: The logging target's port. + metadata: Metadata to be added to each log entry. + """ + + hs = attr.ib() + host = attr.ib(type=str) + port = attr.ib(type=int) + metadata = attr.ib(type=dict) + maximum_buffer = attr.ib(type=int) + _buffer = attr.ib(default=attr.Factory(deque), type=deque) + _writer = attr.ib(default=None) + _logger = attr.ib(default=attr.Factory(Logger)) + + def start(self) -> None: + + # Connect without DNS lookups if it's a direct IP. + try: + ip = ip_address(self.host) + if isinstance(ip, IPv4Address): + endpoint = TCP4ClientEndpoint( + self.hs.get_reactor(), self.host, self.port + ) + elif isinstance(ip, IPv6Address): + endpoint = TCP6ClientEndpoint( + self.hs.get_reactor(), self.host, self.port + ) + except ValueError: + endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) + + factory = Factory.forProtocol(Protocol) + self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor()) + self._service.startService() + + def _write_loop(self) -> None: + """ + Implement the write loop. + """ + if self._writer: + return + + self._writer = self._service.whenConnected() + + @self._writer.addBoth + def writer(r): + if isinstance(r, Failure): + r.printTraceback(file=sys.__stderr__) + self._writer = None + self.hs.get_reactor().callLater(1, self._write_loop) + return + + try: + for event in self._buffer: + r.transport.write( + dumps(event, ensure_ascii=False, separators=(",", ":")).encode( + "utf8" + ) + ) + r.transport.write(b"\n") + self._buffer.clear() + except Exception as e: + sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) + + self._writer = False + self.hs.get_reactor().callLater(1, self._write_loop) + + def _handle_pressure(self) -> None: + """ + Handle backpressure by shedding events. + + The buffer will, in this order, until the buffer is below the maximum: + - Shed DEBUG events + - Shed INFO events + - Shed the middle 50% of the events. + """ + if len(self._buffer) <= self.maximum_buffer: + return + + # Strip out DEBUGs + self._buffer = deque( + filter(lambda event: event["level"] != "DEBUG", self._buffer) + ) + + if len(self._buffer) <= self.maximum_buffer: + return + + # Strip out INFOs + self._buffer = deque( + filter(lambda event: event["level"] != "INFO", self._buffer) + ) + + if len(self._buffer) <= self.maximum_buffer: + return + + # Cut the middle entries out + buffer_split = floor(self.maximum_buffer / 2) + + old_buffer = self._buffer + self._buffer = deque() + + for i in range(buffer_split): + self._buffer.append(old_buffer.popleft()) + + end_buffer = [] + for i in range(buffer_split): + end_buffer.append(old_buffer.pop()) + + self._buffer.extend(reversed(end_buffer)) + + def __call__(self, event: dict) -> None: + flattened = flatten_event(event, self.metadata, include_time=True) + self._buffer.append(flattened) + + # Handle backpressure, if it exists. + try: + self._handle_pressure() + except Exception: + # If handling backpressure fails,clear the buffer and log the + # exception. + self._buffer.clear() + self._logger.failure("Failed clearing backpressure") + + # Try and write immediately. + self._write_loop() diff --git a/synapse/logging/context.py b/synapse/logging/context.py index b456c31f70..63379bfb93 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -25,6 +25,7 @@ See doc/log_contexts.rst for details on how this works. import logging import threading import types +from typing import Any, List from twisted.internet import defer, threads @@ -194,7 +195,7 @@ class LoggingContext(object): class Sentinel(object): """Sentinel to represent the root context""" - __slots__ = [] + __slots__ = [] # type: List[Any] def __str__(self): return "sentinel" @@ -202,6 +203,10 @@ class LoggingContext(object): def copy_to(self, record): pass + def copy_to_twisted_log_entry(self, record): + record["request"] = None + record["scope"] = None + def start(self): pass @@ -330,6 +335,13 @@ class LoggingContext(object): # we also track the current scope: record.scope = self.scope + def copy_to_twisted_log_entry(self, record): + """ + Copy logging fields from this context to a Twisted log record. + """ + record["request"] = self.request + record["scope"] = self.scope + def start(self): if get_thread_id() != self.main_thread: logger.warning("Started logcontext %s on different thread", self) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index c6465c0386..ec0ac547c1 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -47,9 +47,9 @@ REQUIREMENTS = [ "idna>=2.5", # validating SSL certs for IP addresses requires service_identity 18.1. "service_identity>=18.1.0", - # our logcontext handling relies on the ability to cancel inlineCallbacks - # (https://twistedmatrix.com/trac/ticket/4632) which landed in Twisted 18.7. - "Twisted>=18.7.0", + # Twisted 18.9 introduces some logger improvements that the structured + # logger utilises + "Twisted>=18.9.0", "treq>=15.1", # Twisted has required pyopenssl 16.0 since about Twisted 16.6. "pyopenssl>=16.0.0", diff --git a/tests/logging/__init__.py b/tests/logging/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py new file mode 100644 index 0000000000..a786de0233 --- /dev/null +++ b/tests/logging/test_structured.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path +import shutil +import sys +import textwrap + +from twisted.logger import Logger, eventAsText, eventsFromJSONLogFile + +from synapse.config.logger import setup_logging +from synapse.logging._structured import setup_structured_logging +from synapse.logging.context import LoggingContext + +from tests.unittest import DEBUG, HomeserverTestCase + + +class FakeBeginner(object): + def beginLoggingTo(self, observers, **kwargs): + self.observers = observers + + +class StructuredLoggingTestCase(HomeserverTestCase): + """ + Tests for Synapse's structured logging support. + """ + + def test_output_to_json_round_trip(self): + """ + Synapse logs can be outputted to JSON and then read back again. + """ + temp_dir = self.mktemp() + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + json_log_file = os.path.abspath(os.path.join(temp_dir, "out.json")) + + log_config = { + "drains": {"jsonfile": {"type": "file_json", "location": json_log_file}} + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Read the log file and check it has the event we sent + with open(json_log_file, "r") as f: + logged_events = list(eventsFromJSONLogFile(f)) + self.assertEqual(len(logged_events), 1) + + # The event pulled from the file should render fine + self.assertEqual( + eventAsText(logged_events[0], includeTimestamp=False), + "[tests.logging.test_structured#info] Hello there, wally!", + ) + + def test_output_to_text(self): + """ + Synapse logs can be outputted to text. + """ + temp_dir = self.mktemp() + os.mkdir(temp_dir) + self.addCleanup(shutil.rmtree, temp_dir) + + log_file = os.path.abspath(os.path.join(temp_dir, "out.log")) + + log_config = {"drains": {"file": {"type": "file", "location": log_file}}} + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Read the log file and check it has the event we sent + with open(log_file, "r") as f: + logged_events = f.read().strip().split("\n") + self.assertEqual(len(logged_events), 1) + + # The event pulled from the file should render fine + self.assertTrue( + logged_events[0].endswith( + " - tests.logging.test_structured - INFO - None - Hello there, wally!" + ) + ) + + def test_collects_logcontext(self): + """ + Test that log outputs have the attached logging context. + """ + log_config = {"drains": {}} + + # Begin the logger with our config + beginner = FakeBeginner() + publisher = setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + logs = [] + + publisher.addObserver(logs.append) + + # Make a logger and send an event + logger = Logger( + namespace="tests.logging.test_structured", observer=beginner.observers[0] + ) + + with LoggingContext("testcontext", request="somereq"): + logger.info("Hello there, {name}!", name="steve") + + self.assertEqual(len(logs), 1) + self.assertEqual(logs[0]["request"], "somereq") + + +class StructuredLoggingConfigurationFileTestCase(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + + tempdir = self.mktemp() + os.mkdir(tempdir) + log_config_file = os.path.abspath(os.path.join(tempdir, "log.config.yaml")) + self.homeserver_log = os.path.abspath(os.path.join(tempdir, "homeserver.log")) + + config = self.default_config() + config["log_config"] = log_config_file + + with open(log_config_file, "w") as f: + f.write( + textwrap.dedent( + """\ + structured: true + + drains: + file: + type: file_json + location: %s + """ + % (self.homeserver_log,) + ) + ) + + self.addCleanup(self._sys_cleanup) + + return self.setup_test_homeserver(config=config) + + def _sys_cleanup(self): + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + # Do not remove! We need the logging system to be set other than WARNING. + @DEBUG + def test_log_output(self): + """ + When a structured logging config is given, Synapse will use it. + """ + setup_logging(self.hs, self.hs.config) + + # Make a logger and send an event + logger = Logger(namespace="tests.logging.test_structured") + + with LoggingContext("testcontext", request="somereq"): + logger.info("Hello there, {name}!", name="steve") + + with open(self.homeserver_log, "r") as f: + logged_events = [ + eventAsText(x, includeTimestamp=False) for x in eventsFromJSONLogFile(f) + ] + + logs = "\n".join(logged_events) + self.assertTrue("***** STARTING SERVER *****" in logs) + self.assertTrue("Hello there, steve!" in logs) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py new file mode 100644 index 0000000000..514282591d --- /dev/null +++ b/tests/logging/test_terse_json.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from collections import Counter + +from twisted.logger import Logger + +from synapse.logging._structured import setup_structured_logging + +from tests.server import connect_client +from tests.unittest import HomeserverTestCase + +from .test_structured import FakeBeginner + + +class TerseJSONTCPTestCase(HomeserverTestCase): + def test_log_output(self): + """ + The Terse JSON outputter delivers simplified structured logs over TCP. + """ + log_config = { + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + } + } + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, self.hs.config, log_config, logBeginner=beginner + ) + + logger = Logger( + namespace="tests.logging.test_terse_json", observer=beginner.observers[0] + ) + logger.info("Hello there, {name}!", name="wally") + + # Trigger the connection + self.pump() + + _, server = connect_client(self.reactor, 0) + + # Trigger data being sent + self.pump() + + # One log message, with a single trailing newline + logs = server.data.decode("utf8").splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(server.data.count(b"\n"), 1) + + log = json.loads(logs[0]) + + # The terse logger should give us these keys. + expected_log_keys = [ + "log", + "time", + "level", + "log_namespace", + "request", + "scope", + "server_name", + "name", + ] + self.assertEqual(set(log.keys()), set(expected_log_keys)) + + # It contains the data we expect. + self.assertEqual(log["name"], "wally") + + def test_log_backpressure_debug(self): + """ + When backpressure is hit, DEBUG logs will be shed. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 7): + logger.info("test message %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + _, server = connect_client(self.reactor, 0) + self.pump() + + # Only the 7 infos made it through, the debugs were elided + logs = server.data.splitlines() + self.assertEqual(len(logs), 7) + + def test_log_backpressure_info(self): + """ + When backpressure is hit, DEBUG and INFO logs will be shed. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 10): + logger.warn("test warn %s" % (i,)) + + # Send a bunch of info messages + for i in range(0, 3): + logger.info("test message %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + client, server = connect_client(self.reactor, 0) + self.pump() + + # The 10 warnings made it through, the debugs and infos were elided + logs = list(map(json.loads, server.data.decode("utf8").splitlines())) + self.assertEqual(len(logs), 10) + + self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) + + def test_log_backpressure_cut_middle(self): + """ + When backpressure is hit, and no more DEBUG and INFOs cannot be culled, + it will cut the middle messages out. + """ + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": 8000, + "maximum_buffer": 10, + } + }, + } + + # Begin the logger with our config + beginner = FakeBeginner() + setup_structured_logging( + self.hs, + self.hs.config, + log_config, + logBeginner=beginner, + redirect_stdlib_logging=False, + ) + + logger = Logger( + namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] + ) + + # Send a bunch of useful messages + for i in range(0, 20): + logger.warn("test warn", num=i) + + # Allow the reconnection + client, server = connect_client(self.reactor, 0) + self.pump() + + # The first five and last five warnings made it through, the debugs and + # infos were elided + logs = list(map(json.loads, server.data.decode("utf8").splitlines())) + self.assertEqual(len(logs), 10) + self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) + self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs]) diff --git a/tests/server.py b/tests/server.py index e573c4e4c5..c8269619b1 100644 --- a/tests/server.py +++ b/tests/server.py @@ -11,9 +11,13 @@ from twisted.internet import address, threads, udp from twisted.internet._resolver import SimpleResolverComplexifier from twisted.internet.defer import Deferred, fail, succeed from twisted.internet.error import DNSLookupError -from twisted.internet.interfaces import IReactorPluggableNameResolver, IResolverSimple +from twisted.internet.interfaces import ( + IReactorPluggableNameResolver, + IReactorTCP, + IResolverSimple, +) from twisted.python.failure import Failure -from twisted.test.proto_helpers import MemoryReactorClock +from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock from twisted.web.http import unquote from twisted.web.http_headers import Headers @@ -465,3 +469,22 @@ class FakeTransport(object): self.buffer = self.buffer[len(to_write) :] if self.buffer and self.autoflush: self._reactor.callLater(0.0, self.flush) + + +def connect_client(reactor: IReactorTCP, client_id: int) -> AccumulatingProtocol: + """ + Connect a client to a fake TCP transport. + + Args: + reactor + factory: The connecting factory to build. + """ + factory = reactor.tcpClients[client_id][2] + client = factory.buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, reactor)) + client.makeConnection(FakeTransport(server, reactor)) + + reactor.tcpClients.pop(client_id) + + return client, server diff --git a/tox.ini b/tox.ini index 09b4b8fc3c..f9a3b7e49a 100644 --- a/tox.ini +++ b/tox.ini @@ -146,3 +146,13 @@ commands = coverage combine coverage xml codecov -X gcov + +[testenv:mypy] +basepython = python3.5 +deps = + {[base]deps} + mypy +extras = all +commands = mypy --ignore-missing-imports \ + synapse/logging/_structured.py \ + synapse/logging/_terse_json.py \ No newline at end of file -- cgit 1.5.1