From ed53bf314fee25d79d349beae409caf81a2d677f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 28 May 2021 16:14:08 +0100 Subject: Set opentracing priority before setting other tags (#10092) ... because tags on spans which aren't being sampled get thrown away. --- synapse/logging/opentracing.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'synapse/logging/opentracing.py') diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index fba2fa3904..428831dad6 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -265,6 +265,12 @@ class SynapseTags: # Whether the sync response has new data to be returned to the client. SYNC_RESULT = "sync.new_data" + # incoming HTTP request ID (as written in the logs) + REQUEST_ID = "request_id" + + # HTTP request tag (used to distinguish full vs incremental syncs, etc) + REQUEST_TAG = "request_tag" + # Block everything by default # A regex which matches the server_names to expose traces for. @@ -824,7 +830,7 @@ def trace_servlet(request: "SynapseRequest", extract_context: bool = False): return request_tags = { - "request_id": request.get_request_id(), + SynapseTags.REQUEST_ID: request.get_request_id(), tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, tags.HTTP_METHOD: request.get_method(), tags.HTTP_URL: request.get_redacted_uri(), @@ -833,9 +839,9 @@ def trace_servlet(request: "SynapseRequest", extract_context: bool = False): request_name = request.request_metrics.name if extract_context: - scope = start_active_span_from_request(request, request_name, tags=request_tags) + scope = start_active_span_from_request(request, request_name) else: - scope = start_active_span(request_name, tags=request_tags) + scope = start_active_span(request_name) with scope: try: @@ -845,4 +851,11 @@ def trace_servlet(request: "SynapseRequest", extract_context: bool = False): # with JsonResource). scope.span.set_operation_name(request.request_metrics.name) - scope.span.set_tag("request_tag", request.request_metrics.start_context.tag) + # set the tags *after* the servlet completes, in case it decided to + # prioritise the span (tags will get dropped on unprioritised spans) + request_tags[ + SynapseTags.REQUEST_TAG + ] = request.request_metrics.start_context.tag + + for k, v in request_tags.items(): + scope.span.set_tag(k, v) -- cgit 1.5.1 From 10e6d2abce644d5b6d6b59516061562f54382b94 Mon Sep 17 00:00:00 2001 From: Brad Murray Date: Tue, 1 Jun 2021 03:40:26 -0400 Subject: Fix opentracing inject to use the SpanContext, not the Span (#10074) Signed-off-by: Brad Murray brad@beeper.com --- changelog.d/10074.misc | 1 + synapse/logging/opentracing.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/10074.misc (limited to 'synapse/logging/opentracing.py') diff --git a/changelog.d/10074.misc b/changelog.d/10074.misc new file mode 100644 index 0000000000..8dbe2cd2bc --- /dev/null +++ b/changelog.d/10074.misc @@ -0,0 +1 @@ +Update opentracing to inject the right context into the carrier. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 428831dad6..f64845b80c 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -594,7 +594,7 @@ def inject_active_span_twisted_headers(headers, destination, check_destination=T span = opentracing.tracer.active_span carrier = {} # type: Dict[str, str] - opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) + opentracing.tracer.inject(span.context, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): headers.addRawHeaders(key, value) @@ -631,7 +631,7 @@ def inject_active_span_byte_dict(headers, destination, check_destination=True): span = opentracing.tracer.active_span carrier = {} # type: Dict[str, str] - opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) + opentracing.tracer.inject(span.context, opentracing.Format.HTTP_HEADERS, carrier) for key, value in carrier.items(): headers[key.encode()] = [value.encode()] @@ -665,7 +665,7 @@ def inject_active_span_text_map(carrier, destination, check_destination=True): return opentracing.tracer.inject( - opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier + opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier ) @@ -687,7 +687,7 @@ def get_active_span_text_map(destination=None): carrier = {} # type: Dict[str, str] opentracing.tracer.inject( - opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier + opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier ) return carrier @@ -702,7 +702,7 @@ def active_span_context_as_string(): carrier = {} # type: Dict[str, str] if opentracing: opentracing.tracer.inject( - opentracing.tracer.active_span, opentracing.Format.TEXT_MAP, carrier + opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier ) return json_encoder.encode(carrier) -- cgit 1.5.1 From 9eea4646be5eef1e2b24e3b0bb0fc94999c2250c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 3 Jun 2021 16:31:56 +0100 Subject: Add OpenTracing for database activity. (#10113) This adds quite a lot of OpenTracing decoration for database activity. Specifically it adds tracing at four different levels: * emit a span for each "interaction" - ie, the top level database function that we tend to call "transaction", but isn't really, because it can end up as multiple transactions. * emit a span while we hold a database connection open * emit a span for each database transaction - actual actual transaction. * emit a span for each database query. I'm aware this might be quite a lot of overhead, but even just running it on a local Synapse it looks really interesting, and I hope the overhead can be offset just by turning down the sampling frequency and finding other ways of tracing requests of interest (eg, the `force_tracing_for_users` setting). --- changelog.d/10113.feature | 1 + synapse/logging/opentracing.py | 6 +++ synapse/storage/database.py | 86 ++++++++++++++++++++++++++---------------- 3 files changed, 60 insertions(+), 33 deletions(-) create mode 100644 changelog.d/10113.feature (limited to 'synapse/logging/opentracing.py') diff --git a/changelog.d/10113.feature b/changelog.d/10113.feature new file mode 100644 index 0000000000..2658ab8918 --- /dev/null +++ b/changelog.d/10113.feature @@ -0,0 +1 @@ +Report OpenTracing spans for database activity. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index f64845b80c..68f0c00151 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -271,6 +271,12 @@ class SynapseTags: # HTTP request tag (used to distinguish full vs incremental syncs, etc) REQUEST_TAG = "request_tag" + # Text description of a database transaction + DB_TXN_DESC = "db.txn_desc" + + # Uniqueish ID of a database transaction + DB_TXN_ID = "db.txn_id" + # Block everything by default # A regex which matches the server_names to expose traces for. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index a761ad603b..974703d13a 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -40,6 +40,7 @@ from twisted.enterprise import adbapi from synapse.api.errors import StoreError from synapse.config.database import DatabaseConnectionConfig +from synapse.logging import opentracing from synapse.logging.context import ( LoggingContext, current_context, @@ -313,7 +314,14 @@ class LoggingTransaction: start = time.time() try: - return func(sql, *args) + with opentracing.start_active_span( + "db.query", + tags={ + opentracing.tags.DATABASE_TYPE: "sql", + opentracing.tags.DATABASE_STATEMENT: sql, + }, + ): + return func(sql, *args) except Exception as e: sql_logger.debug("[SQL FAIL] {%s} %s", self.name, e) raise @@ -525,9 +533,16 @@ class DatabasePool: exception_callbacks=exception_callbacks, ) try: - r = func(cursor, *args, **kwargs) - conn.commit() - return r + with opentracing.start_active_span( + "db.txn", + tags={ + opentracing.SynapseTags.DB_TXN_DESC: desc, + opentracing.SynapseTags.DB_TXN_ID: name, + }, + ): + r = func(cursor, *args, **kwargs) + conn.commit() + return r except self.engine.module.OperationalError as e: # This can happen if the database disappears mid # transaction. @@ -653,16 +668,17 @@ class DatabasePool: logger.warning("Starting db txn '%s' from sentinel context", desc) try: - result = await self.runWithConnection( - self.new_transaction, - desc, - after_callbacks, - exception_callbacks, - func, - *args, - db_autocommit=db_autocommit, - **kwargs, - ) + with opentracing.start_active_span(f"db.{desc}"): + result = await self.runWithConnection( + self.new_transaction, + desc, + after_callbacks, + exception_callbacks, + func, + *args, + db_autocommit=db_autocommit, + **kwargs, + ) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) @@ -718,25 +734,29 @@ class DatabasePool: with LoggingContext( str(curr_context), parent_context=parent_context ) as context: - sched_duration_sec = monotonic_time() - start_time - sql_scheduling_timer.observe(sched_duration_sec) - context.add_database_scheduled(sched_duration_sec) - - if self.engine.is_connection_closed(conn): - logger.debug("Reconnecting closed database connection") - conn.reconnect() - - try: - if db_autocommit: - self.engine.attempt_to_set_autocommit(conn, True) - - db_conn = LoggingDatabaseConnection( - conn, self.engine, "runWithConnection" - ) - return func(db_conn, *args, **kwargs) - finally: - if db_autocommit: - self.engine.attempt_to_set_autocommit(conn, False) + with opentracing.start_active_span( + operation_name="db.connection", + ): + sched_duration_sec = monotonic_time() - start_time + sql_scheduling_timer.observe(sched_duration_sec) + context.add_database_scheduled(sched_duration_sec) + + if self.engine.is_connection_closed(conn): + logger.debug("Reconnecting closed database connection") + conn.reconnect() + opentracing.log_kv({"message": "reconnected"}) + + try: + if db_autocommit: + self.engine.attempt_to_set_autocommit(conn, True) + + db_conn = LoggingDatabaseConnection( + conn, self.engine, "runWithConnection" + ) + return func(db_conn, *args, **kwargs) + finally: + if db_autocommit: + self.engine.attempt_to_set_autocommit(conn, False) return await make_deferred_yieldable( self._db_pool.runWithConnection(inner_func, *args, **kwargs) -- cgit 1.5.1 From d8be7d493d7a91a55ee37a7931157d4557a508fb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 4 Jun 2021 09:25:33 +0100 Subject: Enable Prometheus metrics for the jaeger client library (#10112) --- changelog.d/10112.misc | 1 + mypy.ini | 2 +- synapse/logging/opentracing.py | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10112.misc (limited to 'synapse/logging/opentracing.py') diff --git a/changelog.d/10112.misc b/changelog.d/10112.misc new file mode 100644 index 0000000000..40af09760c --- /dev/null +++ b/changelog.d/10112.misc @@ -0,0 +1 @@ +Enable Prometheus metrics for the jaeger client library. diff --git a/mypy.ini b/mypy.ini index 062872020e..8ba1b96311 100644 --- a/mypy.ini +++ b/mypy.ini @@ -130,7 +130,7 @@ ignore_missing_imports = True [mypy-canonicaljson] ignore_missing_imports = True -[mypy-jaeger_client] +[mypy-jaeger_client.*] ignore_missing_imports = True [mypy-jsonschema] diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 68f0c00151..26c8ffe780 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -362,10 +362,13 @@ def init_tracer(hs: "HomeServer"): set_homeserver_whitelist(hs.config.opentracer_whitelist) + from jaeger_client.metrics.prometheus import PrometheusMetricsFactory + config = JaegerConfig( config=hs.config.jaeger_config, service_name="{} {}".format(hs.config.server_name, hs.get_instance_name()), scope_manager=LogContextScopeManager(hs.config), + metrics_factory=PrometheusMetricsFactory(), ) # If we have the rust jaeger reporter available let's use that. -- cgit 1.5.1 From b2557cbf42d39fbd8a497a2f859cb45f84539da9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Jun 2021 17:57:49 +0100 Subject: opentracing: use a consistent name for background processes (#10135) ... otherwise we tend to get a namespace clash between the bg process and the functions that it calls. --- changelog.d/10135.misc | 1 + synapse/logging/opentracing.py | 1 + synapse/metrics/background_process_metrics.py | 5 +++-- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10135.misc (limited to 'synapse/logging/opentracing.py') diff --git a/changelog.d/10135.misc b/changelog.d/10135.misc new file mode 100644 index 0000000000..17819cbbcc --- /dev/null +++ b/changelog.d/10135.misc @@ -0,0 +1 @@ +OpenTracing: use a consistent name for background processes. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 26c8ffe780..dd9377340e 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -337,6 +337,7 @@ def ensure_active_span(message, ret=None): @contextlib.contextmanager def noop_context_manager(*args, **kwargs): """Does exactly what it says on the tin""" + # TODO: replace with contextlib.nullcontext once we drop support for Python 3.6 yield diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 0d6d643d35..de96ca0821 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -204,11 +204,12 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar with BackgroundProcessLoggingContext(desc, count) as context: try: - ctx = noop_context_manager() if bg_start_span: ctx = start_active_span( - desc, tags={SynapseTags.REQUEST_ID: str(context)} + f"bgproc.{desc}", tags={SynapseTags.REQUEST_ID: str(context)} ) + else: + ctx = noop_context_manager() with ctx: return await maybe_awaitable(func(*args, **kwargs)) except Exception: -- cgit 1.5.1