summary refs log tree commit diff
path: root/synapse/logging/opentracing.py
diff options
context:
space:
mode:
authorEric Eastwood <erice@element.io>2022-09-09 11:30:06 -0500
committerGitHub <noreply@github.com>2022-09-09 11:30:06 -0500
commitf694bb71b7ea7841a5b5db3d884dfda5a3f78023 (patch)
tree7dddd224184bd102ed9da0a9553cc4ed82e18e33 /synapse/logging/opentracing.py
parentUse an upsert for `receipts_graph`. (#13752) (diff)
downloadsynapse-f694bb71b7ea7841a5b5db3d884dfda5a3f78023.tar.xz
Strip number suffix from instance name to consolidate services that traces are spread over (#13729)
The problem with many services is that it makes it hard to find which service has the trace you want, see https://github.com/jaegertracing/jaeger-ui/issues/985

Previously, we split traces out into services based on their instance name like `matrix.org client_reader-1`, etc but there are many worker instances of the same `client_reader` so there is a lot to click through.

With this PR, all of the traces are just collected under the worker type like `client_reader`, `event_persister` 😇

Note: A Synapse worker instance name is an opaque string with the number convention only being our own thing for the `matrix.org` deployment. But seems pretty sensible to group things this way.
Diffstat (limited to 'synapse/logging/opentracing.py')
-rw-r--r--synapse/logging/opentracing.py13
1 files changed, 12 insertions, 1 deletions
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py

index 482316a1ff..adf3f54770 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py
@@ -203,6 +203,9 @@ if TYPE_CHECKING: # Helper class +# Matches the number suffix in an instance name like "matrix.org client_reader-8" +STRIP_INSTANCE_NUMBER_SUFFIX_REGEX = re.compile(r"[_-]?\d+$") + class _DummyTagNames: """wrapper of opentracings tags. We need to have them if we @@ -441,9 +444,17 @@ def init_tracer(hs: "HomeServer") -> None: from jaeger_client.metrics.prometheus import PrometheusMetricsFactory + # Instance names are opaque strings but by stripping off the number suffix, + # we can get something that looks like a "worker type", e.g. + # "client_reader-1" -> "client_reader" so we don't spread the traces across + # so many services. + instance_name_by_type = re.sub( + STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name() + ) + config = JaegerConfig( config=hs.config.tracing.jaeger_config, - service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}", + service_name=f"{hs.config.server.server_name} {instance_name_by_type}", scope_manager=LogContextScopeManager(), metrics_factory=PrometheusMetricsFactory(), )