summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--docker/conf/homeserver.yaml20
-rw-r--r--synapse/handlers/federation.py29
-rw-r--r--synapse/handlers/federation_event.py5
-rw-r--r--synapse/handlers/relations.py10
-rw-r--r--synapse/logging/tracing.py4
-rw-r--r--synapse/storage/databases/main/relations.py5
6 files changed, 61 insertions, 12 deletions
diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml
index f10f78a48c..d35be76509 100644
--- a/docker/conf/homeserver.yaml
+++ b/docker/conf/homeserver.yaml
@@ -184,3 +184,23 @@ trusted_key_servers:
 
 password_config:
    enabled: true
+
+
+# foo
+tracing:
+  enabled: true
+  sample_rate: 1
+  homeserver_whitelist:
+    - ".*"
+  jaeger_exporter_config:
+    agent_host_name: host.docker.internal
+    agent_port: 6831
+    # Split UDP packets (UDP_PACKET_MAX_LENGTH is set to 65k in OpenTelemetry)
+    udp_split_oversized_batches: true
+    # If you define a collector, it will communicate directly to the collector,
+    # bypassing the agent
+    #
+    # It does not seem like the agent can keep up with the massive UDP load
+    # (1065 spans in one trace) so lets just use the HTTP collector endpoint
+    # instead which seems to work.
+    collector_endpoint: "http://host.docker.internal:14268/api/traces?format=jaeger.thrift"
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index f9000d326a..79e792395f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -61,7 +61,13 @@ from synapse.events.validator import EventValidator
 from synapse.federation.federation_client import InvalidResponseError
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import nested_logging_context
-from synapse.logging.tracing import SynapseTags, set_attribute, tag_args, trace
+from synapse.logging.tracing import (
+    SynapseTags,
+    set_attribute,
+    start_active_span,
+    tag_args,
+    trace,
+)
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.module_api import NOT_SPAM
 from synapse.replication.http.federation import (
@@ -349,10 +355,14 @@ class FederationHandler:
         # attempting to paginate before backfill reached the visible history.
 
         extremities_to_request: List[str] = []
-        for bp in sorted_backfill_points:
+        for i, bp in enumerate(sorted_backfill_points):
             if len(extremities_to_request) >= 5:
                 break
 
+            set_attribute(
+                SynapseTags.RESULT_PREFIX + "backfill_point" + str(i), str(bp)
+            )
+
             # For regular backwards extremities, we don't have the extremity events
             # themselves, so we need to actually check the events that reference them -
             # their "successor" events.
@@ -409,14 +419,15 @@ class FederationHandler:
             str(len(extremities_to_request)),
         )
 
-        # Now we need to decide which hosts to hit first.
-        # First we try hosts that are already in the room.
-        # TODO: HEURISTIC ALERT.
-        likely_domains = (
-            await self._storage_controllers.state.get_current_hosts_in_room_ordered(
-                room_id
+        with start_active_span("getting likely_domains"):
+            # Now we need to decide which hosts to hit first.
+            # First we try hosts that are already in the room.
+            # TODO: HEURISTIC ALERT.
+            likely_domains = (
+                await self._storage_controllers.state.get_current_hosts_in_room_ordered(
+                    room_id
+                )
             )
-        )
 
         async def try_backfill(domains: Collection[str]) -> bool:
             # TODO: Should we try multiple of these at a time?
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 8357f6f84e..378b863c5f 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1249,6 +1249,11 @@ class FederationEventHandler:
                 str(len(failed_to_fetch)),
             )
 
+        set_attribute(
+            SynapseTags.RESULT_PREFIX + f"failed_to_fetch ({len(failed_to_fetch)})",
+            str(failed_to_fetch),
+        )
+
         if remote_event.is_state() and remote_event.rejected_reason is None:
             state_map[
                 (remote_event.type, remote_event.state_key)
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 2c990aab0a..64d373e9d7 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -20,7 +20,7 @@ import attr
 from synapse.api.constants import EventTypes, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
-from synapse.logging.tracing import trace
+from synapse.logging.tracing import SynapseTags, set_attribute, trace
 from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, Requester, StreamToken, UserID
@@ -172,6 +172,7 @@ class RelationsHandler:
 
         return return_value
 
+    @trace
     async def get_relations_for_event(
         self,
         event_id: str,
@@ -259,6 +260,7 @@ class RelationsHandler:
                     e.msg,
                 )
 
+    @trace
     async def get_annotations_for_event(
         self,
         event_id: str,
@@ -304,6 +306,7 @@ class RelationsHandler:
 
         return filtered_results
 
+    @trace
     async def _get_threads_for_events(
         self,
         events_by_id: Dict[str, EventBase],
@@ -465,6 +468,11 @@ class RelationsHandler:
             # The event should get bundled aggregations.
             events_by_id[event.event_id] = event
 
+        set_attribute(
+            SynapseTags.FUNC_ARG_PREFIX + f"event_ids ({len(events_by_id)})",
+            str(events_by_id.keys()),
+        )
+
         # event ID -> bundled aggregation in non-serialized form.
         results: Dict[str, BundledAggregations] = {}
 
diff --git a/synapse/logging/tracing.py b/synapse/logging/tracing.py
index d455854467..c92769e1a0 100644
--- a/synapse/logging/tracing.py
+++ b/synapse/logging/tracing.py
@@ -291,7 +291,8 @@ class SynapseTags:
 
     # Used to tag function arguments
     #
-    # Tag a named arg. The name of the argument should be appended to this prefix.
+    # Tag a named arg. The name of the argument should be appended to this
+    # prefix.
     FUNC_ARG_PREFIX = "ARG."
     # Tag extra variadic number of positional arguments (`def foo(first, second, *extras)`)
     FUNC_ARGS = "args"
@@ -928,7 +929,6 @@ def trace(func: Callable[P, R]) -> Callable[P, R]:
 
     See the module's doc string for usage examples.
     """
-
     return trace_with_opname(func.__name__)(func)
 
 
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index ca431002c8..8a1c206d95 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -32,6 +32,7 @@ import attr
 from synapse.api.constants import MAIN_TIMELINE, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase
+from synapse.logging.tracing import trace
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -503,6 +504,10 @@ class RelationsWorkerStore(SQLBaseStore):
     def get_applicable_edit(self, event_id: str) -> Optional[EventBase]:
         raise NotImplementedError()
 
+    # TODO: What's the proper way to fix this so we can stack @trace on top of
+    # @cachedList
+    #
+    # @trace
     @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids")
     async def get_applicable_edits(
         self, event_ids: Collection[str]