diff options
author | Patrick Cloke <clokep@users.noreply.github.com> | 2021-04-08 08:01:14 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-08 08:01:14 -0400 |
commit | 48d44ab1425ffac721b7d407823c2315cda1929a (patch) | |
tree | bb8c741b7bf163c911ed57880a5e84547c260f9b /synapse | |
parent | Don't report anything from GaugeBucketCollector metrics until data is present... (diff) | |
download | synapse-48d44ab1425ffac721b7d407823c2315cda1929a.tar.xz |
Record more information into structured logs. (#9654)
Records additional request information into the structured logs, e.g. the requester, IP address, etc.
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/http/site.py | 112 | ||||
-rw-r--r-- | synapse/logging/context.py | 70 | ||||
-rw-r--r-- | synapse/metrics/background_process_metrics.py | 18 | ||||
-rw-r--r-- | synapse/replication/tcp/protocol.py | 5 |
4 files changed, 162 insertions, 43 deletions
diff --git a/synapse/http/site.py b/synapse/http/site.py index 47754aff43..c0c873ce32 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -14,7 +14,7 @@ import contextlib import logging import time -from typing import Optional, Type, Union +from typing import Optional, Tuple, Type, Union import attr from zope.interface import implementer @@ -26,7 +26,11 @@ from twisted.web.server import Request, Site from synapse.config.server import ListenerConfig from synapse.http import get_request_user_agent, redact_uri from synapse.http.request_metrics import RequestMetrics, requests_counter -from synapse.logging.context import LoggingContext, PreserveLoggingContext +from synapse.logging.context import ( + ContextRequest, + LoggingContext, + PreserveLoggingContext, +) from synapse.types import Requester logger = logging.getLogger(__name__) @@ -63,7 +67,7 @@ class SynapseRequest(Request): # The requester, if authenticated. For federation requests this is the # server name, for client requests this is the Requester object. - self.requester = None # type: Optional[Union[Requester, str]] + self._requester = None # type: Optional[Union[Requester, str]] # we can't yet create the logcontext, as we don't know the method. self.logcontext = None # type: Optional[LoggingContext] @@ -93,6 +97,31 @@ class SynapseRequest(Request): self.site.site_tag, ) + @property + def requester(self) -> Optional[Union[Requester, str]]: + return self._requester + + @requester.setter + def requester(self, value: Union[Requester, str]) -> None: + # Store the requester, and update some properties based on it. + + # This should only be called once. + assert self._requester is None + + self._requester = value + + # A logging context should exist by now (and have a ContextRequest). + assert self.logcontext is not None + assert self.logcontext.request is not None + + ( + requester, + authenticated_entity, + ) = self.get_authenticated_entity() + self.logcontext.request.requester = requester + # If there's no authenticated entity, it was the requester. + self.logcontext.request.authenticated_entity = authenticated_entity or requester + def get_request_id(self): return "%s-%i" % (self.get_method(), self.request_seq) @@ -126,13 +155,60 @@ class SynapseRequest(Request): return self.method.decode("ascii") return method + def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]: + """ + Get the "authenticated" entity of the request, which might be the user + performing the action, or a user being puppeted by a server admin. + + Returns: + A tuple: + The first item is a string representing the user making the request. + + The second item is a string or None representing the user who + authenticated when making this request. See + Requester.authenticated_entity. + """ + # Convert the requester into a string that we can log + if isinstance(self._requester, str): + return self._requester, None + elif isinstance(self._requester, Requester): + requester = self._requester.user.to_string() + authenticated_entity = self._requester.authenticated_entity + + # If this is a request where the target user doesn't match the user who + # authenticated (e.g. and admin is puppetting a user) then we return both. + if self._requester.user.to_string() != authenticated_entity: + return requester, authenticated_entity + + return requester, None + elif self._requester is not None: + # This shouldn't happen, but we log it so we don't lose information + # and can see that we're doing something wrong. + return repr(self._requester), None # type: ignore[unreachable] + + return None, None + def render(self, resrc): # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. # create a LogContext for this request request_id = self.get_request_id() - self.logcontext = LoggingContext(request_id, request=request_id) + self.logcontext = LoggingContext( + request_id, + request=ContextRequest( + request_id=request_id, + ip_address=self.getClientIP(), + site_tag=self.site.site_tag, + # The requester is going to be unknown at this point. + requester=None, + authenticated_entity=None, + method=self.get_method(), + url=self.get_redacted_uri(), + protocol=self.clientproto.decode("ascii", errors="replace"), + user_agent=get_request_user_agent(self), + ), + ) # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) @@ -277,25 +353,6 @@ class SynapseRequest(Request): # to the client (nb may be negative) response_send_time = self.finish_time - self._processing_finished_time - # Convert the requester into a string that we can log - authenticated_entity = None - if isinstance(self.requester, str): - authenticated_entity = self.requester - elif isinstance(self.requester, Requester): - authenticated_entity = self.requester.authenticated_entity - - # If this is a request where the target user doesn't match the user who - # authenticated (e.g. and admin is puppetting a user) then we log both. - if self.requester.user.to_string() != authenticated_entity: - authenticated_entity = "{},{}".format( - authenticated_entity, - self.requester.user.to_string(), - ) - elif self.requester is not None: - # This shouldn't happen, but we log it so we don't lose information - # and can see that we're doing something wrong. - authenticated_entity = repr(self.requester) # type: ignore[unreachable] - user_agent = get_request_user_agent(self, "-") code = str(self.code) @@ -305,6 +362,13 @@ class SynapseRequest(Request): code += "!" log_level = logging.INFO if self._should_log_request() else logging.DEBUG + + # If this is a request where the target user doesn't match the user who + # authenticated (e.g. and admin is puppetting a user) then we log both. + requester, authenticated_entity = self.get_authenticated_entity() + if authenticated_entity: + requester = "{}.{}".format(authenticated_entity, requester) + self.site.access_logger.log( log_level, "%s - %s - {%s}" @@ -312,7 +376,7 @@ class SynapseRequest(Request): ' %sB %s "%s %s %s" "%s" [%d dbevts]', self.getClientIP(), self.site.site_tag, - authenticated_entity, + requester, processing_time, response_send_time, usage.ru_utime, diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 03cf3c2b8e..e78343f554 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -22,7 +22,6 @@ them. See doc/log_contexts.rst for details on how this works. """ - import inspect import logging import threading @@ -30,6 +29,7 @@ import types import warnings from typing import TYPE_CHECKING, Optional, Tuple, TypeVar, Union +import attr from typing_extensions import Literal from twisted.internet import defer, threads @@ -181,6 +181,29 @@ class ContextResourceUsage: return res +@attr.s(slots=True) +class ContextRequest: + """ + A bundle of attributes from the SynapseRequest object. + + This exists to: + + * Avoid a cycle between LoggingContext and SynapseRequest. + * Be a single variable that can be passed from parent LoggingContexts to + their children. + """ + + request_id = attr.ib(type=str) + ip_address = attr.ib(type=str) + site_tag = attr.ib(type=str) + requester = attr.ib(type=Optional[str]) + authenticated_entity = attr.ib(type=Optional[str]) + method = attr.ib(type=str) + url = attr.ib(type=str) + protocol = attr.ib(type=str) + user_agent = attr.ib(type=str) + + LoggingContextOrSentinel = Union["LoggingContext", "_Sentinel"] @@ -256,7 +279,7 @@ class LoggingContext: self, name: Optional[str] = None, parent_context: "Optional[LoggingContext]" = None, - request: Optional[str] = None, + request: Optional[ContextRequest] = None, ) -> None: self.previous_context = current_context() self.name = name @@ -281,7 +304,11 @@ class LoggingContext: self.parent_context = parent_context if self.parent_context is not None: - self.parent_context.copy_to(self) + # we track the current request_id + self.request = self.parent_context.request + + # we also track the current scope: + self.scope = self.parent_context.scope if request is not None: # the request param overrides the request from the parent context @@ -289,7 +316,7 @@ class LoggingContext: def __str__(self) -> str: if self.request: - return str(self.request) + return self.request.request_id return "%s@%x" % (self.name, id(self)) @classmethod @@ -556,8 +583,23 @@ class LoggingContextFilter(logging.Filter): # we end up in a death spiral of infinite loops, so let's check, for # robustness' sake. if context is not None: - # Logging is interested in the request. - record.request = context.request # type: ignore + # Logging is interested in the request ID. Note that for backwards + # compatibility this is stored as the "request" on the record. + record.request = str(context) # type: ignore + + # Add some data from the HTTP request. + request = context.request + if request is None: + return True + + record.ip_address = request.ip_address # type: ignore + record.site_tag = request.site_tag # type: ignore + record.requester = request.requester # type: ignore + record.authenticated_entity = request.authenticated_entity # type: ignore + record.method = request.method # type: ignore + record.url = request.url # type: ignore + record.protocol = request.protocol # type: ignore + record.user_agent = request.user_agent # type: ignore return True @@ -630,8 +672,8 @@ def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSe def nested_logging_context(suffix: str) -> LoggingContext: """Creates a new logging context as a child of another. - The nested logging context will have a 'request' made up of the parent context's - request, plus the given suffix. + The nested logging context will have a 'name' made up of the parent context's + name, plus the given suffix. CPU/db usage stats will be added to the parent context's on exit. @@ -641,7 +683,7 @@ def nested_logging_context(suffix: str) -> LoggingContext: # ... do stuff Args: - suffix: suffix to add to the parent context's 'request'. + suffix: suffix to add to the parent context's 'name'. Returns: LoggingContext: new logging context. @@ -653,11 +695,17 @@ def nested_logging_context(suffix: str) -> LoggingContext: ) parent_context = None prefix = "" + request = None else: assert isinstance(curr_context, LoggingContext) parent_context = curr_context - prefix = str(parent_context.request) - return LoggingContext(parent_context=parent_context, request=prefix + "-" + suffix) + prefix = str(parent_context.name) + request = parent_context.request + return LoggingContext( + prefix + "-" + suffix, + parent_context=parent_context, + request=request, + ) def preserve_fn(f): diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index b56986d8e7..e8a9096c03 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -16,7 +16,7 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set +from typing import TYPE_CHECKING, Dict, Optional, Set, Union from prometheus_client.core import REGISTRY, Counter, Gauge @@ -199,11 +199,11 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar _background_process_start_count.labels(desc).inc() _background_process_in_flight_count.labels(desc).inc() - with BackgroundProcessLoggingContext(desc, "%s-%i" % (desc, count)) as context: + with BackgroundProcessLoggingContext(desc, count) as context: try: ctx = noop_context_manager() if bg_start_span: - ctx = start_active_span(desc, tags={"request_id": context.request}) + ctx = start_active_span(desc, tags={"request_id": str(context)}) with ctx: return await maybe_awaitable(func(*args, **kwargs)) except Exception: @@ -242,13 +242,19 @@ class BackgroundProcessLoggingContext(LoggingContext): processes. """ - __slots__ = ["_proc"] + __slots__ = ["_id", "_proc"] - def __init__(self, name: str, request: Optional[str] = None): - super().__init__(name, request=request) + def __init__(self, name: str, id: Optional[Union[int, str]] = None): + super().__init__(name) + self._id = id self._proc = _BackgroundProcess(name, self) + def __str__(self) -> str: + if self._id is not None: + return "%s-%s" % (self.name, self._id) + return "%s@%x" % (self.name, id(self)) + def start(self, rusage: "Optional[resource._RUsage]"): """Log context has started running (again).""" diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index e829add257..d10d574246 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -184,8 +184,9 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. - ctx_name = "replication-conn-%s" % self.conn_id - self._logging_context = BackgroundProcessLoggingContext(ctx_name, ctx_name) + self._logging_context = BackgroundProcessLoggingContext( + "replication-conn", self.conn_id + ) def connectionMade(self): logger.info("[%s] Connection established", self.id()) |