diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py
index e69de29bb2..b28b7b2ef7 100644
--- a/synapse/logging/__init__.py
+++ b/synapse/logging/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# These are imported to allow for nicer logging configuration files.
+from synapse.logging._remote import RemoteHandler
+from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter
+
+__all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter"]
diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py
new file mode 100644
index 0000000000..fb937b3f28
--- /dev/null
+++ b/synapse/logging/_remote.py
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import sys
+import traceback
+from collections import deque
+from ipaddress import IPv4Address, IPv6Address, ip_address
+from math import floor
+from typing import Callable, Optional
+
+import attr
+from typing_extensions import Deque
+from zope.interface import implementer
+
+from twisted.application.internet import ClientService
+from twisted.internet.defer import CancelledError, Deferred
+from twisted.internet.endpoints import (
+ HostnameEndpoint,
+ TCP4ClientEndpoint,
+ TCP6ClientEndpoint,
+)
+from twisted.internet.interfaces import IPushProducer, ITransport
+from twisted.internet.protocol import Factory, Protocol
+from twisted.python.failure import Failure
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s
+@implementer(IPushProducer)
+class LogProducer:
+ """
+ An IPushProducer that writes logs from its buffer to its transport when it
+ is resumed.
+
+ Args:
+ buffer: Log buffer to read logs from.
+ transport: Transport to write to.
+ format: A callable to format the log record to a string.
+ """
+
+ transport = attr.ib(type=ITransport)
+ _format = attr.ib(type=Callable[[logging.LogRecord], str])
+ _buffer = attr.ib(type=deque)
+ _paused = attr.ib(default=False, type=bool, init=False)
+
+ def pauseProducing(self):
+ self._paused = True
+
+ def stopProducing(self):
+ self._paused = True
+ self._buffer = deque()
+
+ def resumeProducing(self):
+ # If we're already producing, nothing to do.
+ self._paused = False
+
+ # Loop until paused.
+ while self._paused is False and (self._buffer and self.transport.connected):
+ try:
+ # Request the next record and format it.
+ record = self._buffer.popleft()
+ msg = self._format(record)
+
+ # Send it as a new line over the transport.
+ self.transport.write(msg.encode("utf8"))
+ self.transport.write(b"\n")
+ except Exception:
+ # Something has gone wrong writing to the transport -- log it
+ # and break out of the while.
+ traceback.print_exc(file=sys.__stderr__)
+ break
+
+
+class RemoteHandler(logging.Handler):
+ """
+ An logging handler that writes logs to a TCP target.
+
+ Args:
+ host: The host of the logging target.
+ port: The logging target's port.
+ maximum_buffer: The maximum buffer size.
+ """
+
+ def __init__(
+ self,
+ host: str,
+ port: int,
+ maximum_buffer: int = 1000,
+ level=logging.NOTSET,
+ _reactor=None,
+ ):
+ super().__init__(level=level)
+ self.host = host
+ self.port = port
+ self.maximum_buffer = maximum_buffer
+
+ self._buffer = deque() # type: Deque[logging.LogRecord]
+ self._connection_waiter = None # type: Optional[Deferred]
+ self._producer = None # type: Optional[LogProducer]
+
+ # Connect without DNS lookups if it's a direct IP.
+ if _reactor is None:
+ from twisted.internet import reactor
+
+ _reactor = reactor
+
+ try:
+ ip = ip_address(self.host)
+ if isinstance(ip, IPv4Address):
+ endpoint = TCP4ClientEndpoint(_reactor, self.host, self.port)
+ elif isinstance(ip, IPv6Address):
+ endpoint = TCP6ClientEndpoint(_reactor, self.host, self.port)
+ else:
+ raise ValueError("Unknown IP address provided: %s" % (self.host,))
+ except ValueError:
+ endpoint = HostnameEndpoint(_reactor, self.host, self.port)
+
+ factory = Factory.forProtocol(Protocol)
+ self._service = ClientService(endpoint, factory, clock=_reactor)
+ self._service.startService()
+ self._stopping = False
+ self._connect()
+
+ def close(self):
+ self._stopping = True
+ self._service.stopService()
+
+ def _connect(self) -> None:
+ """
+ Triggers an attempt to connect then write to the remote if not already writing.
+ """
+ # Do not attempt to open multiple connections.
+ if self._connection_waiter:
+ return
+
+ self._connection_waiter = self._service.whenConnected(failAfterFailures=1)
+
+ def fail(failure: Failure) -> None:
+ # If the Deferred was cancelled (e.g. during shutdown) do not try to
+ # reconnect (this will cause an infinite loop of errors).
+ if failure.check(CancelledError) and self._stopping:
+ return
+
+ # For a different error, print the traceback and re-connect.
+ failure.printTraceback(file=sys.__stderr__)
+ self._connection_waiter = None
+ self._connect()
+
+ def writer(result: Protocol) -> None:
+ # We have a connection. If we already have a producer, and its
+ # transport is the same, just trigger a resumeProducing.
+ if self._producer and result.transport is self._producer.transport:
+ self._producer.resumeProducing()
+ self._connection_waiter = None
+ return
+
+ # If the producer is still producing, stop it.
+ if self._producer:
+ self._producer.stopProducing()
+
+ # Make a new producer and start it.
+ self._producer = LogProducer(
+ buffer=self._buffer, transport=result.transport, format=self.format,
+ )
+ result.transport.registerProducer(self._producer, True)
+ self._producer.resumeProducing()
+ self._connection_waiter = None
+
+ self._connection_waiter.addCallbacks(writer, fail)
+
+ def _handle_pressure(self) -> None:
+ """
+ Handle backpressure by shedding records.
+
+ The buffer will, in this order, until the buffer is below the maximum:
+ - Shed DEBUG records.
+ - Shed INFO records.
+ - Shed the middle 50% of the records.
+ """
+ if len(self._buffer) <= self.maximum_buffer:
+ return
+
+ # Strip out DEBUGs
+ self._buffer = deque(
+ filter(lambda record: record.levelno > logging.DEBUG, self._buffer)
+ )
+
+ if len(self._buffer) <= self.maximum_buffer:
+ return
+
+ # Strip out INFOs
+ self._buffer = deque(
+ filter(lambda record: record.levelno > logging.INFO, self._buffer)
+ )
+
+ if len(self._buffer) <= self.maximum_buffer:
+ return
+
+ # Cut the middle entries out
+ buffer_split = floor(self.maximum_buffer / 2)
+
+ old_buffer = self._buffer
+ self._buffer = deque()
+
+ for i in range(buffer_split):
+ self._buffer.append(old_buffer.popleft())
+
+ end_buffer = []
+ for i in range(buffer_split):
+ end_buffer.append(old_buffer.pop())
+
+ self._buffer.extend(reversed(end_buffer))
+
+ def emit(self, record: logging.LogRecord) -> None:
+ self._buffer.append(record)
+
+ # Handle backpressure, if it exists.
+ try:
+ self._handle_pressure()
+ except Exception:
+ # If handling backpressure fails, clear the buffer and log the
+ # exception.
+ self._buffer.clear()
+ logger.warning("Failed clearing backpressure")
+
+ # Try and write immediately.
+ self._connect()
diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py
index 144506c8f2..14d9c104c2 100644
--- a/synapse/logging/_structured.py
+++ b/synapse/logging/_structured.py
@@ -12,146 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
-import logging
import os.path
-import sys
-import typing
-import warnings
-from typing import List
-
-import attr
-from constantly import NamedConstant, Names, ValueConstant, Values
-from zope.interface import implementer
+from typing import Any, Dict, Generator, Optional, Tuple
-from twisted.logger import (
- FileLogObserver,
- FilteringLogObserver,
- ILogObserver,
- LogBeginner,
- Logger,
- LogLevel,
- LogLevelFilterPredicate,
- LogPublisher,
- eventAsText,
- jsonFileLogObserver,
-)
+from constantly import NamedConstant, Names
from synapse.config._base import ConfigError
-from synapse.logging._terse_json import (
- TerseJSONToConsoleLogObserver,
- TerseJSONToTCPLogObserver,
-)
-from synapse.logging.context import current_context
-
-
-def stdlib_log_level_to_twisted(level: str) -> LogLevel:
- """
- Convert a stdlib log level to Twisted's log level.
- """
- lvl = level.lower().replace("warning", "warn")
- return LogLevel.levelWithName(lvl)
-
-
-@attr.s
-@implementer(ILogObserver)
-class LogContextObserver:
- """
- An ILogObserver which adds Synapse-specific log context information.
-
- Attributes:
- observer (ILogObserver): The target parent observer.
- """
-
- observer = attr.ib()
-
- def __call__(self, event: dict) -> None:
- """
- Consume a log event and emit it to the parent observer after filtering
- and adding log context information.
-
- Args:
- event (dict)
- """
- # Filter out some useless events that Twisted outputs
- if "log_text" in event:
- if event["log_text"].startswith("DNSDatagramProtocol starting on "):
- return
-
- if event["log_text"].startswith("(UDP Port "):
- return
-
- if event["log_text"].startswith("Timing out client") or event[
- "log_format"
- ].startswith("Timing out client"):
- return
-
- context = current_context()
-
- # Copy the context information to the log event.
- if context is not None:
- context.copy_to_twisted_log_entry(event)
- else:
- # If there's no logging context, not even the root one, we might be
- # starting up or it might be from non-Synapse code. Log it as if it
- # came from the root logger.
- event["request"] = None
- event["scope"] = None
-
- self.observer(event)
-
-
-class PythonStdlibToTwistedLogger(logging.Handler):
- """
- Transform a Python stdlib log message into a Twisted one.
- """
-
- def __init__(self, observer, *args, **kwargs):
- """
- Args:
- observer (ILogObserver): A Twisted logging observer.
- *args, **kwargs: Args/kwargs to be passed to logging.Handler.
- """
- self.observer = observer
- super().__init__(*args, **kwargs)
-
- def emit(self, record: logging.LogRecord) -> None:
- """
- Emit a record to Twisted's observer.
-
- Args:
- record (logging.LogRecord)
- """
-
- self.observer(
- {
- "log_time": record.created,
- "log_text": record.getMessage(),
- "log_format": "{log_text}",
- "log_namespace": record.name,
- "log_level": stdlib_log_level_to_twisted(record.levelname),
- }
- )
-
-
-def SynapseFileLogObserver(outFile: typing.IO[str]) -> FileLogObserver:
- """
- A log observer that formats events like the traditional log formatter and
- sends them to `outFile`.
-
- Args:
- outFile (file object): The file object to write to.
- """
-
- def formatEvent(_event: dict) -> str:
- event = dict(_event)
- event["log_level"] = event["log_level"].name.upper()
- event["log_format"] = "- {log_namespace} - {log_level} - {request} - " + (
- event.get("log_format", "{log_text}") or "{log_text}"
- )
- return eventAsText(event, includeSystem=False) + "\n"
-
- return FileLogObserver(outFile, formatEvent)
class DrainType(Names):
@@ -163,30 +29,12 @@ class DrainType(Names):
NETWORK_JSON_TERSE = NamedConstant()
-class OutputPipeType(Values):
- stdout = ValueConstant(sys.__stdout__)
- stderr = ValueConstant(sys.__stderr__)
-
-
-@attr.s
-class DrainConfiguration:
- name = attr.ib()
- type = attr.ib()
- location = attr.ib()
- options = attr.ib(default=None)
-
-
-@attr.s
-class NetworkJSONTerseOptions:
- maximum_buffer = attr.ib(type=int)
-
-
-DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}}
+DEFAULT_LOGGERS = {"synapse": {"level": "info"}}
def parse_drain_configs(
drains: dict,
-) -> typing.Generator[DrainConfiguration, None, None]:
+) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
"""
Parse the drain configurations.
@@ -194,11 +42,12 @@ def parse_drain_configs(
drains (dict): A list of drain configurations.
Yields:
- DrainConfiguration instances.
+ dict instances representing a logging handler.
Raises:
ConfigError: If any of the drain configuration items are invalid.
"""
+
for name, config in drains.items():
if "type" not in config:
raise ConfigError("Logging drains require a 'type' key.")
@@ -210,6 +59,18 @@ def parse_drain_configs(
"%s is not a known logging drain type." % (config["type"],)
)
+ # Either use the default formatter or the tersejson one.
+ if logging_type in (DrainType.CONSOLE_JSON, DrainType.FILE_JSON,):
+ formatter = "json" # type: Optional[str]
+ elif logging_type in (
+ DrainType.CONSOLE_JSON_TERSE,
+ DrainType.NETWORK_JSON_TERSE,
+ ):
+ formatter = "tersejson"
+ else:
+ # A formatter of None implies using the default formatter.
+ formatter = None
+
if logging_type in [
DrainType.CONSOLE,
DrainType.CONSOLE_JSON,
@@ -225,9 +86,11 @@ def parse_drain_configs(
% (logging_type,)
)
- pipe = OutputPipeType.lookupByName(location).value
-
- yield DrainConfiguration(name=name, type=logging_type, location=pipe)
+ yield name, {
+ "class": "logging.StreamHandler",
+ "formatter": formatter,
+ "stream": "ext://sys." + location,
+ }
elif logging_type in [DrainType.FILE, DrainType.FILE_JSON]:
if "location" not in config:
@@ -241,18 +104,25 @@ def parse_drain_configs(
"File paths need to be absolute, '%s' is a relative path"
% (location,)
)
- yield DrainConfiguration(name=name, type=logging_type, location=location)
+
+ yield name, {
+ "class": "logging.FileHandler",
+ "formatter": formatter,
+ "filename": location,
+ }
elif logging_type in [DrainType.NETWORK_JSON_TERSE]:
host = config.get("host")
port = config.get("port")
maximum_buffer = config.get("maximum_buffer", 1000)
- yield DrainConfiguration(
- name=name,
- type=logging_type,
- location=(host, port),
- options=NetworkJSONTerseOptions(maximum_buffer=maximum_buffer),
- )
+
+ yield name, {
+ "class": "synapse.logging.RemoteHandler",
+ "formatter": formatter,
+ "host": host,
+ "port": port,
+ "maximum_buffer": maximum_buffer,
+ }
else:
raise ConfigError(
@@ -261,126 +131,29 @@ def parse_drain_configs(
)
-class StoppableLogPublisher(LogPublisher):
- """
- A log publisher that can tell its observers to shut down any external
- communications.
+def setup_structured_logging(log_config: dict,) -> dict:
"""
-
- def stop(self):
- for obs in self._observers:
- if hasattr(obs, "stop"):
- obs.stop()
-
-
-def setup_structured_logging(
- hs,
- config,
- log_config: dict,
- logBeginner: LogBeginner,
- redirect_stdlib_logging: bool = True,
-) -> LogPublisher:
+ Convert a legacy structured logging configuration (from Synapse < v1.23.0)
+ to one compatible with the new standard library handlers.
"""
- Set up Twisted's structured logging system.
-
- Args:
- hs: The homeserver to use.
- config (HomeserverConfig): The configuration of the Synapse homeserver.
- log_config (dict): The log configuration to use.
- """
- if config.no_redirect_stdio:
- raise ConfigError(
- "no_redirect_stdio cannot be defined using structured logging."
- )
-
- logger = Logger()
-
if "drains" not in log_config:
raise ConfigError("The logging configuration requires a list of drains.")
- observers = [] # type: List[ILogObserver]
-
- for observer in parse_drain_configs(log_config["drains"]):
- # Pipe drains
- if observer.type == DrainType.CONSOLE:
- logger.debug(
- "Starting up the {name} console logger drain", name=observer.name
- )
- observers.append(SynapseFileLogObserver(observer.location))
- elif observer.type == DrainType.CONSOLE_JSON:
- logger.debug(
- "Starting up the {name} JSON console logger drain", name=observer.name
- )
- observers.append(jsonFileLogObserver(observer.location))
- elif observer.type == DrainType.CONSOLE_JSON_TERSE:
- logger.debug(
- "Starting up the {name} terse JSON console logger drain",
- name=observer.name,
- )
- observers.append(
- TerseJSONToConsoleLogObserver(observer.location, metadata={})
- )
-
- # File drains
- elif observer.type == DrainType.FILE:
- logger.debug("Starting up the {name} file logger drain", name=observer.name)
- log_file = open(observer.location, "at", buffering=1, encoding="utf8")
- observers.append(SynapseFileLogObserver(log_file))
- elif observer.type == DrainType.FILE_JSON:
- logger.debug(
- "Starting up the {name} JSON file logger drain", name=observer.name
- )
- log_file = open(observer.location, "at", buffering=1, encoding="utf8")
- observers.append(jsonFileLogObserver(log_file))
-
- elif observer.type == DrainType.NETWORK_JSON_TERSE:
- metadata = {"server_name": hs.config.server_name}
- log_observer = TerseJSONToTCPLogObserver(
- hs=hs,
- host=observer.location[0],
- port=observer.location[1],
- metadata=metadata,
- maximum_buffer=observer.options.maximum_buffer,
- )
- log_observer.start()
- observers.append(log_observer)
- else:
- # We should never get here, but, just in case, throw an error.
- raise ConfigError("%s drain type cannot be configured" % (observer.type,))
-
- publisher = StoppableLogPublisher(*observers)
- log_filter = LogLevelFilterPredicate()
-
- for namespace, namespace_config in log_config.get(
- "loggers", DEFAULT_LOGGERS
- ).items():
- # Set the log level for twisted.logger.Logger namespaces
- log_filter.setLogLevelForNamespace(
- namespace,
- stdlib_log_level_to_twisted(namespace_config.get("level", "INFO")),
- )
-
- # Also set the log levels for the stdlib logger namespaces, to prevent
- # them getting to PythonStdlibToTwistedLogger and having to be formatted
- if "level" in namespace_config:
- logging.getLogger(namespace).setLevel(namespace_config.get("level"))
-
- f = FilteringLogObserver(publisher, [log_filter])
- lco = LogContextObserver(f)
-
- if redirect_stdlib_logging:
- stuff_into_twisted = PythonStdlibToTwistedLogger(lco)
- stdliblogger = logging.getLogger()
- stdliblogger.addHandler(stuff_into_twisted)
-
- # Always redirect standard I/O, otherwise other logging outputs might miss
- # it.
- logBeginner.beginLoggingTo([lco], redirectStandardIO=True)
+ new_config = {
+ "version": 1,
+ "formatters": {
+ "json": {"class": "synapse.logging.JsonFormatter"},
+ "tersejson": {"class": "synapse.logging.TerseJsonFormatter"},
+ },
+ "handlers": {},
+ "loggers": log_config.get("loggers", DEFAULT_LOGGERS),
+ "root": {"handlers": []},
+ }
- return publisher
+ for handler_name, handler in parse_drain_configs(log_config["drains"]):
+ new_config["handlers"][handler_name] = handler
+ # Add each handler to the root logger.
+ new_config["root"]["handlers"].append(handler_name)
-def reload_structured_logging(*args, log_config=None) -> None:
- warnings.warn(
- "Currently the structured logging system can not be reloaded, doing nothing"
- )
+ return new_config
diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py
index 1b8916cfa2..2fbf5549a1 100644
--- a/synapse/logging/_terse_json.py
+++ b/synapse/logging/_terse_json.py
@@ -16,314 +16,65 @@
"""
Log formatters that output terse JSON.
"""
-
import json
-import sys
-import traceback
-from collections import deque
-from ipaddress import IPv4Address, IPv6Address, ip_address
-from math import floor
-from typing import IO, Optional
-
-import attr
-from zope.interface import implementer
-
-from twisted.application.internet import ClientService
-from twisted.internet.defer import Deferred
-from twisted.internet.endpoints import (
- HostnameEndpoint,
- TCP4ClientEndpoint,
- TCP6ClientEndpoint,
-)
-from twisted.internet.interfaces import IPushProducer, ITransport
-from twisted.internet.protocol import Factory, Protocol
-from twisted.logger import FileLogObserver, ILogObserver, Logger
+import logging
_encoder = json.JSONEncoder(ensure_ascii=False, separators=(",", ":"))
-
-def flatten_event(event: dict, metadata: dict, include_time: bool = False):
- """
- Flatten a Twisted logging event to an dictionary capable of being sent
- as a log event to a logging aggregation system.
-
- The format is vastly simplified and is not designed to be a "human readable
- string" in the sense that traditional logs are. Instead, the structure is
- optimised for searchability and filtering, with human-understandable log
- keys.
-
- Args:
- event (dict): The Twisted logging event we are flattening.
- metadata (dict): Additional data to include with each log message. This
- can be information like the server name. Since the target log
- consumer does not know who we are other than by host IP, this
- allows us to forward through static information.
- include_time (bool): Should we include the `time` key? If False, the
- event time is stripped from the event.
- """
- new_event = {}
-
- # If it's a failure, make the new event's log_failure be the traceback text.
- if "log_failure" in event:
- new_event["log_failure"] = event["log_failure"].getTraceback()
-
- # If it's a warning, copy over a string representation of the warning.
- if "warning" in event:
- new_event["warning"] = str(event["warning"])
-
- # Stdlib logging events have "log_text" as their human-readable portion,
- # Twisted ones have "log_format". For now, include the log_format, so that
- # context only given in the log format (e.g. what is being logged) is
- # available.
- if "log_text" in event:
- new_event["log"] = event["log_text"]
- else:
- new_event["log"] = event["log_format"]
-
- # We want to include the timestamp when forwarding over the network, but
- # exclude it when we are writing to stdout. This is because the log ingester
- # (e.g. logstash, fluentd) can add its own timestamp.
- if include_time:
- new_event["time"] = round(event["log_time"], 2)
-
- # Convert the log level to a textual representation.
- new_event["level"] = event["log_level"].name.upper()
-
- # Ignore these keys, and do not transfer them over to the new log object.
- # They are either useless (isError), transferred manually above (log_time,
- # log_level, etc), or contain Python objects which are not useful for output
- # (log_logger, log_source).
- keys_to_delete = [
- "isError",
- "log_failure",
- "log_format",
- "log_level",
- "log_logger",
- "log_source",
- "log_system",
- "log_time",
- "log_text",
- "observer",
- "warning",
- ]
-
- # If it's from the Twisted legacy logger (twisted.python.log), it adds some
- # more keys we want to purge.
- if event.get("log_namespace") == "log_legacy":
- keys_to_delete.extend(["message", "system", "time"])
-
- # Rather than modify the dictionary in place, construct a new one with only
- # the content we want. The original event should be considered 'frozen'.
- for key in event.keys():
-
- if key in keys_to_delete:
- continue
-
- if isinstance(event[key], (str, int, bool, float)) or event[key] is None:
- # If it's a plain type, include it as is.
- new_event[key] = event[key]
- else:
- # If it's not one of those basic types, write out a string
- # representation. This should probably be a warning in development,
- # so that we are sure we are only outputting useful data.
- new_event[key] = str(event[key])
-
- # Add the metadata information to the event (e.g. the server_name).
- new_event.update(metadata)
-
- return new_event
-
-
-def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogObserver:
- """
- A log observer that formats events to a flattened JSON representation.
-
- Args:
- outFile: The file object to write to.
- metadata: Metadata to be added to each log object.
- """
-
- def formatEvent(_event: dict) -> str:
- flattened = flatten_event(_event, metadata)
- return _encoder.encode(flattened) + "\n"
-
- return FileLogObserver(outFile, formatEvent)
-
-
-@attr.s
-@implementer(IPushProducer)
-class LogProducer:
- """
- An IPushProducer that writes logs from its buffer to its transport when it
- is resumed.
-
- Args:
- buffer: Log buffer to read logs from.
- transport: Transport to write to.
- """
-
- transport = attr.ib(type=ITransport)
- _buffer = attr.ib(type=deque)
- _paused = attr.ib(default=False, type=bool, init=False)
-
- def pauseProducing(self):
- self._paused = True
-
- def stopProducing(self):
- self._paused = True
- self._buffer = deque()
-
- def resumeProducing(self):
- self._paused = False
-
- while self._paused is False and (self._buffer and self.transport.connected):
- try:
- event = self._buffer.popleft()
- self.transport.write(_encoder.encode(event).encode("utf8"))
- self.transport.write(b"\n")
- except Exception:
- # Something has gone wrong writing to the transport -- log it
- # and break out of the while.
- traceback.print_exc(file=sys.__stderr__)
- break
-
-
-@attr.s
-@implementer(ILogObserver)
-class TerseJSONToTCPLogObserver:
- """
- An IObserver that writes JSON logs to a TCP target.
-
- Args:
- hs (HomeServer): The homeserver that is being logged for.
- host: The host of the logging target.
- port: The logging target's port.
- metadata: Metadata to be added to each log entry.
- """
-
- hs = attr.ib()
- host = attr.ib(type=str)
- port = attr.ib(type=int)
- metadata = attr.ib(type=dict)
- maximum_buffer = attr.ib(type=int)
- _buffer = attr.ib(default=attr.Factory(deque), type=deque)
- _connection_waiter = attr.ib(default=None, type=Optional[Deferred])
- _logger = attr.ib(default=attr.Factory(Logger))
- _producer = attr.ib(default=None, type=Optional[LogProducer])
-
- def start(self) -> None:
-
- # Connect without DNS lookups if it's a direct IP.
- try:
- ip = ip_address(self.host)
- if isinstance(ip, IPv4Address):
- endpoint = TCP4ClientEndpoint(
- self.hs.get_reactor(), self.host, self.port
- )
- elif isinstance(ip, IPv6Address):
- endpoint = TCP6ClientEndpoint(
- self.hs.get_reactor(), self.host, self.port
- )
- except ValueError:
- endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port)
-
- factory = Factory.forProtocol(Protocol)
- self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor())
- self._service.startService()
- self._connect()
-
- def stop(self):
- self._service.stopService()
-
- def _connect(self) -> None:
- """
- Triggers an attempt to connect then write to the remote if not already writing.
- """
- if self._connection_waiter:
- return
-
- self._connection_waiter = self._service.whenConnected(failAfterFailures=1)
-
- @self._connection_waiter.addErrback
- def fail(r):
- r.printTraceback(file=sys.__stderr__)
- self._connection_waiter = None
- self._connect()
-
- @self._connection_waiter.addCallback
- def writer(r):
- # We have a connection. If we already have a producer, and its
- # transport is the same, just trigger a resumeProducing.
- if self._producer and r.transport is self._producer.transport:
- self._producer.resumeProducing()
- self._connection_waiter = None
- return
-
- # If the producer is still producing, stop it.
- if self._producer:
- self._producer.stopProducing()
-
- # Make a new producer and start it.
- self._producer = LogProducer(buffer=self._buffer, transport=r.transport)
- r.transport.registerProducer(self._producer, True)
- self._producer.resumeProducing()
- self._connection_waiter = None
-
- def _handle_pressure(self) -> None:
- """
- Handle backpressure by shedding events.
-
- The buffer will, in this order, until the buffer is below the maximum:
- - Shed DEBUG events
- - Shed INFO events
- - Shed the middle 50% of the events.
- """
- if len(self._buffer) <= self.maximum_buffer:
- return
-
- # Strip out DEBUGs
- self._buffer = deque(
- filter(lambda event: event["level"] != "DEBUG", self._buffer)
- )
-
- if len(self._buffer) <= self.maximum_buffer:
- return
-
- # Strip out INFOs
- self._buffer = deque(
- filter(lambda event: event["level"] != "INFO", self._buffer)
- )
-
- if len(self._buffer) <= self.maximum_buffer:
- return
-
- # Cut the middle entries out
- buffer_split = floor(self.maximum_buffer / 2)
-
- old_buffer = self._buffer
- self._buffer = deque()
-
- for i in range(buffer_split):
- self._buffer.append(old_buffer.popleft())
-
- end_buffer = []
- for i in range(buffer_split):
- end_buffer.append(old_buffer.pop())
-
- self._buffer.extend(reversed(end_buffer))
-
- def __call__(self, event: dict) -> None:
- flattened = flatten_event(event, self.metadata, include_time=True)
- self._buffer.append(flattened)
-
- # Handle backpressure, if it exists.
- try:
- self._handle_pressure()
- except Exception:
- # If handling backpressure fails,clear the buffer and log the
- # exception.
- self._buffer.clear()
- self._logger.failure("Failed clearing backpressure")
-
- # Try and write immediately.
- self._connect()
+# The properties of a standard LogRecord.
+_LOG_RECORD_ATTRIBUTES = {
+ "args",
+ "asctime",
+ "created",
+ "exc_info",
+ # exc_text isn't a public attribute, but is used to cache the result of formatException.
+ "exc_text",
+ "filename",
+ "funcName",
+ "levelname",
+ "levelno",
+ "lineno",
+ "message",
+ "module",
+ "msecs",
+ "msg",
+ "name",
+ "pathname",
+ "process",
+ "processName",
+ "relativeCreated",
+ "stack_info",
+ "thread",
+ "threadName",
+}
+
+
+class JsonFormatter(logging.Formatter):
+ def format(self, record: logging.LogRecord) -> str:
+ event = {
+ "log": record.getMessage(),
+ "namespace": record.name,
+ "level": record.levelname,
+ }
+
+ return self._format(record, event)
+
+ def _format(self, record: logging.LogRecord, event: dict) -> str:
+ # Add any extra attributes to the event.
+ for key, value in record.__dict__.items():
+ if key not in _LOG_RECORD_ATTRIBUTES:
+ event[key] = value
+
+ return _encoder.encode(event)
+
+
+class TerseJsonFormatter(JsonFormatter):
+ def format(self, record: logging.LogRecord) -> str:
+ event = {
+ "log": record.getMessage(),
+ "namespace": record.name,
+ "level": record.levelname,
+ "time": round(record.created, 2),
+ }
+
+ return self._format(record, event)
diff --git a/synapse/logging/filter.py b/synapse/logging/filter.py
new file mode 100644
index 0000000000..1baf8dd679
--- /dev/null
+++ b/synapse/logging/filter.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from typing_extensions import Literal
+
+
+class MetadataFilter(logging.Filter):
+ """Logging filter that adds constant values to each record.
+
+ Args:
+ metadata: Key-value pairs to add to each record.
+ """
+
+ def __init__(self, metadata: dict):
+ self._metadata = metadata
+
+ def filter(self, record: logging.LogRecord) -> Literal[True]:
+ for key, value in self._metadata.items():
+ setattr(record, key, value)
+ return True
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index e58850faff..ab586c318c 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -317,7 +317,7 @@ def ensure_active_span(message, ret=None):
@contextlib.contextmanager
-def _noop_context_manager(*args, **kwargs):
+def noop_context_manager(*args, **kwargs):
"""Does exactly what it says on the tin"""
yield
@@ -413,7 +413,7 @@ def start_active_span(
"""
if opentracing is None:
- return _noop_context_manager()
+ return noop_context_manager()
return opentracing.tracer.start_active_span(
operation_name,
@@ -428,7 +428,7 @@ def start_active_span(
def start_active_span_follows_from(operation_name, contexts):
if opentracing is None:
- return _noop_context_manager()
+ return noop_context_manager()
references = [opentracing.follows_from(context) for context in contexts]
scope = start_active_span(operation_name, references=references)
@@ -459,7 +459,7 @@ def start_active_span_from_request(
# Also, twisted uses byte arrays while opentracing expects strings.
if opentracing is None:
- return _noop_context_manager()
+ return noop_context_manager()
header_dict = {
k.decode(): v[0].decode() for k, v in request.requestHeaders.getAllRawHeaders()
@@ -497,7 +497,7 @@ def start_active_span_from_edu(
"""
if opentracing is None:
- return _noop_context_manager()
+ return noop_context_manager()
carrier = json_decoder.decode(edu_content.get("context", "{}")).get(
"opentracing", {}
|