From d24346f53055eae7fb8e9038ef35fa843790742b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 21 Oct 2022 16:03:44 +0100 Subject: Fix logging error on SIGHUP (#14258) --- synapse/app/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/app/_base.py') diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 000912e86e..a683ebf4cb 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -558,7 +558,7 @@ def reload_cache_config(config: HomeServerConfig) -> None: logger.warning(f) else: logger.debug( - "New cache config. Was:\n %s\nNow:\n", + "New cache config. Was:\n %s\nNow:\n %s", previous_cache_config.__dict__, config.caches.__dict__, ) -- cgit 1.5.1 From b5ab2c428a1c5edd634ff084019811e5f6b963d8 Mon Sep 17 00:00:00 2001 From: Tuomas Ojamies Date: Tue, 15 Nov 2022 13:55:00 +0100 Subject: Support using SSL on worker endpoints. (#14128) * Fix missing SSL support in worker endpoints. * Add changelog * SSL for Replication endpoint * Remove unit test change * Refactor listener creation to reduce duplicated code * Fix the logger message * Update synapse/app/_base.py Co-authored-by: Patrick Cloke * Update synapse/app/_base.py Co-authored-by: Patrick Cloke * Update synapse/app/_base.py Co-authored-by: Patrick Cloke * Add config documentation for new TLS option Co-authored-by: Tuomas Ojamies Co-authored-by: Patrick Cloke Co-authored-by: Olivier Wilkinson (reivilibre) --- changelog.d/14128.misc | 1 + docs/usage/configuration/config_documentation.md | 20 +++++++++ synapse/app/_base.py | 53 +++++++++++++++++++++++- synapse/app/generic_worker.py | 28 ++++--------- synapse/app/homeserver.py | 34 ++------------- synapse/config/workers.py | 7 ++++ synapse/replication/http/_base.py | 10 ++++- 7 files changed, 100 insertions(+), 53 deletions(-) create mode 100644 changelog.d/14128.misc (limited to 'synapse/app/_base.py') diff --git a/changelog.d/14128.misc b/changelog.d/14128.misc new file mode 100644 index 0000000000..29168ef955 --- /dev/null +++ b/changelog.d/14128.misc @@ -0,0 +1 @@ +Add TLS support for generic worker endpoints. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 9a6bd08d01..f5937dd902 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3893,6 +3893,26 @@ Example configuration: worker_replication_http_port: 9093 ``` --- +### `worker_replication_http_tls` + +Whether TLS should be used for talking to the HTTP replication port on the main +Synapse process. +The main Synapse process defines this with the `tls` option on its [listener](#listeners) that +has the `replication` resource enabled. + +**Please note:** by default, it is not safe to expose replication ports to the +public Internet, even with TLS enabled. +See [`worker_replication_secret`](#worker_replication_secret). + +Defaults to `false`. + +*Added in Synapse 1.72.0.* + +Example configuration: +```yaml +worker_replication_http_tls: true +``` +--- ### `worker_listeners` A worker can handle HTTP requests. To do so, a `worker_listeners` option diff --git a/synapse/app/_base.py b/synapse/app/_base.py index a683ebf4cb..8f5b1a20f5 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -47,6 +47,7 @@ from twisted.internet.tcp import Port from twisted.logger import LoggingFile, LogLevel from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.python.threadpool import ThreadPool +from twisted.web.resource import Resource import synapse.util.caches from synapse.api.constants import MAX_PDU_SIZE @@ -55,12 +56,13 @@ from synapse.app.phone_stats_home import start_phone_stats_home from synapse.config import ConfigError from synapse.config._base import format_config_error from synapse.config.homeserver import HomeServerConfig -from synapse.config.server import ManholeConfig +from synapse.config.server import ListenerConfig, ManholeConfig from synapse.crypto import context_factory from synapse.events.presence_router import load_legacy_presence_router from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.events.third_party_rules import load_legacy_third_party_event_rules from synapse.handlers.auth import load_legacy_password_auth_providers +from synapse.http.site import SynapseSite from synapse.logging.context import PreserveLoggingContext from synapse.logging.opentracing import init_tracer from synapse.metrics import install_gc_manager, register_threadpool @@ -357,6 +359,55 @@ def listen_tcp( return r # type: ignore[return-value] +def listen_http( + listener_config: ListenerConfig, + root_resource: Resource, + version_string: str, + max_request_body_size: int, + context_factory: IOpenSSLContextFactory, + reactor: IReactorSSL = reactor, +) -> List[Port]: + port = listener_config.port + bind_addresses = listener_config.bind_addresses + tls = listener_config.tls + + assert listener_config.http_options is not None + + site_tag = listener_config.http_options.tag + if site_tag is None: + site_tag = str(port) + + site = SynapseSite( + "synapse.access.%s.%s" % ("https" if tls else "http", site_tag), + site_tag, + listener_config, + root_resource, + version_string, + max_request_body_size=max_request_body_size, + reactor=reactor, + ) + if tls: + # refresh_certificate should have been called before this. + assert context_factory is not None + ports = listen_ssl( + bind_addresses, + port, + site, + context_factory, + reactor=reactor, + ) + logger.info("Synapse now listening on TCP port %d (TLS)", port) + else: + ports = listen_tcp( + bind_addresses, + port, + site, + reactor=reactor, + ) + logger.info("Synapse now listening on TCP port %d", port) + return ports + + def listen_ssl( bind_addresses: Collection[str], port: int, diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 51446b49cd..1d9aef45c2 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -44,7 +44,7 @@ from synapse.config.server import ListenerConfig from synapse.federation.transport.server import TransportLayerServer from synapse.http.server import JsonResource, OptionsResource from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.http.site import SynapseRequest, SynapseSite +from synapse.http.site import SynapseRequest from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource @@ -288,15 +288,9 @@ class GenericWorkerServer(HomeServer): DATASTORE_CLASS = GenericWorkerSlavedStore # type: ignore def _listen_http(self, listener_config: ListenerConfig) -> None: - port = listener_config.port - bind_addresses = listener_config.bind_addresses assert listener_config.http_options is not None - site_tag = listener_config.http_options.tag - if site_tag is None: - site_tag = str(port) - # We always include a health resource. resources: Dict[str, Resource] = {"/health": HealthResource()} @@ -395,23 +389,15 @@ class GenericWorkerServer(HomeServer): root_resource = create_resource_tree(resources, OptionsResource()) - _base.listen_tcp( - bind_addresses, - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - self.version_string, - max_request_body_size=max_request_body_size(self.config), - reactor=self.get_reactor(), - ), + _base.listen_http( + listener_config, + root_resource, + self.version_string, + max_request_body_size(self.config), + self.tls_server_context_factory, reactor=self.get_reactor(), ) - logger.info("Synapse worker now listening on port %d", port) - def start_listening(self) -> None: for listener in self.config.worker.worker_listeners: if listener.type == "http": diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index de3f08876f..4f4fee4782 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -37,8 +37,7 @@ from synapse.api.urls import ( from synapse.app import _base from synapse.app._base import ( handle_startup_exception, - listen_ssl, - listen_tcp, + listen_http, max_request_body_size, redirect_stdio_to_logs, register_start, @@ -53,7 +52,6 @@ from synapse.http.server import ( RootOptionsRedirectResource, StaticResource, ) -from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource @@ -83,8 +81,6 @@ class SynapseHomeServer(HomeServer): self, config: HomeServerConfig, listener_config: ListenerConfig ) -> Iterable[Port]: port = listener_config.port - bind_addresses = listener_config.bind_addresses - tls = listener_config.tls # Must exist since this is an HTTP listener. assert listener_config.http_options is not None site_tag = listener_config.http_options.tag @@ -140,37 +136,15 @@ class SynapseHomeServer(HomeServer): else: root_resource = OptionsResource() - site = SynapseSite( - "synapse.access.%s.%s" % ("https" if tls else "http", site_tag), - site_tag, + ports = listen_http( listener_config, create_resource_tree(resources, root_resource), self.version_string, - max_request_body_size=max_request_body_size(self.config), + max_request_body_size(self.config), + self.tls_server_context_factory, reactor=self.get_reactor(), ) - if tls: - # refresh_certificate should have been called before this. - assert self.tls_server_context_factory is not None - ports = listen_ssl( - bind_addresses, - port, - site, - self.tls_server_context_factory, - reactor=self.get_reactor(), - ) - logger.info("Synapse now listening on TCP port %d (TLS)", port) - - else: - ports = listen_tcp( - bind_addresses, - port, - site, - reactor=self.get_reactor(), - ) - logger.info("Synapse now listening on TCP port %d", port) - return ports def _configure_named_resource( diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 0fb725dd8f..88b3168cbc 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -67,6 +67,7 @@ class InstanceLocationConfig: host: str port: int + tls: bool = False @attr.s @@ -149,6 +150,12 @@ class WorkerConfig(Config): # The port on the main synapse for HTTP replication endpoint self.worker_replication_http_port = config.get("worker_replication_http_port") + # The tls mode on the main synapse for HTTP replication endpoint. + # For backward compatibility this defaults to False. + self.worker_replication_http_tls = config.get( + "worker_replication_http_tls", False + ) + # The shared secret used for authentication when connecting to the main synapse. self.worker_replication_secret = config.get("worker_replication_secret", None) diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index acb0bd18f7..5e661f8c73 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -184,8 +184,10 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta): client = hs.get_simple_http_client() local_instance_name = hs.get_instance_name() + # The value of these option should match the replication listener settings master_host = hs.config.worker.worker_replication_host master_port = hs.config.worker.worker_replication_http_port + master_tls = hs.config.worker.worker_replication_http_tls instance_map = hs.config.worker.instance_map @@ -205,9 +207,11 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta): if instance_name == "master": host = master_host port = master_port + tls = master_tls elif instance_name in instance_map: host = instance_map[instance_name].host port = instance_map[instance_name].port + tls = instance_map[instance_name].tls else: raise Exception( "Instance %r not in 'instance_map' config" % (instance_name,) @@ -238,7 +242,11 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta): "Unknown METHOD on %s replication endpoint" % (cls.NAME,) ) - uri = "http://%s:%s/_synapse/replication/%s/%s" % ( + # Here the protocol is hard coded to be http by default or https in case the replication + # port is set to have tls true. + scheme = "https" if tls else "http" + uri = "%s://%s:%s/_synapse/replication/%s/%s" % ( + scheme, host, port, cls.NAME, -- cgit 1.5.1 From 258b5285b6b486526dffef9431c2ab063913f42b Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 15 Nov 2022 16:36:43 +0000 Subject: Fix typechecking errors introduced in #14128 (#14455) * Fix typechecking errors introduced in #14128 * Changelog * Correct annotations so that context_factory works if you don't use TLS --- changelog.d/14455.misc | 1 + synapse/app/_base.py | 4 ++-- synapse/server.py | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14455.misc (limited to 'synapse/app/_base.py') diff --git a/changelog.d/14455.misc b/changelog.d/14455.misc new file mode 100644 index 0000000000..29168ef955 --- /dev/null +++ b/changelog.d/14455.misc @@ -0,0 +1 @@ +Add TLS support for generic worker endpoints. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 8f5b1a20f5..41d2732ef9 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -364,8 +364,8 @@ def listen_http( root_resource: Resource, version_string: str, max_request_body_size: int, - context_factory: IOpenSSLContextFactory, - reactor: IReactorSSL = reactor, + context_factory: Optional[IOpenSSLContextFactory], + reactor: ISynapseReactor = reactor, ) -> List[Port]: port = listener_config.port bind_addresses = listener_config.bind_addresses diff --git a/synapse/server.py b/synapse/server.py index c4e025af22..f0a60d0056 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -221,8 +221,6 @@ class HomeServer(metaclass=abc.ABCMeta): # instantiated during setup() for future return by get_datastores() DATASTORE_CLASS = abc.abstractproperty() - tls_server_context_factory: Optional[IOpenSSLContextFactory] - def __init__( self, hostname: str, @@ -258,6 +256,9 @@ class HomeServer(metaclass=abc.ABCMeta): self._module_web_resources: Dict[str, Resource] = {} self._module_web_resources_consumed = False + # This attribute is set by the free function `refresh_certificate`. + self.tls_server_context_factory: Optional[IOpenSSLContextFactory] = None + def register_module_web_resource(self, path: str, resource: Resource) -> None: """Allows a module to register a web resource to be served at the given path. -- cgit 1.5.1 From 9af2be192a759c22d189b72cc0a7580cd9de8a37 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 24 Nov 2022 09:09:17 +0000 Subject: Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0. (#14538) --- changelog.d/14538.removal | 1 + docs/upgrade.md | 22 ++ docs/usage/configuration/config_documentation.md | 25 -- synapse/app/_base.py | 16 +- synapse/app/generic_worker.py | 1 - synapse/app/homeserver.py | 1 - synapse/config/metrics.py | 2 - synapse/metrics/__init__.py | 7 +- synapse/metrics/_legacy_exposition.py | 288 ----------------------- synapse/metrics/_twisted_exposition.py | 38 +++ tests/storage/test_event_metrics.py | 7 +- 11 files changed, 70 insertions(+), 338 deletions(-) create mode 100644 changelog.d/14538.removal delete mode 100644 synapse/metrics/_legacy_exposition.py create mode 100644 synapse/metrics/_twisted_exposition.py (limited to 'synapse/app/_base.py') diff --git a/changelog.d/14538.removal b/changelog.d/14538.removal new file mode 100644 index 0000000000..d2035ce82a --- /dev/null +++ b/changelog.d/14538.removal @@ -0,0 +1 @@ +Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0. \ No newline at end of file diff --git a/docs/upgrade.md b/docs/upgrade.md index 2aa353e496..4fe9e4f02e 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,28 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.73.0 + +## Legacy Prometheus metric names have now been removed + +Synapse v1.69.0 included the deprecation of legacy Prometheus metric names +and offered an option to disable them. +Synapse v1.71.0 disabled legacy Prometheus metric names by default. + +This version, v1.73.0, removes those legacy Prometheus metric names entirely. +This also means that the `enable_legacy_metrics` configuration option has been +removed; it will no longer be possible to re-enable the legacy metric names. + +If you use metrics and have not yet updated your Grafana dashboard(s), +Prometheus console(s) or alerting rule(s), please consider doing so when upgrading +to this version. +Note that the included Grafana dashboard was updated in v1.72.0 to correct some +metric names which were missed when legacy metrics were disabled by default. + +See [v1.69.0: Deprecation of legacy Prometheus metric names](#deprecation-of-legacy-prometheus-metric-names) +for more context. + + # Upgrading to v1.72.0 ## Dropping support for PostgreSQL 10 diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index f5937dd902..fae2771fad 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2437,31 +2437,6 @@ Example configuration: enable_metrics: true ``` --- -### `enable_legacy_metrics` - -Set to `true` to publish both legacy and non-legacy Prometheus metric names, -or to `false` to only publish non-legacy Prometheus metric names. -Defaults to `false`. Has no effect if `enable_metrics` is `false`. -**In Synapse v1.67.0 up to and including Synapse v1.70.1, this defaulted to `true`.** - -Legacy metric names include: -- metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules; -- counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard. - -These legacy metric names are unconventional and not compliant with OpenMetrics standards. -They are included for backwards compatibility. - -Example configuration: -```yaml -enable_legacy_metrics: false -``` - -See https://github.com/matrix-org/synapse/issues/11106 for context. - -*Since v1.67.0.* - -**Will be removed in v1.73.0.** ---- ### `sentry` Use this option to enable sentry integration. Provide the DSN assigned to you by sentry diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 41d2732ef9..a5aa2185a2 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -266,26 +266,18 @@ def register_start( reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper())) -def listen_metrics( - bind_addresses: Iterable[str], port: int, enable_legacy_metric_names: bool -) -> None: +def listen_metrics(bind_addresses: Iterable[str], port: int) -> None: """ Start Prometheus metrics server. """ from prometheus_client import start_http_server as start_http_server_prometheus - from synapse.metrics import ( - RegistryProxy, - start_http_server as start_http_server_legacy, - ) + from synapse.metrics import RegistryProxy for host in bind_addresses: logger.info("Starting metrics listener on %s:%d", host, port) - if enable_legacy_metric_names: - start_http_server_legacy(port, addr=host, registry=RegistryProxy) - else: - _set_prometheus_client_use_created_metrics(False) - start_http_server_prometheus(port, addr=host, registry=RegistryProxy) + _set_prometheus_client_use_created_metrics(False) + start_http_server_prometheus(port, addr=host, registry=RegistryProxy) def _set_prometheus_client_use_created_metrics(new_value: bool) -> None: diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 74909b7d4a..46dc731696 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -320,7 +320,6 @@ class GenericWorkerServer(HomeServer): _base.listen_metrics( listener.bind_addresses, listener.port, - enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics, ) else: logger.warning("Unsupported listener type: %s", listener.type) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 4f4fee4782..b9be558c7e 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -265,7 +265,6 @@ class SynapseHomeServer(HomeServer): _base.listen_metrics( listener.bind_addresses, listener.port, - enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics, ) else: # this shouldn't happen, as the listener type should have been checked diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index 6034a0346e..8c1c9bd12d 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -43,8 +43,6 @@ class MetricsConfig(Config): def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.enable_metrics = config.get("enable_metrics", False) - self.enable_legacy_metrics = config.get("enable_legacy_metrics", False) - self.report_stats = config.get("report_stats", None) self.report_stats_endpoint = config.get( "report_stats_endpoint", "https://matrix.org/report-usage-stats/push" diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index c3d3daf877..b01372565d 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -47,11 +47,7 @@ from twisted.python.threadpool import ThreadPool # This module is imported for its side effects; flake8 needn't warn that it's unused. import synapse.metrics._reactor_metrics # noqa: F401 from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager -from synapse.metrics._legacy_exposition import ( - MetricsResource, - generate_latest, - start_http_server, -) +from synapse.metrics._twisted_exposition import MetricsResource, generate_latest from synapse.metrics._types import Collector from synapse.util import SYNAPSE_VERSION @@ -474,7 +470,6 @@ __all__ = [ "Collector", "MetricsResource", "generate_latest", - "start_http_server", "LaterGauge", "InFlightGauge", "GaugeBucketCollector", diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py deleted file mode 100644 index 1459f9d224..0000000000 --- a/synapse/metrics/_legacy_exposition.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright 2015-2019 Prometheus Python Client Developers -# Copyright 2019 Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This code is based off `prometheus_client/exposition.py` from version 0.7.1. - -Due to the renaming of metrics in prometheus_client 0.4.0, this customised -vendoring of the code will emit both the old versions that Synapse dashboards -expect, and the newer "best practice" version of the up-to-date official client. -""" -import logging -import math -import threading -from http.server import BaseHTTPRequestHandler, HTTPServer -from socketserver import ThreadingMixIn -from typing import Any, Dict, List, Type, Union -from urllib.parse import parse_qs, urlparse - -from prometheus_client import REGISTRY, CollectorRegistry -from prometheus_client.core import Sample - -from twisted.web.resource import Resource -from twisted.web.server import Request - -logger = logging.getLogger(__name__) -CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" - - -def floatToGoString(d: Union[int, float]) -> str: - d = float(d) - if d == math.inf: - return "+Inf" - elif d == -math.inf: - return "-Inf" - elif math.isnan(d): - return "NaN" - else: - s = repr(d) - dot = s.find(".") - # Go switches to exponents sooner than Python. - # We only need to care about positive values for le/quantile. - if d > 0 and dot > 6: - mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.") - return f"{mantissa}e+0{dot - 1}" - return s - - -def sample_line(line: Sample, name: str) -> str: - if line.labels: - labelstr = "{{{0}}}".format( - ",".join( - [ - '{}="{}"'.format( - k, - v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""), - ) - for k, v in sorted(line.labels.items()) - ] - ) - ) - else: - labelstr = "" - timestamp = "" - if line.timestamp is not None: - # Convert to milliseconds. - timestamp = f" {int(float(line.timestamp) * 1000):d}" - return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp) - - -# Mapping from new metric names to legacy metric names. -# We translate these back to their old names when exposing them through our -# legacy vendored exporter. -# Only this legacy exposition module applies these name changes. -LEGACY_METRIC_NAMES = { - "synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits", - "synapse_util_caches_cache_size": "synapse_util_caches_cache:size", - "synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size", - "synapse_util_caches_cache": "synapse_util_caches_cache:total", - "synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size", - "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits", - "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size", - "synapse_util_caches_response_cache": "synapse_util_caches_response_cache:total", - "synapse_federation_client_sent_pdu_destinations": "synapse_federation_client_sent_pdu_destinations:total", - "synapse_federation_client_sent_pdu_destinations_count": "synapse_federation_client_sent_pdu_destinations:count", - "synapse_admin_mau_current": "synapse_admin_mau:current", - "synapse_admin_mau_max": "synapse_admin_mau:max", - "synapse_admin_mau_registered_reserved_users": "synapse_admin_mau:registered_reserved_users", -} - - -def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes: - """ - Generate metrics in legacy format. Modern metrics are generated directly - by prometheus-client. - """ - - output = [] - - for metric in registry.collect(): - if not metric.samples: - # No samples, don't bother. - continue - - # Translate to legacy metric name if it has one. - mname = LEGACY_METRIC_NAMES.get(metric.name, metric.name) - mnewname = metric.name - mtype = metric.type - - # OpenMetrics -> Prometheus - if mtype == "counter": - mnewname = mnewname + "_total" - elif mtype == "info": - mtype = "gauge" - mnewname = mnewname + "_info" - elif mtype == "stateset": - mtype = "gauge" - elif mtype == "gaugehistogram": - mtype = "histogram" - elif mtype == "unknown": - mtype = "untyped" - - # Output in the old format for compatibility. - if emit_help: - output.append( - "# HELP {} {}\n".format( - mname, - metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), - ) - ) - output.append(f"# TYPE {mname} {mtype}\n") - - om_samples: Dict[str, List[str]] = {} - for s in metric.samples: - for suffix in ["_created", "_gsum", "_gcount"]: - if s.name == mname + suffix: - # OpenMetrics specific sample, put in a gauge at the end. - # (these come from gaugehistograms which don't get renamed, - # so no need to faff with mnewname) - om_samples.setdefault(suffix, []).append(sample_line(s, s.name)) - break - else: - newname = s.name.replace(mnewname, mname) - if ":" in newname and newname.endswith("_total"): - newname = newname[: -len("_total")] - output.append(sample_line(s, newname)) - - for suffix, lines in sorted(om_samples.items()): - if emit_help: - output.append( - "# HELP {}{} {}\n".format( - mname, - suffix, - metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), - ) - ) - output.append(f"# TYPE {mname}{suffix} gauge\n") - output.extend(lines) - - # Get rid of the weird colon things while we're at it - if mtype == "counter": - mnewname = mnewname.replace(":total", "") - mnewname = mnewname.replace(":", "_") - - if mname == mnewname: - continue - - # Also output in the new format, if it's different. - if emit_help: - output.append( - "# HELP {} {}\n".format( - mnewname, - metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), - ) - ) - output.append(f"# TYPE {mnewname} {mtype}\n") - - for s in metric.samples: - # Get rid of the OpenMetrics specific samples (we should already have - # dealt with them above anyway.) - for suffix in ["_created", "_gsum", "_gcount"]: - if s.name == mname + suffix: - break - else: - sample_name = LEGACY_METRIC_NAMES.get(s.name, s.name) - output.append( - sample_line(s, sample_name.replace(":total", "").replace(":", "_")) - ) - - return "".join(output).encode("utf-8") - - -class MetricsHandler(BaseHTTPRequestHandler): - """HTTP handler that gives metrics from ``REGISTRY``.""" - - registry = REGISTRY - - def do_GET(self) -> None: - registry = self.registry - params = parse_qs(urlparse(self.path).query) - - if "help" in params: - emit_help = True - else: - emit_help = False - - try: - output = generate_latest(registry, emit_help=emit_help) - except Exception: - self.send_error(500, "error generating metric output") - raise - try: - self.send_response(200) - self.send_header("Content-Type", CONTENT_TYPE_LATEST) - self.send_header("Content-Length", str(len(output))) - self.end_headers() - self.wfile.write(output) - except BrokenPipeError as e: - logger.warning( - "BrokenPipeError when serving metrics (%s). Did Prometheus restart?", e - ) - - def log_message(self, format: str, *args: Any) -> None: - """Log nothing.""" - - @classmethod - def factory(cls, registry: CollectorRegistry) -> Type: - """Returns a dynamic MetricsHandler class tied - to the passed registry. - """ - # This implementation relies on MetricsHandler.registry - # (defined above and defaulted to REGISTRY). - - # As we have unicode_literals, we need to create a str() - # object for type(). - cls_name = str(cls.__name__) - MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry}) - return MyMetricsHandler - - -class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer): - """Thread per request HTTP server.""" - - # Make worker threads "fire and forget". Beginning with Python 3.7 this - # prevents a memory leak because ``ThreadingMixIn`` starts to gather all - # non-daemon threads in a list in order to join on them at server close. - # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the - # same as Python 3.7's ``ThreadingHTTPServer``. - daemon_threads = True - - -def start_http_server( - port: int, addr: str = "", registry: CollectorRegistry = REGISTRY -) -> None: - """Starts an HTTP server for prometheus metrics as a daemon thread""" - CustomMetricsHandler = MetricsHandler.factory(registry) - httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler) - t = threading.Thread(target=httpd.serve_forever) - t.daemon = True - t.start() - - -class MetricsResource(Resource): - """ - Twisted ``Resource`` that serves prometheus metrics. - """ - - isLeaf = True - - def __init__(self, registry: CollectorRegistry = REGISTRY): - self.registry = registry - - def render_GET(self, request: Request) -> bytes: - request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) - response = generate_latest(self.registry) - request.setHeader(b"Content-Length", str(len(response))) - return response diff --git a/synapse/metrics/_twisted_exposition.py b/synapse/metrics/_twisted_exposition.py new file mode 100644 index 0000000000..0abcd14953 --- /dev/null +++ b/synapse/metrics/_twisted_exposition.py @@ -0,0 +1,38 @@ +# Copyright 2015-2019 Prometheus Python Client Developers +# Copyright 2019 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from prometheus_client import REGISTRY, CollectorRegistry, generate_latest + +from twisted.web.resource import Resource +from twisted.web.server import Request + +CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" + + +class MetricsResource(Resource): + """ + Twisted ``Resource`` that serves prometheus metrics. + """ + + isLeaf = True + + def __init__(self, registry: CollectorRegistry = REGISTRY): + self.registry = registry + + def render_GET(self, request: Request) -> bytes: + request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) + response = generate_latest(self.registry) + request.setHeader(b"Content-Length", str(len(response))) + return response diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py index 088fbb247b..6f1135eef4 100644 --- a/tests/storage/test_event_metrics.py +++ b/tests/storage/test_event_metrics.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from prometheus_client import generate_latest -from synapse.metrics import REGISTRY, generate_latest +from synapse.metrics import REGISTRY from synapse.types import UserID, create_requester from tests.unittest import HomeserverTestCase @@ -53,8 +54,8 @@ class ExtremStatisticsTestCase(HomeserverTestCase): items = list( filter( - lambda x: b"synapse_forward_extremities_" in x, - generate_latest(REGISTRY, emit_help=False).split(b"\n"), + lambda x: b"synapse_forward_extremities_" in x and b"# HELP" not in x, + generate_latest(REGISTRY).split(b"\n"), ) ) -- cgit 1.5.1