diff options
author | reivilibre <oliverw@matrix.org> | 2022-08-24 11:35:54 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-24 11:35:54 +0000 |
commit | be4250c7a888e314e361df42042bfa344ab65d55 (patch) | |
tree | cb3cb0b0566d0b7522a2a3cc452c44553cf72683 /synapse/metrics/_legacy_exposition.py | |
parent | Add GitHub automation for new issues (#13610) (diff) | |
download | synapse-be4250c7a888e314e361df42042bfa344ab65d55.tar.xz |
Add experimental configuration option to allow disabling legacy Prometheus metric names. (#13540)
Co-authored-by: David Robertson <davidr@element.io>
Diffstat (limited to 'synapse/metrics/_legacy_exposition.py')
-rw-r--r-- | synapse/metrics/_legacy_exposition.py | 284 |
1 files changed, 284 insertions, 0 deletions
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py new file mode 100644 index 0000000000..ff640a49af --- /dev/null +++ b/synapse/metrics/_legacy_exposition.py @@ -0,0 +1,284 @@ +# Copyright 2015-2019 Prometheus Python Client Developers +# Copyright 2019 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code is based off `prometheus_client/exposition.py` from version 0.7.1. + +Due to the renaming of metrics in prometheus_client 0.4.0, this customised +vendoring of the code will emit both the old versions that Synapse dashboards +expect, and the newer "best practice" version of the up-to-date official client. +""" + +import math +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn +from typing import Any, Dict, List, Type, Union +from urllib.parse import parse_qs, urlparse + +from prometheus_client import REGISTRY, CollectorRegistry +from prometheus_client.core import Sample + +from twisted.web.resource import Resource +from twisted.web.server import Request + +from synapse.util import caches + +CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8" + + +def floatToGoString(d: Union[int, float]) -> str: + d = float(d) + if d == math.inf: + return "+Inf" + elif d == -math.inf: + return "-Inf" + elif math.isnan(d): + return "NaN" + else: + s = repr(d) + dot = s.find(".") + # Go switches to exponents sooner than Python. + # We only need to care about positive values for le/quantile. + if d > 0 and dot > 6: + mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.") + return f"{mantissa}e+0{dot - 1}" + return s + + +def sample_line(line: Sample, name: str) -> str: + if line.labels: + labelstr = "{{{0}}}".format( + ",".join( + [ + '{}="{}"'.format( + k, + v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""), + ) + for k, v in sorted(line.labels.items()) + ] + ) + ) + else: + labelstr = "" + timestamp = "" + if line.timestamp is not None: + # Convert to milliseconds. + timestamp = f" {int(float(line.timestamp) * 1000):d}" + return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp) + + +# Mapping from new metric names to legacy metric names. +# We translate these back to their old names when exposing them through our +# legacy vendored exporter. +# Only this legacy exposition module applies these name changes. +LEGACY_METRIC_NAMES = { + "synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits", + "synapse_util_caches_cache_size": "synapse_util_caches_cache:size", + "synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size", + "synapse_util_caches_cache_total": "synapse_util_caches_cache:total", + "synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size", + "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits", + "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size", + "synapse_util_caches_response_cache_total": "synapse_util_caches_response_cache:total", +} + + +def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes: + """ + Generate metrics in legacy format. Modern metrics are generated directly + by prometheus-client. + """ + + # Trigger the cache metrics to be rescraped, which updates the common + # metrics but do not produce metrics themselves + for collector in caches.collectors_by_name.values(): + collector.collect() + + output = [] + + for metric in registry.collect(): + if not metric.samples: + # No samples, don't bother. + continue + + # Translate to legacy metric name if it has one. + mname = LEGACY_METRIC_NAMES.get(metric.name, metric.name) + mnewname = metric.name + mtype = metric.type + + # OpenMetrics -> Prometheus + if mtype == "counter": + mnewname = mnewname + "_total" + elif mtype == "info": + mtype = "gauge" + mnewname = mnewname + "_info" + elif mtype == "stateset": + mtype = "gauge" + elif mtype == "gaugehistogram": + mtype = "histogram" + elif mtype == "unknown": + mtype = "untyped" + + # Output in the old format for compatibility. + if emit_help: + output.append( + "# HELP {} {}\n".format( + mname, + metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), + ) + ) + output.append(f"# TYPE {mname} {mtype}\n") + + om_samples: Dict[str, List[str]] = {} + for s in metric.samples: + for suffix in ["_created", "_gsum", "_gcount"]: + if s.name == mname + suffix: + # OpenMetrics specific sample, put in a gauge at the end. + # (these come from gaugehistograms which don't get renamed, + # so no need to faff with mnewname) + om_samples.setdefault(suffix, []).append(sample_line(s, s.name)) + break + else: + newname = s.name.replace(mnewname, mname) + if ":" in newname and newname.endswith("_total"): + newname = newname[: -len("_total")] + output.append(sample_line(s, newname)) + + for suffix, lines in sorted(om_samples.items()): + if emit_help: + output.append( + "# HELP {}{} {}\n".format( + mname, + suffix, + metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), + ) + ) + output.append(f"# TYPE {mname}{suffix} gauge\n") + output.extend(lines) + + # Get rid of the weird colon things while we're at it + if mtype == "counter": + mnewname = mnewname.replace(":total", "") + mnewname = mnewname.replace(":", "_") + + if mname == mnewname: + continue + + # Also output in the new format, if it's different. + if emit_help: + output.append( + "# HELP {} {}\n".format( + mnewname, + metric.documentation.replace("\\", r"\\").replace("\n", r"\n"), + ) + ) + output.append(f"# TYPE {mnewname} {mtype}\n") + + for s in metric.samples: + # Get rid of the OpenMetrics specific samples (we should already have + # dealt with them above anyway.) + for suffix in ["_created", "_gsum", "_gcount"]: + if s.name == mname + suffix: + break + else: + sample_name = LEGACY_METRIC_NAMES.get(s.name, s.name) + output.append( + sample_line(s, sample_name.replace(":total", "").replace(":", "_")) + ) + + return "".join(output).encode("utf-8") + + +class MetricsHandler(BaseHTTPRequestHandler): + """HTTP handler that gives metrics from ``REGISTRY``.""" + + registry = REGISTRY + + def do_GET(self) -> None: + registry = self.registry + params = parse_qs(urlparse(self.path).query) + + if "help" in params: + emit_help = True + else: + emit_help = False + + try: + output = generate_latest(registry, emit_help=emit_help) + except Exception: + self.send_error(500, "error generating metric output") + raise + self.send_response(200) + self.send_header("Content-Type", CONTENT_TYPE_LATEST) + self.send_header("Content-Length", str(len(output))) + self.end_headers() + self.wfile.write(output) + + def log_message(self, format: str, *args: Any) -> None: + """Log nothing.""" + + @classmethod + def factory(cls, registry: CollectorRegistry) -> Type: + """Returns a dynamic MetricsHandler class tied + to the passed registry. + """ + # This implementation relies on MetricsHandler.registry + # (defined above and defaulted to REGISTRY). + + # As we have unicode_literals, we need to create a str() + # object for type(). + cls_name = str(cls.__name__) + MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry}) + return MyMetricsHandler + + +class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer): + """Thread per request HTTP server.""" + + # Make worker threads "fire and forget". Beginning with Python 3.7 this + # prevents a memory leak because ``ThreadingMixIn`` starts to gather all + # non-daemon threads in a list in order to join on them at server close. + # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the + # same as Python 3.7's ``ThreadingHTTPServer``. + daemon_threads = True + + +def start_http_server( + port: int, addr: str = "", registry: CollectorRegistry = REGISTRY +) -> None: + """Starts an HTTP server for prometheus metrics as a daemon thread""" + CustomMetricsHandler = MetricsHandler.factory(registry) + httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler) + t = threading.Thread(target=httpd.serve_forever) + t.daemon = True + t.start() + + +class MetricsResource(Resource): + """ + Twisted ``Resource`` that serves prometheus metrics. + """ + + isLeaf = True + + def __init__(self, registry: CollectorRegistry = REGISTRY): + self.registry = registry + + def render_GET(self, request: Request) -> bytes: + request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii")) + response = generate_latest(self.registry) + request.setHeader(b"Content-Length", str(len(response))) + return response |