diff options
author | Richard van der Hoff <1389908+richvdh@users.noreply.github.com> | 2022-01-17 12:14:40 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-17 12:14:40 +0000 |
commit | 6a78ede56932b486c0ca3cc7c6edd84a2f373366 (patch) | |
tree | dcfe2e7e2ba0acf0f7e23a741b5b4b8333e12f8e /synapse/metrics/_reactor_metrics.py | |
parent | Make pagination of rooms in admin api stable (#11737) (diff) | |
download | synapse-6a78ede56932b486c0ca3cc7c6edd84a2f373366.tar.xz |
Improve `reactor_tick_time` metric (#11724)
The existing implementation of the `python_twisted_reactor_tick_time` metric is pretty useless, because it *only* measures the time taken to execute timed calls and callbacks from threads. That neglects everything that happens off the back of I/O, which is obviously quite a lot for us. To improve this, I've hooked into a different place in the reactor - in particular, where it calls `epoll`. That call is the only place it should wait for something to happen - the rest of the loop *should* be quick. I've also removed `python_twisted_reactor_pending_calls`, because I don't believe anyone ever looks at it, and it's a nuisance to populate.
Diffstat (limited to 'synapse/metrics/_reactor_metrics.py')
-rw-r--r-- | synapse/metrics/_reactor_metrics.py | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/synapse/metrics/_reactor_metrics.py b/synapse/metrics/_reactor_metrics.py new file mode 100644 index 0000000000..ce0688621c --- /dev/null +++ b/synapse/metrics/_reactor_metrics.py @@ -0,0 +1,83 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import select +import time +from typing import Any, Iterable, List, Tuple + +from prometheus_client import Histogram, Metric +from prometheus_client.core import REGISTRY, GaugeMetricFamily + +from twisted.internet import reactor + +# +# Twisted reactor metrics +# + +tick_time = Histogram( + "python_twisted_reactor_tick_time", + "Tick time of the Twisted reactor (sec)", + buckets=[0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2, 5], +) + + +class EpollWrapper: + """a wrapper for an epoll object which records the time between polls""" + + def __init__(self, poller: "select.epoll"): + self.last_polled = time.time() + self._poller = poller + + def poll(self, *args, **kwargs) -> List[Tuple[int, int]]: # type: ignore[no-untyped-def] + # record the time since poll() was last called. This gives a good proxy for + # how long it takes to run everything in the reactor - ie, how long anything + # waiting for the next tick will have to wait. + tick_time.observe(time.time() - self.last_polled) + + ret = self._poller.poll(*args, **kwargs) + + self.last_polled = time.time() + return ret + + def __getattr__(self, item: str) -> Any: + return getattr(self._poller, item) + + +class ReactorLastSeenMetric: + def __init__(self, epoll_wrapper: EpollWrapper): + self._epoll_wrapper = epoll_wrapper + + def collect(self) -> Iterable[Metric]: + cm = GaugeMetricFamily( + "python_twisted_reactor_last_seen", + "Seconds since the Twisted reactor was last seen", + ) + cm.add_metric([], time.time() - self._epoll_wrapper.last_polled) + yield cm + + +try: + # if the reactor has a `_poller` attribute, which is an `epoll` object + # (ie, it's an EPollReactor), we wrap the `epoll` with a thing that will + # measure the time between ticks + from select import epoll + + poller = reactor._poller # type: ignore[attr-defined] +except (AttributeError, ImportError): + pass +else: + if isinstance(poller, epoll): + poller = EpollWrapper(poller) + reactor._poller = poller # type: ignore[attr-defined] + REGISTRY.register(ReactorLastSeenMetric(poller)) |