summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2021-05-06 15:54:07 +0100
committerGitHub <noreply@github.com>2021-05-06 15:54:07 +0100
commit8771b1337da9faa3b60cf0ec0a128a7de856f19e (patch)
tree48b4e8cae88de74ed6f43e79b1fc5a4ec0af55fe /synapse
parentRevert "Leave out optional keys from /sync (#9919)" (#9940) (diff)
downloadsynapse-8771b1337da9faa3b60cf0ec0a128a7de856f19e.tar.xz
Export jemalloc stats to prometheus when used (#9882)
Diffstat (limited to 'synapse')
-rw-r--r--synapse/app/_base.py2
-rw-r--r--synapse/metrics/__init__.py1
-rw-r--r--synapse/metrics/jemalloc.py196
3 files changed, 199 insertions, 0 deletions
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 638e01c1b2..59918d789e 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -37,6 +37,7 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.crypto import context_factory
 from synapse.logging.context import PreserveLoggingContext
 from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.metrics.jemalloc import setup_jemalloc_stats
 from synapse.util.async_helpers import Linearizer
 from synapse.util.daemonize import daemonize_process
 from synapse.util.rlimit import change_resource_limit
@@ -115,6 +116,7 @@ def start_reactor(
 
     def run():
         logger.info("Running")
+        setup_jemalloc_stats()
         change_resource_limit(soft_file_limit)
         if gc_thresholds:
             gc.set_threshold(*gc_thresholds)
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index e671da26d5..fef2846669 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -629,6 +629,7 @@ try:
 except AttributeError:
     pass
 
+
 __all__ = [
     "MetricsResource",
     "generate_latest",
diff --git a/synapse/metrics/jemalloc.py b/synapse/metrics/jemalloc.py
new file mode 100644
index 0000000000..29ab6c0229
--- /dev/null
+++ b/synapse/metrics/jemalloc.py
@@ -0,0 +1,196 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import ctypes
+import logging
+import os
+import re
+from typing import Optional
+
+from synapse.metrics import REGISTRY, GaugeMetricFamily
+
+logger = logging.getLogger(__name__)
+
+
+def _setup_jemalloc_stats():
+    """Checks to see if jemalloc is loaded, and hooks up a collector to record
+    statistics exposed by jemalloc.
+    """
+
+    # Try to find the loaded jemalloc shared library, if any. We need to
+    # introspect into what is loaded, rather than loading whatever is on the
+    # path, as if we load a *different* jemalloc version things will seg fault.
+
+    # We look in `/proc/self/maps`, which only exists on linux.
+    if not os.path.exists("/proc/self/maps"):
+        logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
+        return
+
+    # We're looking for a path at the end of the line that includes
+    # "libjemalloc".
+    regex = re.compile(r"/\S+/libjemalloc.*$")
+
+    jemalloc_path = None
+    with open("/proc/self/maps") as f:
+        for line in f:
+            match = regex.search(line.strip())
+            if match:
+                jemalloc_path = match.group()
+
+    if not jemalloc_path:
+        # No loaded jemalloc was found.
+        logger.debug("jemalloc not found")
+        return
+
+    logger.debug("Found jemalloc at %s", jemalloc_path)
+
+    jemalloc = ctypes.CDLL(jemalloc_path)
+
+    def _mallctl(
+        name: str, read: bool = True, write: Optional[int] = None
+    ) -> Optional[int]:
+        """Wrapper around `mallctl` for reading and writing integers to
+        jemalloc.
+
+        Args:
+            name: The name of the option to read from/write to.
+            read: Whether to try and read the value.
+            write: The value to write, if given.
+
+        Returns:
+            The value read if `read` is True, otherwise None.
+
+        Raises:
+            An exception if `mallctl` returns a non-zero error code.
+        """
+
+        input_var = None
+        input_var_ref = None
+        input_len_ref = None
+        if read:
+            input_var = ctypes.c_size_t(0)
+            input_len = ctypes.c_size_t(ctypes.sizeof(input_var))
+
+            input_var_ref = ctypes.byref(input_var)
+            input_len_ref = ctypes.byref(input_len)
+
+        write_var_ref = None
+        write_len = ctypes.c_size_t(0)
+        if write is not None:
+            write_var = ctypes.c_size_t(write)
+            write_len = ctypes.c_size_t(ctypes.sizeof(write_var))
+
+            write_var_ref = ctypes.byref(write_var)
+
+        # The interface is:
+        #
+        #   int mallctl(
+        #       const char *name,
+        #       void *oldp,
+        #       size_t *oldlenp,
+        #       void *newp,
+        #       size_t newlen
+        #   )
+        #
+        # Where oldp/oldlenp is a buffer where the old value will be written to
+        # (if not null), and newp/newlen is the buffer with the new value to set
+        # (if not null). Note that they're all references *except* newlen.
+        result = jemalloc.mallctl(
+            name.encode("ascii"),
+            input_var_ref,
+            input_len_ref,
+            write_var_ref,
+            write_len,
+        )
+
+        if result != 0:
+            raise Exception("Failed to call mallctl")
+
+        if input_var is None:
+            return None
+
+        return input_var.value
+
+    def _jemalloc_refresh_stats() -> None:
+        """Request that jemalloc updates its internal statistics. This needs to
+        be called before querying for stats, otherwise it will return stale
+        values.
+        """
+        try:
+            _mallctl("epoch", read=False, write=1)
+        except Exception as e:
+            logger.warning("Failed to reload jemalloc stats: %s", e)
+
+    class JemallocCollector:
+        """Metrics for internal jemalloc stats."""
+
+        def collect(self):
+            _jemalloc_refresh_stats()
+
+            g = GaugeMetricFamily(
+                "jemalloc_stats_app_memory_bytes",
+                "The stats reported by jemalloc",
+                labels=["type"],
+            )
+
+            # Read the relevant global stats from jemalloc. Note that these may
+            # not be accurate if python is configured to use its internal small
+            # object allocator (which is on by default, disable by setting the
+            # env `PYTHONMALLOC=malloc`).
+            #
+            # See the jemalloc manpage for details about what each value means,
+            # roughly:
+            #   - allocated ─ Total number of bytes allocated by the app
+            #   - active ─ Total number of bytes in active pages allocated by
+            #     the application, this is bigger than `allocated`.
+            #   - resident ─ Maximum number of bytes in physically resident data
+            #     pages mapped by the allocator, comprising all pages dedicated
+            #     to allocator metadata, pages backing active allocations, and
+            #     unused dirty pages. This is bigger than `active`.
+            #   - mapped ─ Total number of bytes in active extents mapped by the
+            #     allocator.
+            #   - metadata ─ Total number of bytes dedicated to jemalloc
+            #     metadata.
+            for t in (
+                "allocated",
+                "active",
+                "resident",
+                "mapped",
+                "metadata",
+            ):
+                try:
+                    value = _mallctl(f"stats.{t}")
+                except Exception as e:
+                    # There was an error fetching the value, skip.
+                    logger.warning("Failed to read jemalloc stats.%s: %s", t, e)
+                    continue
+
+                g.add_metric([t], value=value)
+
+            yield g
+
+    REGISTRY.register(JemallocCollector())
+
+    logger.debug("Added jemalloc stats")
+
+
+def setup_jemalloc_stats():
+    """Try to setup jemalloc stats, if jemalloc is loaded."""
+
+    try:
+        _setup_jemalloc_stats()
+    except Exception as e:
+        # This should only happen if we find the loaded jemalloc library, but
+        # fail to load it somehow (e.g. we somehow picked the wrong version).
+        logger.info("Failed to setup collector to record jemalloc stats: %s", e)