diff --git a/changelog.d/9882.misc b/changelog.d/9882.misc
new file mode 100644
index 0000000000..facfa31f38
--- /dev/null
+++ b/changelog.d/9882.misc
@@ -0,0 +1 @@
+Export jemalloc stats to Prometheus if it is being used.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 638e01c1b2..59918d789e 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -37,6 +37,7 @@ from synapse.config.homeserver import HomeServerConfig
from synapse.crypto import context_factory
from synapse.logging.context import PreserveLoggingContext
from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.metrics.jemalloc import setup_jemalloc_stats
from synapse.util.async_helpers import Linearizer
from synapse.util.daemonize import daemonize_process
from synapse.util.rlimit import change_resource_limit
@@ -115,6 +116,7 @@ def start_reactor(
def run():
logger.info("Running")
+ setup_jemalloc_stats()
change_resource_limit(soft_file_limit)
if gc_thresholds:
gc.set_threshold(*gc_thresholds)
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index e671da26d5..fef2846669 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -629,6 +629,7 @@ try:
except AttributeError:
pass
+
__all__ = [
"MetricsResource",
"generate_latest",
diff --git a/synapse/metrics/jemalloc.py b/synapse/metrics/jemalloc.py
new file mode 100644
index 0000000000..29ab6c0229
--- /dev/null
+++ b/synapse/metrics/jemalloc.py
@@ -0,0 +1,196 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import ctypes
+import logging
+import os
+import re
+from typing import Optional
+
+from synapse.metrics import REGISTRY, GaugeMetricFamily
+
+logger = logging.getLogger(__name__)
+
+
+def _setup_jemalloc_stats():
+ """Checks to see if jemalloc is loaded, and hooks up a collector to record
+ statistics exposed by jemalloc.
+ """
+
+ # Try to find the loaded jemalloc shared library, if any. We need to
+ # introspect into what is loaded, rather than loading whatever is on the
+ # path, as if we load a *different* jemalloc version things will seg fault.
+
+ # We look in `/proc/self/maps`, which only exists on linux.
+ if not os.path.exists("/proc/self/maps"):
+ logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
+ return
+
+ # We're looking for a path at the end of the line that includes
+ # "libjemalloc".
+ regex = re.compile(r"/\S+/libjemalloc.*$")
+
+ jemalloc_path = None
+ with open("/proc/self/maps") as f:
+ for line in f:
+ match = regex.search(line.strip())
+ if match:
+ jemalloc_path = match.group()
+
+ if not jemalloc_path:
+ # No loaded jemalloc was found.
+ logger.debug("jemalloc not found")
+ return
+
+ logger.debug("Found jemalloc at %s", jemalloc_path)
+
+ jemalloc = ctypes.CDLL(jemalloc_path)
+
+ def _mallctl(
+ name: str, read: bool = True, write: Optional[int] = None
+ ) -> Optional[int]:
+ """Wrapper around `mallctl` for reading and writing integers to
+ jemalloc.
+
+ Args:
+ name: The name of the option to read from/write to.
+ read: Whether to try and read the value.
+ write: The value to write, if given.
+
+ Returns:
+ The value read if `read` is True, otherwise None.
+
+ Raises:
+ An exception if `mallctl` returns a non-zero error code.
+ """
+
+ input_var = None
+ input_var_ref = None
+ input_len_ref = None
+ if read:
+ input_var = ctypes.c_size_t(0)
+ input_len = ctypes.c_size_t(ctypes.sizeof(input_var))
+
+ input_var_ref = ctypes.byref(input_var)
+ input_len_ref = ctypes.byref(input_len)
+
+ write_var_ref = None
+ write_len = ctypes.c_size_t(0)
+ if write is not None:
+ write_var = ctypes.c_size_t(write)
+ write_len = ctypes.c_size_t(ctypes.sizeof(write_var))
+
+ write_var_ref = ctypes.byref(write_var)
+
+ # The interface is:
+ #
+ # int mallctl(
+ # const char *name,
+ # void *oldp,
+ # size_t *oldlenp,
+ # void *newp,
+ # size_t newlen
+ # )
+ #
+ # Where oldp/oldlenp is a buffer where the old value will be written to
+ # (if not null), and newp/newlen is the buffer with the new value to set
+ # (if not null). Note that they're all references *except* newlen.
+ result = jemalloc.mallctl(
+ name.encode("ascii"),
+ input_var_ref,
+ input_len_ref,
+ write_var_ref,
+ write_len,
+ )
+
+ if result != 0:
+ raise Exception("Failed to call mallctl")
+
+ if input_var is None:
+ return None
+
+ return input_var.value
+
+ def _jemalloc_refresh_stats() -> None:
+ """Request that jemalloc updates its internal statistics. This needs to
+ be called before querying for stats, otherwise it will return stale
+ values.
+ """
+ try:
+ _mallctl("epoch", read=False, write=1)
+ except Exception as e:
+ logger.warning("Failed to reload jemalloc stats: %s", e)
+
+ class JemallocCollector:
+ """Metrics for internal jemalloc stats."""
+
+ def collect(self):
+ _jemalloc_refresh_stats()
+
+ g = GaugeMetricFamily(
+ "jemalloc_stats_app_memory_bytes",
+ "The stats reported by jemalloc",
+ labels=["type"],
+ )
+
+ # Read the relevant global stats from jemalloc. Note that these may
+ # not be accurate if python is configured to use its internal small
+ # object allocator (which is on by default, disable by setting the
+ # env `PYTHONMALLOC=malloc`).
+ #
+ # See the jemalloc manpage for details about what each value means,
+ # roughly:
+ # - allocated ─ Total number of bytes allocated by the app
+ # - active ─ Total number of bytes in active pages allocated by
+ # the application, this is bigger than `allocated`.
+ # - resident ─ Maximum number of bytes in physically resident data
+ # pages mapped by the allocator, comprising all pages dedicated
+ # to allocator metadata, pages backing active allocations, and
+ # unused dirty pages. This is bigger than `active`.
+ # - mapped ─ Total number of bytes in active extents mapped by the
+ # allocator.
+ # - metadata ─ Total number of bytes dedicated to jemalloc
+ # metadata.
+ for t in (
+ "allocated",
+ "active",
+ "resident",
+ "mapped",
+ "metadata",
+ ):
+ try:
+ value = _mallctl(f"stats.{t}")
+ except Exception as e:
+ # There was an error fetching the value, skip.
+ logger.warning("Failed to read jemalloc stats.%s: %s", t, e)
+ continue
+
+ g.add_metric([t], value=value)
+
+ yield g
+
+ REGISTRY.register(JemallocCollector())
+
+ logger.debug("Added jemalloc stats")
+
+
+def setup_jemalloc_stats():
+ """Try to setup jemalloc stats, if jemalloc is loaded."""
+
+ try:
+ _setup_jemalloc_stats()
+ except Exception as e:
+ # This should only happen if we find the loaded jemalloc library, but
+ # fail to load it somehow (e.g. we somehow picked the wrong version).
+ logger.info("Failed to setup collector to record jemalloc stats: %s", e)
|