summary refs log tree commit diff
path: root/synapse/metrics/__init__.py
blob: 6564b03eee59a2fd1062a3e11bee427485aee1ba (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Because otherwise 'resource' collides with synapse.metrics.resource
from __future__ import absolute_import

import logging
from resource import getrusage, getpagesize, RUSAGE_SELF
import os
import stat

from .metric import (
    CounterMetric, CallbackMetric, DistributionMetric, CacheMetric
)


logger = logging.getLogger(__name__)


# We'll keep all the available metrics in a single toplevel dict, one shared
# for the entire process. We don't currently support per-HomeServer instances
# of metrics, because in practice any one python VM will host only one
# HomeServer anyway. This makes a lot of implementation neater
all_metrics = {}


class Metrics(object):
    """ A single Metrics object gives a (mutable) slice view of the all_metrics
    dict, allowing callers to easily register new metrics that are namespaced
    nicely."""

    def __init__(self, name):
        self.name_prefix = name

    def _register(self, metric_class, name, *args, **kwargs):
        full_name = "%s_%s" % (self.name_prefix, name)

        metric = metric_class(full_name, *args, **kwargs)

        all_metrics[full_name] = metric
        return metric

    def register_counter(self, *args, **kwargs):
        return self._register(CounterMetric, *args, **kwargs)

    def register_callback(self, *args, **kwargs):
        return self._register(CallbackMetric, *args, **kwargs)

    def register_distribution(self, *args, **kwargs):
        return self._register(DistributionMetric, *args, **kwargs)

    def register_cache(self, *args, **kwargs):
        return self._register(CacheMetric, *args, **kwargs)


def get_metrics_for(pkg_name):
    """ Returns a Metrics instance for conveniently creating metrics
    namespaced with the given name prefix. """

    # Convert a "package.name" to "package_name" because Prometheus doesn't
    # let us use . in metric names
    return Metrics(pkg_name.replace(".", "_"))


def render_all():
    strs = []

    # TODO(paul): Internal hack
    update_resource_metrics()

    for name in sorted(all_metrics.keys()):
        try:
            strs += all_metrics[name].render()
        except Exception:
            strs += ["# FAILED to render %s" % name]
            logger.exception("Failed to render %s metric", name)

    strs.append("")  # to generate a final CRLF

    return "\n".join(strs)


# Now register some standard process-wide state metrics, to give indications of
# process resource usage

rusage = None
PAGE_SIZE = getpagesize()


def update_resource_metrics():
    global rusage
    rusage = getrusage(RUSAGE_SELF)

resource_metrics = get_metrics_for("process.resource")

# msecs
resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000)
resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000)

# pages
resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * PAGE_SIZE)

TYPES = {
    stat.S_IFSOCK: "SOCK",
    stat.S_IFLNK: "LNK",
    stat.S_IFREG: "REG",
    stat.S_IFBLK: "BLK",
    stat.S_IFDIR: "DIR",
    stat.S_IFCHR: "CHR",
    stat.S_IFIFO: "FIFO",
}

def _process_fds():
    counts = {(k,): 0 for k in TYPES.values()}
    counts[("other",)] = 0

    for fd in os.listdir("/proc/self/fd"):
        try:
            s = os.stat("/proc/self/fd/%s" % (fd))
            fmt = stat.S_IFMT(s.st_mode)
            if fmt in TYPES:
                t = TYPES[fmt]
            else:
                t = "other"

            counts[(t,)] += 1
        except OSError:
            # the dirh itself used by listdir() is usually missing by now
            pass

    return counts

get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"])