diff options
Diffstat (limited to 'contrib/prometheus')
-rw-r--r-- | contrib/prometheus/README | 37 | ||||
-rw-r--r-- | contrib/prometheus/consoles/synapse.html | 395 | ||||
-rw-r--r-- | contrib/prometheus/synapse-v1.rules | 21 | ||||
-rw-r--r-- | contrib/prometheus/synapse-v2.rules | 60 |
4 files changed, 513 insertions, 0 deletions
diff --git a/contrib/prometheus/README b/contrib/prometheus/README new file mode 100644 index 0000000000..7b733172e6 --- /dev/null +++ b/contrib/prometheus/README @@ -0,0 +1,37 @@ +This directory contains some sample monitoring config for using the +'Prometheus' monitoring server against synapse. + +To use it, first install prometheus by following the instructions at + + http://prometheus.io/ + +### for Prometheus v1 +Add a new job to the main prometheus.conf file: + + job: { + name: "synapse" + + target_group: { + target: "http://SERVER.LOCATION.HERE:PORT/_synapse/metrics" + } + } + +### for Prometheus v2 +Add a new job to the main prometheus.yml file: + + - job_name: "synapse" + metrics_path: "/_synapse/metrics" + # when endpoint uses https: + scheme: "https" + + static_configs: + - targets: ['SERVER.LOCATION:PORT'] + +To use `synapse.rules` add + + rule_files: + - "/PATH/TO/synapse-v2.rules" + +Metrics are disabled by default when running synapse; they must be enabled +with the 'enable-metrics' option, either in the synapse config file or as a +command-line option. diff --git a/contrib/prometheus/consoles/synapse.html b/contrib/prometheus/consoles/synapse.html new file mode 100644 index 0000000000..69aa87f85e --- /dev/null +++ b/contrib/prometheus/consoles/synapse.html @@ -0,0 +1,395 @@ +{{ template "head" . }} + +{{ template "prom_content_head" . }} +<h1>System Resources</h1> + +<h3>CPU</h3> +<div id="process_resource_utime"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#process_resource_utime"), + expr: "rate(process_cpu_seconds_total[2m]) * 100", + name: "[[job]]", + min: 0, + max: 100, + renderer: "line", + height: 150, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "%", + yTitle: "CPU Usage" +}) +</script> + +<h3>Memory</h3> +<div id="process_resource_maxrss"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#process_resource_maxrss"), + expr: "process_psutil_rss:max", + name: "Maxrss", + min: 0, + renderer: "line", + height: 150, + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "bytes", + yTitle: "Usage" +}) +</script> + +<h3>File descriptors</h3> +<div id="process_fds"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#process_fds"), + expr: "process_open_fds{job='synapse'}", + name: "FDs", + min: 0, + renderer: "line", + height: 150, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "", + yTitle: "Descriptors" +}) +</script> + +<h1>Reactor</h1> + +<h3>Total reactor time</h3> +<div id="reactor_total_time"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#reactor_total_time"), + expr: "rate(python_twisted_reactor_tick_time:total[2m]) / 1000", + name: "time", + max: 1, + min: 0, + renderer: "area", + height: 150, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/s", + yTitle: "Usage" +}) +</script> + +<h3>Average reactor tick time</h3> +<div id="reactor_average_time"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#reactor_average_time"), + expr: "rate(python_twisted_reactor_tick_time:total[2m]) / rate(python_twisted_reactor_tick_time:count[2m]) / 1000", + name: "time", + min: 0, + renderer: "line", + height: 150, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s", + yTitle: "Time" +}) +</script> + +<h3>Pending calls per tick</h3> +<div id="reactor_pending_calls"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#reactor_pending_calls"), + expr: "rate(python_twisted_reactor_pending_calls:total[30s])/rate(python_twisted_reactor_pending_calls:count[30s])", + name: "calls", + min: 0, + renderer: "line", + height: 150, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yTitle: "Pending Cals" +}) +</script> + +<h1>Storage</h1> + +<h3>Queries</h3> +<div id="synapse_storage_query_time"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_storage_query_time"), + expr: "rate(synapse_storage_query_time:count[2m])", + name: "[[verb]]", + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "queries/s", + yTitle: "Queries" +}) +</script> + +<h3>Transactions</h3> +<div id="synapse_storage_transaction_time"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_storage_transaction_time"), + expr: "rate(synapse_storage_transaction_time:count[2m])", + name: "[[desc]]", + min: 0, + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "txn/s", + yTitle: "Transactions" +}) +</script> + +<h3>Transaction execution time</h3> +<div id="synapse_storage_transactions_time_msec"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_storage_transactions_time_msec"), + expr: "rate(synapse_storage_transaction_time:total[2m]) / 1000", + name: "[[desc]]", + min: 0, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/s", + yTitle: "Usage" +}) +</script> + +<h3>Database scheduling latency</h3> +<div id="synapse_storage_schedule_time"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_storage_schedule_time"), + expr: "rate(synapse_storage_schedule_time:total[2m]) / 1000", + name: "Total latency", + min: 0, + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/s", + yTitle: "Usage" +}) +</script> + +<h3>Cache hit ratio</h3> +<div id="synapse_cache_ratio"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_cache_ratio"), + expr: "rate(synapse_util_caches_cache:total[2m]) * 100", + name: "[[name]]", + min: 0, + max: 100, + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "%", + yTitle: "Percentage" +}) +</script> + +<h3>Cache size</h3> +<div id="synapse_cache_size"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_cache_size"), + expr: "synapse_util_caches_cache:size", + name: "[[name]]", + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "", + yTitle: "Items" +}) +</script> + +<h1>Requests</h1> + +<h3>Requests by Servlet</h3> +<div id="synapse_http_server_request_count_servlet"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_request_count_servlet"), + expr: "rate(synapse_http_server_request_count:servlet[2m])", + name: "[[servlet]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "req/s", + yTitle: "Requests" +}) +</script> +<h4> (without <tt>EventStreamRestServlet</tt> or <tt>SyncRestServlet</tt>)</h4> +<div id="synapse_http_server_request_count_servlet_minus_events"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_request_count_servlet_minus_events"), + expr: "rate(synapse_http_server_request_count:servlet{servlet!=\"EventStreamRestServlet\", servlet!=\"SyncRestServlet\"}[2m])", + name: "[[servlet]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "req/s", + yTitle: "Requests" +}) +</script> + +<h3>Average response times</h3> +<div id="synapse_http_server_response_time_avg"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_response_time_avg"), + expr: "rate(synapse_http_server_response_time_seconds[2m]) / rate(synapse_http_server_response_count[2m]) / 1000", + name: "[[servlet]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/req", + yTitle: "Response time" +}) +</script> + +<h3>All responses by code</h3> +<div id="synapse_http_server_responses"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_responses"), + expr: "rate(synapse_http_server_responses[2m])", + name: "[[method]] / [[code]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "req/s", + yTitle: "Requests" +}) +</script> + +<h3>Error responses by code</h3> +<div id="synapse_http_server_responses_err"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_responses_err"), + expr: "rate(synapse_http_server_responses{code=~\"[45]..\"}[2m])", + name: "[[method]] / [[code]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "req/s", + yTitle: "Requests" +}) +</script> + + +<h3>CPU Usage</h3> +<div id="synapse_http_server_response_ru_utime"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_response_ru_utime"), + expr: "rate(synapse_http_server_response_ru_utime_seconds[2m])", + name: "[[servlet]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/s", + yTitle: "CPU Usage" +}) +</script> + + +<h3>DB Usage</h3> +<div id="synapse_http_server_response_db_txn_duration"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_response_db_txn_duration"), + expr: "rate(synapse_http_server_response_db_txn_duration_seconds[2m])", + name: "[[servlet]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/s", + yTitle: "DB Usage" +}) +</script> + + +<h3>Average event send times</h3> +<div id="synapse_http_server_send_time_avg"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_http_server_send_time_avg"), + expr: "rate(synapse_http_server_response_time_second{servlet='RoomSendEventRestServlet'}[2m]) / rate(synapse_http_server_response_count{servlet='RoomSendEventRestServlet'}[2m]) / 1000", + name: "[[servlet]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "s/req", + yTitle: "Response time" +}) +</script> + +<h1>Federation</h1> + +<h3>Sent Messages</h3> +<div id="synapse_federation_client_sent"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_federation_client_sent"), + expr: "rate(synapse_federation_client_sent[2m])", + name: "[[type]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "req/s", + yTitle: "Requests" +}) +</script> + +<h3>Received Messages</h3> +<div id="synapse_federation_server_received"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_federation_server_received"), + expr: "rate(synapse_federation_server_received[2m])", + name: "[[type]]", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "req/s", + yTitle: "Requests" +}) +</script> + +<h3>Pending</h3> +<div id="synapse_federation_transaction_queue_pending"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_federation_transaction_queue_pending"), + expr: "synapse_federation_transaction_queue_pending", + name: "[[type]]", + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "", + yTitle: "Units" +}) +</script> + +<h1>Clients</h1> + +<h3>Notifiers</h3> +<div id="synapse_notifier_listeners"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_notifier_listeners"), + expr: "synapse_notifier_listeners", + name: "listeners", + min: 0, + yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix, + yUnits: "", + yTitle: "Listeners" +}) +</script> + +<h3>Notified Events</h3> +<div id="synapse_notifier_notified_events"></div> +<script> +new PromConsole.Graph({ + node: document.querySelector("#synapse_notifier_notified_events"), + expr: "rate(synapse_notifier_notified_events[2m])", + name: "events", + yAxisFormatter: PromConsole.NumberFormatter.humanize, + yHoverFormatter: PromConsole.NumberFormatter.humanize, + yUnits: "events/s", + yTitle: "Event rate" +}) +</script> + +{{ template "prom_content_tail" . }} + +{{ template "tail" }} diff --git a/contrib/prometheus/synapse-v1.rules b/contrib/prometheus/synapse-v1.rules new file mode 100644 index 0000000000..4c900ba537 --- /dev/null +++ b/contrib/prometheus/synapse-v1.rules @@ -0,0 +1,21 @@ +synapse_federation_transaction_queue_pendingEdus:total = sum(synapse_federation_transaction_queue_pendingEdus or absent(synapse_federation_transaction_queue_pendingEdus)*0) +synapse_federation_transaction_queue_pendingPdus:total = sum(synapse_federation_transaction_queue_pendingPdus or absent(synapse_federation_transaction_queue_pendingPdus)*0) + +synapse_http_server_request_count:method{servlet=""} = sum(synapse_http_server_request_count) by (method) +synapse_http_server_request_count:servlet{method=""} = sum(synapse_http_server_request_count) by (servlet) + +synapse_http_server_request_count:total{servlet=""} = sum(synapse_http_server_request_count:by_method) by (servlet) + +synapse_cache:hit_ratio_5m = rate(synapse_util_caches_cache:hits[5m]) / rate(synapse_util_caches_cache:total[5m]) +synapse_cache:hit_ratio_30s = rate(synapse_util_caches_cache:hits[30s]) / rate(synapse_util_caches_cache:total[30s]) + +synapse_federation_client_sent{type="EDU"} = synapse_federation_client_sent_edus + 0 +synapse_federation_client_sent{type="PDU"} = synapse_federation_client_sent_pdu_destinations:count + 0 +synapse_federation_client_sent{type="Query"} = sum(synapse_federation_client_sent_queries) by (job) + +synapse_federation_server_received{type="EDU"} = synapse_federation_server_received_edus + 0 +synapse_federation_server_received{type="PDU"} = synapse_federation_server_received_pdus + 0 +synapse_federation_server_received{type="Query"} = sum(synapse_federation_server_received_queries) by (job) + +synapse_federation_transaction_queue_pending{type="EDU"} = synapse_federation_transaction_queue_pending_edus + 0 +synapse_federation_transaction_queue_pending{type="PDU"} = synapse_federation_transaction_queue_pending_pdus + 0 diff --git a/contrib/prometheus/synapse-v2.rules b/contrib/prometheus/synapse-v2.rules new file mode 100644 index 0000000000..6ccca2daaf --- /dev/null +++ b/contrib/prometheus/synapse-v2.rules @@ -0,0 +1,60 @@ +groups: +- name: synapse + rules: + - record: "synapse_federation_transaction_queue_pendingEdus:total" + expr: "sum(synapse_federation_transaction_queue_pendingEdus or absent(synapse_federation_transaction_queue_pendingEdus)*0)" + - record: "synapse_federation_transaction_queue_pendingPdus:total" + expr: "sum(synapse_federation_transaction_queue_pendingPdus or absent(synapse_federation_transaction_queue_pendingPdus)*0)" + - record: 'synapse_http_server_request_count:method' + labels: + servlet: "" + expr: "sum(synapse_http_server_request_count) by (method)" + - record: 'synapse_http_server_request_count:servlet' + labels: + method: "" + expr: 'sum(synapse_http_server_request_count) by (servlet)' + + - record: 'synapse_http_server_request_count:total' + labels: + servlet: "" + expr: 'sum(synapse_http_server_request_count:by_method) by (servlet)' + + - record: 'synapse_cache:hit_ratio_5m' + expr: 'rate(synapse_util_caches_cache:hits[5m]) / rate(synapse_util_caches_cache:total[5m])' + - record: 'synapse_cache:hit_ratio_30s' + expr: 'rate(synapse_util_caches_cache:hits[30s]) / rate(synapse_util_caches_cache:total[30s])' + + - record: 'synapse_federation_client_sent' + labels: + type: "EDU" + expr: 'synapse_federation_client_sent_edus + 0' + - record: 'synapse_federation_client_sent' + labels: + type: "PDU" + expr: 'synapse_federation_client_sent_pdu_destinations:count + 0' + - record: 'synapse_federation_client_sent' + labels: + type: "Query" + expr: 'sum(synapse_federation_client_sent_queries) by (job)' + + - record: 'synapse_federation_server_received' + labels: + type: "EDU" + expr: 'synapse_federation_server_received_edus + 0' + - record: 'synapse_federation_server_received' + labels: + type: "PDU" + expr: 'synapse_federation_server_received_pdus + 0' + - record: 'synapse_federation_server_received' + labels: + type: "Query" + expr: 'sum(synapse_federation_server_received_queries) by (job)' + + - record: 'synapse_federation_transaction_queue_pending' + labels: + type: "EDU" + expr: 'synapse_federation_transaction_queue_pending_edus + 0' + - record: 'synapse_federation_transaction_queue_pending' + labels: + type: "PDU" + expr: 'synapse_federation_transaction_queue_pending_pdus + 0' |