summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/6318.misc1
-rw-r--r--synapse/app/homeserver.py174
-rw-r--r--synapse/python_dependencies.py1
-rw-r--r--synapse/server.py2
-rw-r--r--tests/test_phone_home.py51
5 files changed, 146 insertions, 83 deletions
diff --git a/changelog.d/6318.misc b/changelog.d/6318.misc
new file mode 100644
index 0000000000..63527ccef4
--- /dev/null
+++ b/changelog.d/6318.misc
@@ -0,0 +1 @@
+Remove the dependency on psutil and replace functionality with the stdlib `resource` module.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 8d28076d92..00a7f8330e 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -19,12 +19,13 @@ from __future__ import print_function
 
 import gc
 import logging
+import math
 import os
+import resource
 import sys
 
 from six import iteritems
 
-import psutil
 from prometheus_client import Gauge
 
 from twisted.application import service
@@ -471,6 +472,87 @@ class SynapseService(service.Service):
         return self._port.stopListening()
 
 
+# Contains the list of processes we will be monitoring
+# currently either 0 or 1
+_stats_process = []
+
+
+@defer.inlineCallbacks
+def phone_stats_home(hs, stats, stats_process=_stats_process):
+    logger.info("Gathering stats for reporting")
+    now = int(hs.get_clock().time())
+    uptime = int(now - hs.start_time)
+    if uptime < 0:
+        uptime = 0
+
+    stats["homeserver"] = hs.config.server_name
+    stats["server_context"] = hs.config.server_context
+    stats["timestamp"] = now
+    stats["uptime_seconds"] = uptime
+    version = sys.version_info
+    stats["python_version"] = "{}.{}.{}".format(
+        version.major, version.minor, version.micro
+    )
+    stats["total_users"] = yield hs.get_datastore().count_all_users()
+
+    total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users()
+    stats["total_nonbridged_users"] = total_nonbridged_users
+
+    daily_user_type_results = yield hs.get_datastore().count_daily_user_type()
+    for name, count in iteritems(daily_user_type_results):
+        stats["daily_user_type_" + name] = count
+
+    room_count = yield hs.get_datastore().get_room_count()
+    stats["total_room_count"] = room_count
+
+    stats["daily_active_users"] = yield hs.get_datastore().count_daily_users()
+    stats["monthly_active_users"] = yield hs.get_datastore().count_monthly_users()
+    stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms()
+    stats["daily_messages"] = yield hs.get_datastore().count_daily_messages()
+
+    r30_results = yield hs.get_datastore().count_r30_users()
+    for name, count in iteritems(r30_results):
+        stats["r30_users_" + name] = count
+
+    daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
+    stats["daily_sent_messages"] = daily_sent_messages
+    stats["cache_factor"] = CACHE_SIZE_FACTOR
+    stats["event_cache_size"] = hs.config.event_cache_size
+
+    #
+    # Performance statistics
+    #
+    old = stats_process[0]
+    new = (now, resource.getrusage(resource.RUSAGE_SELF))
+    stats_process[0] = new
+
+    # Get RSS in bytes
+    stats["memory_rss"] = new[1].ru_maxrss
+
+    # Get CPU time in % of a single core, not % of all cores
+    used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
+        old[1].ru_utime + old[1].ru_stime
+    )
+    if used_cpu_time == 0 or new[0] == old[0]:
+        stats["cpu_average"] = 0
+    else:
+        stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
+
+    #
+    # Database version
+    #
+
+    stats["database_engine"] = hs.get_datastore().database_engine_name
+    stats["database_server_version"] = hs.get_datastore().get_server_version()
+    logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats))
+    try:
+        yield hs.get_proxied_http_client().put_json(
+            hs.config.report_stats_endpoint, stats
+        )
+    except Exception as e:
+        logger.warning("Error reporting stats: %s", e)
+
+
 def run(hs):
     PROFILE_SYNAPSE = False
     if PROFILE_SYNAPSE:
@@ -497,91 +579,19 @@ def run(hs):
         reactor.run = profile(reactor.run)
 
     clock = hs.get_clock()
-    start_time = clock.time()
 
     stats = {}
 
-    # Contains the list of processes we will be monitoring
-    # currently either 0 or 1
-    stats_process = []
+    def performance_stats_init():
+        _stats_process.clear()
+        _stats_process.append(
+            (int(hs.get_clock().time(), resource.getrusage(resource.RUSAGE_SELF)))
+        )
 
     def start_phone_stats_home():
-        return run_as_background_process("phone_stats_home", phone_stats_home)
-
-    @defer.inlineCallbacks
-    def phone_stats_home():
-        logger.info("Gathering stats for reporting")
-        now = int(hs.get_clock().time())
-        uptime = int(now - start_time)
-        if uptime < 0:
-            uptime = 0
-
-        stats["homeserver"] = hs.config.server_name
-        stats["server_context"] = hs.config.server_context
-        stats["timestamp"] = now
-        stats["uptime_seconds"] = uptime
-        version = sys.version_info
-        stats["python_version"] = "{}.{}.{}".format(
-            version.major, version.minor, version.micro
-        )
-        stats["total_users"] = yield hs.get_datastore().count_all_users()
-
-        total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users()
-        stats["total_nonbridged_users"] = total_nonbridged_users
-
-        daily_user_type_results = yield hs.get_datastore().count_daily_user_type()
-        for name, count in iteritems(daily_user_type_results):
-            stats["daily_user_type_" + name] = count
-
-        room_count = yield hs.get_datastore().get_room_count()
-        stats["total_room_count"] = room_count
-
-        stats["daily_active_users"] = yield hs.get_datastore().count_daily_users()
-        stats["monthly_active_users"] = yield hs.get_datastore().count_monthly_users()
-        stats[
-            "daily_active_rooms"
-        ] = yield hs.get_datastore().count_daily_active_rooms()
-        stats["daily_messages"] = yield hs.get_datastore().count_daily_messages()
-
-        r30_results = yield hs.get_datastore().count_r30_users()
-        for name, count in iteritems(r30_results):
-            stats["r30_users_" + name] = count
-
-        daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
-        stats["daily_sent_messages"] = daily_sent_messages
-        stats["cache_factor"] = CACHE_SIZE_FACTOR
-        stats["event_cache_size"] = hs.config.event_cache_size
-
-        if len(stats_process) > 0:
-            stats["memory_rss"] = 0
-            stats["cpu_average"] = 0
-            for process in stats_process:
-                stats["memory_rss"] += process.memory_info().rss
-                stats["cpu_average"] += int(process.cpu_percent(interval=None))
-
-        stats["database_engine"] = hs.get_datastore().database_engine_name
-        stats["database_server_version"] = hs.get_datastore().get_server_version()
-        logger.info(
-            "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats)
+        return run_as_background_process(
+            "phone_stats_home", phone_stats_home, hs, stats
         )
-        try:
-            yield hs.get_proxied_http_client().put_json(
-                hs.config.report_stats_endpoint, stats
-            )
-        except Exception as e:
-            logger.warning("Error reporting stats: %s", e)
-
-    def performance_stats_init():
-        try:
-            process = psutil.Process()
-            # Ensure we can fetch both, and make the initial request for cpu_percent
-            # so the next request will use this as the initial point.
-            process.memory_info().rss
-            process.cpu_percent(interval=None)
-            logger.info("report_stats can use psutil")
-            stats_process.append(process)
-        except (AttributeError):
-            logger.warning("Unable to read memory/cpu stats. Disabling reporting.")
 
     def generate_user_daily_visit_stats():
         return run_as_background_process(
@@ -626,7 +636,7 @@ def run(hs):
 
     if hs.config.report_stats:
         logger.info("Scheduling stats reporting for 3 hour intervals")
-        clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000)
+        clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
 
         # We need to defer this init for the cases that we daemonize
         # otherwise the process ID we get is that of the non-daemon process
@@ -634,7 +644,7 @@ def run(hs):
 
         # We wait 5 minutes to send the first set of stats as the server can
         # be quite busy the first few minutes
-        clock.call_later(5 * 60, start_phone_stats_home)
+        clock.call_later(5 * 60, start_phone_stats_home, hs, stats)
 
     _base.start_reactor(
         "synapse-homeserver",
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index aa7da1c543..5871feaafd 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -61,7 +61,6 @@ REQUIREMENTS = [
     "bcrypt>=3.1.0",
     "pillow>=4.3.0",
     "sortedcontainers>=1.4.4",
-    "psutil>=2.0.0",
     "pymacaroons>=0.13.0",
     "msgpack>=0.5.2",
     "phonenumbers>=8.2.0",
diff --git a/synapse/server.py b/synapse/server.py
index f8aeebcff8..90c3b072e8 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -221,6 +221,7 @@ class HomeServer(object):
         self.hostname = hostname
         self._building = {}
         self._listening_services = []
+        self.start_time = None
 
         self.clock = Clock(reactor)
         self.distributor = Distributor()
@@ -240,6 +241,7 @@ class HomeServer(object):
             datastore = self.DATASTORE_CLASS(conn, self)
             self.datastores = DataStores(datastore, conn, self)
             conn.commit()
+        self.start_time = int(self.get_clock().time())
         logger.info("Finished setting up.")
 
     def setup_master(self):
diff --git a/tests/test_phone_home.py b/tests/test_phone_home.py
new file mode 100644
index 0000000000..7657bddea5
--- /dev/null
+++ b/tests/test_phone_home.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import resource
+
+import mock
+
+from synapse.app.homeserver import phone_stats_home
+
+from tests.unittest import HomeserverTestCase
+
+
+class PhoneHomeStatsTestCase(HomeserverTestCase):
+    def test_performance_frozen_clock(self):
+        """
+        If time doesn't move, don't error out.
+        """
+        past_stats = [
+            (self.hs.get_clock().time(), resource.getrusage(resource.RUSAGE_SELF))
+        ]
+        stats = {}
+        self.get_success(phone_stats_home(self.hs, stats, past_stats))
+        self.assertEqual(stats["cpu_average"], 0)
+
+    def test_performance_100(self):
+        """
+        1 second of usage over 1 second is 100% CPU usage.
+        """
+        real_res = resource.getrusage(resource.RUSAGE_SELF)
+        old_resource = mock.Mock(spec=real_res)
+        old_resource.ru_utime = real_res.ru_utime - 1
+        old_resource.ru_stime = real_res.ru_stime
+        old_resource.ru_maxrss = real_res.ru_maxrss
+
+        past_stats = [(self.hs.get_clock().time(), old_resource)]
+        stats = {}
+        self.reactor.advance(1)
+        self.get_success(phone_stats_home(self.hs, stats, past_stats))
+        self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5)