diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index fb476ddaf5..f6f7b2bf42 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -28,6 +28,7 @@ from twisted.protocols.tls import TLSMemoryBIOFactory
import synapse
from synapse.app import check_bind_error
+from synapse.app.phone_stats_home import start_phone_stats_home
from synapse.config.server import ListenerConfig
from synapse.crypto import context_factory
from synapse.logging.context import PreserveLoggingContext
@@ -271,9 +272,19 @@ def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
hs.get_datastore().db_pool.start_profiling()
hs.get_pusherpool().start()
+ # Log when we start the shut down process.
+ hs.get_reactor().addSystemEventTrigger(
+ "before", "shutdown", logger.info, "Shutting down..."
+ )
+
setup_sentry(hs)
setup_sdnotify(hs)
+ # If background tasks are running on the main process, start collecting the
+ # phone home stats.
+ if hs.config.run_background_tasks:
+ start_phone_stats_home(hs)
+
# We now freeze all allocated objects in the hopes that (almost)
# everything currently allocated are things that will be used for the
# rest of time. Doing so means less work each GC (hopefully).
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 7d309b1bb0..b4bd4d8e7a 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -89,7 +89,7 @@ async def export_data_command(hs, args):
user_id = args.user_id
directory = args.output_directory
- res = await hs.get_handlers().admin_handler.export_user_data(
+ res = await hs.get_admin_handler().export_user_data(
user_id, FileExfiltrationWriter(user_id, directory=directory)
)
print(res)
@@ -208,6 +208,7 @@ def start(config_options):
# Explicitly disable background processes
config.update_user_directory = False
+ config.run_background_tasks = False
config.start_pushers = False
config.send_federation = False
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index c38413c893..d53181deb1 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -127,12 +127,16 @@ from synapse.rest.health import HealthResource
from synapse.rest.key.v2 import KeyApiV2Resource
from synapse.server import HomeServer, cache_in_self
from synapse.storage.databases.main.censor_events import CensorEventsStore
+from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
from synapse.storage.databases.main.media_repository import MediaRepositoryStore
+from synapse.storage.databases.main.metrics import ServerMetricsStore
from synapse.storage.databases.main.monthly_active_users import (
MonthlyActiveUsersWorkerStore,
)
from synapse.storage.databases.main.presence import UserPresenceState
from synapse.storage.databases.main.search import SearchWorkerStore
+from synapse.storage.databases.main.stats import StatsStore
+from synapse.storage.databases.main.transactions import TransactionWorkerStore
from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
from synapse.storage.databases.main.user_directory import UserDirectoryStore
from synapse.types import ReadReceipt
@@ -454,6 +458,7 @@ class GenericWorkerSlavedStore(
# FIXME(#3714): We need to add UserDirectoryStore as we write directly
# rather than going via the correct worker.
UserDirectoryStore,
+ StatsStore,
UIAuthWorkerStore,
SlavedDeviceInboxStore,
SlavedDeviceStore,
@@ -463,6 +468,7 @@ class GenericWorkerSlavedStore(
SlavedAccountDataStore,
SlavedPusherStore,
CensorEventsStore,
+ ClientIpWorkerStore,
SlavedEventStore,
SlavedKeyStore,
RoomStore,
@@ -476,7 +482,9 @@ class GenericWorkerSlavedStore(
SlavedFilteringStore,
MonthlyActiveUsersWorkerStore,
MediaRepositoryStore,
+ ServerMetricsStore,
SearchWorkerStore,
+ TransactionWorkerStore,
BaseSlavedStore,
):
pass
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index dff739e106..2b5465417f 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -17,14 +17,10 @@
import gc
import logging
-import math
import os
-import resource
import sys
from typing import Iterable
-from prometheus_client import Gauge
-
from twisted.application import service
from twisted.internet import defer, reactor
from twisted.python.failure import Failure
@@ -60,8 +56,6 @@ from synapse.http.server import (
from synapse.http.site import SynapseSite
from synapse.logging.context import LoggingContext
from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
-from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.module_api import ModuleApi
from synapse.python_dependencies import check_requirements
from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
@@ -111,7 +105,7 @@ class SynapseHomeServer(HomeServer):
additional_resources = listener_config.http_options.additional_resources
logger.debug("Configuring additional resources: %r", additional_resources)
- module_api = ModuleApi(self, self.get_auth_handler())
+ module_api = self.get_module_api()
for path, resmodule in additional_resources.items():
handler_cls, config = load_module(resmodule)
handler = handler_cls(config, module_api)
@@ -334,20 +328,6 @@ class SynapseHomeServer(HomeServer):
logger.warning("Unrecognized listener type: %s", listener.type)
-# Gauges to expose monthly active user control metrics
-current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU")
-current_mau_by_service_gauge = Gauge(
- "synapse_admin_mau_current_mau_by_service",
- "Current MAU by service",
- ["app_service"],
-)
-max_mau_gauge = Gauge("synapse_admin_mau:max", "MAU Limit")
-registered_reserved_users_mau_gauge = Gauge(
- "synapse_admin_mau:registered_reserved_users",
- "Registered users with reserved threepids",
-)
-
-
def setup(config_options):
"""
Args:
@@ -389,8 +369,6 @@ def setup(config_options):
except UpgradeDatabaseException as e:
quit_with_error("Failed to upgrade database: %s" % (e,))
- hs.setup_master()
-
async def do_acme() -> bool:
"""
Reprovision an ACME certificate, if it's required.
@@ -486,92 +464,6 @@ class SynapseService(service.Service):
return self._port.stopListening()
-# Contains the list of processes we will be monitoring
-# currently either 0 or 1
-_stats_process = []
-
-
-async def phone_stats_home(hs, stats, stats_process=_stats_process):
- logger.info("Gathering stats for reporting")
- now = int(hs.get_clock().time())
- uptime = int(now - hs.start_time)
- if uptime < 0:
- uptime = 0
-
- #
- # Performance statistics. Keep this early in the function to maintain reliability of `test_performance_100` test.
- #
- old = stats_process[0]
- new = (now, resource.getrusage(resource.RUSAGE_SELF))
- stats_process[0] = new
-
- # Get RSS in bytes
- stats["memory_rss"] = new[1].ru_maxrss
-
- # Get CPU time in % of a single core, not % of all cores
- used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
- old[1].ru_utime + old[1].ru_stime
- )
- if used_cpu_time == 0 or new[0] == old[0]:
- stats["cpu_average"] = 0
- else:
- stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
-
- #
- # General statistics
- #
-
- stats["homeserver"] = hs.config.server_name
- stats["server_context"] = hs.config.server_context
- stats["timestamp"] = now
- stats["uptime_seconds"] = uptime
- version = sys.version_info
- stats["python_version"] = "{}.{}.{}".format(
- version.major, version.minor, version.micro
- )
- stats["total_users"] = await hs.get_datastore().count_all_users()
-
- total_nonbridged_users = await hs.get_datastore().count_nonbridged_users()
- stats["total_nonbridged_users"] = total_nonbridged_users
-
- daily_user_type_results = await hs.get_datastore().count_daily_user_type()
- for name, count in daily_user_type_results.items():
- stats["daily_user_type_" + name] = count
-
- room_count = await hs.get_datastore().get_room_count()
- stats["total_room_count"] = room_count
-
- stats["daily_active_users"] = await hs.get_datastore().count_daily_users()
- stats["monthly_active_users"] = await hs.get_datastore().count_monthly_users()
- stats["daily_active_rooms"] = await hs.get_datastore().count_daily_active_rooms()
- stats["daily_messages"] = await hs.get_datastore().count_daily_messages()
-
- r30_results = await hs.get_datastore().count_r30_users()
- for name, count in r30_results.items():
- stats["r30_users_" + name] = count
-
- daily_sent_messages = await hs.get_datastore().count_daily_sent_messages()
- stats["daily_sent_messages"] = daily_sent_messages
- stats["cache_factor"] = hs.config.caches.global_factor
- stats["event_cache_size"] = hs.config.caches.event_cache_size
-
- #
- # Database version
- #
-
- # This only reports info about the *main* database.
- stats["database_engine"] = hs.get_datastore().db_pool.engine.module.__name__
- stats["database_server_version"] = hs.get_datastore().db_pool.engine.server_version
-
- logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats))
- try:
- await hs.get_proxied_http_client().put_json(
- hs.config.report_stats_endpoint, stats
- )
- except Exception as e:
- logger.warning("Error reporting stats: %s", e)
-
-
def run(hs):
PROFILE_SYNAPSE = False
if PROFILE_SYNAPSE:
@@ -597,81 +489,6 @@ def run(hs):
ThreadPool._worker = profile(ThreadPool._worker)
reactor.run = profile(reactor.run)
- clock = hs.get_clock()
-
- stats = {}
-
- def performance_stats_init():
- _stats_process.clear()
- _stats_process.append(
- (int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
- )
-
- def start_phone_stats_home():
- return run_as_background_process(
- "phone_stats_home", phone_stats_home, hs, stats
- )
-
- def generate_user_daily_visit_stats():
- return run_as_background_process(
- "generate_user_daily_visits", hs.get_datastore().generate_user_daily_visits
- )
-
- # Rather than update on per session basis, batch up the requests.
- # If you increase the loop period, the accuracy of user_daily_visits
- # table will decrease
- clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000)
-
- # monthly active user limiting functionality
- def reap_monthly_active_users():
- return run_as_background_process(
- "reap_monthly_active_users", hs.get_datastore().reap_monthly_active_users
- )
-
- clock.looping_call(reap_monthly_active_users, 1000 * 60 * 60)
- reap_monthly_active_users()
-
- async def generate_monthly_active_users():
- current_mau_count = 0
- current_mau_count_by_service = {}
- reserved_users = ()
- store = hs.get_datastore()
- if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
- current_mau_count = await store.get_monthly_active_count()
- current_mau_count_by_service = (
- await store.get_monthly_active_count_by_service()
- )
- reserved_users = await store.get_registered_reserved_users()
- current_mau_gauge.set(float(current_mau_count))
-
- for app_service, count in current_mau_count_by_service.items():
- current_mau_by_service_gauge.labels(app_service).set(float(count))
-
- registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
- max_mau_gauge.set(float(hs.config.max_mau_value))
-
- def start_generate_monthly_active_users():
- return run_as_background_process(
- "generate_monthly_active_users", generate_monthly_active_users
- )
-
- start_generate_monthly_active_users()
- if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
- clock.looping_call(start_generate_monthly_active_users, 5 * 60 * 1000)
- # End of monthly active user settings
-
- if hs.config.report_stats:
- logger.info("Scheduling stats reporting for 3 hour intervals")
- clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000)
-
- # We need to defer this init for the cases that we daemonize
- # otherwise the process ID we get is that of the non-daemon process
- clock.call_later(0, performance_stats_init)
-
- # We wait 5 minutes to send the first set of stats as the server can
- # be quite busy the first few minutes
- clock.call_later(5 * 60, start_phone_stats_home)
-
_base.start_reactor(
"synapse-homeserver",
soft_file_limit=hs.config.soft_file_limit,
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
new file mode 100644
index 0000000000..c38cf8231f
--- /dev/null
+++ b/synapse/app/phone_stats_home.py
@@ -0,0 +1,190 @@
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import math
+import resource
+import sys
+
+from prometheus_client import Gauge
+
+from synapse.metrics.background_process_metrics import wrap_as_background_process
+
+logger = logging.getLogger("synapse.app.homeserver")
+
+# Contains the list of processes we will be monitoring
+# currently either 0 or 1
+_stats_process = []
+
+# Gauges to expose monthly active user control metrics
+current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU")
+current_mau_by_service_gauge = Gauge(
+ "synapse_admin_mau_current_mau_by_service",
+ "Current MAU by service",
+ ["app_service"],
+)
+max_mau_gauge = Gauge("synapse_admin_mau:max", "MAU Limit")
+registered_reserved_users_mau_gauge = Gauge(
+ "synapse_admin_mau:registered_reserved_users",
+ "Registered users with reserved threepids",
+)
+
+
+@wrap_as_background_process("phone_stats_home")
+async def phone_stats_home(hs, stats, stats_process=_stats_process):
+ logger.info("Gathering stats for reporting")
+ now = int(hs.get_clock().time())
+ uptime = int(now - hs.start_time)
+ if uptime < 0:
+ uptime = 0
+
+ #
+ # Performance statistics. Keep this early in the function to maintain reliability of `test_performance_100` test.
+ #
+ old = stats_process[0]
+ new = (now, resource.getrusage(resource.RUSAGE_SELF))
+ stats_process[0] = new
+
+ # Get RSS in bytes
+ stats["memory_rss"] = new[1].ru_maxrss
+
+ # Get CPU time in % of a single core, not % of all cores
+ used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
+ old[1].ru_utime + old[1].ru_stime
+ )
+ if used_cpu_time == 0 or new[0] == old[0]:
+ stats["cpu_average"] = 0
+ else:
+ stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
+
+ #
+ # General statistics
+ #
+
+ stats["homeserver"] = hs.config.server_name
+ stats["server_context"] = hs.config.server_context
+ stats["timestamp"] = now
+ stats["uptime_seconds"] = uptime
+ version = sys.version_info
+ stats["python_version"] = "{}.{}.{}".format(
+ version.major, version.minor, version.micro
+ )
+ stats["total_users"] = await hs.get_datastore().count_all_users()
+
+ total_nonbridged_users = await hs.get_datastore().count_nonbridged_users()
+ stats["total_nonbridged_users"] = total_nonbridged_users
+
+ daily_user_type_results = await hs.get_datastore().count_daily_user_type()
+ for name, count in daily_user_type_results.items():
+ stats["daily_user_type_" + name] = count
+
+ room_count = await hs.get_datastore().get_room_count()
+ stats["total_room_count"] = room_count
+
+ stats["daily_active_users"] = await hs.get_datastore().count_daily_users()
+ stats["monthly_active_users"] = await hs.get_datastore().count_monthly_users()
+ stats["daily_active_rooms"] = await hs.get_datastore().count_daily_active_rooms()
+ stats["daily_messages"] = await hs.get_datastore().count_daily_messages()
+
+ r30_results = await hs.get_datastore().count_r30_users()
+ for name, count in r30_results.items():
+ stats["r30_users_" + name] = count
+
+ daily_sent_messages = await hs.get_datastore().count_daily_sent_messages()
+ stats["daily_sent_messages"] = daily_sent_messages
+ stats["cache_factor"] = hs.config.caches.global_factor
+ stats["event_cache_size"] = hs.config.caches.event_cache_size
+
+ #
+ # Database version
+ #
+
+ # This only reports info about the *main* database.
+ stats["database_engine"] = hs.get_datastore().db_pool.engine.module.__name__
+ stats["database_server_version"] = hs.get_datastore().db_pool.engine.server_version
+
+ #
+ # Logging configuration
+ #
+ synapse_logger = logging.getLogger("synapse")
+ log_level = synapse_logger.getEffectiveLevel()
+ stats["log_level"] = logging.getLevelName(log_level)
+
+ logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats))
+ try:
+ await hs.get_proxied_http_client().put_json(
+ hs.config.report_stats_endpoint, stats
+ )
+ except Exception as e:
+ logger.warning("Error reporting stats: %s", e)
+
+
+def start_phone_stats_home(hs):
+ """
+ Start the background tasks which report phone home stats.
+ """
+ clock = hs.get_clock()
+
+ stats = {}
+
+ def performance_stats_init():
+ _stats_process.clear()
+ _stats_process.append(
+ (int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
+ )
+
+ # Rather than update on per session basis, batch up the requests.
+ # If you increase the loop period, the accuracy of user_daily_visits
+ # table will decrease
+ clock.looping_call(hs.get_datastore().generate_user_daily_visits, 5 * 60 * 1000)
+
+ # monthly active user limiting functionality
+ clock.looping_call(hs.get_datastore().reap_monthly_active_users, 1000 * 60 * 60)
+ hs.get_datastore().reap_monthly_active_users()
+
+ @wrap_as_background_process("generate_monthly_active_users")
+ async def generate_monthly_active_users():
+ current_mau_count = 0
+ current_mau_count_by_service = {}
+ reserved_users = ()
+ store = hs.get_datastore()
+ if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
+ current_mau_count = await store.get_monthly_active_count()
+ current_mau_count_by_service = (
+ await store.get_monthly_active_count_by_service()
+ )
+ reserved_users = await store.get_registered_reserved_users()
+ current_mau_gauge.set(float(current_mau_count))
+
+ for app_service, count in current_mau_count_by_service.items():
+ current_mau_by_service_gauge.labels(app_service).set(float(count))
+
+ registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
+ max_mau_gauge.set(float(hs.config.max_mau_value))
+
+ if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
+ generate_monthly_active_users()
+ clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
+ # End of monthly active user settings
+
+ if hs.config.report_stats:
+ logger.info("Scheduling stats reporting for 3 hour intervals")
+ clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
+
+ # We need to defer this init for the cases that we daemonize
+ # otherwise the process ID we get is that of the non-daemon process
+ clock.call_later(0, performance_stats_init)
+
+ # We wait 5 minutes to send the first set of stats as the server can
+ # be quite busy the first few minutes
+ clock.call_later(5 * 60, phone_stats_home, hs, stats)
|