From f299441cc67f31dcd47b8fdfda4a218bee9df9ba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Jul 2020 18:26:36 +0100 Subject: Add ability to shard the federation sender (#7798) --- synapse/config/federation.py | 129 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 synapse/config/federation.py (limited to 'synapse/config/federation.py') diff --git a/synapse/config/federation.py b/synapse/config/federation.py new file mode 100644 index 0000000000..7782ab4c9d --- /dev/null +++ b/synapse/config/federation.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from hashlib import sha256 +from typing import List, Optional + +import attr +from netaddr import IPSet + +from ._base import Config, ConfigError + + +@attr.s +class ShardedFederationSendingConfig: + """Algorithm for choosing which federation sender instance is responsible + for which destionation host. + """ + + instances = attr.ib(type=List[str]) + + def should_send_to(self, instance_name: str, destination: str) -> bool: + """Whether this instance is responsible for sending transcations for + the given host. + """ + + # If multiple federation senders are not defined we always return true. + if not self.instances or len(self.instances) == 1: + return True + + # We shard by taking the hash, modulo it by the number of federation + # senders and then checking whether this instance matches the instance + # at that index. + # + # (Technically this introduces some bias and is not entirely uniform, but + # since the hash is so large the bias is ridiculously small). + dest_hash = sha256(destination.encode("utf8")).digest() + dest_int = int.from_bytes(dest_hash, byteorder="little") + remainder = dest_int % (len(self.instances)) + return self.instances[remainder] == instance_name + + +class FederationConfig(Config): + section = "federation" + + def read_config(self, config, **kwargs): + # Whether to send federation traffic out in this process. This only + # applies to some federation traffic, and so shouldn't be used to + # "disable" federation + self.send_federation = config.get("send_federation", True) + + federation_sender_instances = config.get("federation_sender_instances") or [] + self.federation_shard_config = ShardedFederationSendingConfig( + federation_sender_instances + ) + + # FIXME: federation_domain_whitelist needs sytests + self.federation_domain_whitelist = None # type: Optional[dict] + federation_domain_whitelist = config.get("federation_domain_whitelist", None) + + if federation_domain_whitelist is not None: + # turn the whitelist into a hash for speed of lookup + self.federation_domain_whitelist = {} + + for domain in federation_domain_whitelist: + self.federation_domain_whitelist[domain] = True + + self.federation_ip_range_blacklist = config.get( + "federation_ip_range_blacklist", [] + ) + + # Attempt to create an IPSet from the given ranges + try: + self.federation_ip_range_blacklist = IPSet( + self.federation_ip_range_blacklist + ) + + # Always blacklist 0.0.0.0, :: + self.federation_ip_range_blacklist.update(["0.0.0.0", "::"]) + except Exception as e: + raise ConfigError( + "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e + ) + + def generate_config_section(self, config_dir_path, server_name, **kwargs): + return """\ + # Restrict federation to the following whitelist of domains. + # N.B. we recommend also firewalling your federation listener to limit + # inbound federation traffic as early as possible, rather than relying + # purely on this application-layer restriction. If not specified, the + # default is to whitelist everything. + # + #federation_domain_whitelist: + # - lon.example.com + # - nyc.example.com + # - syd.example.com + + # Prevent federation requests from being sent to the following + # blacklist IP address CIDR ranges. If this option is not specified, or + # specified with an empty list, no ip range blacklist will be enforced. + # + # As of Synapse v1.4.0 this option also affects any outbound requests to identity + # servers provided by user input. + # + # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly + # listed here, since they correspond to unroutable addresses.) + # + federation_ip_range_blacklist: + - '127.0.0.0/8' + - '10.0.0.0/8' + - '172.16.0.0/12' + - '192.168.0.0/16' + - '100.64.0.0/10' + - '169.254.0.0/16' + - '::1/128' + - 'fe80::/64' + - 'fc00::/7' + """ -- cgit 1.5.1 From 649a7ead5c4bd2d8b7c486ac1a68ce4e41d49767 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 Jul 2020 14:06:28 +0100 Subject: Add ability to run multiple pusher instances (#7855) This reuses the same scheme as federation sender sharding --- changelog.d/7855.feature | 1 + synapse/config/_base.py | 38 +++- synapse/config/_base.pyi | 5 + synapse/config/federation.py | 37 +--- synapse/config/push.py | 5 +- synapse/federation/sender/__init__.py | 16 +- synapse/federation/sender/per_destination_queue.py | 2 +- synapse/push/pusherpool.py | 78 +++++---- tests/replication/test_pusher_shard.py | 193 +++++++++++++++++++++ 9 files changed, 293 insertions(+), 82 deletions(-) create mode 100644 changelog.d/7855.feature create mode 100644 tests/replication/test_pusher_shard.py (limited to 'synapse/config/federation.py') diff --git a/changelog.d/7855.feature b/changelog.d/7855.feature new file mode 100644 index 0000000000..2b6a9f0e71 --- /dev/null +++ b/changelog.d/7855.feature @@ -0,0 +1 @@ +Add experimental support for running multiple pusher workers. diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 1391e5fc43..fd137853b1 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -19,9 +19,11 @@ import argparse import errno import os from collections import OrderedDict +from hashlib import sha256 from textwrap import dedent -from typing import Any, MutableMapping, Optional +from typing import Any, List, MutableMapping, Optional +import attr import yaml @@ -717,4 +719,36 @@ def find_config_files(search_paths): return config_files -__all__ = ["Config", "RootConfig"] +@attr.s +class ShardedWorkerHandlingConfig: + """Algorithm for choosing which instance is responsible for handling some + sharded work. + + For example, the federation senders use this to determine which instances + handles sending stuff to a given destination (which is used as the `key` + below). + """ + + instances = attr.ib(type=List[str]) + + def should_handle(self, instance_name: str, key: str) -> bool: + """Whether this instance is responsible for handling the given key. + """ + + # If multiple instances are not defined we always return true. + if not self.instances or len(self.instances) == 1: + return True + + # We shard by taking the hash, modulo it by the number of instances and + # then checking whether this instance matches the instance at that + # index. + # + # (Technically this introduces some bias and is not entirely uniform, + # but since the hash is so large the bias is ridiculously small). + dest_hash = sha256(key.encode("utf8")).digest() + dest_int = int.from_bytes(dest_hash, byteorder="little") + remainder = dest_int % (len(self.instances)) + return self.instances[remainder] == instance_name + + +__all__ = ["Config", "RootConfig", "ShardedWorkerHandlingConfig"] diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index 9e576060d4..eb911e8f9f 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -137,3 +137,8 @@ class Config: def read_config_files(config_files: List[str]): ... def find_config_files(search_paths: List[str]): ... + +class ShardedWorkerHandlingConfig: + instances: List[str] + def __init__(self, instances: List[str]) -> None: ... + def should_handle(self, instance_name: str, key: str) -> bool: ... diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 7782ab4c9d..82ff9664de 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -13,42 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from hashlib import sha256 -from typing import List, Optional +from typing import Optional -import attr from netaddr import IPSet -from ._base import Config, ConfigError - - -@attr.s -class ShardedFederationSendingConfig: - """Algorithm for choosing which federation sender instance is responsible - for which destionation host. - """ - - instances = attr.ib(type=List[str]) - - def should_send_to(self, instance_name: str, destination: str) -> bool: - """Whether this instance is responsible for sending transcations for - the given host. - """ - - # If multiple federation senders are not defined we always return true. - if not self.instances or len(self.instances) == 1: - return True - - # We shard by taking the hash, modulo it by the number of federation - # senders and then checking whether this instance matches the instance - # at that index. - # - # (Technically this introduces some bias and is not entirely uniform, but - # since the hash is so large the bias is ridiculously small). - dest_hash = sha256(destination.encode("utf8")).digest() - dest_int = int.from_bytes(dest_hash, byteorder="little") - remainder = dest_int % (len(self.instances)) - return self.instances[remainder] == instance_name +from ._base import Config, ConfigError, ShardedWorkerHandlingConfig class FederationConfig(Config): @@ -61,7 +30,7 @@ class FederationConfig(Config): self.send_federation = config.get("send_federation", True) federation_sender_instances = config.get("federation_sender_instances") or [] - self.federation_shard_config = ShardedFederationSendingConfig( + self.federation_shard_config = ShardedWorkerHandlingConfig( federation_sender_instances ) diff --git a/synapse/config/push.py b/synapse/config/push.py index 6f2b3a7faa..a1f3752c8a 100644 --- a/synapse/config/push.py +++ b/synapse/config/push.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import Config +from ._base import Config, ShardedWorkerHandlingConfig class PushConfig(Config): @@ -24,6 +24,9 @@ class PushConfig(Config): push_config = config.get("push", {}) self.push_include_content = push_config.get("include_content", True) + pusher_instances = config.get("pusher_instances") or [] + self.pusher_shard_config = ShardedWorkerHandlingConfig(pusher_instances) + # There was a a 'redact_content' setting but mistakenly read from the # 'email'section'. Check for the flag in the 'push' section, and log, # but do not honour it to avoid nasty surprises when people upgrade. diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 4b63a0755f..b328a4df09 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -197,7 +197,7 @@ class FederationSender(object): destinations = { d for d in destinations - if self._federation_shard_config.should_send_to( + if self._federation_shard_config.should_handle( self._instance_name, d ) } @@ -335,7 +335,7 @@ class FederationSender(object): d for d in domains if d != self.server_name - and self._federation_shard_config.should_send_to(self._instance_name, d) + and self._federation_shard_config.should_handle(self._instance_name, d) ] if not domains: return @@ -441,7 +441,7 @@ class FederationSender(object): for destination in destinations: if destination == self.server_name: continue - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, destination ): continue @@ -460,7 +460,7 @@ class FederationSender(object): if destination == self.server_name: continue - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, destination ): continue @@ -486,7 +486,7 @@ class FederationSender(object): logger.info("Not sending EDU to ourselves") return - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, destination ): return @@ -507,7 +507,7 @@ class FederationSender(object): edu: edu to send key: clobbering key for this edu """ - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, edu.destination ): return @@ -523,7 +523,7 @@ class FederationSender(object): logger.warning("Not sending device update to ourselves") return - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, destination ): return @@ -541,7 +541,7 @@ class FederationSender(object): logger.warning("Not waking up ourselves") return - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, destination ): return diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 6402136e8a..3436741783 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -78,7 +78,7 @@ class PerDestinationQueue(object): self._federation_shard_config = hs.config.federation.federation_shard_config self._should_send_on_this_instance = True - if not self._federation_shard_config.should_send_to( + if not self._federation_shard_config.should_handle( self._instance_name, destination ): # We don't raise an exception here to avoid taking out any other diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index f6a5458681..2456f12f46 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -15,13 +15,12 @@ # limitations under the License. import logging -from collections import defaultdict -from threading import Lock -from typing import Dict, Tuple, Union +from typing import TYPE_CHECKING, Dict, Union + +from prometheus_client import Gauge from twisted.internet import defer -from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import PusherConfigException from synapse.push.emailpusher import EmailPusher @@ -29,9 +28,18 @@ from synapse.push.httppusher import HttpPusher from synapse.push.pusher import PusherFactory from synapse.util.async_helpers import concurrently_execute +if TYPE_CHECKING: + from synapse.server import HomeServer + + logger = logging.getLogger(__name__) +synapse_pushers = Gauge( + "synapse_pushers", "Number of active synapse pushers", ["kind", "app_id"] +) + + class PusherPool: """ The pusher pool. This is responsible for dispatching notifications of new events to @@ -47,36 +55,20 @@ class PusherPool: Pusher.on_new_receipts are not expected to return deferreds. """ - def __init__(self, _hs): - self.hs = _hs - self.pusher_factory = PusherFactory(_hs) - self._should_start_pushers = _hs.config.start_pushers + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.pusher_factory = PusherFactory(hs) + self._should_start_pushers = hs.config.start_pushers self.store = self.hs.get_datastore() self.clock = self.hs.get_clock() + # We shard the handling of push notifications by user ID. + self._pusher_shard_config = hs.config.push.pusher_shard_config + self._instance_name = hs.get_instance_name() + # map from user id to app_id:pushkey to pusher self.pushers = {} # type: Dict[str, Dict[str, Union[HttpPusher, EmailPusher]]] - # a lock for the pushers dict, since `count_pushers` is called from an different - # and we otherwise get concurrent modification errors - self._pushers_lock = Lock() - - def count_pushers(): - results = defaultdict(int) # type: Dict[Tuple[str, str], int] - with self._pushers_lock: - for pushers in self.pushers.values(): - for pusher in pushers.values(): - k = (type(pusher).__name__, pusher.app_id) - results[k] += 1 - return results - - LaterGauge( - name="synapse_pushers", - desc="the number of active pushers", - labels=["kind", "app_id"], - caller=count_pushers, - ) - def start(self): """Starts the pushers off in a background process. """ @@ -104,6 +96,7 @@ class PusherPool: Returns: Deferred[EmailPusher|HttpPusher] """ + time_now_msec = self.clock.time_msec() # we try to create the pusher just to validate the config: it @@ -176,6 +169,9 @@ class PusherPool: access_tokens (Iterable[int]): access token *ids* to remove pushers for """ + if not self._pusher_shard_config.should_handle(self._instance_name, user_id): + return + tokens = set(access_tokens) for p in (yield self.store.get_pushers_by_user_id(user_id)): if p["access_token"] in tokens: @@ -237,6 +233,9 @@ class PusherPool: if not self._should_start_pushers: return + if not self._pusher_shard_config.should_handle(self._instance_name, user_id): + return + resultlist = yield self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey) pusher_dict = None @@ -275,6 +274,11 @@ class PusherPool: Returns: Deferred[EmailPusher|HttpPusher] """ + if not self._pusher_shard_config.should_handle( + self._instance_name, pusherdict["user_name"] + ): + return + try: p = self.pusher_factory.create_pusher(pusherdict) except PusherConfigException as e: @@ -298,11 +302,12 @@ class PusherPool: appid_pushkey = "%s:%s" % (pusherdict["app_id"], pusherdict["pushkey"]) - with self._pushers_lock: - byuser = self.pushers.setdefault(pusherdict["user_name"], {}) - if appid_pushkey in byuser: - byuser[appid_pushkey].on_stop() - byuser[appid_pushkey] = p + byuser = self.pushers.setdefault(pusherdict["user_name"], {}) + if appid_pushkey in byuser: + byuser[appid_pushkey].on_stop() + byuser[appid_pushkey] = p + + synapse_pushers.labels(type(p).__name__, p.app_id).inc() # Check if there *may* be push to process. We do this as this check is a # lot cheaper to do than actually fetching the exact rows we need to @@ -330,9 +335,10 @@ class PusherPool: if appid_pushkey in byuser: logger.info("Stopping pusher %s / %s", user_id, appid_pushkey) - byuser[appid_pushkey].on_stop() - with self._pushers_lock: - del byuser[appid_pushkey] + pusher = byuser.pop(appid_pushkey) + pusher.on_stop() + + synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec() yield self.store.delete_pusher_by_app_id_pushkey_user_id( app_id, pushkey, user_id diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py new file mode 100644 index 0000000000..2bdc6edbb1 --- /dev/null +++ b/tests/replication/test_pusher_shard.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from mock import Mock + +from twisted.internet import defer + +from synapse.rest import admin +from synapse.rest.client.v1 import login, room + +from tests.replication._base import BaseMultiWorkerStreamTestCase + +logger = logging.getLogger(__name__) + + +class PusherShardTestCase(BaseMultiWorkerStreamTestCase): + """Checks pusher sharding works + """ + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + # Register a user who sends a message that we'll get notified about + self.other_user_id = self.register_user("otheruser", "pass") + self.other_access_token = self.login("otheruser", "pass") + + def default_config(self): + conf = super().default_config() + conf["start_pushers"] = False + return conf + + def _create_pusher_and_send_msg(self, localpart): + # Create a user that will get push notifications + user_id = self.register_user(localpart, "pass") + access_token = self.login(localpart, "pass") + + # Register a pusher + user_dict = self.get_success( + self.hs.get_datastore().get_user_by_access_token(access_token) + ) + token_id = user_dict["token_id"] + + self.get_success( + self.hs.get_pusherpool().add_pusher( + user_id=user_id, + access_token=token_id, + kind="http", + app_id="m.http", + app_display_name="HTTP Push Notifications", + device_display_name="pushy push", + pushkey="a@example.com", + lang=None, + data={"url": "https://push.example.com/push"}, + ) + ) + + self.pump() + + # Create a room + room = self.helper.create_room_as(user_id, tok=access_token) + + # The other user joins + self.helper.join( + room=room, user=self.other_user_id, tok=self.other_access_token + ) + + # The other user sends some messages + response = self.helper.send(room, body="Hi!", tok=self.other_access_token) + event_id = response["event_id"] + + return event_id + + def test_send_push_single_worker(self): + """Test that registration works when using a pusher worker. + """ + http_client_mock = Mock(spec_set=["post_json_get_json"]) + http_client_mock.post_json_get_json.side_effect = lambda *_, **__: defer.succeed( + {} + ) + + self.make_worker_hs( + "synapse.app.pusher", + {"start_pushers": True}, + proxied_http_client=http_client_mock, + ) + + event_id = self._create_pusher_and_send_msg("user") + + # Advance time a bit, so the pusher will register something has happened + self.pump() + + http_client_mock.post_json_get_json.assert_called_once() + self.assertEqual( + http_client_mock.post_json_get_json.call_args[0][0], + "https://push.example.com/push", + ) + self.assertEqual( + event_id, + http_client_mock.post_json_get_json.call_args[0][1]["notification"][ + "event_id" + ], + ) + + def test_send_push_multiple_workers(self): + """Test that registration works when using sharded pusher workers. + """ + http_client_mock1 = Mock(spec_set=["post_json_get_json"]) + http_client_mock1.post_json_get_json.side_effect = lambda *_, **__: defer.succeed( + {} + ) + + self.make_worker_hs( + "synapse.app.pusher", + { + "start_pushers": True, + "worker_name": "pusher1", + "pusher_instances": ["pusher1", "pusher2"], + }, + proxied_http_client=http_client_mock1, + ) + + http_client_mock2 = Mock(spec_set=["post_json_get_json"]) + http_client_mock2.post_json_get_json.side_effect = lambda *_, **__: defer.succeed( + {} + ) + + self.make_worker_hs( + "synapse.app.pusher", + { + "start_pushers": True, + "worker_name": "pusher2", + "pusher_instances": ["pusher1", "pusher2"], + }, + proxied_http_client=http_client_mock2, + ) + + # We choose a user name that we know should go to pusher1. + event_id = self._create_pusher_and_send_msg("user2") + + # Advance time a bit, so the pusher will register something has happened + self.pump() + + http_client_mock1.post_json_get_json.assert_called_once() + http_client_mock2.post_json_get_json.assert_not_called() + self.assertEqual( + http_client_mock1.post_json_get_json.call_args[0][0], + "https://push.example.com/push", + ) + self.assertEqual( + event_id, + http_client_mock1.post_json_get_json.call_args[0][1]["notification"][ + "event_id" + ], + ) + + http_client_mock1.post_json_get_json.reset_mock() + http_client_mock2.post_json_get_json.reset_mock() + + # Now we choose a user name that we know should go to pusher2. + event_id = self._create_pusher_and_send_msg("user4") + + # Advance time a bit, so the pusher will register something has happened + self.pump() + + http_client_mock1.post_json_get_json.assert_not_called() + http_client_mock2.post_json_get_json.assert_called_once() + self.assertEqual( + http_client_mock2.post_json_get_json.call_args[0][0], + "https://push.example.com/push", + ) + self.assertEqual( + event_id, + http_client_mock2.post_json_get_json.call_args[0][1]["notification"][ + "event_id" + ], + ) -- cgit 1.5.1 From 2c1b9d676322fad8cb57c92f97f81393bcfcbe56 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 Jul 2020 23:22:13 +0100 Subject: Update worker docs with recent enhancements (#7969) --- changelog.d/7969.doc | 1 + docs/sample_config.yaml | 54 +++ docs/synctl_workers.md | 32 ++ docs/workers.md | 459 +++++++++++---------- synapse/app/generic_worker.py | 6 +- synapse/config/federation.py | 12 +- synapse/config/homeserver.py | 2 +- synapse/config/logger.py | 2 +- synapse/config/redis.py | 23 +- synapse/config/workers.py | 49 ++- synapse/federation/send_queue.py | 2 +- synapse/federation/sender/__init__.py | 2 +- synapse/federation/sender/per_destination_queue.py | 2 +- synapse/storage/data_stores/main/stream.py | 2 +- 14 files changed, 413 insertions(+), 235 deletions(-) create mode 100644 changelog.d/7969.doc create mode 100644 docs/synctl_workers.md (limited to 'synapse/config/federation.py') diff --git a/changelog.d/7969.doc b/changelog.d/7969.doc new file mode 100644 index 0000000000..68d2ed5fad --- /dev/null +++ b/changelog.d/7969.doc @@ -0,0 +1 @@ +Update worker docs with latest enhancements. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 3227294e0b..b21e36bb6d 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2398,3 +2398,57 @@ opentracing: # # logging: # false + + +## Workers ## + +# Disables sending of outbound federation transactions on the main process. +# Uncomment if using a federation sender worker. +# +#send_federation: false + +# It is possible to run multiple federation sender workers, in which case the +# work is balanced across them. +# +# This configuration must be shared between all federation sender workers, and if +# changed all federation sender workers must be stopped at the same time and then +# started, to ensure that all instances are running with the same config (otherwise +# events may be dropped). +# +#federation_sender_instances: +# - federation_sender1 + +# When using workers this should be a map from `worker_name` to the +# HTTP replication listener of the worker, if configured. +# +#instance_map: +# worker1: +# host: localhost +# port: 8034 + +# Experimental: When using workers you can define which workers should +# handle event persistence and typing notifications. Any worker +# specified here must also be in the `instance_map`. +# +#stream_writers: +# events: worker1 +# typing: worker1 + + +# Configuration for Redis when using workers. This *must* be enabled when +# using workers (unless using old style direct TCP configuration). +# +redis: + # Uncomment the below to enable Redis support. + # + #enabled: true + + # Optional host and port to use to connect to redis. Defaults to + # localhost and 6379 + # + #host: localhost + #port: 6379 + + # Optional password if configured on the Redis instance + # + #password: diff --git a/docs/synctl_workers.md b/docs/synctl_workers.md new file mode 100644 index 0000000000..8da4a31852 --- /dev/null +++ b/docs/synctl_workers.md @@ -0,0 +1,32 @@ +### Using synctl with workers + +If you want to use `synctl` to manage your synapse processes, you will need to +create an an additional configuration file for the main synapse process. That +configuration should look like this: + +```yaml +worker_app: synapse.app.homeserver +``` + +Additionally, each worker app must be configured with the name of a "pid file", +to which it will write its process ID when it starts. For example, for a +synchrotron, you might write: + +```yaml +worker_pid_file: /home/matrix/synapse/worker1.pid +``` + +Finally, to actually run your worker-based synapse, you must pass synctl the `-a` +commandline option to tell it to operate on all the worker configurations found +in the given directory, e.g.: + + synctl -a $CONFIG/workers start + +Currently one should always restart all workers when restarting or upgrading +synapse, unless you explicitly know it's safe not to. For instance, restarting +synapse without restarting all the synchrotrons may result in broken typing +notifications. + +To manipulate a specific worker, you pass the -w option to synctl: + + synctl -w $CONFIG/workers/worker1.yaml restart diff --git a/docs/workers.md b/docs/workers.md index f4cbbc0400..38bd758e57 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -16,69 +16,106 @@ workers only work with PostgreSQL-based Synapse deployments. SQLite should only be used for demo purposes and any admin considering workers should already be running PostgreSQL. -## Master/worker communication +## Main process/worker communication -The workers communicate with the master process via a Synapse-specific protocol -called 'replication' (analogous to MySQL- or Postgres-style database -replication) which feeds a stream of relevant data from the master to the -workers so they can be kept in sync with the master process and database state. +The processes communicate with each other via a Synapse-specific protocol called +'replication' (analogous to MySQL- or Postgres-style database replication) which +feeds streams of newly written data between processes so they can be kept in +sync with the database state. -Additionally, workers may make HTTP requests to the master, to send information -in the other direction. Typically this is used for operations which need to -wait for a reply - such as sending an event. +Additionally, processes may make HTTP requests to each other. Typically this is +used for operations which need to wait for a reply - such as sending an event. -## Configuration +As of Synapse v1.13.0, it is possible to configure Synapse to send replication +via a [Redis pub/sub channel](https://redis.io/topics/pubsub), and is now the +recommended way of configuring replication. This is an alternative to the old +direct TCP connections to the main process: rather than all the workers +connecting to the main process, all the workers and the main process connect to +Redis, which relays replication commands between processes. This can give a +significant cpu saving on the main process and will be a prerequisite for +upcoming performance improvements. + +(See the [Architectural diagram](#architectural-diagram) section at the end for +a visualisation of what this looks like) + + +## Setting up workers + +A Redis server is required to manage the communication between the processes. +(The older direct TCP connections are now deprecated.) The Redis server +should be installed following the normal procedure for your distribution (e.g. +`apt install redis-server` on Debian). It is safe to use an existing Redis +deployment if you have one. + +Once installed, check that Redis is running and accessible from the host running +Synapse, for example by executing `echo PING | nc -q1 localhost 6379` and seeing +a response of `+PONG`. + +The appropriate dependencies must also be installed for Synapse. If using a +virtualenv, these can be installed with: + +```sh +pip install matrix-synapse[redis] +``` + +Note that these dependencies are included when synapse is installed with `pip +install matrix-synapse[all]`. They are also included in the debian packages from +`matrix.org` and in the docker images at +https://hub.docker.com/r/matrixdotorg/synapse/. To make effective use of the workers, you will need to configure an HTTP reverse-proxy such as nginx or haproxy, which will direct incoming requests to -the correct worker, or to the main synapse instance. Note that this includes -requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md) +the correct worker, or to the main synapse instance. See [reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse proxy. -To enable workers, you need to add *two* replication listeners to the -main Synapse configuration file (`homeserver.yaml`). For example: +To enable workers you should create a configuration file for each worker +process. Each worker configuration file inherits the configuration of the shared +homeserver configuration file. You can then override configuration specific to +that worker, e.g. the HTTP listener that it provides (if any); logging +configuration; etc. You should minimise the number of overrides though to +maintain a usable config. + +Next you need to add both a HTTP replication listener and redis config to the +shared Synapse configuration file (`homeserver.yaml`). For example: ```yaml +# extend the existing `listeners` section. This defines the ports that the +# main process will listen on. listeners: - # The TCP replication port - - port: 9092 - bind_address: '127.0.0.1' - type: replication - # The HTTP replication port - port: 9093 bind_address: '127.0.0.1' type: http resources: - names: [replication] + +redis: + enabled: true ``` -Under **no circumstances** should these replication API listeners be exposed to -the public internet; they have no authentication and are unencrypted. +See the sample config for the full documentation of each option. -You should then create a set of configs for the various worker processes. Each -worker configuration file inherits the configuration of the main homeserver -configuration file. You can then override configuration specific to that -worker, e.g. the HTTP listener that it provides (if any); logging -configuration; etc. You should minimise the number of overrides though to -maintain a usable config. +Under **no circumstances** should the replication listener be exposed to the +public internet; it has no authentication and is unencrypted. In the config file for each worker, you must specify the type of worker -application (`worker_app`). The currently available worker applications are -listed below. You must also specify the replication endpoints that it should -talk to on the main synapse process. `worker_replication_host` should specify -the host of the main synapse, `worker_replication_port` should point to the TCP -replication listener port and `worker_replication_http_port` should point to -the HTTP replication port. +application (`worker_app`), and you should specify a unqiue name for the worker +(`worker_name`). The currently available worker applications are listed below. +You must also specify the HTTP replication endpoint that it should talk to on +the main synapse process. `worker_replication_host` should specify the host of +the main synapse and `worker_replication_http_port` should point to the HTTP +replication port. If the worker will handle HTTP requests then the +`worker_listeners` option should be set with a `http` listener, in the same way +as the `listeners` option in the shared config. For example: ```yaml -worker_app: synapse.app.synchrotron +worker_app: synapse.app.generic_worker +worker_name: worker1 -# The replication listener on the synapse to talk to. +# The replication listener on the main synapse process. worker_replication_host: 127.0.0.1 -worker_replication_port: 9092 worker_replication_http_port: 9093 worker_listeners: @@ -87,13 +124,14 @@ worker_listeners: resources: - names: - client + - federation -worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml +worker_log_config: /home/matrix/synapse/config/worker1_log_config.yaml ``` -...is a full configuration for a synchrotron worker instance, which will expose a -plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided -by the main synapse. +...is a full configuration for a generic worker instance, which will expose a +plain HTTP endpoint on port 8083 separately serving various endpoints, e.g. +`/sync`, which are listed below. Obviously you should configure your reverse-proxy to route the relevant endpoints to the worker (`localhost:8083` in the above example). @@ -102,127 +140,24 @@ Finally, you need to start your worker processes. This can be done with either `synctl` or your distribution's preferred service manager such as `systemd`. We recommend the use of `systemd` where available: for information on setting up `systemd` to start synapse workers, see -[systemd-with-workers](systemd-with-workers). To use `synctl`, see below. +[systemd-with-workers](systemd-with-workers). To use `synctl`, see +[synctl_workers.md](synctl_workers.md). -### **Experimental** support for replication over redis - -As of Synapse v1.13.0, it is possible to configure Synapse to send replication -via a [Redis pub/sub channel](https://redis.io/topics/pubsub). This is an -alternative to direct TCP connections to the master: rather than all the -workers connecting to the master, all the workers and the master connect to -Redis, which relays replication commands between processes. This can give a -significant cpu saving on the master and will be a prerequisite for upcoming -performance improvements. - -Note that this support is currently experimental; you may experience lost -messages and similar problems! It is strongly recommended that admins setting -up workers for the first time use direct TCP replication as above. - -To configure Synapse to use Redis: - -1. Install Redis following the normal procedure for your distribution - for - example, on Debian, `apt install redis-server`. (It is safe to use an - existing Redis deployment if you have one: we use a pub/sub stream named - according to the `server_name` of your synapse server.) -2. Check Redis is running and accessible: you should be able to `echo PING | nc -q1 - localhost 6379` and get a response of `+PONG`. -3. Install the python prerequisites. If you installed synapse into a - virtualenv, this can be done with: - ```sh - pip install matrix-synapse[redis] - ``` - The debian packages from matrix.org already include the required - dependencies. -4. Add config to the shared configuration (`homeserver.yaml`): - ```yaml - redis: - enabled: true - ``` - Optional parameters which can go alongside `enabled` are `host`, `port`, - `password`. Normally none of these are required. -5. Restart master and all workers. - -Once redis replication is in use, `worker_replication_port` is redundant and -can be removed from the worker configuration files. Similarly, the -configuration for the `listener` for the TCP replication port can be removed -from the main configuration file. Note that the HTTP replication port is -still required. - -### Using synctl - -If you want to use `synctl` to manage your synapse processes, you will need to -create an an additional configuration file for the master synapse process. That -configuration should look like this: - -```yaml -worker_app: synapse.app.homeserver -``` - -Additionally, each worker app must be configured with the name of a "pid file", -to which it will write its process ID when it starts. For example, for a -synchrotron, you might write: - -```yaml -worker_pid_file: /home/matrix/synapse/synchrotron.pid -``` - -Finally, to actually run your worker-based synapse, you must pass synctl the `-a` -commandline option to tell it to operate on all the worker configurations found -in the given directory, e.g.: - - synctl -a $CONFIG/workers start - -Currently one should always restart all workers when restarting or upgrading -synapse, unless you explicitly know it's safe not to. For instance, restarting -synapse without restarting all the synchrotrons may result in broken typing -notifications. - -To manipulate a specific worker, you pass the -w option to synctl: - - synctl -w $CONFIG/workers/synchrotron.yaml restart ## Available worker applications -### `synapse.app.pusher` - -Handles sending push notifications to sygnal and email. Doesn't handle any -REST endpoints itself, but you should set `start_pushers: False` in the -shared configuration file to stop the main synapse sending these notifications. - -Note this worker cannot be load-balanced: only one instance should be active. - -### `synapse.app.synchrotron` +### `synapse.app.generic_worker` -The synchrotron handles `sync` requests from clients. In particular, it can -handle REST endpoints matching the following regular expressions: +This worker can handle API requests matching the following regular +expressions: + # Sync requests ^/_matrix/client/(v2_alpha|r0)/sync$ ^/_matrix/client/(api/v1|v2_alpha|r0)/events$ ^/_matrix/client/(api/v1|r0)/initialSync$ ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$ -The above endpoints should all be routed to the synchrotron worker by the -reverse-proxy configuration. - -It is possible to run multiple instances of the synchrotron to scale -horizontally. In this case the reverse-proxy should be configured to -load-balance across the instances, though it will be more efficient if all -requests from a particular user are routed to a single instance. Extracting -a userid from the access token is currently left as an exercise for the reader. - -### `synapse.app.appservice` - -Handles sending output traffic to Application Services. Doesn't handle any -REST endpoints itself, but you should set `notify_appservices: False` in the -shared configuration file to stop the main synapse sending these notifications. - -Note this worker cannot be load-balanced: only one instance should be active. - -### `synapse.app.federation_reader` - -Handles a subset of federation endpoints. In particular, it can handle REST -endpoints matching the following regular expressions: - + # Federation requests ^/_matrix/federation/v1/event/ ^/_matrix/federation/v1/state/ ^/_matrix/federation/v1/state_ids/ @@ -242,40 +177,145 @@ endpoints matching the following regular expressions: ^/_matrix/federation/v1/event_auth/ ^/_matrix/federation/v1/exchange_third_party_invite/ ^/_matrix/federation/v1/user/devices/ - ^/_matrix/federation/v1/send/ ^/_matrix/federation/v1/get_groups_publicised$ ^/_matrix/key/v2/query + # Inbound federation transaction request + ^/_matrix/federation/v1/send/ + + # Client API requests + ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ + ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ + ^/_matrix/client/versions$ + ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ + ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$ + ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$ + ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/ + + # Registration/login requests + ^/_matrix/client/(api/v1|r0|unstable)/login$ + ^/_matrix/client/(r0|unstable)/register$ + ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$ + + # Event sending requests + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/ + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ + ^/_matrix/client/(api/v1|r0|unstable)/join/ + ^/_matrix/client/(api/v1|r0|unstable)/profile/ + + Additionally, the following REST endpoints can be handled for GET requests: ^/_matrix/federation/v1/groups/ -The above endpoints should all be routed to the federation_reader worker by the -reverse-proxy configuration. +Pagination requests can also be handled, but all requests for a given +room must be routed to the same instance. Additionally, care must be taken to +ensure that the purge history admin API is not used while pagination requests +for the room are in flight: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ + +Note that a HTTP listener with `client` and `federation` resources must be +configured in the `worker_listeners` option in the worker config. + + +#### Load balancing + +It is possible to run multiple instances of this worker app, with incoming requests +being load-balanced between them by the reverse-proxy. However, different endpoints +have different characteristics and so admins +may wish to run multiple groups of workers handling different endpoints so that +load balancing can be done in different ways. + +For `/sync` and `/initialSync` requests it will be more efficient if all +requests from a particular user are routed to a single instance. Extracting a +user ID from the access token or `Authorization` header is currently left as an +exercise for the reader. Admins may additionally wish to separate out `/sync` +requests that have a `since` query parameter from those that don't (and +`/initialSync`), as requests that don't are known as "initial sync" that happens +when a user logs in on a new device and can be *very* resource intensive, so +isolating these requests will stop them from interfering with other users ongoing +syncs. + +Federation and client requests can be balanced via simple round robin. -The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single -instance. +The inbound federation transaction request `^/_matrix/federation/v1/send/` +should be balanced by source IP so that transactions from the same remote server +go to the same process. -Note that `federation` must be added to the listener resources in the worker config: +Registration/login requests can be handled separately purely to help ensure that +unexpected load doesn't affect new logins and sign ups. + +Finally, event sending requests can be balanced by the room ID in the URI (or +the full URI, or even just round robin), the room ID is the path component after +`/rooms/`. If there is a large bridge connected that is sending or may send lots +of events, then a dedicated set of workers can be provisioned to limit the +effects of bursts of events from that bridge on events sent by normal users. + +#### Stream writers + +Additionally, there is *experimental* support for moving writing of specific +streams (such as events) off of the main process to a particular worker. (This +is only supported with Redis-based replication.) + +Currently support streams are `events` and `typing`. + +To enable this, the worker must have a HTTP replication listener configured, +have a `worker_name` and be listed in the `instance_map` config. For example to +move event persistence off to a dedicated worker, the shared configuration would +include: ```yaml -worker_app: synapse.app.federation_reader -... -worker_listeners: - - type: http - port: - resources: - - names: - - federation +instance_map: + event_persister1: + host: localhost + port: 8034 + +streams_writers: + events: event_persister1 ``` + +### `synapse.app.pusher` + +Handles sending push notifications to sygnal and email. Doesn't handle any +REST endpoints itself, but you should set `start_pushers: False` in the +shared configuration file to stop the main synapse sending push notifications. + +Note this worker cannot be load-balanced: only one instance should be active. + +### `synapse.app.appservice` + +Handles sending output traffic to Application Services. Doesn't handle any +REST endpoints itself, but you should set `notify_appservices: False` in the +shared configuration file to stop the main synapse sending appservice notifications. + +Note this worker cannot be load-balanced: only one instance should be active. + + ### `synapse.app.federation_sender` Handles sending federation traffic to other servers. Doesn't handle any REST endpoints itself, but you should set `send_federation: False` in the shared configuration file to stop the main synapse sending this traffic. -Note this worker cannot be load-balanced: only one instance should be active. +If running multiple federation senders then you must list each +instance in the `federation_sender_instances` option by their `worker_name`. +All instances must be stopped and started when adding or removing instances. +For example: + +```yaml +federation_sender_instances: + - federation_sender1 + - federation_sender2 +``` ### `synapse.app.media_repository` @@ -314,46 +354,6 @@ and you must configure a single instance to run the background tasks, e.g.: media_instance_running_background_jobs: "media-repository-1" ``` -### `synapse.app.client_reader` - -Handles client API endpoints. It can handle REST endpoints matching the -following regular expressions: - - ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ - ^/_matrix/client/(api/v1|r0|unstable)/login$ - ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ - ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ - ^/_matrix/client/versions$ - ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$ - ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$ - ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$ - ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/ - -Additionally, the following REST endpoints can be handled for GET requests: - - ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/groups/.*$ - ^/_matrix/client/(api/v1|r0|unstable)/user/[^/]*/account_data/ - ^/_matrix/client/(api/v1|r0|unstable)/user/[^/]*/rooms/[^/]*/account_data/ - -Additionally, the following REST endpoints can be handled, but all requests must -be routed to the same instance: - - ^/_matrix/client/(r0|unstable)/register$ - ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$ - -Pagination requests can also be handled, but all requests with the same path -room must be routed to the same instance. Additionally, care must be taken to -ensure that the purge history admin API is not used while pagination requests -for the room are in flight: - - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$ - ### `synapse.app.user_dir` Handles searches in the user directory. It can handle REST endpoints matching @@ -388,15 +388,48 @@ file. For example: worker_main_http_uri: http://127.0.0.1:8008 -### `synapse.app.event_creator` +### Historical apps -Handles some event creation. It can handle REST endpoints matching: +*Note:* Historically there used to be more apps, however they have been +amalgamated into a single `synapse.app.generic_worker` app. The remaining apps +are ones that do specific processing unrelated to requests, e.g. the `pusher` +that handles sending out push notifications for new events. The intention is for +all these to be folded into the `generic_worker` app and to use config to define +which processes handle the various proccessing such as push notifications. - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/ - ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$ - ^/_matrix/client/(api/v1|r0|unstable)/join/ - ^/_matrix/client/(api/v1|r0|unstable)/profile/ -It will create events locally and then send them on to the main synapse -instance to be persisted and handled. +## Architectural diagram + +The following shows an example setup using Redis and a reverse proxy: + +``` + Clients & Federation + | + v + +-----------+ + | | + | Reverse | + | Proxy | + | | + +-----------+ + | | | + | | | HTTP requests + +-------------------+ | +-----------+ + | +---+ | + | | | + v v v ++--------------+ +--------------+ +--------------+ +--------------+ +| Main | | Generic | | Generic | | Event | +| Process | | Worker 1 | | Worker 2 | | Persister | ++--------------+ +--------------+ +--------------+ +--------------+ + ^ ^ | ^ | | ^ | ^ ^ + | | | | | | | | | | + | | | | | HTTP | | | | | + | +----------+<--|---|---------+ | | | | + | | +-------------|-->+----------+ | + | | | | + | | | | + v v v v +==================================================================== + Redis pub/sub channel +``` diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index ec0dbddb8c..5841454c9a 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -940,7 +940,7 @@ def start(config_options): config.server.update_user_directory = False if config.worker_app == "synapse.app.federation_sender": - if config.federation.send_federation: + if config.worker.send_federation: sys.stderr.write( "\nThe send_federation must be disabled in the main synapse process" "\nbefore they can be run in a separate worker." @@ -950,10 +950,10 @@ def start(config_options): sys.exit(1) # Force the pushers to start since they will be disabled in the main config - config.federation.send_federation = True + config.worker.send_federation = True else: # For other worker types we force this to off. - config.federation.send_federation = False + config.worker.send_federation = False synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 82ff9664de..2c77d8f85b 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -17,23 +17,13 @@ from typing import Optional from netaddr import IPSet -from ._base import Config, ConfigError, ShardedWorkerHandlingConfig +from ._base import Config, ConfigError class FederationConfig(Config): section = "federation" def read_config(self, config, **kwargs): - # Whether to send federation traffic out in this process. This only - # applies to some federation traffic, and so shouldn't be used to - # "disable" federation - self.send_federation = config.get("send_federation", True) - - federation_sender_instances = config.get("federation_sender_instances") or [] - self.federation_shard_config = ShardedWorkerHandlingConfig( - federation_sender_instances - ) - # FIXME: federation_domain_whitelist needs sytests self.federation_domain_whitelist = None # type: Optional[dict] federation_domain_whitelist = config.get("federation_domain_whitelist", None) diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 8e93d31394..556e291495 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -78,7 +78,6 @@ class HomeServerConfig(RootConfig): JWTConfig, PasswordConfig, EmailConfig, - WorkerConfig, PasswordAuthProviderConfig, PushConfig, SpamCheckerConfig, @@ -91,6 +90,7 @@ class HomeServerConfig(RootConfig): RoomDirectoryConfig, ThirdPartyRulesConfig, TracerConfig, + WorkerConfig, RedisConfig, FederationConfig, ] diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 49f6c32beb..dd775a97e8 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -214,7 +214,7 @@ def setup_logging( Set up the logging subsystem. Args: - config (LoggingConfig | synapse.config.workers.WorkerConfig): + config (LoggingConfig | synapse.config.worker.WorkerConfig): configuration data use_worker_options (bool): True to use the 'worker_log_config' option diff --git a/synapse/config/redis.py b/synapse/config/redis.py index d5d3ca1c9e..1373302335 100644 --- a/synapse/config/redis.py +++ b/synapse/config/redis.py @@ -21,7 +21,7 @@ class RedisConfig(Config): section = "redis" def read_config(self, config, **kwargs): - redis_config = config.get("redis", {}) + redis_config = config.get("redis") or {} self.redis_enabled = redis_config.get("enabled", False) if not self.redis_enabled: @@ -32,3 +32,24 @@ class RedisConfig(Config): self.redis_host = redis_config.get("host", "localhost") self.redis_port = redis_config.get("port", 6379) self.redis_password = redis_config.get("password") + + def generate_config_section(self, config_dir_path, server_name, **kwargs): + return """\ + # Configuration for Redis when using workers. This *must* be enabled when + # using workers (unless using old style direct TCP configuration). + # + redis: + # Uncomment the below to enable Redis support. + # + #enabled: true + + # Optional host and port to use to connect to redis. Defaults to + # localhost and 6379 + # + #host: localhost + #port: 6379 + + # Optional password if configured on the Redis instance + # + #password: + """ diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 2574cd3aa1..c784a71508 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -15,7 +15,7 @@ import attr -from ._base import Config, ConfigError +from ._base import Config, ConfigError, ShardedWorkerHandlingConfig from .server import ListenerConfig, parse_listener_def @@ -85,6 +85,16 @@ class WorkerConfig(Config): ) ) + # Whether to send federation traffic out in this process. This only + # applies to some federation traffic, and so shouldn't be used to + # "disable" federation + self.send_federation = config.get("send_federation", True) + + federation_sender_instances = config.get("federation_sender_instances") or [] + self.federation_shard_config = ShardedWorkerHandlingConfig( + federation_sender_instances + ) + # A map from instance name to host/port of their HTTP replication endpoint. instance_map = config.get("instance_map") or {} self.instance_map = { @@ -105,6 +115,43 @@ class WorkerConfig(Config): % (instance, stream) ) + def generate_config_section(self, config_dir_path, server_name, **kwargs): + return """\ + ## Workers ## + + # Disables sending of outbound federation transactions on the main process. + # Uncomment if using a federation sender worker. + # + #send_federation: false + + # It is possible to run multiple federation sender workers, in which case the + # work is balanced across them. + # + # This configuration must be shared between all federation sender workers, and if + # changed all federation sender workers must be stopped at the same time and then + # started, to ensure that all instances are running with the same config (otherwise + # events may be dropped). + # + #federation_sender_instances: + # - federation_sender1 + + # When using workers this should be a map from `worker_name` to the + # HTTP replication listener of the worker, if configured. + # + #instance_map: + # worker1: + # host: localhost + # port: 8034 + + # Experimental: When using workers you can define which workers should + # handle event persistence and typing notifications. Any worker + # specified here must also be in the `instance_map`. + # + #stream_writers: + # events: worker1 + # typing: worker1 + """ + def read_arguments(self, args): # We support a bunch of command line arguments that override options in # the config. A lot of these options have a worker_* prefix when running diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 4fc9ff92e5..2b0ab2dcbf 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -57,7 +57,7 @@ class FederationRemoteSendQueue(object): # We may have multiple federation sender instances, so we need to track # their positions separately. - self._sender_instances = hs.config.federation.federation_shard_config.instances + self._sender_instances = hs.config.worker.federation_shard_config.instances self._sender_positions = {} # Pending presence map user_id -> UserPresenceState diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index ba4ddd2370..6ae6522f87 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -70,7 +70,7 @@ class FederationSender(object): self._transaction_manager = TransactionManager(hs) self._instance_name = hs.get_instance_name() - self._federation_shard_config = hs.config.federation.federation_shard_config + self._federation_shard_config = hs.config.worker.federation_shard_config # map from destination to PerDestinationQueue self._per_destination_queues = {} # type: Dict[str, PerDestinationQueue] diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 3436741783..dd150f89a6 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -75,7 +75,7 @@ class PerDestinationQueue(object): self._store = hs.get_datastore() self._transaction_manager = transaction_manager self._instance_name = hs.get_instance_name() - self._federation_shard_config = hs.config.federation.federation_shard_config + self._federation_shard_config = hs.config.worker.federation_shard_config self._should_send_on_this_instance = True if not self._federation_shard_config.should_handle( diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 5e32c7aa1e..10d39b3699 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -255,7 +255,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): self._instance_name = hs.get_instance_name() self._send_federation = hs.should_send_federation() - self._federation_shard_config = hs.config.federation.federation_shard_config + self._federation_shard_config = hs.config.worker.federation_shard_config # If we're a process that sends federation we may need to reset the # `federation_stream_position` table to match the current sharding -- cgit 1.5.1