From f299441cc67f31dcd47b8fdfda4a218bee9df9ba Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 10 Jul 2020 18:26:36 +0100
Subject: Add ability to shard the federation sender (#7798)

---
 synapse/config/federation.py | 129 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 synapse/config/federation.py

(limited to 'synapse/config/federation.py')

diff --git a/synapse/config/federation.py b/synapse/config/federation.py
new file mode 100644
index 0000000000..7782ab4c9d
--- /dev/null
+++ b/synapse/config/federation.py
@@ -0,0 +1,129 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from hashlib import sha256
+from typing import List, Optional
+
+import attr
+from netaddr import IPSet
+
+from ._base import Config, ConfigError
+
+
+@attr.s
+class ShardedFederationSendingConfig:
+    """Algorithm for choosing which federation sender instance is responsible
+    for which destionation host.
+    """
+
+    instances = attr.ib(type=List[str])
+
+    def should_send_to(self, instance_name: str, destination: str) -> bool:
+        """Whether this instance is responsible for sending transcations for
+        the given host.
+        """
+
+        # If multiple federation senders are not defined we always return true.
+        if not self.instances or len(self.instances) == 1:
+            return True
+
+        # We shard by taking the hash, modulo it by the number of federation
+        # senders and then checking whether this instance matches the instance
+        # at that index.
+        #
+        # (Technically this introduces some bias and is not entirely uniform, but
+        # since the hash is so large the bias is ridiculously small).
+        dest_hash = sha256(destination.encode("utf8")).digest()
+        dest_int = int.from_bytes(dest_hash, byteorder="little")
+        remainder = dest_int % (len(self.instances))
+        return self.instances[remainder] == instance_name
+
+
+class FederationConfig(Config):
+    section = "federation"
+
+    def read_config(self, config, **kwargs):
+        # Whether to send federation traffic out in this process. This only
+        # applies to some federation traffic, and so shouldn't be used to
+        # "disable" federation
+        self.send_federation = config.get("send_federation", True)
+
+        federation_sender_instances = config.get("federation_sender_instances") or []
+        self.federation_shard_config = ShardedFederationSendingConfig(
+            federation_sender_instances
+        )
+
+        # FIXME: federation_domain_whitelist needs sytests
+        self.federation_domain_whitelist = None  # type: Optional[dict]
+        federation_domain_whitelist = config.get("federation_domain_whitelist", None)
+
+        if federation_domain_whitelist is not None:
+            # turn the whitelist into a hash for speed of lookup
+            self.federation_domain_whitelist = {}
+
+            for domain in federation_domain_whitelist:
+                self.federation_domain_whitelist[domain] = True
+
+        self.federation_ip_range_blacklist = config.get(
+            "federation_ip_range_blacklist", []
+        )
+
+        # Attempt to create an IPSet from the given ranges
+        try:
+            self.federation_ip_range_blacklist = IPSet(
+                self.federation_ip_range_blacklist
+            )
+
+            # Always blacklist 0.0.0.0, ::
+            self.federation_ip_range_blacklist.update(["0.0.0.0", "::"])
+        except Exception as e:
+            raise ConfigError(
+                "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e
+            )
+
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        # Restrict federation to the following whitelist of domains.
+        # N.B. we recommend also firewalling your federation listener to limit
+        # inbound federation traffic as early as possible, rather than relying
+        # purely on this application-layer restriction.  If not specified, the
+        # default is to whitelist everything.
+        #
+        #federation_domain_whitelist:
+        #  - lon.example.com
+        #  - nyc.example.com
+        #  - syd.example.com
+
+        # Prevent federation requests from being sent to the following
+        # blacklist IP address CIDR ranges. If this option is not specified, or
+        # specified with an empty list, no ip range blacklist will be enforced.
+        #
+        # As of Synapse v1.4.0 this option also affects any outbound requests to identity
+        # servers provided by user input.
+        #
+        # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
+        # listed here, since they correspond to unroutable addresses.)
+        #
+        federation_ip_range_blacklist:
+          - '127.0.0.0/8'
+          - '10.0.0.0/8'
+          - '172.16.0.0/12'
+          - '192.168.0.0/16'
+          - '100.64.0.0/10'
+          - '169.254.0.0/16'
+          - '::1/128'
+          - 'fe80::/64'
+          - 'fc00::/7'
+        """
-- 
cgit 1.5.1


From 649a7ead5c4bd2d8b7c486ac1a68ce4e41d49767 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 16 Jul 2020 14:06:28 +0100
Subject: Add ability to run multiple pusher instances (#7855)

This reuses the same scheme as federation sender sharding
---
 changelog.d/7855.feature                           |   1 +
 synapse/config/_base.py                            |  38 +++-
 synapse/config/_base.pyi                           |   5 +
 synapse/config/federation.py                       |  37 +---
 synapse/config/push.py                             |   5 +-
 synapse/federation/sender/__init__.py              |  16 +-
 synapse/federation/sender/per_destination_queue.py |   2 +-
 synapse/push/pusherpool.py                         |  78 +++++----
 tests/replication/test_pusher_shard.py             | 193 +++++++++++++++++++++
 9 files changed, 293 insertions(+), 82 deletions(-)
 create mode 100644 changelog.d/7855.feature
 create mode 100644 tests/replication/test_pusher_shard.py

(limited to 'synapse/config/federation.py')

diff --git a/changelog.d/7855.feature b/changelog.d/7855.feature
new file mode 100644
index 0000000000..2b6a9f0e71
--- /dev/null
+++ b/changelog.d/7855.feature
@@ -0,0 +1 @@
+Add experimental support for running multiple pusher workers.
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 1391e5fc43..fd137853b1 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -19,9 +19,11 @@ import argparse
 import errno
 import os
 from collections import OrderedDict
+from hashlib import sha256
 from textwrap import dedent
-from typing import Any, MutableMapping, Optional
+from typing import Any, List, MutableMapping, Optional
 
+import attr
 import yaml
 
 
@@ -717,4 +719,36 @@ def find_config_files(search_paths):
     return config_files
 
 
-__all__ = ["Config", "RootConfig"]
+@attr.s
+class ShardedWorkerHandlingConfig:
+    """Algorithm for choosing which instance is responsible for handling some
+    sharded work.
+
+    For example, the federation senders use this to determine which instances
+    handles sending stuff to a given destination (which is used as the `key`
+    below).
+    """
+
+    instances = attr.ib(type=List[str])
+
+    def should_handle(self, instance_name: str, key: str) -> bool:
+        """Whether this instance is responsible for handling the given key.
+        """
+
+        # If multiple instances are not defined we always return true.
+        if not self.instances or len(self.instances) == 1:
+            return True
+
+        # We shard by taking the hash, modulo it by the number of instances and
+        # then checking whether this instance matches the instance at that
+        # index.
+        #
+        # (Technically this introduces some bias and is not entirely uniform,
+        # but since the hash is so large the bias is ridiculously small).
+        dest_hash = sha256(key.encode("utf8")).digest()
+        dest_int = int.from_bytes(dest_hash, byteorder="little")
+        remainder = dest_int % (len(self.instances))
+        return self.instances[remainder] == instance_name
+
+
+__all__ = ["Config", "RootConfig", "ShardedWorkerHandlingConfig"]
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index 9e576060d4..eb911e8f9f 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -137,3 +137,8 @@ class Config:
 
 def read_config_files(config_files: List[str]): ...
 def find_config_files(search_paths: List[str]): ...
+
+class ShardedWorkerHandlingConfig:
+    instances: List[str]
+    def __init__(self, instances: List[str]) -> None: ...
+    def should_handle(self, instance_name: str, key: str) -> bool: ...
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 7782ab4c9d..82ff9664de 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -13,42 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from hashlib import sha256
-from typing import List, Optional
+from typing import Optional
 
-import attr
 from netaddr import IPSet
 
-from ._base import Config, ConfigError
-
-
-@attr.s
-class ShardedFederationSendingConfig:
-    """Algorithm for choosing which federation sender instance is responsible
-    for which destionation host.
-    """
-
-    instances = attr.ib(type=List[str])
-
-    def should_send_to(self, instance_name: str, destination: str) -> bool:
-        """Whether this instance is responsible for sending transcations for
-        the given host.
-        """
-
-        # If multiple federation senders are not defined we always return true.
-        if not self.instances or len(self.instances) == 1:
-            return True
-
-        # We shard by taking the hash, modulo it by the number of federation
-        # senders and then checking whether this instance matches the instance
-        # at that index.
-        #
-        # (Technically this introduces some bias and is not entirely uniform, but
-        # since the hash is so large the bias is ridiculously small).
-        dest_hash = sha256(destination.encode("utf8")).digest()
-        dest_int = int.from_bytes(dest_hash, byteorder="little")
-        remainder = dest_int % (len(self.instances))
-        return self.instances[remainder] == instance_name
+from ._base import Config, ConfigError, ShardedWorkerHandlingConfig
 
 
 class FederationConfig(Config):
@@ -61,7 +30,7 @@ class FederationConfig(Config):
         self.send_federation = config.get("send_federation", True)
 
         federation_sender_instances = config.get("federation_sender_instances") or []
-        self.federation_shard_config = ShardedFederationSendingConfig(
+        self.federation_shard_config = ShardedWorkerHandlingConfig(
             federation_sender_instances
         )
 
diff --git a/synapse/config/push.py b/synapse/config/push.py
index 6f2b3a7faa..a1f3752c8a 100644
--- a/synapse/config/push.py
+++ b/synapse/config/push.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._base import Config
+from ._base import Config, ShardedWorkerHandlingConfig
 
 
 class PushConfig(Config):
@@ -24,6 +24,9 @@ class PushConfig(Config):
         push_config = config.get("push", {})
         self.push_include_content = push_config.get("include_content", True)
 
+        pusher_instances = config.get("pusher_instances") or []
+        self.pusher_shard_config = ShardedWorkerHandlingConfig(pusher_instances)
+
         # There was a a 'redact_content' setting but mistakenly read from the
         # 'email'section'. Check for the flag in the 'push' section, and log,
         # but do not honour it to avoid nasty surprises when people upgrade.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 4b63a0755f..b328a4df09 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -197,7 +197,7 @@ class FederationSender(object):
                     destinations = {
                         d
                         for d in destinations
-                        if self._federation_shard_config.should_send_to(
+                        if self._federation_shard_config.should_handle(
                             self._instance_name, d
                         )
                     }
@@ -335,7 +335,7 @@ class FederationSender(object):
             d
             for d in domains
             if d != self.server_name
-            and self._federation_shard_config.should_send_to(self._instance_name, d)
+            and self._federation_shard_config.should_handle(self._instance_name, d)
         ]
         if not domains:
             return
@@ -441,7 +441,7 @@ class FederationSender(object):
         for destination in destinations:
             if destination == self.server_name:
                 continue
-            if not self._federation_shard_config.should_send_to(
+            if not self._federation_shard_config.should_handle(
                 self._instance_name, destination
             ):
                 continue
@@ -460,7 +460,7 @@ class FederationSender(object):
                 if destination == self.server_name:
                     continue
 
-                if not self._federation_shard_config.should_send_to(
+                if not self._federation_shard_config.should_handle(
                     self._instance_name, destination
                 ):
                     continue
@@ -486,7 +486,7 @@ class FederationSender(object):
             logger.info("Not sending EDU to ourselves")
             return
 
-        if not self._federation_shard_config.should_send_to(
+        if not self._federation_shard_config.should_handle(
             self._instance_name, destination
         ):
             return
@@ -507,7 +507,7 @@ class FederationSender(object):
             edu: edu to send
             key: clobbering key for this edu
         """
-        if not self._federation_shard_config.should_send_to(
+        if not self._federation_shard_config.should_handle(
             self._instance_name, edu.destination
         ):
             return
@@ -523,7 +523,7 @@ class FederationSender(object):
             logger.warning("Not sending device update to ourselves")
             return
 
-        if not self._federation_shard_config.should_send_to(
+        if not self._federation_shard_config.should_handle(
             self._instance_name, destination
         ):
             return
@@ -541,7 +541,7 @@ class FederationSender(object):
             logger.warning("Not waking up ourselves")
             return
 
-        if not self._federation_shard_config.should_send_to(
+        if not self._federation_shard_config.should_handle(
             self._instance_name, destination
         ):
             return
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 6402136e8a..3436741783 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -78,7 +78,7 @@ class PerDestinationQueue(object):
         self._federation_shard_config = hs.config.federation.federation_shard_config
 
         self._should_send_on_this_instance = True
-        if not self._federation_shard_config.should_send_to(
+        if not self._federation_shard_config.should_handle(
             self._instance_name, destination
         ):
             # We don't raise an exception here to avoid taking out any other
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index f6a5458681..2456f12f46 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -15,13 +15,12 @@
 # limitations under the License.
 
 import logging
-from collections import defaultdict
-from threading import Lock
-from typing import Dict, Tuple, Union
+from typing import TYPE_CHECKING, Dict, Union
+
+from prometheus_client import Gauge
 
 from twisted.internet import defer
 
-from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.push import PusherConfigException
 from synapse.push.emailpusher import EmailPusher
@@ -29,9 +28,18 @@ from synapse.push.httppusher import HttpPusher
 from synapse.push.pusher import PusherFactory
 from synapse.util.async_helpers import concurrently_execute
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
 logger = logging.getLogger(__name__)
 
 
+synapse_pushers = Gauge(
+    "synapse_pushers", "Number of active synapse pushers", ["kind", "app_id"]
+)
+
+
 class PusherPool:
     """
     The pusher pool. This is responsible for dispatching notifications of new events to
@@ -47,36 +55,20 @@ class PusherPool:
     Pusher.on_new_receipts are not expected to return deferreds.
     """
 
-    def __init__(self, _hs):
-        self.hs = _hs
-        self.pusher_factory = PusherFactory(_hs)
-        self._should_start_pushers = _hs.config.start_pushers
+    def __init__(self, hs: "HomeServer"):
+        self.hs = hs
+        self.pusher_factory = PusherFactory(hs)
+        self._should_start_pushers = hs.config.start_pushers
         self.store = self.hs.get_datastore()
         self.clock = self.hs.get_clock()
 
+        # We shard the handling of push notifications by user ID.
+        self._pusher_shard_config = hs.config.push.pusher_shard_config
+        self._instance_name = hs.get_instance_name()
+
         # map from user id to app_id:pushkey to pusher
         self.pushers = {}  # type: Dict[str, Dict[str, Union[HttpPusher, EmailPusher]]]
 
-        # a lock for the pushers dict, since `count_pushers` is called from an different
-        # and we otherwise get concurrent modification errors
-        self._pushers_lock = Lock()
-
-        def count_pushers():
-            results = defaultdict(int)  # type: Dict[Tuple[str, str], int]
-            with self._pushers_lock:
-                for pushers in self.pushers.values():
-                    for pusher in pushers.values():
-                        k = (type(pusher).__name__, pusher.app_id)
-                        results[k] += 1
-            return results
-
-        LaterGauge(
-            name="synapse_pushers",
-            desc="the number of active pushers",
-            labels=["kind", "app_id"],
-            caller=count_pushers,
-        )
-
     def start(self):
         """Starts the pushers off in a background process.
         """
@@ -104,6 +96,7 @@ class PusherPool:
         Returns:
             Deferred[EmailPusher|HttpPusher]
         """
+
         time_now_msec = self.clock.time_msec()
 
         # we try to create the pusher just to validate the config: it
@@ -176,6 +169,9 @@ class PusherPool:
             access_tokens (Iterable[int]): access token *ids* to remove pushers
                 for
         """
+        if not self._pusher_shard_config.should_handle(self._instance_name, user_id):
+            return
+
         tokens = set(access_tokens)
         for p in (yield self.store.get_pushers_by_user_id(user_id)):
             if p["access_token"] in tokens:
@@ -237,6 +233,9 @@ class PusherPool:
         if not self._should_start_pushers:
             return
 
+        if not self._pusher_shard_config.should_handle(self._instance_name, user_id):
+            return
+
         resultlist = yield self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey)
 
         pusher_dict = None
@@ -275,6 +274,11 @@ class PusherPool:
         Returns:
             Deferred[EmailPusher|HttpPusher]
         """
+        if not self._pusher_shard_config.should_handle(
+            self._instance_name, pusherdict["user_name"]
+        ):
+            return
+
         try:
             p = self.pusher_factory.create_pusher(pusherdict)
         except PusherConfigException as e:
@@ -298,11 +302,12 @@ class PusherPool:
 
         appid_pushkey = "%s:%s" % (pusherdict["app_id"], pusherdict["pushkey"])
 
-        with self._pushers_lock:
-            byuser = self.pushers.setdefault(pusherdict["user_name"], {})
-            if appid_pushkey in byuser:
-                byuser[appid_pushkey].on_stop()
-            byuser[appid_pushkey] = p
+        byuser = self.pushers.setdefault(pusherdict["user_name"], {})
+        if appid_pushkey in byuser:
+            byuser[appid_pushkey].on_stop()
+        byuser[appid_pushkey] = p
+
+        synapse_pushers.labels(type(p).__name__, p.app_id).inc()
 
         # Check if there *may* be push to process. We do this as this check is a
         # lot cheaper to do than actually fetching the exact rows we need to
@@ -330,9 +335,10 @@ class PusherPool:
 
         if appid_pushkey in byuser:
             logger.info("Stopping pusher %s / %s", user_id, appid_pushkey)
-            byuser[appid_pushkey].on_stop()
-            with self._pushers_lock:
-                del byuser[appid_pushkey]
+            pusher = byuser.pop(appid_pushkey)
+            pusher.on_stop()
+
+            synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec()
 
         yield self.store.delete_pusher_by_app_id_pushkey_user_id(
             app_id, pushkey, user_id
diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py
new file mode 100644
index 0000000000..2bdc6edbb1
--- /dev/null
+++ b/tests/replication/test_pusher_shard.py
@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from mock import Mock
+
+from twisted.internet import defer
+
+from synapse.rest import admin
+from synapse.rest.client.v1 import login, room
+
+from tests.replication._base import BaseMultiWorkerStreamTestCase
+
+logger = logging.getLogger(__name__)
+
+
+class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
+    """Checks pusher sharding works
+    """
+
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor, clock, hs):
+        # Register a user who sends a message that we'll get notified about
+        self.other_user_id = self.register_user("otheruser", "pass")
+        self.other_access_token = self.login("otheruser", "pass")
+
+    def default_config(self):
+        conf = super().default_config()
+        conf["start_pushers"] = False
+        return conf
+
+    def _create_pusher_and_send_msg(self, localpart):
+        # Create a user that will get push notifications
+        user_id = self.register_user(localpart, "pass")
+        access_token = self.login(localpart, "pass")
+
+        # Register a pusher
+        user_dict = self.get_success(
+            self.hs.get_datastore().get_user_by_access_token(access_token)
+        )
+        token_id = user_dict["token_id"]
+
+        self.get_success(
+            self.hs.get_pusherpool().add_pusher(
+                user_id=user_id,
+                access_token=token_id,
+                kind="http",
+                app_id="m.http",
+                app_display_name="HTTP Push Notifications",
+                device_display_name="pushy push",
+                pushkey="a@example.com",
+                lang=None,
+                data={"url": "https://push.example.com/push"},
+            )
+        )
+
+        self.pump()
+
+        # Create a room
+        room = self.helper.create_room_as(user_id, tok=access_token)
+
+        # The other user joins
+        self.helper.join(
+            room=room, user=self.other_user_id, tok=self.other_access_token
+        )
+
+        # The other user sends some messages
+        response = self.helper.send(room, body="Hi!", tok=self.other_access_token)
+        event_id = response["event_id"]
+
+        return event_id
+
+    def test_send_push_single_worker(self):
+        """Test that registration works when using a pusher worker.
+        """
+        http_client_mock = Mock(spec_set=["post_json_get_json"])
+        http_client_mock.post_json_get_json.side_effect = lambda *_, **__: defer.succeed(
+            {}
+        )
+
+        self.make_worker_hs(
+            "synapse.app.pusher",
+            {"start_pushers": True},
+            proxied_http_client=http_client_mock,
+        )
+
+        event_id = self._create_pusher_and_send_msg("user")
+
+        # Advance time a bit, so the pusher will register something has happened
+        self.pump()
+
+        http_client_mock.post_json_get_json.assert_called_once()
+        self.assertEqual(
+            http_client_mock.post_json_get_json.call_args[0][0],
+            "https://push.example.com/push",
+        )
+        self.assertEqual(
+            event_id,
+            http_client_mock.post_json_get_json.call_args[0][1]["notification"][
+                "event_id"
+            ],
+        )
+
+    def test_send_push_multiple_workers(self):
+        """Test that registration works when using sharded pusher workers.
+        """
+        http_client_mock1 = Mock(spec_set=["post_json_get_json"])
+        http_client_mock1.post_json_get_json.side_effect = lambda *_, **__: defer.succeed(
+            {}
+        )
+
+        self.make_worker_hs(
+            "synapse.app.pusher",
+            {
+                "start_pushers": True,
+                "worker_name": "pusher1",
+                "pusher_instances": ["pusher1", "pusher2"],
+            },
+            proxied_http_client=http_client_mock1,
+        )
+
+        http_client_mock2 = Mock(spec_set=["post_json_get_json"])
+        http_client_mock2.post_json_get_json.side_effect = lambda *_, **__: defer.succeed(
+            {}
+        )
+
+        self.make_worker_hs(
+            "synapse.app.pusher",
+            {
+                "start_pushers": True,
+                "worker_name": "pusher2",
+                "pusher_instances": ["pusher1", "pusher2"],
+            },
+            proxied_http_client=http_client_mock2,
+        )
+
+        # We choose a user name that we know should go to pusher1.
+        event_id = self._create_pusher_and_send_msg("user2")
+
+        # Advance time a bit, so the pusher will register something has happened
+        self.pump()
+
+        http_client_mock1.post_json_get_json.assert_called_once()
+        http_client_mock2.post_json_get_json.assert_not_called()
+        self.assertEqual(
+            http_client_mock1.post_json_get_json.call_args[0][0],
+            "https://push.example.com/push",
+        )
+        self.assertEqual(
+            event_id,
+            http_client_mock1.post_json_get_json.call_args[0][1]["notification"][
+                "event_id"
+            ],
+        )
+
+        http_client_mock1.post_json_get_json.reset_mock()
+        http_client_mock2.post_json_get_json.reset_mock()
+
+        # Now we choose a user name that we know should go to pusher2.
+        event_id = self._create_pusher_and_send_msg("user4")
+
+        # Advance time a bit, so the pusher will register something has happened
+        self.pump()
+
+        http_client_mock1.post_json_get_json.assert_not_called()
+        http_client_mock2.post_json_get_json.assert_called_once()
+        self.assertEqual(
+            http_client_mock2.post_json_get_json.call_args[0][0],
+            "https://push.example.com/push",
+        )
+        self.assertEqual(
+            event_id,
+            http_client_mock2.post_json_get_json.call_args[0][1]["notification"][
+                "event_id"
+            ],
+        )
-- 
cgit 1.5.1


From 2c1b9d676322fad8cb57c92f97f81393bcfcbe56 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Jul 2020 23:22:13 +0100
Subject: Update worker docs with recent enhancements  (#7969)

---
 changelog.d/7969.doc                               |   1 +
 docs/sample_config.yaml                            |  54 +++
 docs/synctl_workers.md                             |  32 ++
 docs/workers.md                                    | 459 +++++++++++----------
 synapse/app/generic_worker.py                      |   6 +-
 synapse/config/federation.py                       |  12 +-
 synapse/config/homeserver.py                       |   2 +-
 synapse/config/logger.py                           |   2 +-
 synapse/config/redis.py                            |  23 +-
 synapse/config/workers.py                          |  49 ++-
 synapse/federation/send_queue.py                   |   2 +-
 synapse/federation/sender/__init__.py              |   2 +-
 synapse/federation/sender/per_destination_queue.py |   2 +-
 synapse/storage/data_stores/main/stream.py         |   2 +-
 14 files changed, 413 insertions(+), 235 deletions(-)
 create mode 100644 changelog.d/7969.doc
 create mode 100644 docs/synctl_workers.md

(limited to 'synapse/config/federation.py')

diff --git a/changelog.d/7969.doc b/changelog.d/7969.doc
new file mode 100644
index 0000000000..68d2ed5fad
--- /dev/null
+++ b/changelog.d/7969.doc
@@ -0,0 +1 @@
+Update worker docs with latest enhancements.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 3227294e0b..b21e36bb6d 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -2398,3 +2398,57 @@ opentracing:
     #
     #  logging:
     #    false
+
+
+## Workers ##
+
+# Disables sending of outbound federation transactions on the main process.
+# Uncomment if using a federation sender worker.
+#
+#send_federation: false
+
+# It is possible to run multiple federation sender workers, in which case the
+# work is balanced across them.
+#
+# This configuration must be shared between all federation sender workers, and if
+# changed all federation sender workers must be stopped at the same time and then
+# started, to ensure that all instances are running with the same config (otherwise
+# events may be dropped).
+#
+#federation_sender_instances:
+#  - federation_sender1
+
+# When using workers this should be a map from `worker_name` to the
+# HTTP replication listener of the worker, if configured.
+#
+#instance_map:
+#  worker1:
+#    host: localhost
+#    port: 8034
+
+# Experimental: When using workers you can define which workers should
+# handle event persistence and typing notifications. Any worker
+# specified here must also be in the `instance_map`.
+#
+#stream_writers:
+#  events: worker1
+#  typing: worker1
+
+
+# Configuration for Redis when using workers. This *must* be enabled when
+# using workers (unless using old style direct TCP configuration).
+#
+redis:
+  # Uncomment the below to enable Redis support.
+  #
+  #enabled: true
+
+  # Optional host and port to use to connect to redis. Defaults to
+  # localhost and 6379
+  #
+  #host: localhost
+  #port: 6379
+
+  # Optional password if configured on the Redis instance
+  #
+  #password: <secret_password>
diff --git a/docs/synctl_workers.md b/docs/synctl_workers.md
new file mode 100644
index 0000000000..8da4a31852
--- /dev/null
+++ b/docs/synctl_workers.md
@@ -0,0 +1,32 @@
+### Using synctl with workers
+
+If you want to use `synctl` to manage your synapse processes, you will need to
+create an an additional configuration file for the main synapse process. That
+configuration should look like this:
+
+```yaml
+worker_app: synapse.app.homeserver
+```
+
+Additionally, each worker app must be configured with the name of a "pid file",
+to which it will write its process ID when it starts. For example, for a
+synchrotron, you might write:
+
+```yaml
+worker_pid_file: /home/matrix/synapse/worker1.pid
+```
+
+Finally, to actually run your worker-based synapse, you must pass synctl the `-a`
+commandline option to tell it to operate on all the worker configurations found
+in the given directory, e.g.:
+
+    synctl -a $CONFIG/workers start
+
+Currently one should always restart all workers when restarting or upgrading
+synapse, unless you explicitly know it's safe not to.  For instance, restarting
+synapse without restarting all the synchrotrons may result in broken typing
+notifications.
+
+To manipulate a specific worker, you pass the -w option to synctl:
+
+    synctl -w $CONFIG/workers/worker1.yaml restart
diff --git a/docs/workers.md b/docs/workers.md
index f4cbbc0400..38bd758e57 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -16,69 +16,106 @@ workers only work with PostgreSQL-based Synapse deployments. SQLite should only
 be used for demo purposes and any admin considering workers should already be
 running PostgreSQL.
 
-## Master/worker communication
+## Main process/worker communication
 
-The workers communicate with the master process via a Synapse-specific protocol
-called 'replication' (analogous to MySQL- or Postgres-style database
-replication) which feeds a stream of relevant data from the master to the
-workers so they can be kept in sync with the master process and database state.
+The processes communicate with each other via a Synapse-specific protocol called
+'replication' (analogous to MySQL- or Postgres-style database replication) which
+feeds streams of newly written data between processes so they can be kept in
+sync with the database state.
 
-Additionally, workers may make HTTP requests to the master, to send information
-in the other direction. Typically this is used for operations which need to
-wait for a reply - such as sending an event.
+Additionally, processes may make HTTP requests to each other. Typically this is
+used for operations which need to wait for a reply - such as sending an event.
 
-## Configuration
+As of Synapse v1.13.0, it is possible to configure Synapse to send replication
+via a [Redis pub/sub channel](https://redis.io/topics/pubsub), and is now the
+recommended way of configuring replication. This is an alternative to the old
+direct TCP connections to the main process: rather than all the workers
+connecting to the main process, all the workers and the main process connect to
+Redis, which relays replication commands between processes. This can give a
+significant cpu saving on the main process and will be a prerequisite for
+upcoming performance improvements.
+
+(See the [Architectural diagram](#architectural-diagram) section at the end for
+a visualisation of what this looks like)
+
+
+## Setting up workers
+
+A Redis server is required to manage the communication between the processes.
+(The older direct TCP connections are now deprecated.) The Redis server
+should be installed following the normal procedure for your distribution (e.g.
+`apt install redis-server` on Debian). It is safe to use an existing Redis
+deployment if you have one.
+
+Once installed, check that Redis is running and accessible from the host running
+Synapse, for example by executing `echo PING | nc -q1 localhost 6379` and seeing
+a response of `+PONG`.
+
+The appropriate dependencies must also be installed for Synapse. If using a
+virtualenv, these can be installed with:
+
+```sh
+pip install matrix-synapse[redis]
+```
+
+Note that these dependencies are included when synapse is installed with `pip
+install matrix-synapse[all]`. They are also included in the debian packages from
+`matrix.org` and in the docker images at
+https://hub.docker.com/r/matrixdotorg/synapse/.
 
 To make effective use of the workers, you will need to configure an HTTP
 reverse-proxy such as nginx or haproxy, which will direct incoming requests to
-the correct worker, or to the main synapse instance. Note that this includes
-requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md)
+the correct worker, or to the main synapse instance. See [reverse_proxy.md](reverse_proxy.md)
 for information on setting up a reverse proxy.
 
-To enable workers, you need to add *two* replication listeners to the
-main Synapse configuration file (`homeserver.yaml`). For example:
+To enable workers you should create a configuration file for each worker
+process. Each worker configuration file inherits the configuration of the shared
+homeserver configuration file.  You can then override configuration specific to
+that worker, e.g. the HTTP listener that it provides (if any); logging
+configuration; etc.  You should minimise the number of overrides though to
+maintain a usable config.
+
+Next you need to add both a HTTP replication listener and redis config to the
+shared Synapse configuration file (`homeserver.yaml`). For example:
 
 ```yaml
+# extend the existing `listeners` section. This defines the ports that the
+# main process will listen on.
 listeners:
-  # The TCP replication port
-  - port: 9092
-    bind_address: '127.0.0.1'
-    type: replication
-
   # The HTTP replication port
   - port: 9093
     bind_address: '127.0.0.1'
     type: http
     resources:
      - names: [replication]
+
+redis:
+    enabled: true
 ```
 
-Under **no circumstances** should these replication API listeners be exposed to
-the public internet; they have no authentication and are unencrypted.
+See the sample config for the full documentation of each option.
 
-You should then create a set of configs for the various worker processes.  Each
-worker configuration file inherits the configuration of the main homeserver
-configuration file.  You can then override configuration specific to that
-worker, e.g. the HTTP listener that it provides (if any); logging
-configuration; etc.  You should minimise the number of overrides though to
-maintain a usable config.
+Under **no circumstances** should the replication listener be exposed to the
+public internet; it has no authentication and is unencrypted.
 
 In the config file for each worker, you must specify the type of worker
-application (`worker_app`). The currently available worker applications are
-listed below. You must also specify the replication endpoints that it should
-talk to on the main synapse process.  `worker_replication_host` should specify
-the host of the main synapse, `worker_replication_port` should point to the TCP
-replication listener port and `worker_replication_http_port` should point to
-the HTTP replication port.
+application (`worker_app`), and you should specify a unqiue name for the worker
+(`worker_name`). The currently available worker applications are listed below.
+You must also specify the HTTP replication endpoint that it should talk to on
+the main synapse process.  `worker_replication_host` should specify the host of
+the main synapse and `worker_replication_http_port` should point to the HTTP
+replication port. If the worker will handle HTTP requests then the
+`worker_listeners` option should be set with a `http` listener, in the same way
+as the `listeners` option in the shared config.
 
 For example:
 
 ```yaml
-worker_app: synapse.app.synchrotron
+worker_app: synapse.app.generic_worker
+worker_name: worker1
 
-# The replication listener on the synapse to talk to.
+# The replication listener on the main synapse process.
 worker_replication_host: 127.0.0.1
-worker_replication_port: 9092
 worker_replication_http_port: 9093
 
 worker_listeners:
@@ -87,13 +124,14 @@ worker_listeners:
    resources:
      - names:
        - client
+       - federation
 
-worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml
+worker_log_config: /home/matrix/synapse/config/worker1_log_config.yaml
 ```
 
-...is a full configuration for a synchrotron worker instance, which will expose a
-plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided
-by the main synapse.
+...is a full configuration for a generic worker instance, which will expose a
+plain HTTP endpoint on port 8083 separately serving various endpoints, e.g.
+`/sync`, which are listed below.
 
 Obviously you should configure your reverse-proxy to route the relevant
 endpoints to the worker (`localhost:8083` in the above example).
@@ -102,127 +140,24 @@ Finally, you need to start your worker processes. This can be done with either
 `synctl` or your distribution's preferred service manager such as `systemd`. We
 recommend the use of `systemd` where available: for information on setting up
 `systemd` to start synapse workers, see
-[systemd-with-workers](systemd-with-workers). To use `synctl`, see below.
+[systemd-with-workers](systemd-with-workers). To use `synctl`, see
+[synctl_workers.md](synctl_workers.md).
 
-### **Experimental** support for replication over redis
-
-As of Synapse v1.13.0, it is possible to configure Synapse to send replication
-via a [Redis pub/sub channel](https://redis.io/topics/pubsub). This is an
-alternative to direct TCP connections to the master: rather than all the
-workers connecting to the master, all the workers and the master connect to
-Redis, which relays replication commands between processes. This can give a
-significant cpu saving on the master and will be a prerequisite for upcoming
-performance improvements.
-
-Note that this support is currently experimental; you may experience lost
-messages and similar problems! It is strongly recommended that admins setting
-up workers for the first time use direct TCP replication as above.
-
-To configure Synapse to use Redis:
-
-1. Install Redis following the normal procedure for your distribution - for
-   example, on Debian, `apt install redis-server`. (It is safe to use an
-   existing Redis deployment if you have one: we use a pub/sub stream named
-   according to the `server_name` of your synapse server.)
-2. Check Redis is running and accessible: you should be able to `echo PING | nc -q1
-   localhost 6379` and get a response of `+PONG`.
-3. Install the python prerequisites. If you installed synapse into a
-   virtualenv, this can be done with:
-   ```sh
-   pip install matrix-synapse[redis]
-   ```
-   The debian packages from matrix.org already include the required
-   dependencies.
-4. Add config to the shared configuration (`homeserver.yaml`):
-    ```yaml
-    redis:
-      enabled: true
-    ```
-    Optional parameters which can go alongside `enabled` are `host`, `port`,
-    `password`. Normally none of these are required.
-5. Restart master and all workers.
-
-Once redis replication is in use, `worker_replication_port` is redundant and
-can be removed from the worker configuration files. Similarly, the
-configuration for the `listener` for the TCP replication port can be removed
-from the main configuration file. Note that the HTTP replication port is
-still required.
-
-### Using synctl
-
-If you want to use `synctl` to manage your synapse processes, you will need to
-create an an additional configuration file for the master synapse process. That
-configuration should look like this:
-
-```yaml
-worker_app: synapse.app.homeserver
-```
-
-Additionally, each worker app must be configured with the name of a "pid file",
-to which it will write its process ID when it starts. For example, for a
-synchrotron, you might write:
-
-```yaml
-worker_pid_file: /home/matrix/synapse/synchrotron.pid
-```
-
-Finally, to actually run your worker-based synapse, you must pass synctl the `-a`
-commandline option to tell it to operate on all the worker configurations found
-in the given directory, e.g.:
-
-    synctl -a $CONFIG/workers start
-
-Currently one should always restart all workers when restarting or upgrading
-synapse, unless you explicitly know it's safe not to.  For instance, restarting
-synapse without restarting all the synchrotrons may result in broken typing
-notifications.
-
-To manipulate a specific worker, you pass the -w option to synctl:
-
-    synctl -w $CONFIG/workers/synchrotron.yaml restart
 
 ## Available worker applications
 
-### `synapse.app.pusher`
-
-Handles sending push notifications to sygnal and email. Doesn't handle any
-REST endpoints itself, but you should set `start_pushers: False` in the
-shared configuration file to stop the main synapse sending these notifications.
-
-Note this worker cannot be load-balanced: only one instance should be active.
-
-### `synapse.app.synchrotron`
+### `synapse.app.generic_worker`
 
-The synchrotron handles `sync` requests from clients. In particular, it can
-handle REST endpoints matching the following regular expressions:
+This worker can handle API requests matching the following regular
+expressions:
 
+    # Sync requests
     ^/_matrix/client/(v2_alpha|r0)/sync$
     ^/_matrix/client/(api/v1|v2_alpha|r0)/events$
     ^/_matrix/client/(api/v1|r0)/initialSync$
     ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$
 
-The above endpoints should all be routed to the synchrotron worker by the
-reverse-proxy configuration.
-
-It is possible to run multiple instances of the synchrotron to scale
-horizontally. In this case the reverse-proxy should be configured to
-load-balance across the instances, though it will be more efficient if all
-requests from a particular user are routed to a single instance. Extracting
-a userid from the access token is currently left as an exercise for the reader.
-
-### `synapse.app.appservice`
-
-Handles sending output traffic to Application Services. Doesn't handle any
-REST endpoints itself, but you should set `notify_appservices: False` in the
-shared configuration file to stop the main synapse sending these notifications.
-
-Note this worker cannot be load-balanced: only one instance should be active.
-
-### `synapse.app.federation_reader`
-
-Handles a subset of federation endpoints. In particular, it can handle REST
-endpoints matching the following regular expressions:
-
+    # Federation requests
     ^/_matrix/federation/v1/event/
     ^/_matrix/federation/v1/state/
     ^/_matrix/federation/v1/state_ids/
@@ -242,40 +177,145 @@ endpoints matching the following regular expressions:
     ^/_matrix/federation/v1/event_auth/
     ^/_matrix/federation/v1/exchange_third_party_invite/
     ^/_matrix/federation/v1/user/devices/
-    ^/_matrix/federation/v1/send/
     ^/_matrix/federation/v1/get_groups_publicised$
     ^/_matrix/key/v2/query
 
+    # Inbound federation transaction request
+    ^/_matrix/federation/v1/send/
+
+    # Client API requests
+    ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$
+    ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$
+    ^/_matrix/client/(api/v1|r0|unstable)/keys/query$
+    ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$
+    ^/_matrix/client/versions$
+    ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
+    ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$
+    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$
+    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/
+
+    # Registration/login requests
+    ^/_matrix/client/(api/v1|r0|unstable)/login$
+    ^/_matrix/client/(r0|unstable)/register$
+    ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$
+
+    # Event sending requests
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
+    ^/_matrix/client/(api/v1|r0|unstable)/join/
+    ^/_matrix/client/(api/v1|r0|unstable)/profile/
+
+
 Additionally, the following REST endpoints can be handled for GET requests:
 
     ^/_matrix/federation/v1/groups/
 
-The above endpoints should all be routed to the federation_reader worker by the
-reverse-proxy configuration.
+Pagination requests can also be handled, but all requests for a given
+room must be routed to the same instance. Additionally, care must be taken to
+ensure that the purge history admin API is not used while pagination requests
+for the room are in flight:
+
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$
+
+Note that a HTTP listener with `client` and `federation` resources must be
+configured in the `worker_listeners` option in the worker config.
+
+
+#### Load balancing
+
+It is possible to run multiple instances of this worker app, with incoming requests
+being load-balanced between them by the reverse-proxy. However, different endpoints
+have different characteristics and so admins
+may wish to run multiple groups of workers handling different endpoints so that
+load balancing can be done in different ways.
+
+For `/sync` and `/initialSync` requests it will be more efficient if all
+requests from a particular user are routed to a single instance. Extracting a
+user ID from the access token or `Authorization` header is currently left as an
+exercise for the reader. Admins may additionally wish to separate out `/sync`
+requests that have a `since` query parameter from those that don't (and
+`/initialSync`), as requests that don't are known as "initial sync" that happens
+when a user logs in on a new device and can be *very* resource intensive, so
+isolating these requests will stop them from interfering with other users ongoing
+syncs.
+
+Federation and client requests can be balanced via simple round robin.
 
-The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single
-instance.
+The inbound federation transaction request `^/_matrix/federation/v1/send/`
+should be balanced by source IP so that transactions from the same remote server
+go to the same process.
 
-Note that `federation` must be added to the listener resources in the worker config:
+Registration/login requests can be handled separately purely to help ensure that
+unexpected load doesn't affect new logins and sign ups.
+
+Finally, event sending requests can be balanced by the room ID in the URI (or
+the full URI, or even just round robin), the room ID is the path component after
+`/rooms/`. If there is a large bridge connected that is sending or may send lots
+of events, then a dedicated set of workers can be provisioned to limit the
+effects of bursts of events from that bridge on events sent by normal users.
+
+#### Stream writers
+
+Additionally, there is *experimental* support for moving writing of specific
+streams (such as events) off of the main process to a particular worker. (This
+is only supported with Redis-based replication.)
+
+Currently support streams are `events` and `typing`.
+
+To enable this, the worker must have a HTTP replication listener configured,
+have a `worker_name` and be listed in the `instance_map` config. For example to
+move event persistence off to a dedicated worker, the shared configuration would
+include:
 
 ```yaml
-worker_app: synapse.app.federation_reader
-...
-worker_listeners:
- - type: http
-   port: <port>
-   resources:
-     - names:
-       - federation
+instance_map:
+    event_persister1:
+        host: localhost
+        port: 8034
+
+streams_writers:
+    events: event_persister1
 ```
 
+
+### `synapse.app.pusher`
+
+Handles sending push notifications to sygnal and email. Doesn't handle any
+REST endpoints itself, but you should set `start_pushers: False` in the
+shared configuration file to stop the main synapse sending push notifications.
+
+Note this worker cannot be load-balanced: only one instance should be active.
+
+### `synapse.app.appservice`
+
+Handles sending output traffic to Application Services. Doesn't handle any
+REST endpoints itself, but you should set `notify_appservices: False` in the
+shared configuration file to stop the main synapse sending appservice notifications.
+
+Note this worker cannot be load-balanced: only one instance should be active.
+
+
 ### `synapse.app.federation_sender`
 
 Handles sending federation traffic to other servers. Doesn't handle any
 REST endpoints itself, but you should set `send_federation: False` in the
 shared configuration file to stop the main synapse sending this traffic.
 
-Note this worker cannot be load-balanced: only one instance should be active.
+If running multiple federation senders then you must list each
+instance in the `federation_sender_instances` option by their `worker_name`.
+All instances must be stopped and started when adding or removing instances.
+For example:
+
+```yaml
+federation_sender_instances:
+    - federation_sender1
+    - federation_sender2
+```
 
 ### `synapse.app.media_repository`
 
@@ -314,46 +354,6 @@ and you must configure a single instance to run the background tasks, e.g.:
     media_instance_running_background_jobs: "media-repository-1"
 ```
 
-### `synapse.app.client_reader`
-
-Handles client API endpoints. It can handle REST endpoints matching the
-following regular expressions:
-
-    ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$
-    ^/_matrix/client/(api/v1|r0|unstable)/login$
-    ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$
-    ^/_matrix/client/(api/v1|r0|unstable)/keys/query$
-    ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$
-    ^/_matrix/client/versions$
-    ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
-    ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$
-    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$
-    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/
-
-Additionally, the following REST endpoints can be handled for GET requests:
-
-    ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$
-    ^/_matrix/client/(api/v1|r0|unstable)/groups/.*$
-    ^/_matrix/client/(api/v1|r0|unstable)/user/[^/]*/account_data/
-    ^/_matrix/client/(api/v1|r0|unstable)/user/[^/]*/rooms/[^/]*/account_data/
-
-Additionally, the following REST endpoints can be handled, but all requests must
-be routed to the same instance:
-
-    ^/_matrix/client/(r0|unstable)/register$
-    ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$
-
-Pagination requests can also be handled, but all requests with the same path
-room must be routed to the same instance. Additionally, care must be taken to
-ensure that the purge history admin API is not used while pagination requests
-for the room are in flight:
-
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$
-
 ### `synapse.app.user_dir`
 
 Handles searches in the user directory. It can handle REST endpoints matching
@@ -388,15 +388,48 @@ file. For example:
 
     worker_main_http_uri: http://127.0.0.1:8008
 
-### `synapse.app.event_creator`
+### Historical apps
 
-Handles some event creation. It can handle REST endpoints matching:
+*Note:* Historically there used to be more apps, however they have been
+amalgamated into a single `synapse.app.generic_worker` app. The remaining apps
+are ones that do specific processing unrelated to requests, e.g. the `pusher`
+that handles sending out push notifications for new events. The intention is for
+all these to be folded into the `generic_worker` app and to use config to define
+which processes handle the various proccessing such as push notifications.
 
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
-    ^/_matrix/client/(api/v1|r0|unstable)/join/
-    ^/_matrix/client/(api/v1|r0|unstable)/profile/
 
-It will create events locally and then send them on to the main synapse
-instance to be persisted and handled.
+## Architectural diagram
+
+The following shows an example setup using Redis and a reverse proxy:
+
+```
+                     Clients & Federation
+                              |
+                              v
+                        +-----------+
+                        |           |
+                        |  Reverse  |
+                        |  Proxy    |
+                        |           |
+                        +-----------+
+                            | | |
+                            | | | HTTP requests
+        +-------------------+ | +-----------+
+        |                 +---+             |
+        |                 |                 |
+        v                 v                 v
++--------------+  +--------------+  +--------------+  +--------------+
+|   Main       |  |   Generic    |  |   Generic    |  |  Event       |
+|   Process    |  |   Worker 1   |  |   Worker 2   |  |  Persister   |
++--------------+  +--------------+  +--------------+  +--------------+
+      ^    ^          |   ^   |         |   ^   |          ^    ^
+      |    |          |   |   |         |   |   |          |    |
+      |    |          |   |   |  HTTP   |   |   |          |    |
+      |    +----------+<--|---|---------+   |   |          |    |
+      |                   |   +-------------|-->+----------+    |
+      |                   |                 |                   |
+      |                   |                 |                   |
+      v                   v                 v                   v
+====================================================================
+                                                         Redis pub/sub channel
+```
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index ec0dbddb8c..5841454c9a 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -940,7 +940,7 @@ def start(config_options):
         config.server.update_user_directory = False
 
     if config.worker_app == "synapse.app.federation_sender":
-        if config.federation.send_federation:
+        if config.worker.send_federation:
             sys.stderr.write(
                 "\nThe send_federation must be disabled in the main synapse process"
                 "\nbefore they can be run in a separate worker."
@@ -950,10 +950,10 @@ def start(config_options):
             sys.exit(1)
 
         # Force the pushers to start since they will be disabled in the main config
-        config.federation.send_federation = True
+        config.worker.send_federation = True
     else:
         # For other worker types we force this to off.
-        config.federation.send_federation = False
+        config.worker.send_federation = False
 
     synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 82ff9664de..2c77d8f85b 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -17,23 +17,13 @@ from typing import Optional
 
 from netaddr import IPSet
 
-from ._base import Config, ConfigError, ShardedWorkerHandlingConfig
+from ._base import Config, ConfigError
 
 
 class FederationConfig(Config):
     section = "federation"
 
     def read_config(self, config, **kwargs):
-        # Whether to send federation traffic out in this process. This only
-        # applies to some federation traffic, and so shouldn't be used to
-        # "disable" federation
-        self.send_federation = config.get("send_federation", True)
-
-        federation_sender_instances = config.get("federation_sender_instances") or []
-        self.federation_shard_config = ShardedWorkerHandlingConfig(
-            federation_sender_instances
-        )
-
         # FIXME: federation_domain_whitelist needs sytests
         self.federation_domain_whitelist = None  # type: Optional[dict]
         federation_domain_whitelist = config.get("federation_domain_whitelist", None)
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 8e93d31394..556e291495 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -78,7 +78,6 @@ class HomeServerConfig(RootConfig):
         JWTConfig,
         PasswordConfig,
         EmailConfig,
-        WorkerConfig,
         PasswordAuthProviderConfig,
         PushConfig,
         SpamCheckerConfig,
@@ -91,6 +90,7 @@ class HomeServerConfig(RootConfig):
         RoomDirectoryConfig,
         ThirdPartyRulesConfig,
         TracerConfig,
+        WorkerConfig,
         RedisConfig,
         FederationConfig,
     ]
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 49f6c32beb..dd775a97e8 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -214,7 +214,7 @@ def setup_logging(
     Set up the logging subsystem.
 
     Args:
-        config (LoggingConfig | synapse.config.workers.WorkerConfig):
+        config (LoggingConfig | synapse.config.worker.WorkerConfig):
             configuration data
 
         use_worker_options (bool): True to use the 'worker_log_config' option
diff --git a/synapse/config/redis.py b/synapse/config/redis.py
index d5d3ca1c9e..1373302335 100644
--- a/synapse/config/redis.py
+++ b/synapse/config/redis.py
@@ -21,7 +21,7 @@ class RedisConfig(Config):
     section = "redis"
 
     def read_config(self, config, **kwargs):
-        redis_config = config.get("redis", {})
+        redis_config = config.get("redis") or {}
         self.redis_enabled = redis_config.get("enabled", False)
 
         if not self.redis_enabled:
@@ -32,3 +32,24 @@ class RedisConfig(Config):
         self.redis_host = redis_config.get("host", "localhost")
         self.redis_port = redis_config.get("port", 6379)
         self.redis_password = redis_config.get("password")
+
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        # Configuration for Redis when using workers. This *must* be enabled when
+        # using workers (unless using old style direct TCP configuration).
+        #
+        redis:
+          # Uncomment the below to enable Redis support.
+          #
+          #enabled: true
+
+          # Optional host and port to use to connect to redis. Defaults to
+          # localhost and 6379
+          #
+          #host: localhost
+          #port: 6379
+
+          # Optional password if configured on the Redis instance
+          #
+          #password: <secret_password>
+        """
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 2574cd3aa1..c784a71508 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -15,7 +15,7 @@
 
 import attr
 
-from ._base import Config, ConfigError
+from ._base import Config, ConfigError, ShardedWorkerHandlingConfig
 from .server import ListenerConfig, parse_listener_def
 
 
@@ -85,6 +85,16 @@ class WorkerConfig(Config):
                 )
             )
 
+        # Whether to send federation traffic out in this process. This only
+        # applies to some federation traffic, and so shouldn't be used to
+        # "disable" federation
+        self.send_federation = config.get("send_federation", True)
+
+        federation_sender_instances = config.get("federation_sender_instances") or []
+        self.federation_shard_config = ShardedWorkerHandlingConfig(
+            federation_sender_instances
+        )
+
         # A map from instance name to host/port of their HTTP replication endpoint.
         instance_map = config.get("instance_map") or {}
         self.instance_map = {
@@ -105,6 +115,43 @@ class WorkerConfig(Config):
                     % (instance, stream)
                 )
 
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        ## Workers ##
+
+        # Disables sending of outbound federation transactions on the main process.
+        # Uncomment if using a federation sender worker.
+        #
+        #send_federation: false
+
+        # It is possible to run multiple federation sender workers, in which case the
+        # work is balanced across them.
+        #
+        # This configuration must be shared between all federation sender workers, and if
+        # changed all federation sender workers must be stopped at the same time and then
+        # started, to ensure that all instances are running with the same config (otherwise
+        # events may be dropped).
+        #
+        #federation_sender_instances:
+        #  - federation_sender1
+
+        # When using workers this should be a map from `worker_name` to the
+        # HTTP replication listener of the worker, if configured.
+        #
+        #instance_map:
+        #  worker1:
+        #    host: localhost
+        #    port: 8034
+
+        # Experimental: When using workers you can define which workers should
+        # handle event persistence and typing notifications. Any worker
+        # specified here must also be in the `instance_map`.
+        #
+        #stream_writers:
+        #  events: worker1
+        #  typing: worker1
+        """
+
     def read_arguments(self, args):
         # We support a bunch of command line arguments that override options in
         # the config. A lot of these options have a worker_* prefix when running
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 4fc9ff92e5..2b0ab2dcbf 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -57,7 +57,7 @@ class FederationRemoteSendQueue(object):
 
         # We may have multiple federation sender instances, so we need to track
         # their positions separately.
-        self._sender_instances = hs.config.federation.federation_shard_config.instances
+        self._sender_instances = hs.config.worker.federation_shard_config.instances
         self._sender_positions = {}
 
         # Pending presence map user_id -> UserPresenceState
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index ba4ddd2370..6ae6522f87 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -70,7 +70,7 @@ class FederationSender(object):
         self._transaction_manager = TransactionManager(hs)
 
         self._instance_name = hs.get_instance_name()
-        self._federation_shard_config = hs.config.federation.federation_shard_config
+        self._federation_shard_config = hs.config.worker.federation_shard_config
 
         # map from destination to PerDestinationQueue
         self._per_destination_queues = {}  # type: Dict[str, PerDestinationQueue]
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3436741783..dd150f89a6 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -75,7 +75,7 @@ class PerDestinationQueue(object):
         self._store = hs.get_datastore()
         self._transaction_manager = transaction_manager
         self._instance_name = hs.get_instance_name()
-        self._federation_shard_config = hs.config.federation.federation_shard_config
+        self._federation_shard_config = hs.config.worker.federation_shard_config
 
         self._should_send_on_this_instance = True
         if not self._federation_shard_config.should_handle(
diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py
index 5e32c7aa1e..10d39b3699 100644
--- a/synapse/storage/data_stores/main/stream.py
+++ b/synapse/storage/data_stores/main/stream.py
@@ -255,7 +255,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         self._instance_name = hs.get_instance_name()
         self._send_federation = hs.should_send_federation()
-        self._federation_shard_config = hs.config.federation.federation_shard_config
+        self._federation_shard_config = hs.config.worker.federation_shard_config
 
         # If we're a process that sends federation we may need to reset the
         # `federation_stream_position` table to match the current sharding
-- 
cgit 1.5.1