From 84d099ae1192af0f38d26f9a32e38bd4c0ad304e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 27 Jul 2020 14:10:53 +0100
Subject: Fix typing replication not being handled on master (#7959)

Handling of incoming typing stream updates from replication was not
hooked up on master, effecting set ups where typing was handled on a
different worker.

This is really only a problem if the master process is also handling
sync requests, which is unlikely for those that are at the stage of
moving typing off.

The other observable effect is that if a worker restarts or a
replication connect drops then the typing worker will issue a
`POSITION typing`, triggering master process to try and stream *all*
typing updates from position 0.

Fixes #7907
---
 synapse/app/generic_worker.py | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'synapse/app/generic_worker.py')

diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index c1b76d827b..ec0dbddb8c 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -87,7 +87,6 @@ from synapse.replication.tcp.streams import (
     ReceiptsStream,
     TagAccountDataStream,
     ToDeviceStream,
-    TypingStream,
 )
 from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.client.v1 import events
@@ -644,7 +643,6 @@ class GenericWorkerReplicationHandler(ReplicationDataHandler):
         super(GenericWorkerReplicationHandler, self).__init__(hs)
 
         self.store = hs.get_datastore()
-        self.typing_handler = hs.get_typing_handler()
         self.presence_handler = hs.get_presence_handler()  # type: GenericWorkerPresence
         self.notifier = hs.get_notifier()
 
@@ -681,11 +679,6 @@ class GenericWorkerReplicationHandler(ReplicationDataHandler):
                 await self.pusher_pool.on_new_receipts(
                     token, token, {row.room_id for row in rows}
                 )
-            elif stream_name == TypingStream.NAME:
-                self.typing_handler.process_replication_rows(token, rows)
-                self.notifier.on_new_event(
-                    "typing_key", token, rooms=[row.room_id for row in rows]
-                )
             elif stream_name == ToDeviceStream.NAME:
                 entities = [row.entity for row in rows if row.entity.startswith("@")]
                 if entities:
-- 
cgit 1.5.1


From 2c1b9d676322fad8cb57c92f97f81393bcfcbe56 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Jul 2020 23:22:13 +0100
Subject: Update worker docs with recent enhancements  (#7969)

---
 changelog.d/7969.doc                               |   1 +
 docs/sample_config.yaml                            |  54 +++
 docs/synctl_workers.md                             |  32 ++
 docs/workers.md                                    | 459 +++++++++++----------
 synapse/app/generic_worker.py                      |   6 +-
 synapse/config/federation.py                       |  12 +-
 synapse/config/homeserver.py                       |   2 +-
 synapse/config/logger.py                           |   2 +-
 synapse/config/redis.py                            |  23 +-
 synapse/config/workers.py                          |  49 ++-
 synapse/federation/send_queue.py                   |   2 +-
 synapse/federation/sender/__init__.py              |   2 +-
 synapse/federation/sender/per_destination_queue.py |   2 +-
 synapse/storage/data_stores/main/stream.py         |   2 +-
 14 files changed, 413 insertions(+), 235 deletions(-)
 create mode 100644 changelog.d/7969.doc
 create mode 100644 docs/synctl_workers.md

(limited to 'synapse/app/generic_worker.py')

diff --git a/changelog.d/7969.doc b/changelog.d/7969.doc
new file mode 100644
index 0000000000..68d2ed5fad
--- /dev/null
+++ b/changelog.d/7969.doc
@@ -0,0 +1 @@
+Update worker docs with latest enhancements.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 3227294e0b..b21e36bb6d 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -2398,3 +2398,57 @@ opentracing:
     #
     #  logging:
     #    false
+
+
+## Workers ##
+
+# Disables sending of outbound federation transactions on the main process.
+# Uncomment if using a federation sender worker.
+#
+#send_federation: false
+
+# It is possible to run multiple federation sender workers, in which case the
+# work is balanced across them.
+#
+# This configuration must be shared between all federation sender workers, and if
+# changed all federation sender workers must be stopped at the same time and then
+# started, to ensure that all instances are running with the same config (otherwise
+# events may be dropped).
+#
+#federation_sender_instances:
+#  - federation_sender1
+
+# When using workers this should be a map from `worker_name` to the
+# HTTP replication listener of the worker, if configured.
+#
+#instance_map:
+#  worker1:
+#    host: localhost
+#    port: 8034
+
+# Experimental: When using workers you can define which workers should
+# handle event persistence and typing notifications. Any worker
+# specified here must also be in the `instance_map`.
+#
+#stream_writers:
+#  events: worker1
+#  typing: worker1
+
+
+# Configuration for Redis when using workers. This *must* be enabled when
+# using workers (unless using old style direct TCP configuration).
+#
+redis:
+  # Uncomment the below to enable Redis support.
+  #
+  #enabled: true
+
+  # Optional host and port to use to connect to redis. Defaults to
+  # localhost and 6379
+  #
+  #host: localhost
+  #port: 6379
+
+  # Optional password if configured on the Redis instance
+  #
+  #password: <secret_password>
diff --git a/docs/synctl_workers.md b/docs/synctl_workers.md
new file mode 100644
index 0000000000..8da4a31852
--- /dev/null
+++ b/docs/synctl_workers.md
@@ -0,0 +1,32 @@
+### Using synctl with workers
+
+If you want to use `synctl` to manage your synapse processes, you will need to
+create an an additional configuration file for the main synapse process. That
+configuration should look like this:
+
+```yaml
+worker_app: synapse.app.homeserver
+```
+
+Additionally, each worker app must be configured with the name of a "pid file",
+to which it will write its process ID when it starts. For example, for a
+synchrotron, you might write:
+
+```yaml
+worker_pid_file: /home/matrix/synapse/worker1.pid
+```
+
+Finally, to actually run your worker-based synapse, you must pass synctl the `-a`
+commandline option to tell it to operate on all the worker configurations found
+in the given directory, e.g.:
+
+    synctl -a $CONFIG/workers start
+
+Currently one should always restart all workers when restarting or upgrading
+synapse, unless you explicitly know it's safe not to.  For instance, restarting
+synapse without restarting all the synchrotrons may result in broken typing
+notifications.
+
+To manipulate a specific worker, you pass the -w option to synctl:
+
+    synctl -w $CONFIG/workers/worker1.yaml restart
diff --git a/docs/workers.md b/docs/workers.md
index f4cbbc0400..38bd758e57 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -16,69 +16,106 @@ workers only work with PostgreSQL-based Synapse deployments. SQLite should only
 be used for demo purposes and any admin considering workers should already be
 running PostgreSQL.
 
-## Master/worker communication
+## Main process/worker communication
 
-The workers communicate with the master process via a Synapse-specific protocol
-called 'replication' (analogous to MySQL- or Postgres-style database
-replication) which feeds a stream of relevant data from the master to the
-workers so they can be kept in sync with the master process and database state.
+The processes communicate with each other via a Synapse-specific protocol called
+'replication' (analogous to MySQL- or Postgres-style database replication) which
+feeds streams of newly written data between processes so they can be kept in
+sync with the database state.
 
-Additionally, workers may make HTTP requests to the master, to send information
-in the other direction. Typically this is used for operations which need to
-wait for a reply - such as sending an event.
+Additionally, processes may make HTTP requests to each other. Typically this is
+used for operations which need to wait for a reply - such as sending an event.
 
-## Configuration
+As of Synapse v1.13.0, it is possible to configure Synapse to send replication
+via a [Redis pub/sub channel](https://redis.io/topics/pubsub), and is now the
+recommended way of configuring replication. This is an alternative to the old
+direct TCP connections to the main process: rather than all the workers
+connecting to the main process, all the workers and the main process connect to
+Redis, which relays replication commands between processes. This can give a
+significant cpu saving on the main process and will be a prerequisite for
+upcoming performance improvements.
+
+(See the [Architectural diagram](#architectural-diagram) section at the end for
+a visualisation of what this looks like)
+
+
+## Setting up workers
+
+A Redis server is required to manage the communication between the processes.
+(The older direct TCP connections are now deprecated.) The Redis server
+should be installed following the normal procedure for your distribution (e.g.
+`apt install redis-server` on Debian). It is safe to use an existing Redis
+deployment if you have one.
+
+Once installed, check that Redis is running and accessible from the host running
+Synapse, for example by executing `echo PING | nc -q1 localhost 6379` and seeing
+a response of `+PONG`.
+
+The appropriate dependencies must also be installed for Synapse. If using a
+virtualenv, these can be installed with:
+
+```sh
+pip install matrix-synapse[redis]
+```
+
+Note that these dependencies are included when synapse is installed with `pip
+install matrix-synapse[all]`. They are also included in the debian packages from
+`matrix.org` and in the docker images at
+https://hub.docker.com/r/matrixdotorg/synapse/.
 
 To make effective use of the workers, you will need to configure an HTTP
 reverse-proxy such as nginx or haproxy, which will direct incoming requests to
-the correct worker, or to the main synapse instance. Note that this includes
-requests made to the federation port. See [reverse_proxy.md](reverse_proxy.md)
+the correct worker, or to the main synapse instance. See [reverse_proxy.md](reverse_proxy.md)
 for information on setting up a reverse proxy.
 
-To enable workers, you need to add *two* replication listeners to the
-main Synapse configuration file (`homeserver.yaml`). For example:
+To enable workers you should create a configuration file for each worker
+process. Each worker configuration file inherits the configuration of the shared
+homeserver configuration file.  You can then override configuration specific to
+that worker, e.g. the HTTP listener that it provides (if any); logging
+configuration; etc.  You should minimise the number of overrides though to
+maintain a usable config.
+
+Next you need to add both a HTTP replication listener and redis config to the
+shared Synapse configuration file (`homeserver.yaml`). For example:
 
 ```yaml
+# extend the existing `listeners` section. This defines the ports that the
+# main process will listen on.
 listeners:
-  # The TCP replication port
-  - port: 9092
-    bind_address: '127.0.0.1'
-    type: replication
-
   # The HTTP replication port
   - port: 9093
     bind_address: '127.0.0.1'
     type: http
     resources:
      - names: [replication]
+
+redis:
+    enabled: true
 ```
 
-Under **no circumstances** should these replication API listeners be exposed to
-the public internet; they have no authentication and are unencrypted.
+See the sample config for the full documentation of each option.
 
-You should then create a set of configs for the various worker processes.  Each
-worker configuration file inherits the configuration of the main homeserver
-configuration file.  You can then override configuration specific to that
-worker, e.g. the HTTP listener that it provides (if any); logging
-configuration; etc.  You should minimise the number of overrides though to
-maintain a usable config.
+Under **no circumstances** should the replication listener be exposed to the
+public internet; it has no authentication and is unencrypted.
 
 In the config file for each worker, you must specify the type of worker
-application (`worker_app`). The currently available worker applications are
-listed below. You must also specify the replication endpoints that it should
-talk to on the main synapse process.  `worker_replication_host` should specify
-the host of the main synapse, `worker_replication_port` should point to the TCP
-replication listener port and `worker_replication_http_port` should point to
-the HTTP replication port.
+application (`worker_app`), and you should specify a unqiue name for the worker
+(`worker_name`). The currently available worker applications are listed below.
+You must also specify the HTTP replication endpoint that it should talk to on
+the main synapse process.  `worker_replication_host` should specify the host of
+the main synapse and `worker_replication_http_port` should point to the HTTP
+replication port. If the worker will handle HTTP requests then the
+`worker_listeners` option should be set with a `http` listener, in the same way
+as the `listeners` option in the shared config.
 
 For example:
 
 ```yaml
-worker_app: synapse.app.synchrotron
+worker_app: synapse.app.generic_worker
+worker_name: worker1
 
-# The replication listener on the synapse to talk to.
+# The replication listener on the main synapse process.
 worker_replication_host: 127.0.0.1
-worker_replication_port: 9092
 worker_replication_http_port: 9093
 
 worker_listeners:
@@ -87,13 +124,14 @@ worker_listeners:
    resources:
      - names:
        - client
+       - federation
 
-worker_log_config: /home/matrix/synapse/config/synchrotron_log_config.yaml
+worker_log_config: /home/matrix/synapse/config/worker1_log_config.yaml
 ```
 
-...is a full configuration for a synchrotron worker instance, which will expose a
-plain HTTP `/sync` endpoint on port 8083 separately from the `/sync` endpoint provided
-by the main synapse.
+...is a full configuration for a generic worker instance, which will expose a
+plain HTTP endpoint on port 8083 separately serving various endpoints, e.g.
+`/sync`, which are listed below.
 
 Obviously you should configure your reverse-proxy to route the relevant
 endpoints to the worker (`localhost:8083` in the above example).
@@ -102,127 +140,24 @@ Finally, you need to start your worker processes. This can be done with either
 `synctl` or your distribution's preferred service manager such as `systemd`. We
 recommend the use of `systemd` where available: for information on setting up
 `systemd` to start synapse workers, see
-[systemd-with-workers](systemd-with-workers). To use `synctl`, see below.
+[systemd-with-workers](systemd-with-workers). To use `synctl`, see
+[synctl_workers.md](synctl_workers.md).
 
-### **Experimental** support for replication over redis
-
-As of Synapse v1.13.0, it is possible to configure Synapse to send replication
-via a [Redis pub/sub channel](https://redis.io/topics/pubsub). This is an
-alternative to direct TCP connections to the master: rather than all the
-workers connecting to the master, all the workers and the master connect to
-Redis, which relays replication commands between processes. This can give a
-significant cpu saving on the master and will be a prerequisite for upcoming
-performance improvements.
-
-Note that this support is currently experimental; you may experience lost
-messages and similar problems! It is strongly recommended that admins setting
-up workers for the first time use direct TCP replication as above.
-
-To configure Synapse to use Redis:
-
-1. Install Redis following the normal procedure for your distribution - for
-   example, on Debian, `apt install redis-server`. (It is safe to use an
-   existing Redis deployment if you have one: we use a pub/sub stream named
-   according to the `server_name` of your synapse server.)
-2. Check Redis is running and accessible: you should be able to `echo PING | nc -q1
-   localhost 6379` and get a response of `+PONG`.
-3. Install the python prerequisites. If you installed synapse into a
-   virtualenv, this can be done with:
-   ```sh
-   pip install matrix-synapse[redis]
-   ```
-   The debian packages from matrix.org already include the required
-   dependencies.
-4. Add config to the shared configuration (`homeserver.yaml`):
-    ```yaml
-    redis:
-      enabled: true
-    ```
-    Optional parameters which can go alongside `enabled` are `host`, `port`,
-    `password`. Normally none of these are required.
-5. Restart master and all workers.
-
-Once redis replication is in use, `worker_replication_port` is redundant and
-can be removed from the worker configuration files. Similarly, the
-configuration for the `listener` for the TCP replication port can be removed
-from the main configuration file. Note that the HTTP replication port is
-still required.
-
-### Using synctl
-
-If you want to use `synctl` to manage your synapse processes, you will need to
-create an an additional configuration file for the master synapse process. That
-configuration should look like this:
-
-```yaml
-worker_app: synapse.app.homeserver
-```
-
-Additionally, each worker app must be configured with the name of a "pid file",
-to which it will write its process ID when it starts. For example, for a
-synchrotron, you might write:
-
-```yaml
-worker_pid_file: /home/matrix/synapse/synchrotron.pid
-```
-
-Finally, to actually run your worker-based synapse, you must pass synctl the `-a`
-commandline option to tell it to operate on all the worker configurations found
-in the given directory, e.g.:
-
-    synctl -a $CONFIG/workers start
-
-Currently one should always restart all workers when restarting or upgrading
-synapse, unless you explicitly know it's safe not to.  For instance, restarting
-synapse without restarting all the synchrotrons may result in broken typing
-notifications.
-
-To manipulate a specific worker, you pass the -w option to synctl:
-
-    synctl -w $CONFIG/workers/synchrotron.yaml restart
 
 ## Available worker applications
 
-### `synapse.app.pusher`
-
-Handles sending push notifications to sygnal and email. Doesn't handle any
-REST endpoints itself, but you should set `start_pushers: False` in the
-shared configuration file to stop the main synapse sending these notifications.
-
-Note this worker cannot be load-balanced: only one instance should be active.
-
-### `synapse.app.synchrotron`
+### `synapse.app.generic_worker`
 
-The synchrotron handles `sync` requests from clients. In particular, it can
-handle REST endpoints matching the following regular expressions:
+This worker can handle API requests matching the following regular
+expressions:
 
+    # Sync requests
     ^/_matrix/client/(v2_alpha|r0)/sync$
     ^/_matrix/client/(api/v1|v2_alpha|r0)/events$
     ^/_matrix/client/(api/v1|r0)/initialSync$
     ^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$
 
-The above endpoints should all be routed to the synchrotron worker by the
-reverse-proxy configuration.
-
-It is possible to run multiple instances of the synchrotron to scale
-horizontally. In this case the reverse-proxy should be configured to
-load-balance across the instances, though it will be more efficient if all
-requests from a particular user are routed to a single instance. Extracting
-a userid from the access token is currently left as an exercise for the reader.
-
-### `synapse.app.appservice`
-
-Handles sending output traffic to Application Services. Doesn't handle any
-REST endpoints itself, but you should set `notify_appservices: False` in the
-shared configuration file to stop the main synapse sending these notifications.
-
-Note this worker cannot be load-balanced: only one instance should be active.
-
-### `synapse.app.federation_reader`
-
-Handles a subset of federation endpoints. In particular, it can handle REST
-endpoints matching the following regular expressions:
-
+    # Federation requests
     ^/_matrix/federation/v1/event/
     ^/_matrix/federation/v1/state/
     ^/_matrix/federation/v1/state_ids/
@@ -242,40 +177,145 @@ endpoints matching the following regular expressions:
     ^/_matrix/federation/v1/event_auth/
     ^/_matrix/federation/v1/exchange_third_party_invite/
     ^/_matrix/federation/v1/user/devices/
-    ^/_matrix/federation/v1/send/
     ^/_matrix/federation/v1/get_groups_publicised$
     ^/_matrix/key/v2/query
 
+    # Inbound federation transaction request
+    ^/_matrix/federation/v1/send/
+
+    # Client API requests
+    ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$
+    ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$
+    ^/_matrix/client/(api/v1|r0|unstable)/keys/query$
+    ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$
+    ^/_matrix/client/versions$
+    ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
+    ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$
+    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$
+    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/
+
+    # Registration/login requests
+    ^/_matrix/client/(api/v1|r0|unstable)/login$
+    ^/_matrix/client/(r0|unstable)/register$
+    ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$
+
+    # Event sending requests
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
+    ^/_matrix/client/(api/v1|r0|unstable)/join/
+    ^/_matrix/client/(api/v1|r0|unstable)/profile/
+
+
 Additionally, the following REST endpoints can be handled for GET requests:
 
     ^/_matrix/federation/v1/groups/
 
-The above endpoints should all be routed to the federation_reader worker by the
-reverse-proxy configuration.
+Pagination requests can also be handled, but all requests for a given
+room must be routed to the same instance. Additionally, care must be taken to
+ensure that the purge history admin API is not used while pagination requests
+for the room are in flight:
+
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$
+
+Note that a HTTP listener with `client` and `federation` resources must be
+configured in the `worker_listeners` option in the worker config.
+
+
+#### Load balancing
+
+It is possible to run multiple instances of this worker app, with incoming requests
+being load-balanced between them by the reverse-proxy. However, different endpoints
+have different characteristics and so admins
+may wish to run multiple groups of workers handling different endpoints so that
+load balancing can be done in different ways.
+
+For `/sync` and `/initialSync` requests it will be more efficient if all
+requests from a particular user are routed to a single instance. Extracting a
+user ID from the access token or `Authorization` header is currently left as an
+exercise for the reader. Admins may additionally wish to separate out `/sync`
+requests that have a `since` query parameter from those that don't (and
+`/initialSync`), as requests that don't are known as "initial sync" that happens
+when a user logs in on a new device and can be *very* resource intensive, so
+isolating these requests will stop them from interfering with other users ongoing
+syncs.
+
+Federation and client requests can be balanced via simple round robin.
 
-The `^/_matrix/federation/v1/send/` endpoint must only be handled by a single
-instance.
+The inbound federation transaction request `^/_matrix/federation/v1/send/`
+should be balanced by source IP so that transactions from the same remote server
+go to the same process.
 
-Note that `federation` must be added to the listener resources in the worker config:
+Registration/login requests can be handled separately purely to help ensure that
+unexpected load doesn't affect new logins and sign ups.
+
+Finally, event sending requests can be balanced by the room ID in the URI (or
+the full URI, or even just round robin), the room ID is the path component after
+`/rooms/`. If there is a large bridge connected that is sending or may send lots
+of events, then a dedicated set of workers can be provisioned to limit the
+effects of bursts of events from that bridge on events sent by normal users.
+
+#### Stream writers
+
+Additionally, there is *experimental* support for moving writing of specific
+streams (such as events) off of the main process to a particular worker. (This
+is only supported with Redis-based replication.)
+
+Currently support streams are `events` and `typing`.
+
+To enable this, the worker must have a HTTP replication listener configured,
+have a `worker_name` and be listed in the `instance_map` config. For example to
+move event persistence off to a dedicated worker, the shared configuration would
+include:
 
 ```yaml
-worker_app: synapse.app.federation_reader
-...
-worker_listeners:
- - type: http
-   port: <port>
-   resources:
-     - names:
-       - federation
+instance_map:
+    event_persister1:
+        host: localhost
+        port: 8034
+
+streams_writers:
+    events: event_persister1
 ```
 
+
+### `synapse.app.pusher`
+
+Handles sending push notifications to sygnal and email. Doesn't handle any
+REST endpoints itself, but you should set `start_pushers: False` in the
+shared configuration file to stop the main synapse sending push notifications.
+
+Note this worker cannot be load-balanced: only one instance should be active.
+
+### `synapse.app.appservice`
+
+Handles sending output traffic to Application Services. Doesn't handle any
+REST endpoints itself, but you should set `notify_appservices: False` in the
+shared configuration file to stop the main synapse sending appservice notifications.
+
+Note this worker cannot be load-balanced: only one instance should be active.
+
+
 ### `synapse.app.federation_sender`
 
 Handles sending federation traffic to other servers. Doesn't handle any
 REST endpoints itself, but you should set `send_federation: False` in the
 shared configuration file to stop the main synapse sending this traffic.
 
-Note this worker cannot be load-balanced: only one instance should be active.
+If running multiple federation senders then you must list each
+instance in the `federation_sender_instances` option by their `worker_name`.
+All instances must be stopped and started when adding or removing instances.
+For example:
+
+```yaml
+federation_sender_instances:
+    - federation_sender1
+    - federation_sender2
+```
 
 ### `synapse.app.media_repository`
 
@@ -314,46 +354,6 @@ and you must configure a single instance to run the background tasks, e.g.:
     media_instance_running_background_jobs: "media-repository-1"
 ```
 
-### `synapse.app.client_reader`
-
-Handles client API endpoints. It can handle REST endpoints matching the
-following regular expressions:
-
-    ^/_matrix/client/(api/v1|r0|unstable)/publicRooms$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$
-    ^/_matrix/client/(api/v1|r0|unstable)/login$
-    ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$
-    ^/_matrix/client/(api/v1|r0|unstable)/keys/query$
-    ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$
-    ^/_matrix/client/versions$
-    ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
-    ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$
-    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$
-    ^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/
-
-Additionally, the following REST endpoints can be handled for GET requests:
-
-    ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$
-    ^/_matrix/client/(api/v1|r0|unstable)/groups/.*$
-    ^/_matrix/client/(api/v1|r0|unstable)/user/[^/]*/account_data/
-    ^/_matrix/client/(api/v1|r0|unstable)/user/[^/]*/rooms/[^/]*/account_data/
-
-Additionally, the following REST endpoints can be handled, but all requests must
-be routed to the same instance:
-
-    ^/_matrix/client/(r0|unstable)/register$
-    ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$
-
-Pagination requests can also be handled, but all requests with the same path
-room must be routed to the same instance. Additionally, care must be taken to
-ensure that the purge history admin API is not used while pagination requests
-for the room are in flight:
-
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/messages$
-
 ### `synapse.app.user_dir`
 
 Handles searches in the user directory. It can handle REST endpoints matching
@@ -388,15 +388,48 @@ file. For example:
 
     worker_main_http_uri: http://127.0.0.1:8008
 
-### `synapse.app.event_creator`
+### Historical apps
 
-Handles some event creation. It can handle REST endpoints matching:
+*Note:* Historically there used to be more apps, however they have been
+amalgamated into a single `synapse.app.generic_worker` app. The remaining apps
+are ones that do specific processing unrelated to requests, e.g. the `pusher`
+that handles sending out push notifications for new events. The intention is for
+all these to be folded into the `generic_worker` app and to use config to define
+which processes handle the various proccessing such as push notifications.
 
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/
-    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
-    ^/_matrix/client/(api/v1|r0|unstable)/join/
-    ^/_matrix/client/(api/v1|r0|unstable)/profile/
 
-It will create events locally and then send them on to the main synapse
-instance to be persisted and handled.
+## Architectural diagram
+
+The following shows an example setup using Redis and a reverse proxy:
+
+```
+                     Clients & Federation
+                              |
+                              v
+                        +-----------+
+                        |           |
+                        |  Reverse  |
+                        |  Proxy    |
+                        |           |
+                        +-----------+
+                            | | |
+                            | | | HTTP requests
+        +-------------------+ | +-----------+
+        |                 +---+             |
+        |                 |                 |
+        v                 v                 v
++--------------+  +--------------+  +--------------+  +--------------+
+|   Main       |  |   Generic    |  |   Generic    |  |  Event       |
+|   Process    |  |   Worker 1   |  |   Worker 2   |  |  Persister   |
++--------------+  +--------------+  +--------------+  +--------------+
+      ^    ^          |   ^   |         |   ^   |          ^    ^
+      |    |          |   |   |         |   |   |          |    |
+      |    |          |   |   |  HTTP   |   |   |          |    |
+      |    +----------+<--|---|---------+   |   |          |    |
+      |                   |   +-------------|-->+----------+    |
+      |                   |                 |                   |
+      |                   |                 |                   |
+      v                   v                 v                   v
+====================================================================
+                                                         Redis pub/sub channel
+```
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index ec0dbddb8c..5841454c9a 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -940,7 +940,7 @@ def start(config_options):
         config.server.update_user_directory = False
 
     if config.worker_app == "synapse.app.federation_sender":
-        if config.federation.send_federation:
+        if config.worker.send_federation:
             sys.stderr.write(
                 "\nThe send_federation must be disabled in the main synapse process"
                 "\nbefore they can be run in a separate worker."
@@ -950,10 +950,10 @@ def start(config_options):
             sys.exit(1)
 
         # Force the pushers to start since they will be disabled in the main config
-        config.federation.send_federation = True
+        config.worker.send_federation = True
     else:
         # For other worker types we force this to off.
-        config.federation.send_federation = False
+        config.worker.send_federation = False
 
     synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 82ff9664de..2c77d8f85b 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -17,23 +17,13 @@ from typing import Optional
 
 from netaddr import IPSet
 
-from ._base import Config, ConfigError, ShardedWorkerHandlingConfig
+from ._base import Config, ConfigError
 
 
 class FederationConfig(Config):
     section = "federation"
 
     def read_config(self, config, **kwargs):
-        # Whether to send federation traffic out in this process. This only
-        # applies to some federation traffic, and so shouldn't be used to
-        # "disable" federation
-        self.send_federation = config.get("send_federation", True)
-
-        federation_sender_instances = config.get("federation_sender_instances") or []
-        self.federation_shard_config = ShardedWorkerHandlingConfig(
-            federation_sender_instances
-        )
-
         # FIXME: federation_domain_whitelist needs sytests
         self.federation_domain_whitelist = None  # type: Optional[dict]
         federation_domain_whitelist = config.get("federation_domain_whitelist", None)
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 8e93d31394..556e291495 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -78,7 +78,6 @@ class HomeServerConfig(RootConfig):
         JWTConfig,
         PasswordConfig,
         EmailConfig,
-        WorkerConfig,
         PasswordAuthProviderConfig,
         PushConfig,
         SpamCheckerConfig,
@@ -91,6 +90,7 @@ class HomeServerConfig(RootConfig):
         RoomDirectoryConfig,
         ThirdPartyRulesConfig,
         TracerConfig,
+        WorkerConfig,
         RedisConfig,
         FederationConfig,
     ]
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 49f6c32beb..dd775a97e8 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -214,7 +214,7 @@ def setup_logging(
     Set up the logging subsystem.
 
     Args:
-        config (LoggingConfig | synapse.config.workers.WorkerConfig):
+        config (LoggingConfig | synapse.config.worker.WorkerConfig):
             configuration data
 
         use_worker_options (bool): True to use the 'worker_log_config' option
diff --git a/synapse/config/redis.py b/synapse/config/redis.py
index d5d3ca1c9e..1373302335 100644
--- a/synapse/config/redis.py
+++ b/synapse/config/redis.py
@@ -21,7 +21,7 @@ class RedisConfig(Config):
     section = "redis"
 
     def read_config(self, config, **kwargs):
-        redis_config = config.get("redis", {})
+        redis_config = config.get("redis") or {}
         self.redis_enabled = redis_config.get("enabled", False)
 
         if not self.redis_enabled:
@@ -32,3 +32,24 @@ class RedisConfig(Config):
         self.redis_host = redis_config.get("host", "localhost")
         self.redis_port = redis_config.get("port", 6379)
         self.redis_password = redis_config.get("password")
+
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        # Configuration for Redis when using workers. This *must* be enabled when
+        # using workers (unless using old style direct TCP configuration).
+        #
+        redis:
+          # Uncomment the below to enable Redis support.
+          #
+          #enabled: true
+
+          # Optional host and port to use to connect to redis. Defaults to
+          # localhost and 6379
+          #
+          #host: localhost
+          #port: 6379
+
+          # Optional password if configured on the Redis instance
+          #
+          #password: <secret_password>
+        """
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 2574cd3aa1..c784a71508 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -15,7 +15,7 @@
 
 import attr
 
-from ._base import Config, ConfigError
+from ._base import Config, ConfigError, ShardedWorkerHandlingConfig
 from .server import ListenerConfig, parse_listener_def
 
 
@@ -85,6 +85,16 @@ class WorkerConfig(Config):
                 )
             )
 
+        # Whether to send federation traffic out in this process. This only
+        # applies to some federation traffic, and so shouldn't be used to
+        # "disable" federation
+        self.send_federation = config.get("send_federation", True)
+
+        federation_sender_instances = config.get("federation_sender_instances") or []
+        self.federation_shard_config = ShardedWorkerHandlingConfig(
+            federation_sender_instances
+        )
+
         # A map from instance name to host/port of their HTTP replication endpoint.
         instance_map = config.get("instance_map") or {}
         self.instance_map = {
@@ -105,6 +115,43 @@ class WorkerConfig(Config):
                     % (instance, stream)
                 )
 
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        ## Workers ##
+
+        # Disables sending of outbound federation transactions on the main process.
+        # Uncomment if using a federation sender worker.
+        #
+        #send_federation: false
+
+        # It is possible to run multiple federation sender workers, in which case the
+        # work is balanced across them.
+        #
+        # This configuration must be shared between all federation sender workers, and if
+        # changed all federation sender workers must be stopped at the same time and then
+        # started, to ensure that all instances are running with the same config (otherwise
+        # events may be dropped).
+        #
+        #federation_sender_instances:
+        #  - federation_sender1
+
+        # When using workers this should be a map from `worker_name` to the
+        # HTTP replication listener of the worker, if configured.
+        #
+        #instance_map:
+        #  worker1:
+        #    host: localhost
+        #    port: 8034
+
+        # Experimental: When using workers you can define which workers should
+        # handle event persistence and typing notifications. Any worker
+        # specified here must also be in the `instance_map`.
+        #
+        #stream_writers:
+        #  events: worker1
+        #  typing: worker1
+        """
+
     def read_arguments(self, args):
         # We support a bunch of command line arguments that override options in
         # the config. A lot of these options have a worker_* prefix when running
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 4fc9ff92e5..2b0ab2dcbf 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -57,7 +57,7 @@ class FederationRemoteSendQueue(object):
 
         # We may have multiple federation sender instances, so we need to track
         # their positions separately.
-        self._sender_instances = hs.config.federation.federation_shard_config.instances
+        self._sender_instances = hs.config.worker.federation_shard_config.instances
         self._sender_positions = {}
 
         # Pending presence map user_id -> UserPresenceState
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index ba4ddd2370..6ae6522f87 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -70,7 +70,7 @@ class FederationSender(object):
         self._transaction_manager = TransactionManager(hs)
 
         self._instance_name = hs.get_instance_name()
-        self._federation_shard_config = hs.config.federation.federation_shard_config
+        self._federation_shard_config = hs.config.worker.federation_shard_config
 
         # map from destination to PerDestinationQueue
         self._per_destination_queues = {}  # type: Dict[str, PerDestinationQueue]
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3436741783..dd150f89a6 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -75,7 +75,7 @@ class PerDestinationQueue(object):
         self._store = hs.get_datastore()
         self._transaction_manager = transaction_manager
         self._instance_name = hs.get_instance_name()
-        self._federation_shard_config = hs.config.federation.federation_shard_config
+        self._federation_shard_config = hs.config.worker.federation_shard_config
 
         self._should_send_on_this_instance = True
         if not self._federation_shard_config.should_handle(
diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py
index 5e32c7aa1e..10d39b3699 100644
--- a/synapse/storage/data_stores/main/stream.py
+++ b/synapse/storage/data_stores/main/stream.py
@@ -255,7 +255,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         self._instance_name = hs.get_instance_name()
         self._send_federation = hs.should_send_federation()
-        self._federation_shard_config = hs.config.federation.federation_shard_config
+        self._federation_shard_config = hs.config.worker.federation_shard_config
 
         # If we're a process that sends federation we may need to reset the
         # `federation_stream_position` table to match the current sharding
-- 
cgit 1.5.1


From 3950ae51ef3e7d0bdbe5002dbe8ef5c35a9e8eea Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 30 Jul 2020 06:56:55 -0400
Subject: Ensure that remove_pusher is always async (#7981)

---
 changelog.d/7981.misc         | 1 +
 synapse/app/generic_worker.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/7981.misc

(limited to 'synapse/app/generic_worker.py')

diff --git a/changelog.d/7981.misc b/changelog.d/7981.misc
new file mode 100644
index 0000000000..dfe4c03171
--- /dev/null
+++ b/changelog.d/7981.misc
@@ -0,0 +1 @@
+Convert various parts of the codebase to async/await.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index ec0dbddb8c..6e8130351c 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -628,7 +628,7 @@ class GenericWorkerServer(HomeServer):
 
         self.get_tcp_replication().start_replication(self)
 
-    def remove_pusher(self, app_id, push_key, user_id):
+    async def remove_pusher(self, app_id, push_key, user_id):
         self.get_tcp_replication().send_remove_pusher(app_id, push_key, user_id)
 
     def build_replication_data_handler(self):
-- 
cgit 1.5.1