9 files changed, 184 insertions, 68 deletions
diff --git a/docker/Dockerfile b/docker/Dockerfile
index b20951d4cf..7f8756e8a4 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -106,7 +106,13 @@ ENV CARGO_HOME=/cargo
 ENV PATH=/cargo/bin:/rust/bin:$PATH
 RUN mkdir /rust /cargo
 
-RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable
+RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable --profile minimal
+
+
+# arm64 builds consume a lot of memory if `CARGO_NET_GIT_FETCH_WITH_CLI` is not
+# set to true, so we expose it as a build-arg.
+ARG CARGO_NET_GIT_FETCH_WITH_CLI=false
+ENV CARGO_NET_GIT_FETCH_WITH_CLI=$CARGO_NET_GIT_FETCH_WITH_CLI
 
 # To speed up rebuilds, install all of the dependencies before we copy over
 # the whole synapse project, so that this layer in the Docker cache can be
@@ -121,7 +127,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 COPY synapse /synapse/synapse/
 COPY rust /synapse/rust/
 # ... and what we need to `pip install`.
-COPY pyproject.toml README.rst build_rust.py /synapse/
+COPY pyproject.toml README.rst build_rust.py Cargo.toml Cargo.lock /synapse/
 
 # Repeat of earlier build argument declaration, as this is a new build stage.
 ARG TEST_ONLY_IGNORE_POETRY_LOCKFILE
@@ -129,7 +135,9 @@ ARG TEST_ONLY_IGNORE_POETRY_LOCKFILE
 # Install the synapse package itself.
 # If we have populated requirements.txt, we don't install any dependencies
 # as we should already have those from the previous `pip install` step.
-RUN if [ -z "$TEST_ONLY_IGNORE_POETRY_LOCKFILE" ]; then \
+RUN --mount=type=cache,target=/synapse/target,sharing=locked \
+  --mount=type=cache,target=${CARGO_HOME}/registry,sharing=locked \
+  if [ -z "$TEST_ONLY_IGNORE_POETRY_LOCKFILE" ]; then \
     pip install --prefix="/install" --no-deps --no-warn-script-location /synapse[all]; \
   else \
     pip install --prefix="/install" --no-warn-script-location /synapse[all]; \
diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv
index ca3a259081..73165f6f85 100644
--- a/docker/Dockerfile-dhvirtualenv
+++ b/docker/Dockerfile-dhvirtualenv
@@ -92,7 +92,7 @@ ENV CARGO_HOME=/cargo
 ENV PATH=/cargo/bin:/rust/bin:$PATH
 RUN mkdir /rust /cargo
 
-RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable
+RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable --profile minimal
 
 
 COPY --from=builder /dh-virtualenv_1.2.2-1_all.deb /
diff --git a/docker/Dockerfile-workers b/docker/Dockerfile-workers
index 003a1cc3bf..0c2d4f3047 100644
--- a/docker/Dockerfile-workers
+++ b/docker/Dockerfile-workers
@@ -40,7 +40,11 @@ FROM matrixdotorg/synapse:$SYNAPSE_VERSION
     COPY --from=deps_base /etc/nginx /etc/nginx
     RUN rm /etc/nginx/sites-enabled/default
     RUN mkdir /var/log/nginx /var/lib/nginx
-    RUN chown www-data /var/log/nginx /var/lib/nginx
+    RUN chown www-data /var/lib/nginx
+
+    # have nginx log to stderr/out
+    RUN ln -sf /dev/stdout /var/log/nginx/access.log
+    RUN ln -sf /dev/stderr /var/log/nginx/error.log
 
     # Copy Synapse worker, nginx and supervisord configuration template files
     COPY ./docker/conf-workers/* /conf/
diff --git a/docker/README.md b/docker/README.md
index 017f046c58..eda3221c23 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -241,4 +241,4 @@ healthcheck:
 
 Jemalloc is embedded in the image and will be used instead of the default allocator.
 You can read about jemalloc by reading the Synapse
-[README](https://github.com/matrix-org/synapse/blob/HEAD/README.rst#help-synapse-is-slow-and-eats-all-my-ram-cpu).
+[Admin FAQ](https://matrix-org.github.io/synapse/latest/usage/administration/admin_faq.html#help-synapse-is-slow-and-eats-all-my-ramcpu).
diff --git a/docker/complement/Dockerfile b/docker/complement/Dockerfile
index 0e13722d1c..c0935c99a8 100644
--- a/docker/complement/Dockerfile
+++ b/docker/complement/Dockerfile
@@ -8,19 +8,15 @@
 
 ARG SYNAPSE_VERSION=latest
 
-# first of all, we create a base image with a postgres server and database,
-# which we can copy into the target image. For repeated rebuilds, this is
-# much faster than apt installing postgres each time.
-#
-# This trick only works because (a) the Synapse image happens to have all the
-# shared libraries that postgres wants, (b) we use a postgres image based on
-# the same debian version as Synapse's docker image (so the versions of the
-# shared libraries match).
-
-# now build the final image, based on the Synapse image.
-
 FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION
-    # copy the postgres installation over from the image we built above
+    # First of all, we copy postgres server from the official postgres image,
+    # since for repeated rebuilds, this is much faster than apt installing
+    # postgres each time.
+
+    # This trick only works because (a) the Synapse image happens to have all the
+    # shared libraries that postgres wants, (b) we use a postgres image based on
+    # the same debian version as Synapse's docker image (so the versions of the
+    # shared libraries match).
     RUN adduser --system --uid 999 postgres --home /var/lib/postgresql
     COPY --from=postgres:13-bullseye /usr/lib/postgresql /usr/lib/postgresql
     COPY --from=postgres:13-bullseye /usr/share/postgresql /usr/share/postgresql
@@ -28,7 +24,7 @@ FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION
     ENV PATH="${PATH}:/usr/lib/postgresql/13/bin"
     ENV PGDATA=/var/lib/postgresql/data
 
-    # initialise the database cluster in /var/lib/postgresql
+    # We also initialize the database at build time, rather than runtime, so that it's faster to spin up the image.
     RUN gosu postgres initdb --locale=C --encoding=UTF-8 --auth-host password
 
     # Configure a password and create a database for Synapse
diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh
index cc6482f763..49d79745b0 100755
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@@ -45,7 +45,12 @@ esac
 
 if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
   # Specify the workers to test with
-  export SYNAPSE_WORKER_TYPES="\
+  # Allow overriding by explicitly setting SYNAPSE_WORKER_TYPES outside, while still
+  # utilizing WORKERS=1 for backwards compatibility.
+  # -n True if the length of string is non-zero.
+  # -z True if the length of string is zero.
+  if [[ -z "$SYNAPSE_WORKER_TYPES" ]]; then
+    export SYNAPSE_WORKER_TYPES="\
       event_persister, \
       event_persister, \
       background_worker, \
@@ -57,9 +62,12 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
       federation_reader, \
       federation_sender, \
       synchrotron, \
+      client_reader, \
       appservice, \
       pusher"
 
+  fi
+  log "Workers requested: $SYNAPSE_WORKER_TYPES"
   # Improve startup times by using a launcher based on fork()
   export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1
 else
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 9e554a865e..883a87159c 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -12,6 +12,8 @@ trusted_key_servers: []
 enable_registration: true
 enable_registration_without_verification: true
 bcrypt_rounds: 4
+url_preview_enabled: true
+url_preview_ip_range_blacklist: []
 
 ## Registration ##
 
@@ -90,8 +92,6 @@ allow_device_name_lookup_over_federation: true
 ## Experimental Features ##
 
 experimental_features:
-  # Enable spaces support
-  spaces_enabled: true
   # Enable history backfilling support
   msc2716_enabled: true
   # server-side support for partial state in /send_join responses
@@ -102,6 +102,8 @@ experimental_features:
   {% endif %}
   # Enable jump to date endpoint
   msc3030_enabled: true
+  # Filtering /messages by relation type.
+  msc3874_enabled: true
 
 server_notices:
   system_mxid_localpart: _server
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 51583dc13d..c1e1544536 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -20,7 +20,7 @@
 #   * SYNAPSE_SERVER_NAME: The desired server_name of the homeserver.
 #   * SYNAPSE_REPORT_STATS: Whether to report stats.
 #   * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG
-#         below. Leave empty for no workers, or set to '*' for all possible workers.
+#         below. Leave empty for no workers.
 #   * SYNAPSE_AS_REGISTRATION_DIR: If specified, a directory in which .yaml and .yml files
 #         will be treated as Application Service registration files.
 #   * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format.
@@ -39,6 +39,7 @@
 # continue to work if so.
 
 import os
+import platform
 import subprocess
 import sys
 from pathlib import Path
@@ -49,13 +50,18 @@ from jinja2 import Environment, FileSystemLoader
 
 MAIN_PROCESS_HTTP_LISTENER_PORT = 8080
 
-
+# Workers with exposed endpoints needs either "client", "federation", or "media" listener_resources
+# Watching /_matrix/client needs a "client" listener
+# Watching /_matrix/federation needs a "federation" listener
+# Watching /_matrix/media and related needs a "media" listener
+# Stream Writers require "client" and "replication" listeners because they
+#   have to attach by instance_map to the master process and have client endpoints.
 WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
     "pusher": {
-        "app": "synapse.app.pusher",
+        "app": "synapse.app.generic_worker",
         "listener_resources": [],
         "endpoint_patterns": [],
-        "shared_extra_conf": {"start_pushers": False},
+        "shared_extra_conf": {},
         "worker_extra_conf": "",
     },
     "user_dir": {
@@ -78,7 +84,11 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_synapse/admin/v1/media/.*$",
             "^/_synapse/admin/v1/quarantine_media/.*$",
         ],
-        "shared_extra_conf": {"enable_media_repo": False},
+        # The first configured media worker will run the media background jobs
+        "shared_extra_conf": {
+            "enable_media_repo": False,
+            "media_instance_running_background_jobs": "media_repository1",
+        },
         "worker_extra_conf": "enable_media_repo: true",
     },
     "appservice": {
@@ -89,10 +99,10 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
         "worker_extra_conf": "",
     },
     "federation_sender": {
-        "app": "synapse.app.federation_sender",
+        "app": "synapse.app.generic_worker",
         "listener_resources": [],
         "endpoint_patterns": [],
-        "shared_extra_conf": {"send_federation": False},
+        "shared_extra_conf": {},
         "worker_extra_conf": "",
     },
     "synchrotron": {
@@ -107,6 +117,34 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
         "shared_extra_conf": {},
         "worker_extra_conf": "",
     },
+    "client_reader": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client"],
+        "endpoint_patterns": [
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/publicRooms$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/joined_members$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/context/.*$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$",
+            "^/_matrix/client/v1/rooms/.*/hierarchy$",
+            "^/_matrix/client/(v1|unstable)/rooms/.*/relations/",
+            "^/_matrix/client/v1/rooms/.*/threads$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/login$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/account/whoami$",
+            "^/_matrix/client/versions$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$",
+            "^/_matrix/client/(r0|v3|unstable)/register$",
+            "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms",
+            "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/search",
+        ],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
+    },
     "federation_reader": {
         "app": "synapse.app.generic_worker",
         "listener_resources": ["federation"],
@@ -171,14 +209,54 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
         "worker_extra_conf": "",
     },
     "frontend_proxy": {
-        "app": "synapse.app.frontend_proxy",
+        "app": "synapse.app.generic_worker",
         "listener_resources": ["client", "replication"],
         "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload"],
         "shared_extra_conf": {},
-        "worker_extra_conf": (
-            "worker_main_http_uri: http://127.0.0.1:%d"
-            % (MAIN_PROCESS_HTTP_LISTENER_PORT,)
-        ),
+        "worker_extra_conf": "",
+    },
+    "account_data": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client", "replication"],
+        "endpoint_patterns": [
+            "^/_matrix/client/(r0|v3|unstable)/.*/tags",
+            "^/_matrix/client/(r0|v3|unstable)/.*/account_data",
+        ],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
+    },
+    "presence": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client", "replication"],
+        "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/presence/"],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
+    },
+    "receipts": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client", "replication"],
+        "endpoint_patterns": [
+            "^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt",
+            "^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers",
+        ],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
+    },
+    "to_device": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client", "replication"],
+        "endpoint_patterns": ["^/_matrix/client/(r0|v3|unstable)/sendToDevice/"],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
+    },
+    "typing": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client", "replication"],
+        "endpoint_patterns": [
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/typing"
+        ],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
     },
 }
 
@@ -201,24 +279,19 @@ upstream {upstream_worker_type} {{
 
 # Utility functions
 def log(txt: str) -> None:
-    """Log something to the stdout.
-
-    Args:
-        txt: The text to log.
-    """
     print(txt)
 
 
 def error(txt: str) -> NoReturn:
-    """Log something and exit with an error code.
-
-    Args:
-        txt: The text to log in error.
-    """
-    log(txt)
+    print(txt, file=sys.stderr)
     sys.exit(2)
 
 
+def flush_buffers() -> None:
+    sys.stdout.flush()
+    sys.stderr.flush()
+
+
 def convert(src: str, dst: str, **template_vars: object) -> None:
     """Generate a file from a template
 
@@ -247,14 +320,14 @@ def convert(src: str, dst: str, **template_vars: object) -> None:
         outfile.write(rendered)
 
 
-def add_sharding_to_shared_config(
+def add_worker_roles_to_shared_config(
     shared_config: dict,
     worker_type: str,
     worker_name: str,
     worker_port: int,
 ) -> None:
     """Given a dictionary representing a config file shared across all workers,
-    append sharded worker information to it for the current worker_type instance.
+    append appropriate worker information to it for the current worker_type instance.
 
     Args:
         shared_config: The config dict that all worker instances share (after being converted to YAML)
@@ -285,9 +358,19 @@ def add_sharding_to_shared_config(
             "port": worker_port,
         }
 
-    elif worker_type == "media_repository":
-        # The first configured media worker will run the media background jobs
-        shared_config.setdefault("media_instance_running_background_jobs", worker_name)
+    elif worker_type in ["account_data", "presence", "receipts", "to_device", "typing"]:
+        # Update the list of stream writers
+        # It's convenient that the name of the worker type is the same as the stream to write
+        shared_config.setdefault("stream_writers", {}).setdefault(
+            worker_type, []
+        ).append(worker_name)
+
+        # Map of stream writer instance names to host/ports combos
+        # For now, all stream writers need http replication ports
+        instance_map[worker_name] = {
+            "host": "localhost",
+            "port": worker_port,
+        }
 
 
 def generate_base_homeserver_config() -> None:
@@ -299,7 +382,7 @@ def generate_base_homeserver_config() -> None:
     # start.py already does this for us, so just call that.
     # note that this script is copied in in the official, monolith dockerfile
     os.environ["SYNAPSE_HTTP_PORT"] = str(MAIN_PROCESS_HTTP_LISTENER_PORT)
-    subprocess.check_output(["/usr/local/bin/python", "/start.py", "migrate_config"])
+    subprocess.run(["/usr/local/bin/python", "/start.py", "migrate_config"], check=True)
 
 
 def generate_worker_files(
@@ -373,8 +456,8 @@ def generate_worker_files(
         # No workers, just the main process
         worker_types = []
     else:
-        # Split type names by comma
-        worker_types = worker_types_env.split(",")
+        # Split type names by comma, ignoring whitespace.
+        worker_types = [x.strip() for x in worker_types_env.split(",")]
 
     # Create the worker configuration directory if it doesn't already exist
     os.makedirs("/conf/workers", exist_ok=True)
@@ -393,14 +476,11 @@ def generate_worker_files(
 
     # For each worker type specified by the user, create config values
     for worker_type in worker_types:
-        worker_type = worker_type.strip()
-
         worker_config = WORKERS_CONFIG.get(worker_type)
         if worker_config:
             worker_config = worker_config.copy()
         else:
-            log(worker_type + " is an unknown worker type! It will be ignored")
-            continue
+            error(worker_type + " is an unknown worker type! Please fix!")
 
         new_worker_count = worker_type_counter.setdefault(worker_type, 0) + 1
         worker_type_counter[worker_type] = new_worker_count
@@ -419,11 +499,11 @@ def generate_worker_files(
 
         # Check if more than one instance of this worker type has been specified
         worker_type_total_count = worker_types.count(worker_type)
-        if worker_type_total_count > 1:
-            # Update the shared config with sharding-related options if necessary
-            add_sharding_to_shared_config(
-                shared_config, worker_type, worker_name, worker_port
-            )
+
+        # Update the shared config with sharding-related options if necessary
+        add_worker_roles_to_shared_config(
+            shared_config, worker_type, worker_name, worker_port
+        )
 
         # Enable the worker in supervisord
         worker_descriptors.append(worker_config)
@@ -604,14 +684,24 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None:
         with open(mark_filepath, "w") as f:
             f.write("")
 
+    # Lifted right out of start.py
+    jemallocpath = "/usr/lib/%s-linux-gnu/libjemalloc.so.2" % (platform.machine(),)
+
+    if os.path.isfile(jemallocpath):
+        environ["LD_PRELOAD"] = jemallocpath
+    else:
+        log("Could not find %s, will not use" % (jemallocpath,))
+
     # Start supervisord, which will start Synapse, all of the configured worker
     # processes, redis, nginx etc. according to the config we created above.
     log("Starting supervisord")
-    os.execl(
+    flush_buffers()
+    os.execle(
         "/usr/local/bin/supervisord",
         "supervisord",
         "-c",
         "/etc/supervisor/supervisord.conf",
+        environ,
     )
 
 
diff --git a/docker/start.py b/docker/start.py
index 5a98dce551..ebcc599f04 100755
--- a/docker/start.py
+++ b/docker/start.py
@@ -13,14 +13,19 @@ import jinja2
 
 # Utility functions
 def log(txt: str) -> None:
-    print(txt, file=sys.stderr)
+    print(txt)
 
 
 def error(txt: str) -> NoReturn:
-    log(txt)
+    print(txt, file=sys.stderr)
     sys.exit(2)
 
 
+def flush_buffers() -> None:
+    sys.stdout.flush()
+    sys.stderr.flush()
+
+
 def convert(src: str, dst: str, environ: Mapping[str, object]) -> None:
     """Generate a file from a template
 
@@ -131,10 +136,10 @@ def generate_config_from_template(
 
     if ownership is not None:
         log(f"Setting ownership on /data to {ownership}")
-        subprocess.check_output(["chown", "-R", ownership, "/data"])
+        subprocess.run(["chown", "-R", ownership, "/data"], check=True)
         args = ["gosu", ownership] + args
 
-    subprocess.check_output(args)
+    subprocess.run(args, check=True)
 
 
 def run_generate_config(environ: Mapping[str, str], ownership: Optional[str]) -> None:
@@ -158,7 +163,7 @@ def run_generate_config(environ: Mapping[str, str], ownership: Optional[str]) ->
     if ownership is not None:
         # make sure that synapse has perms to write to the data dir.
         log(f"Setting ownership on {data_dir} to {ownership}")
-        subprocess.check_output(["chown", ownership, data_dir])
+        subprocess.run(["chown", ownership, data_dir], check=True)
 
     # create a suitable log config from our template
     log_config_file = "%s/%s.log.config" % (config_dir, server_name)
@@ -185,6 +190,7 @@ def run_generate_config(environ: Mapping[str, str], ownership: Optional[str]) ->
         "--open-private-ports",
     ]
     # log("running %s" % (args, ))
+    flush_buffers()
     os.execv(sys.executable, args)
 
 
@@ -267,8 +273,10 @@ running with 'migrate_config'. See the README for more details.
     args = [sys.executable] + args
     if ownership is not None:
         args = ["gosu", ownership] + args
+        flush_buffers()
         os.execve("/usr/sbin/gosu", args, environ)
     else:
+        flush_buffers()
         os.execve(sys.executable, args, environ)