2 files changed, 509 insertions, 0 deletions
diff --git a/scripts-dev/gen_config_documentation.py b/scripts-dev/gen_config_documentation.py
new file mode 100755
index 0000000000..8e9d402c6a
--- /dev/null
+++ b/scripts-dev/gen_config_documentation.py
@@ -0,0 +1,503 @@
+#!/usr/bin/env python3
+"""Generate Synapse documentation from JSON Schema file."""
+
+import json
+import re
+import sys
+from typing import Any, Optional
+
+import yaml
+
+HEADER = """<!-- Document auto-generated by scripts-dev/gen_config_documentation.py -->
+
+# Configuring Synapse
+
+This is intended as a guide to the Synapse configuration. The behavior of a Synapse instance can be modified
+through the many configuration settings documented here — each config option is explained,
+including what the default is, how to change the default and what sort of behaviour the setting governs.
+Also included is an example configuration for each setting. If you don't want to spend a lot of time
+thinking about options, the config as generated sets sensible defaults for all values. Do note however that the
+database defaults to SQLite, which is not recommended for production usage. You can read more on this subject
+[here](../../setup/installation.md#using-postgresql).
+
+## Config Conventions
+
+Configuration options that take a time period can be set using a number
+followed by a letter. Letters have the following meanings:
+
+* `s` = second
+* `m` = minute
+* `h` = hour
+* `d` = day
+* `w` = week
+* `y` = year
+
+For example, setting `redaction_retention_period: 5m` would remove redacted
+messages from the database after 5 minutes, rather than 5 months.
+
+In addition, configuration options referring to size use the following suffixes:
+
+* `K` = KiB, or 1024 bytes
+* `M` = MiB, or 1,048,576 bytes
+* `G` = GiB, or 1,073,741,824 bytes
+* `T` = TiB, or 1,099,511,627,776 bytes
+
+For example, setting `max_avatar_size: 10M` means that Synapse will not accept files larger than 10,485,760 bytes
+for a user avatar.
+
+## Config Validation
+
+The configuration file can be validated with the following command:
+```bash
+python -m synapse.config read <config key to print> -c <path to config>
+```
+
+To validate the entire file, omit `read <config key to print>`:
+```bash
+python -m synapse.config -c <path to config>
+```
+
+To see how to set other options, check the help reference:
+```bash
+python -m synapse.config --help
+```
+
+### YAML
+The configuration file is a [YAML](https://yaml.org/) file, which means that certain syntax rules
+apply if you want your config file to be read properly. A few helpful things to know:
+* `#` before any option in the config will comment out that setting and either a default (if available) will
+   be applied or Synapse will ignore the setting. Thus, in example #1 below, the setting will be read and
+   applied, but in example #2 the setting will not be read and a default will be applied.
+
+   Example #1:
+   ```yaml
+   pid_file: DATADIR/homeserver.pid
+   ```
+   Example #2:
+   ```yaml
+   #pid_file: DATADIR/homeserver.pid
+   ```
+* Indentation matters! The indentation before a setting
+  will determine whether a given setting is read as part of another
+  setting, or considered on its own. Thus, in example #1, the `enabled` setting
+  is read as a sub-option of the `presence` setting, and will be properly applied.
+
+  However, the lack of indentation before the `enabled` setting in example #2 means
+  that when reading the config, Synapse will consider both `presence` and `enabled` as
+  different settings. In this case, `presence` has no value, and thus a default applied, and `enabled`
+  is an option that Synapse doesn't recognize and thus ignores.
+
+  Example #1:
+  ```yaml
+  presence:
+    enabled: false
+  ```
+  Example #2:
+  ```yaml
+  presence:
+  enabled: false
+  ```
+  In this manual, all top-level settings (ones with no indentation) are identified
+  at the beginning of their section (i.e. "### `example_setting`") and
+  the sub-options, if any, are identified and listed in the body of the section.
+  In addition, each setting has an example of its usage, with the proper indentation
+  shown.
+"""
+SECTION_HEADERS = {
+    "modules": {
+        "title": "Modules",
+        "description": (
+            "Server admins can expand Synapse's functionality with external "
+            "modules.\n\n"
+            "See [here](../../modules/index.md) for more documentation on how "
+            "to configure or create custom modules for Synapse."
+        ),
+    },
+    "server_name": {
+        "title": "Server",
+        "description": "Define your homeserver name and other base options.",
+    },
+    "admin_contact": {
+        "title": "Homeserver blocking",
+        "description": "Useful options for Synapse admins.",
+    },
+    "tls_certificate_path": {
+        "title": "TLS",
+        "description": "Options related to TLS.",
+    },
+    "federation_domain_whitelist": {
+        "title": "Federation",
+        "description": "Options related to federation.",
+    },
+    "event_cache_size": {
+        "title": "Caching",
+        "description": "Options related to caching.",
+    },
+    "database": {
+        "title": "Database",
+        "description": "Config options related to database settings.",
+    },
+    "log_config": {
+        "title": "Logging",
+        "description": ("Config options related to logging."),
+    },
+    "rc_message": {
+        "title": "Ratelimiting",
+        "description": (
+            "Options related to ratelimiting in Synapse.\n\n"
+            "Each ratelimiting configuration is made of two parameters:\n"
+            "- `per_second`: number of requests a client can send per second.\n"
+            "- `burst_count`: number of requests a client can send before "
+            "being throttled."
+        ),
+    },
+    "enable_authenticated_media": {
+        "title": "Media Store",
+        "description": "Config options related to Synapse's media store.",
+    },
+    "recaptcha_public_key": {
+        "title": "Captcha",
+        "description": (
+            "See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha."
+        ),
+    },
+    "turn_uris": {
+        "title": "TURN",
+        "description": ("Options related to adding a TURN server to Synapse."),
+    },
+    "enable_registration": {
+        "title": "Registration",
+        "description": (
+            "Registration can be rate-limited using the parameters in the "
+            "[Ratelimiting](#ratelimiting) section of this manual."
+        ),
+    },
+    "session_lifetime": {
+        "title": "User session management",
+        "description": ("Config options related to user session management."),
+    },
+    "enable_metrics": {
+        "title": "Metrics",
+        "description": ("Config options related to metrics."),
+    },
+    "room_prejoin_state": {
+        "title": "API Configuration",
+        "description": ("Config settings related to the client/server API."),
+    },
+    "signing_key_path": {
+        "title": "Signing Keys",
+        "description": ("Config options relating to signing keys."),
+    },
+    "saml2_config": {
+        "title": "Single sign-on integration",
+        "description": (
+            "The following settings can be used to make Synapse use a single sign-on provider for authentication, instead of its internal password database.\n\n"
+            "You will probably also want to set the following options to `false` to disable the regular login/registration flows:\n"
+            "* [`enable_registration`](#enable_registration)\n"
+            "* [`password_config.enabled`](#password_config)"
+        ),
+    },
+    "push": {
+        "title": "Push",
+        "description": ("Configuration settings related to push notifications."),
+    },
+    "encryption_enabled_by_default_for_room_type": {
+        "title": "Rooms",
+        "description": ("Config options relating to rooms."),
+    },
+    "opentracing": {
+        "title": "Opentracing",
+        "description": ("Configuration options related to Opentracing support."),
+    },
+    "worker_replication_secret": {
+        "title": "Coordinating workers",
+        "description": (
+            "Configuration options related to workers which belong in the main config file (usually called `homeserver.yaml`). A Synapse deployment can scale horizontally by running multiple Synapse processes called _workers_. Incoming requests are distributed between workers to handle higher loads. Some workers are privileged and can accept requests from other workers.\n\n"
+            "As a result, the worker configuration is divided into two parts.\n\n"
+            "1. The first part (in this section of the manual) defines which shardable tasks are delegated to privileged workers. This allows unprivileged workers to make requests to a privileged worker to act on their behalf.\n"
+            "2. [The second part](#individual-worker-configuration) controls the behaviour of individual workers in isolation.\n\n"
+            "For guidance on setting up workers, see the [worker documentation](../../workers.md)."
+        ),
+    },
+    "worker_app": {
+        "title": "Individual worker configuration",
+        "description": (
+            "These options configure an individual worker, in its worker configuration file. They should be not be provided when configuring the main process.\n\n"
+            "Note also the configuration above for [coordinating a cluster of workers](#coordinating-workers).\n\n"
+            "For guidance on setting up workers, see the [worker documentation](../../workers.md)."
+        ),
+    },
+    "background_updates": {
+        "title": "Background Updates",
+        "description": ("Configuration settings related to background updates."),
+    },
+    "auto_accept_invites": {
+        "title": "Auto Accept Invites",
+        "description": (
+            "Configuration settings related to automatically accepting invites."
+        ),
+    },
+}
+INDENT = "  "
+
+
+has_error = False
+
+
+def error(text: str) -> None:
+    global has_error
+    print(f"ERROR: {text}", file=sys.stderr)
+    has_error = True
+
+
+def indent(text: str, first_line: bool = True) -> str:
+    """Indents each non-empty line of the given text."""
+    text = re.sub(r"(\n)([^\n])", r"\1" + INDENT + r"\2", text)
+    if first_line:
+        text = re.sub(r"^([^\n])", INDENT + r"\1", text)
+
+    return text
+
+
+def em(s: Optional[str]) -> str:
+    """Add emphasis to text."""
+    return f"*{s}*" if s else ""
+
+
+def a(s: Optional[str], suffix: str = " ") -> str:
+    """Appends a space if the given string is not empty."""
+    return s + suffix if s else ""
+
+
+def p(s: Optional[str], prefix: str = " ") -> str:
+    """Prepend a space if the given string is not empty."""
+    return prefix + s if s else ""
+
+
+def resolve_local_refs(schema: dict) -> dict:
+    """Returns the given schema with local $ref properties replaced by their keywords.
+
+    Crude approximation that will override keywords.
+    """
+    defs = schema["$defs"]
+
+    def replace_ref(d: Any) -> Any:
+        if isinstance(d, dict):
+            the_def = {}
+            if "$ref" in d:
+                # Found a "$ref" key.
+                def_name = d["$ref"].removeprefix("#/$defs/")
+                del d["$ref"]
+                the_def = defs[def_name]
+
+            new_dict = {k: replace_ref(v) for k, v in d.items()}
+            if common_keys := (new_dict.keys() & the_def.keys()) - {"properties"}:
+                print(
+                    f"WARN: '{def_name}' overrides keys '{common_keys}'",
+                    file=sys.stderr,
+                )
+
+            new_dict_props = new_dict.get("properties", {})
+            the_def_props = the_def.get("properties", {})
+            if common_props := new_dict_props.keys() & the_def_props.keys():
+                print(
+                    f"WARN: '{def_name}' overrides properties '{common_props}'",
+                    file=sys.stderr,
+                )
+            if merged_props := {**new_dict_props, **the_def_props}:
+                return {**new_dict, **the_def, "properties": merged_props}
+            else:
+                return {**new_dict, **the_def}
+
+        elif isinstance(d, list):
+            return [replace_ref(v) for v in d]
+        else:
+            return d
+
+    return replace_ref(schema)
+
+
+def sep(values: dict) -> str:
+    """Separator between parts of the description."""
+    # If description is multiple paragraphs already, add new ones. Otherwise
+    # append to same paragraph.
+    return "\n\n" if "\n\n" in values.get("description", "") else " "
+
+
+def type_str(values: dict) -> str:
+    """Type of the current value."""
+    if t := values.get("io.element.type_name"):
+        # Allow custom overrides for the type name, for documentation clarity
+        return f"({t})"
+    if not (t := values.get("type")):
+        return ""
+    if not isinstance(t, list):
+        t = [t]
+    joined = "|".join(t)
+    return f"({joined})"
+
+
+def items(values: dict) -> str:
+    """A block listing properties of array items."""
+    if not (items := values.get("items")):
+        return ""
+    if not (item_props := items.get("properties")):
+        return ""
+    return "\nOptions for each entry include:\n\n" + "\n".join(
+        sub_section(k, v) for k, v in item_props.items()
+    )
+
+
+def properties(values: dict) -> str:
+    """A block listing object properties."""
+    if not (properties := values.get("properties")):
+        return ""
+    return "\nThis setting has the following sub-options:\n\n" + "\n".join(
+        sub_section(k, v) for k, v in properties.items()
+    )
+
+
+def sub_section(prop: str, values: dict) -> str:
+    """Formats a bullet point about the given sub-property."""
+    sep = lambda: globals()["sep"](values)
+    type_str = lambda: globals()["type_str"](values)
+    items = lambda: globals()["items"](values)
+    properties = lambda: globals()["properties"](values)
+
+    def default() -> str:
+        try:
+            default = values["default"]
+            return f"Defaults to `{json.dumps(default)}`."
+        except KeyError:
+            return ""
+
+    def description() -> str:
+        if not (description := values.get("description")):
+            error(f"missing description for {prop}")
+            return "MISSING DESCRIPTION\n"
+
+        return f"{description}{p(default(), sep())}\n"
+
+    return (
+        f"* `{prop}`{p(type_str())}: "
+        + f"{indent(description(), first_line=False)}"
+        + indent(items())
+        + indent(properties())
+    )
+
+
+def section(prop: str, values: dict) -> str:
+    """Formats a section about the given property."""
+    sep = lambda: globals()["sep"](values)
+    type_str = lambda: globals()["type_str"](values)
+    items = lambda: globals()["items"](values)
+    properties = lambda: globals()["properties"](values)
+
+    def is_simple_default() -> bool:
+        """Whether the given default is simple enough for a one-liner."""
+        if not (d := values.get("default")):
+            return True
+        return not isinstance(d, dict) and not isinstance(d, list)
+
+    def default_str() -> str:
+        try:
+            default = values["default"]
+        except KeyError:
+            t = values.get("type", [])
+            if "object" == t or "object" in t:
+                # Skip objects as they probably have child defaults.
+                return ""
+            return "There is no default for this option."
+
+        if not is_simple_default():
+            # Show complex defaults as a code block instead.
+            return ""
+        return f"Defaults to `{json.dumps(default)}`."
+
+    def header() -> str:
+        try:
+            title = SECTION_HEADERS[prop]["title"]
+            description = SECTION_HEADERS[prop]["description"]
+            return f"## {title}\n\n{description}\n\n---\n"
+        except KeyError:
+            return ""
+
+    def title() -> str:
+        return f"### `{prop}`\n"
+
+    def description() -> str:
+        if not (description := values.get("description")):
+            error(f"missing description for {prop}")
+            return "MISSING DESCRIPTION\n"
+        return f"\n{a(em(type_str()))}{description}{p(default_str(), sep())}\n"
+
+    def example_str(example: Any) -> str:
+        return "```yaml\n" + f"{yaml.dump({prop: example}, sort_keys=False)}" + "```\n"
+
+    def default_example() -> str:
+        if is_simple_default():
+            return ""
+        default_cfg = example_str(values["default"])
+        return f"\nDefault configuration:\n{default_cfg}"
+
+    def examples() -> str:
+        if not (examples := values.get("examples")):
+            return ""
+
+        examples_str = "\n".join(example_str(e) for e in examples)
+
+        if len(examples) >= 2:
+            return f"\nExample configurations:\n{examples_str}"
+        else:
+            return f"\nExample configuration:\n{examples_str}"
+
+    def post_description() -> str:
+        # Sometimes it's helpful to have a description after the list of fields,
+        # e.g. with a subsection that consists only of text.
+        # This helps with that.
+        if not (description := values.get("io.element.post_description")):
+            return ""
+        return f"\n{description}\n\n"
+
+    return (
+        "---\n"
+        + header()
+        + title()
+        + description()
+        + items()
+        + properties()
+        + default_example()
+        + examples()
+        + post_description()
+    )
+
+
+def main() -> None:
+    def usage(err_msg: str) -> int:
+        script_name = (sys.argv[:1] or ["__main__.py"])[0]
+        print(err_msg, file=sys.stderr)
+        print(f"Usage: {script_name} <JSON Schema file>", file=sys.stderr)
+        print(f"\n{__doc__}", file=sys.stderr)
+        exit(1)
+
+    def read_json_file_arg() -> Any:
+        if len(sys.argv) > 2:
+            exit(usage("Too many arguments."))
+        if not (filepath := (sys.argv[1:] or [""])[0]):
+            exit(usage("No schema file provided."))
+        with open(filepath) as f:
+            return yaml.safe_load(f)
+
+    schema = read_json_file_arg()
+    schema = resolve_local_refs(schema)
+
+    sections = (section(k, v) for k, v in schema["properties"].items())
+    print(HEADER + "".join(sections), end="")
+
+    if has_error:
+        print("There were errors.", file=sys.stderr)
+        exit(2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts-dev/release.py b/scripts-dev/release.py
index c556ccaf86..5de5814b17 100755
--- a/scripts-dev/release.py
+++ b/scripts-dev/release.py
@@ -254,6 +254,12 @@ def _prepare() -> None:
     # Update the version specified in pyproject.toml.
     subprocess.check_output(["poetry", "version", new_version])
 
+    # Update config schema $id.
+    schema_file = "schema/synapse-config.schema.yaml"
+    major_minor_version = ".".join(new_version.split(".")[:2])
+    url = f"https://element-hq.github.io/synapse/schema/synapse/v{major_minor_version}/synapse-config.schema.json"
+    subprocess.check_output(["sed", "-i", f"0,/^\\$id: .*/s||$id: {url}|", schema_file])
+
     # Generate changelogs.
     generate_and_write_changelog(synapse_repo, current_version, new_version)