scripts-dev/gen_config_documentation.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494

#!/usr/bin/env python3
"""Generate Synapse documentation from JSON Schema file."""

import json
import re
import sys
from typing import Any, Optional

import yaml

HEADER = """<!-- Document auto-generated by scripts-dev/gen_config_documentation.py -->

# Configuring Synapse

This is intended as a guide to the Synapse configuration. The behavior of a Synapse instance can be modified
through the many configuration settings documented here — each config option is explained,
including what the default is, how to change the default and what sort of behaviour the setting governs.
Also included is an example configuration for each setting. If you don't want to spend a lot of time
thinking about options, the config as generated sets sensible defaults for all values. Do note however that the
database defaults to SQLite, which is not recommended for production usage. You can read more on this subject
[here](../../setup/installation.md#using-postgresql).

## Config Conventions

Configuration options that take a time period can be set using a number
followed by a letter. Letters have the following meanings:

* `s` = second
* `m` = minute
* `h` = hour
* `d` = day
* `w` = week
* `y` = year

For example, setting `redaction_retention_period: 5m` would remove redacted
messages from the database after 5 minutes, rather than 5 months.

In addition, configuration options referring to size use the following suffixes:

* `K` = KiB, or 1024 bytes
* `M` = MiB, or 1,048,576 bytes
* `G` = GiB, or 1,073,741,824 bytes
* `T` = TiB, or 1,099,511,627,776 bytes

For example, setting `max_avatar_size: 10M` means that Synapse will not accept files larger than 10,485,760 bytes
for a user avatar.

## Config Validation

The configuration file can be validated with the following command:
```bash
python -m synapse.config read <config key to print> -c <path to config>
```

To validate the entire file, omit `read <config key to print>`:
```bash
python -m synapse.config -c <path to config>
```

To see how to set other options, check the help reference:
```bash
python -m synapse.config --help
```

### YAML
The configuration file is a [YAML](https://yaml.org/) file, which means that certain syntax rules
apply if you want your config file to be read properly. A few helpful things to know:
* `#` before any option in the config will comment out that setting and either a default (if available) will
   be applied or Synapse will ignore the setting. Thus, in example #1 below, the setting will be read and
   applied, but in example #2 the setting will not be read and a default will be applied.

   Example #1:
   ```yaml
   pid_file: DATADIR/homeserver.pid
   ```
   Example #2:
   ```yaml
   #pid_file: DATADIR/homeserver.pid
   ```
* Indentation matters! The indentation before a setting
  will determine whether a given setting is read as part of another
  setting, or considered on its own. Thus, in example #1, the `enabled` setting
  is read as a sub-option of the `presence` setting, and will be properly applied.

  However, the lack of indentation before the `enabled` setting in example #2 means
  that when reading the config, Synapse will consider both `presence` and `enabled` as
  different settings. In this case, `presence` has no value, and thus a default applied, and `enabled`
  is an option that Synapse doesn't recognize and thus ignores.

  Example #1:
  ```yaml
  presence:
    enabled: false
  ```
  Example #2:
  ```yaml
  presence:
  enabled: false
  ```
  In this manual, all top-level settings (ones with no indentation) are identified
  at the beginning of their section (i.e. "### `example_setting`") and
  the sub-options, if any, are identified and listed in the body of the section.
  In addition, each setting has an example of its usage, with the proper indentation
  shown.
"""
SECTION_HEADERS = {
    "modules": {
        "title": "Modules",
        "description": (
            "Server admins can expand Synapse's functionality with external "
            "modules.\n\n"
            "See [here](../../modules/index.md) for more documentation on how "
            "to configure or create custom modules for Synapse."
        ),
    },
    "server_name": {
        "title": "Server",
        "description": "Define your homeserver name and other base options.",
    },
    "admin_contact": {
        "title": "Homeserver blocking",
        "description": "Useful options for Synapse admins.",
    },
    "tls_certificate_path": {
        "title": "TLS",
        "description": "Options related to TLS.",
    },
    "federation_domain_whitelist": {
        "title": "Federation",
        "description": "Options related to federation.",
    },
    "event_cache_size": {
        "title": "Caching",
        "description": "Options related to caching.",
    },
    "database": {
        "title": "Database",
        "description": "Config options related to database settings.",
    },
    "log_config": {
        "title": "Logging",
        "description": ("Config options related to logging."),
    },
    "rc_message": {
        "title": "Ratelimiting",
        "description": (
            "Options related to ratelimiting in Synapse.\n\n"
            "Each ratelimiting configuration is made of two parameters:\n"
            "- `per_second`: number of requests a client can send per second.\n"
            "- `burst_count`: number of requests a client can send before "
            "being throttled."
        ),
    },
    "enable_authenticated_media": {
        "title": "Media Store",
        "description": "Config options related to Synapse's media store.",
    },
    "recaptcha_public_key": {
        "title": "Captcha",
        "description": (
            "See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha."
        ),
    },
    "turn_uris": {
        "title": "TURN",
        "description": ("Options related to adding a TURN server to Synapse."),
    },
    "enable_registration": {
        "title": "Registration",
        "description": (
            "Registration can be rate-limited using the parameters in the "
            "[Ratelimiting](#ratelimiting) section of this manual."
        ),
    },
    "session_lifetime": {
        "title": "User session management",
        "description": ("Config options related to user session management."),
    },
    "enable_metrics": {
        "title": "Metrics",
        "description": ("Config options related to metrics."),
    },
    "room_prejoin_state": {
        "title": "API Configuration",
        "description": ("Config settings related to the client/server API."),
    },
    "signing_key_path": {
        "title": "Signing Keys",
        "description": ("Config options relating to signing keys."),
    },
    "push": {
        "title": "Push",
        "description": ("Configuration settings related to push notifications."),
    },
    "encryption_enabled_by_default_for_room_type": {
        "title": "Rooms",
        "description": ("Config options relating to rooms."),
    },
    "opentracing": {
        "title": "Opentracing",
        "description": ("Configuration options related to Opentracing support."),
    },
    "worker_replication_secret": {
        "title": "Coordinating workers",
        "description": (
            "Configuration options related to workers which belong in the main config file (usually called `homeserver.yaml`). A Synapse deployment can scale horizontally by running multiple Synapse processes called _workers_. Incoming requests are distributed between workers to handle higher loads. Some workers are privileged and can accept requests from other workers.\n\n"
            "As a result, the worker configuration is divided into two parts.\n\n"
            "1. The first part (in this section of the manual) defines which shardable tasks are delegated to privileged workers. This allows unprivileged workers to make requests to a privileged worker to act on their behalf.\n"
            "2. [The second part](#individual-worker-configuration) controls the behaviour of individual workers in isolation.\n\n"
            "For guidance on setting up workers, see the [worker documentation](../../workers.md)."
        ),
    },
    "worker_app": {
        "title": "Individual worker configuration",
        "description": (
            "These options configure an individual worker, in its worker configuration file. They should be not be provided when configuring the main process.\n\n"
            "Note also the configuration above for [coordinating a cluster of workers](#coordinating-workers).\n\n"
            "For guidance on setting up workers, see the [worker documentation](../../workers.md)."
        ),
    },
    "background_updates": {
        "title": "Background Updates",
        "description": ("Configuration settings related to background updates."),
    },
    "auto_accept_invites": {
        "title": "Auto Accept Invites",
        "description": (
            "Configuration settings related to automatically accepting invites."
        ),
    },
}
INDENT = "  "


has_error = False


def error(text: str) -> None:
    global has_error
    print(f"ERROR: {text}", file=sys.stderr)
    has_error = True


def indent(text: str, first_line: bool = True) -> str:
    """Indents each non-empty line of the given text."""
    text = re.sub(r"(\n)([^\n])", r"\1" + INDENT + r"\2", text)
    if first_line:
        text = re.sub(r"^([^\n])", INDENT + r"\1", text)

    return text


def em(s: Optional[str]) -> str:
    """Add emphasis to text."""
    return f"*{s}*" if s else ""


def a(s: Optional[str], suffix: str = " ") -> str:
    """Appends a space if the given string is not empty."""
    return s + suffix if s else ""


def p(s: Optional[str], prefix: str = " ") -> str:
    """Prepend a space if the given string is not empty."""
    return prefix + s if s else ""


def resolve_local_refs(schema: dict) -> dict:
    """Returns the given schema with local $ref properties replaced by their keywords.

    Crude approximation that will override keywords.
    """
    defs = schema["$defs"]

    def replace_ref(d: Any) -> Any:
        if isinstance(d, dict):
            the_def = {}
            if "$ref" in d:
                # Found a "$ref" key.
                def_name = d["$ref"].removeprefix("#/$defs/")
                del d["$ref"]
                the_def = defs[def_name]

            new_dict = {k: replace_ref(v) for k, v in d.items()}
            if common_keys := (new_dict.keys() & the_def.keys()) - {"properties"}:
                print(
                    f"WARN: '{def_name}' overrides keys '{common_keys}'",
                    file=sys.stderr,
                )

            new_dict_props = new_dict.get("properties", {})
            the_def_props = the_def.get("properties", {})
            if common_props := new_dict_props.keys() & the_def_props.keys():
                print(
                    f"WARN: '{def_name}' overrides properties '{common_props}'",
                    file=sys.stderr,
                )
            if merged_props := {**new_dict_props, **the_def_props}:
                return {**new_dict, **the_def, "properties": merged_props}
            else:
                return {**new_dict, **the_def}

        elif isinstance(d, list):
            return [replace_ref(v) for v in d]
        else:
            return d

    return replace_ref(schema)


def sep(values: dict) -> str:
    """Separator between parts of the description."""
    # If description is multiple paragraphs already, add new ones. Otherwise
    # append to same paragraph.
    return "\n\n" if "\n\n" in values.get("description", "") else " "


def type_str(values: dict) -> str:
    """Type of the current value."""
    if t := values.get("io.element.type_name"):
        # Allow custom overrides for the type name, for documentation clarity
        return f"({t})"
    if not (t := values.get("type")):
        return ""
    if not isinstance(t, list):
        t = [t]
    joined = "|".join(t)
    return f"({joined})"


def items(values: dict) -> str:
    """A block listing properties of array items."""
    if not (items := values.get("items")):
        return ""
    if not (item_props := items.get("properties")):
        return ""
    return "\nOptions for each entry include:\n\n" + "\n".join(
        sub_section(k, v) for k, v in item_props.items()
    )


def properties(values: dict) -> str:
    """A block listing object properties."""
    if not (properties := values.get("properties")):
        return ""
    return "\nThis setting has the following sub-options:\n\n" + "\n".join(
        sub_section(k, v) for k, v in properties.items()
    )


def sub_section(prop: str, values: dict) -> str:
    """Formats a bullet point about the given sub-property."""
    sep = lambda: globals()["sep"](values)
    type_str = lambda: globals()["type_str"](values)
    items = lambda: globals()["items"](values)
    properties = lambda: globals()["properties"](values)

    def default() -> str:
        try:
            default = values["default"]
            return f"Defaults to `{json.dumps(default)}`."
        except KeyError:
            return ""

    def description() -> str:
        if not (description := values.get("description")):
            error(f"missing description for {prop}")
            return "MISSING DESCRIPTION\n"

        return f"{description}{p(default(), sep())}\n"

    return (
        f"* `{prop}`{p(type_str())}: "
        + f"{indent(description(), first_line=False)}"
        + indent(items())
        + indent(properties())
    )


def section(prop: str, values: dict) -> str:
    """Formats a section about the given property."""
    sep = lambda: globals()["sep"](values)
    type_str = lambda: globals()["type_str"](values)
    items = lambda: globals()["items"](values)
    properties = lambda: globals()["properties"](values)

    def is_simple_default() -> bool:
        """Whether the given default is simple enough for a one-liner."""
        if not (d := values.get("default")):
            return True
        return not isinstance(d, dict) and not isinstance(d, list)

    def default_str() -> str:
        try:
            default = values["default"]
        except KeyError:
            t = values.get("type", [])
            if "object" == t or "object" in t:
                # Skip objects as they probably have child defaults.
                return ""
            return "There is no default for this option."

        if not is_simple_default():
            # Show complex defaults as a code block instead.
            return ""
        return f"Defaults to `{json.dumps(default)}`."

    def header() -> str:
        try:
            title = SECTION_HEADERS[prop]["title"]
            description = SECTION_HEADERS[prop]["description"]
            return f"## {title}\n\n{description}\n\n---\n"
        except KeyError:
            return ""

    def title() -> str:
        return f"### `{prop}`\n"

    def description() -> str:
        if not (description := values.get("description")):
            error(f"missing description for {prop}")
            return "MISSING DESCRIPTION\n"
        return f"\n{a(em(type_str()))}{description}{p(default_str(), sep())}\n"

    def example_str(example: Any) -> str:
        return "```yaml\n" + f"{yaml.dump({prop: example}, sort_keys=False)}" + "```\n"

    def default_example() -> str:
        if is_simple_default():
            return ""
        default_cfg = example_str(values["default"])
        return f"\nDefault configuration:\n{default_cfg}"

    def examples() -> str:
        if not (examples := values.get("examples")):
            return ""

        examples_str = "\n".join(example_str(e) for e in examples)

        if len(examples) >= 2:
            return f"\nExample configurations:\n{examples_str}"
        else:
            return f"\nExample configuration:\n{examples_str}"

    def post_description() -> str:
        # Sometimes it's helpful to have a description after the list of fields,
        # e.g. with a subsection that consists only of text.
        # This helps with that.
        if not (description := values.get("io.element.post_description")):
            return ""
        return f"\n{description}\n\n"

    return (
        "---\n"
        + header()
        + title()
        + description()
        + items()
        + properties()
        + default_example()
        + examples()
        + post_description()
    )


def main() -> None:
    def usage(err_msg: str) -> int:
        script_name = (sys.argv[:1] or ["__main__.py"])[0]
        print(err_msg, file=sys.stderr)
        print(f"Usage: {script_name} <JSON Schema file>", file=sys.stderr)
        print(f"\n{__doc__}", file=sys.stderr)
        exit(1)

    def read_json_file_arg() -> Any:
        if len(sys.argv) > 2:
            exit(usage("Too many arguments."))
        if not (filepath := (sys.argv[1:] or [""])[0]):
            exit(usage("No schema file provided."))
        with open(filepath) as f:
            return yaml.safe_load(f)

    schema = read_json_file_arg()
    schema = resolve_local_refs(schema)

    sections = (section(k, v) for k, v in schema["properties"].items())
    print(HEADER + "".join(sections), end="")

    if has_error:
        print("There were errors.", file=sys.stderr)
        exit(2)


if __name__ == "__main__":
    main()