summary refs log tree commit diff
path: root/synapse/config/workers.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/config/workers.py')
-rw-r--r--synapse/config/workers.py122
1 files changed, 100 insertions, 22 deletions
diff --git a/synapse/config/workers.py b/synapse/config/workers.py

index 1dfbe27e89..d2311cc857 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py
@@ -18,6 +18,7 @@ import logging from typing import Any, Dict, List, Union import attr +from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr from synapse.config._base import ( Config, @@ -25,6 +26,7 @@ from synapse.config._base import ( RoutableShardedWorkerHandlingConfig, ShardedWorkerHandlingConfig, ) +from synapse.config._util import parse_and_validate_mapping from synapse.config.server import ( DIRECT_TCP_ERROR, TCPListenerConfig, @@ -37,6 +39,19 @@ The '%s' configuration option is deprecated and will be removed in a future Synapse version. Please use ``%s: name_of_worker`` instead. """ +_MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA = """ +Missing data for a worker to connect to main process. Please include '%s' in the +`instance_map` declared in your shared yaml configuration, or optionally(as a deprecated +solution) in every worker's yaml as various `worker_replication_*` settings as defined +in workers documentation here: +`https://matrix-org.github.io/synapse/latest/workers.html#worker-configuration` +""" +# This allows for a handy knob when it's time to change from 'master' to +# something with less 'history' +MAIN_PROCESS_INSTANCE_NAME = "master" +# Use this to adjust what the main process is known as in the yaml instance_map +MAIN_PROCESS_INSTANCE_MAP_NAME = "main" + logger = logging.getLogger(__name__) @@ -50,13 +65,43 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: return obj -@attr.s(auto_attribs=True) -class InstanceLocationConfig: +class ConfigModel(BaseModel): + """A custom version of Pydantic's BaseModel which + + - ignores unknown fields and + - does not allow fields to be overwritten after construction, + + but otherwise uses Pydantic's default behaviour. + + For now, ignore unknown fields. In the future, we could change this so that unknown + config values cause a ValidationError, provided the error messages are meaningful to + server operators. + + Subclassing in this way is recommended by + https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally + """ + + class Config: + # By default, ignore fields that we don't recognise. + extra = Extra.ignore + # By default, don't allow fields to be reassigned after parsing. + allow_mutation = False + + +class InstanceLocationConfig(ConfigModel): """The host and port to talk to an instance via HTTP replication.""" - host: str - port: int - tls: bool = False + host: StrictStr + port: StrictInt + tls: StrictBool = False + + def scheme(self) -> str: + """Hardcode a retrievable scheme based on self.tls""" + return "https" if self.tls else "http" + + def netloc(self) -> str: + """Nicely format the network location data""" + return f"{self.host}:{self.port}" @attr.s @@ -129,27 +174,15 @@ class WorkerConfig(Config): raise ConfigError("worker_log_config must be a string") self.worker_log_config = worker_log_config - # The host used to connect to the main synapse - self.worker_replication_host = config.get("worker_replication_host", None) - # The port on the main synapse for TCP replication if "worker_replication_port" in config: raise ConfigError(DIRECT_TCP_ERROR, ("worker_replication_port",)) - # The port on the main synapse for HTTP replication endpoint - self.worker_replication_http_port = config.get("worker_replication_http_port") - - # The tls mode on the main synapse for HTTP replication endpoint. - # For backward compatibility this defaults to False. - self.worker_replication_http_tls = config.get( - "worker_replication_http_tls", False - ) - # The shared secret used for authentication when connecting to the main synapse. self.worker_replication_secret = config.get("worker_replication_secret", None) self.worker_name = config.get("worker_name", self.worker_app) - self.instance_name = self.worker_name or "master" + self.instance_name = self.worker_name or MAIN_PROCESS_INSTANCE_NAME # FIXME: Remove this check after a suitable amount of time. self.worker_main_http_uri = config.get("worker_main_http_uri", None) @@ -183,10 +216,55 @@ class WorkerConfig(Config): ) # A map from instance name to host/port of their HTTP replication endpoint. - instance_map = config.get("instance_map") or {} - self.instance_map = { - name: InstanceLocationConfig(**c) for name, c in instance_map.items() - } + # Check if the main process is declared. Inject it into the map if it's not, + # based first on if a 'main' block is declared then on 'worker_replication_*' + # data. If both are available, default to instance_map. The main process + # itself doesn't need this data as it would never have to talk to itself. + instance_map: Dict[str, Any] = config.get("instance_map", {}) + + if instance_map and self.instance_name is not MAIN_PROCESS_INSTANCE_NAME: + # The host used to connect to the main synapse + main_host = config.get("worker_replication_host", None) + + # The port on the main synapse for HTTP replication endpoint + main_port = config.get("worker_replication_http_port") + + # The tls mode on the main synapse for HTTP replication endpoint. + # For backward compatibility this defaults to False. + main_tls = config.get("worker_replication_http_tls", False) + + # For now, accept 'main' in the instance_map, but the replication system + # expects 'master', force that into being until it's changed later. + if MAIN_PROCESS_INSTANCE_MAP_NAME in instance_map: + instance_map[MAIN_PROCESS_INSTANCE_NAME] = instance_map[ + MAIN_PROCESS_INSTANCE_MAP_NAME + ] + del instance_map[MAIN_PROCESS_INSTANCE_MAP_NAME] + + # This is the backwards compatibility bit that handles the + # worker_replication_* bits using setdefault() to not overwrite anything. + elif main_host is not None and main_port is not None: + instance_map.setdefault( + MAIN_PROCESS_INSTANCE_NAME, + { + "host": main_host, + "port": main_port, + "tls": main_tls, + }, + ) + + else: + # If we've gotten here, it means that the main process is not on the + # instance_map and that not enough worker_replication_* variables + # were declared in the worker's yaml. + raise ConfigError( + _MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA + % MAIN_PROCESS_INSTANCE_MAP_NAME + ) + + self.instance_map: Dict[ + str, InstanceLocationConfig + ] = parse_and_validate_mapping(instance_map, InstanceLocationConfig) # Map from type of streams to source, c.f. WriterLocations. writers = config.get("stream_writers") or {}