diff options
Diffstat (limited to 'scripts-dev')
-rwxr-xr-x | scripts-dev/build_debian_packages.py | 1 | ||||
-rwxr-xr-x | scripts-dev/check_pydantic_models.py | 424 | ||||
-rwxr-xr-x | scripts-dev/complement.sh | 10 | ||||
-rwxr-xr-x | scripts-dev/federation_client.py | 122 | ||||
-rwxr-xr-x | scripts-dev/lint.sh | 1 | ||||
-rwxr-xr-x | scripts-dev/make_full_schema.sh | 206 | ||||
-rw-r--r-- | scripts-dev/mypy_synapse_plugin.py | 4 | ||||
-rwxr-xr-x | scripts-dev/release.py | 189 |
8 files changed, 850 insertions, 107 deletions
diff --git a/scripts-dev/build_debian_packages.py b/scripts-dev/build_debian_packages.py index cd2e64b75f..7442300196 100755 --- a/scripts-dev/build_debian_packages.py +++ b/scripts-dev/build_debian_packages.py @@ -27,6 +27,7 @@ DISTS = ( "debian:sid", "ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14) "ubuntu:jammy", # 22.04 LTS (EOL 2027-04) + "ubuntu:kinetic", # 22.10 (EOL 2023-07-20) ) DESC = """\ diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py new file mode 100755 index 0000000000..9f2b7ded5b --- /dev/null +++ b/scripts-dev/check_pydantic_models.py @@ -0,0 +1,424 @@ +#! /usr/bin/env python +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +A script which enforces that Synapse always uses strict types when defining a Pydantic +model. + +Pydantic does not yet offer a strict mode, but it is planned for pydantic v2. See + + https://github.com/pydantic/pydantic/issues/1098 + https://pydantic-docs.helpmanual.io/blog/pydantic-v2/#strict-mode + +until then, this script is a best effort to stop us from introducing type coersion bugs +(like the infamous stringy power levels fixed in room version 10). +""" +import argparse +import contextlib +import functools +import importlib +import logging +import os +import pkgutil +import sys +import textwrap +import traceback +import unittest.mock +from contextlib import contextmanager +from typing import Any, Callable, Dict, Generator, List, Set, Type, TypeVar + +from parameterized import parameterized +from pydantic import BaseModel as PydanticBaseModel, conbytes, confloat, conint, constr +from pydantic.typing import get_args +from typing_extensions import ParamSpec + +logger = logging.getLogger(__name__) + +CONSTRAINED_TYPE_FACTORIES_WITH_STRICT_FLAG: List[Callable] = [ + constr, + conbytes, + conint, + confloat, +] + +TYPES_THAT_PYDANTIC_WILL_COERCE_TO = [ + str, + bytes, + int, + float, + bool, +] + + +P = ParamSpec("P") +R = TypeVar("R") + + +class ModelCheckerException(Exception): + """Dummy exception. Allows us to detect unwanted types during a module import.""" + + +class MissingStrictInConstrainedTypeException(ModelCheckerException): + factory_name: str + + def __init__(self, factory_name: str): + self.factory_name = factory_name + + +class FieldHasUnwantedTypeException(ModelCheckerException): + message: str + + def __init__(self, message: str): + self.message = message + + +def make_wrapper(factory: Callable[P, R]) -> Callable[P, R]: + """We patch `constr` and friends with wrappers that enforce strict=True.""" + + @functools.wraps(factory) + def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: + if "strict" not in kwargs: + raise MissingStrictInConstrainedTypeException(factory.__name__) + if not kwargs["strict"]: + raise MissingStrictInConstrainedTypeException(factory.__name__) + return factory(*args, **kwargs) + + return wrapper + + +def field_type_unwanted(type_: Any) -> bool: + """Very rough attempt to detect if a type is unwanted as a Pydantic annotation. + + At present, we exclude types which will coerce, or any generic type involving types + which will coerce.""" + logger.debug("Is %s unwanted?") + if type_ in TYPES_THAT_PYDANTIC_WILL_COERCE_TO: + logger.debug("yes") + return True + logger.debug("Maybe. Subargs are %s", get_args(type_)) + rv = any(field_type_unwanted(t) for t in get_args(type_)) + logger.debug("Conclusion: %s %s unwanted", type_, "is" if rv else "is not") + return rv + + +class PatchedBaseModel(PydanticBaseModel): + """A patched version of BaseModel that inspects fields after models are defined. + + We complain loudly if we see an unwanted type. + + Beware: ModelField.type_ is presumably private; this is likely to be very brittle. + """ + + @classmethod + def __init_subclass__(cls: Type[PydanticBaseModel], **kwargs: object): + for field in cls.__fields__.values(): + # Note that field.type_ and field.outer_type are computed based on the + # annotation type, see pydantic.fields.ModelField._type_analysis + if field_type_unwanted(field.outer_type_): + # TODO: this only reports the first bad field. Can we find all bad ones + # and report them all? + raise FieldHasUnwantedTypeException( + f"{cls.__module__}.{cls.__qualname__} has field '{field.name}' " + f"with unwanted type `{field.outer_type_}`" + ) + + +@contextmanager +def monkeypatch_pydantic() -> Generator[None, None, None]: + """Patch pydantic with our snooping versions of BaseModel and the con* functions. + + If the snooping functions see something they don't like, they'll raise a + ModelCheckingException instance. + """ + with contextlib.ExitStack() as patches: + # Most Synapse code ought to import the patched objects directly from + # `pydantic`. But we also patch their containing modules `pydantic.main` and + # `pydantic.types` for completeness. + patch_basemodel1 = unittest.mock.patch( + "pydantic.BaseModel", new=PatchedBaseModel + ) + patch_basemodel2 = unittest.mock.patch( + "pydantic.main.BaseModel", new=PatchedBaseModel + ) + patches.enter_context(patch_basemodel1) + patches.enter_context(patch_basemodel2) + for factory in CONSTRAINED_TYPE_FACTORIES_WITH_STRICT_FLAG: + wrapper: Callable = make_wrapper(factory) + patch1 = unittest.mock.patch(f"pydantic.{factory.__name__}", new=wrapper) + patch2 = unittest.mock.patch( + f"pydantic.types.{factory.__name__}", new=wrapper + ) + patches.enter_context(patch1) + patches.enter_context(patch2) + yield + + +def format_model_checker_exception(e: ModelCheckerException) -> str: + """Work out which line of code caused e. Format the line in a human-friendly way.""" + # TODO. FieldHasUnwantedTypeException gives better error messages. Can we ditch the + # patches of constr() etc, and instead inspect fields to look for ConstrainedStr + # with strict=False? There is some difficulty with the inheritance hierarchy + # because StrictStr < ConstrainedStr < str. + if isinstance(e, FieldHasUnwantedTypeException): + return e.message + elif isinstance(e, MissingStrictInConstrainedTypeException): + frame_summary = traceback.extract_tb(e.__traceback__)[-2] + return ( + f"Missing `strict=True` from {e.factory_name}() call \n" + + traceback.format_list([frame_summary])[0].lstrip() + ) + else: + raise ValueError(f"Unknown exception {e}") from e + + +def lint() -> int: + """Try to import all of Synapse and see if we spot any Pydantic type coercions. + + Print any problems, then return a status code suitable for sys.exit.""" + failures = do_lint() + if failures: + print(f"Found {len(failures)} problem(s)") + for failure in sorted(failures): + print(failure) + return os.EX_DATAERR if failures else os.EX_OK + + +def do_lint() -> Set[str]: + """Try to import all of Synapse and see if we spot any Pydantic type coercions.""" + failures = set() + + with monkeypatch_pydantic(): + logger.debug("Importing synapse") + try: + # TODO: make "synapse" an argument so we can target this script at + # a subpackage + module = importlib.import_module("synapse") + except ModelCheckerException as e: + logger.warning("Bad annotation found when importing synapse") + failures.add(format_model_checker_exception(e)) + return failures + + try: + logger.debug("Fetching subpackages") + module_infos = list( + pkgutil.walk_packages(module.__path__, f"{module.__name__}.") + ) + except ModelCheckerException as e: + logger.warning("Bad annotation found when looking for modules to import") + failures.add(format_model_checker_exception(e)) + return failures + + for module_info in module_infos: + logger.debug("Importing %s", module_info.name) + try: + importlib.import_module(module_info.name) + except ModelCheckerException as e: + logger.warning( + f"Bad annotation found when importing {module_info.name}" + ) + failures.add(format_model_checker_exception(e)) + + return failures + + +def run_test_snippet(source: str) -> None: + """Exec a snippet of source code in an isolated environment.""" + # To emulate `source` being called at the top level of the module, + # the globals and locals we provide apparently have to be the same mapping. + # + # > Remember that at the module level, globals and locals are the same dictionary. + # > If exec gets two separate objects as globals and locals, the code will be + # > executed as if it were embedded in a class definition. + globals_: Dict[str, object] + locals_: Dict[str, object] + globals_ = locals_ = {} + exec(textwrap.dedent(source), globals_, locals_) + + +class TestConstrainedTypesPatch(unittest.TestCase): + def test_expression_without_strict_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic import constr + constr() + """ + ) + + def test_called_as_module_attribute_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + import pydantic + pydantic.constr() + """ + ) + + def test_wildcard_import_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic import * + constr() + """ + ) + + def test_alternative_import_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic.types import constr + constr() + """ + ) + + def test_alternative_import_attribute_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + import pydantic.types + pydantic.types.constr() + """ + ) + + def test_kwarg_but_no_strict_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic import constr + constr(min_length=10) + """ + ) + + def test_kwarg_strict_False_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic import constr + constr(strict=False) + """ + ) + + def test_kwarg_strict_True_doesnt_raise(self) -> None: + with monkeypatch_pydantic(): + run_test_snippet( + """ + from pydantic import constr + constr(strict=True) + """ + ) + + def test_annotation_without_strict_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic import constr + x: constr() + """ + ) + + def test_field_annotation_without_strict_raises(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic import BaseModel, conint + class C: + x: conint() + """ + ) + + +class TestFieldTypeInspection(unittest.TestCase): + @parameterized.expand( + [ + ("str",), + ("bytes"), + ("int",), + ("float",), + ("bool"), + ("Optional[str]",), + ("Union[None, str]",), + ("List[str]",), + ("List[List[str]]",), + ("Dict[StrictStr, str]",), + ("Dict[str, StrictStr]",), + ("TypedDict('D', x=int)",), + ] + ) + def test_field_holding_unwanted_type_raises(self, annotation: str) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + f""" + from typing import * + from pydantic import * + class C(BaseModel): + f: {annotation} + """ + ) + + @parameterized.expand( + [ + ("StrictStr",), + ("StrictBytes"), + ("StrictInt",), + ("StrictFloat",), + ("StrictBool"), + ("constr(strict=True, min_length=10)",), + ("Optional[StrictStr]",), + ("Union[None, StrictStr]",), + ("List[StrictStr]",), + ("List[List[StrictStr]]",), + ("Dict[StrictStr, StrictStr]",), + ("TypedDict('D', x=StrictInt)",), + ] + ) + def test_field_holding_accepted_type_doesnt_raise(self, annotation: str) -> None: + with monkeypatch_pydantic(): + run_test_snippet( + f""" + from typing import * + from pydantic import * + class C(BaseModel): + f: {annotation} + """ + ) + + def test_field_holding_str_raises_with_alternative_import(self) -> None: + with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException): + run_test_snippet( + """ + from pydantic.main import BaseModel + class C(BaseModel): + f: str + """ + ) + + +parser = argparse.ArgumentParser() +parser.add_argument("mode", choices=["lint", "test"], default="lint", nargs="?") +parser.add_argument("-v", "--verbose", action="store_true") + + +if __name__ == "__main__": + args = parser.parse_args(sys.argv[1:]) + logging.basicConfig( + format="%(asctime)s %(name)s:%(lineno)d %(levelname)s %(message)s", + level=logging.DEBUG if args.verbose else logging.INFO, + ) + # suppress logs we don't care about + logging.getLogger("xmlschema").setLevel(logging.WARNING) + if args.mode == "lint": + sys.exit(lint()) + elif args.mode == "test": + unittest.main(argv=sys.argv[:1]) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index eab23f18f1..803c6ce92d 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -126,7 +126,7 @@ export COMPLEMENT_BASE_IMAGE=complement-synapse extra_test_args=() -test_tags="synapse_blacklist,msc2716,msc3030,msc3787" +test_tags="synapse_blacklist,msc3787,msc3874" # All environment variables starting with PASS_ will be shared. # (The prefix is stripped off before reaching the container.) @@ -139,6 +139,9 @@ if [[ -n "$WORKERS" ]]; then # Use workers. export PASS_SYNAPSE_COMPLEMENT_USE_WORKERS=true + # Pass through the workers defined. If none, it will be an empty string + export PASS_SYNAPSE_WORKER_TYPES="$WORKER_TYPES" + # Workers can only use Postgres as a database. export PASS_SYNAPSE_COMPLEMENT_DATABASE=postgres @@ -158,7 +161,10 @@ else # We only test faster room joins on monoliths, because they are purposefully # being developed without worker support to start with. - test_tags="$test_tags,faster_joins" + # + # The tests for importing historical messages (MSC2716) and jump to date (MSC3030) + # also only pass with monoliths, currently. + test_tags="$test_tags,faster_joins,msc2716,msc3030" fi diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py index 763dd02c47..b1d5e2e616 100755 --- a/scripts-dev/federation_client.py +++ b/scripts-dev/federation_client.py @@ -46,11 +46,12 @@ import signedjson.key import signedjson.types import srvlookup import yaml +from requests import PreparedRequest, Response from requests.adapters import HTTPAdapter from urllib3 import HTTPConnectionPool # uncomment the following to enable debug logging of http requests -# from httplib import HTTPConnection +# from http.client import HTTPConnection # HTTPConnection.debuglevel = 1 @@ -103,6 +104,7 @@ def request( destination: str, path: str, content: Optional[str], + verify_tls: bool, ) -> requests.Response: if method is None: if content is None: @@ -141,7 +143,6 @@ def request( s.mount("matrix://", MatrixConnectionAdapter()) headers: Dict[str, str] = { - "Host": destination, "Authorization": authorization_headers[0], } @@ -152,7 +153,7 @@ def request( method=method, url=dest, headers=headers, - verify=False, + verify=verify_tls, data=content, stream=True, ) @@ -203,6 +204,12 @@ def main() -> None: parser.add_argument("--body", help="Data to send as the body of the HTTP request") parser.add_argument( + "--insecure", + action="store_true", + help="Disable TLS certificate verification", + ) + + parser.add_argument( "path", help="request path, including the '/_matrix/federation/...' prefix." ) @@ -227,6 +234,7 @@ def main() -> None: args.destination, args.path, content=args.body, + verify_tls=not args.insecure, ) sys.stderr.write("Status Code: %d\n" % (result.status_code,)) @@ -254,36 +262,93 @@ def read_args_from_config(args: argparse.Namespace) -> None: class MatrixConnectionAdapter(HTTPAdapter): + def send( + self, + request: PreparedRequest, + *args: Any, + **kwargs: Any, + ) -> Response: + # overrides the send() method in the base class. + + # We need to look for .well-known redirects before passing the request up to + # HTTPAdapter.send(). + assert isinstance(request.url, str) + parsed = urlparse.urlsplit(request.url) + server_name = parsed.netloc + well_known = self._get_well_known(parsed.netloc) + + if well_known: + server_name = well_known + + # replace the scheme in the uri with https, so that cert verification is done + # also replace the hostname if we got a .well-known result + request.url = urlparse.urlunsplit( + ("https", server_name, parsed.path, parsed.query, parsed.fragment) + ) + + # at this point we also add the host header (otherwise urllib will add one + # based on the `host` from the connection returned by `get_connection`, + # which will be wrong if there is an SRV record). + request.headers["Host"] = server_name + + return super().send(request, *args, **kwargs) + + def get_connection( + self, url: str, proxies: Optional[Dict[str, str]] = None + ) -> HTTPConnectionPool: + # overrides the get_connection() method in the base class + parsed = urlparse.urlsplit(url) + (host, port, ssl_server_name) = self._lookup(parsed.netloc) + print( + f"Connecting to {host}:{port} with SNI {ssl_server_name}", file=sys.stderr + ) + return self.poolmanager.connection_from_host( + host, + port=port, + scheme="https", + pool_kwargs={"server_hostname": ssl_server_name}, + ) + @staticmethod - def lookup(s: str, skip_well_known: bool = False) -> Tuple[str, int]: - if s[-1] == "]": + def _lookup(server_name: str) -> Tuple[str, int, str]: + """ + Do an SRV lookup on a server name and return the host:port to connect to + Given the server_name (after any .well-known lookup), return the host, port and + the ssl server name + """ + if server_name[-1] == "]": # ipv6 literal (with no port) - return s, 8448 + return server_name, 8448, server_name - if ":" in s: - out = s.rsplit(":", 1) + if ":" in server_name: + # explicit port + out = server_name.rsplit(":", 1) try: port = int(out[1]) except ValueError: - raise ValueError("Invalid host:port '%s'" % s) - return out[0], port - - # try a .well-known lookup - if not skip_well_known: - well_known = MatrixConnectionAdapter.get_well_known(s) - if well_known: - return MatrixConnectionAdapter.lookup(well_known, skip_well_known=True) + raise ValueError("Invalid host:port '%s'" % (server_name,)) + return out[0], port, out[0] try: - srv = srvlookup.lookup("matrix", "tcp", s)[0] - return srv.host, srv.port + srv = srvlookup.lookup("matrix", "tcp", server_name)[0] + print( + f"SRV lookup on _matrix._tcp.{server_name} gave {srv}", + file=sys.stderr, + ) + return srv.host, srv.port, server_name except Exception: - return s, 8448 + return server_name, 8448, server_name @staticmethod - def get_well_known(server_name: str) -> Optional[str]: - uri = "https://%s/.well-known/matrix/server" % (server_name,) - print("fetching %s" % (uri,), file=sys.stderr) + def _get_well_known(server_name: str) -> Optional[str]: + if ":" in server_name: + # explicit port, or ipv6 literal. Either way, no .well-known + return None + + # TODO: check for ipv4 literals + + uri = f"https://{server_name}/.well-known/matrix/server" + print(f"fetching {uri}", file=sys.stderr) try: resp = requests.get(uri) @@ -304,19 +369,6 @@ class MatrixConnectionAdapter(HTTPAdapter): print("Invalid response from %s: %s" % (uri, e), file=sys.stderr) return None - def get_connection( - self, url: str, proxies: Optional[Dict[str, str]] = None - ) -> HTTPConnectionPool: - parsed = urlparse.urlparse(url) - - (host, port) = self.lookup(parsed.netloc) - netloc = "%s:%d" % (host, port) - print("Connecting to %s" % (netloc,), file=sys.stderr) - url = urlparse.urlunparse( - ("https", netloc, parsed.path, parsed.params, parsed.query, parsed.fragment) - ) - return super().get_connection(url, proxies) - if __name__ == "__main__": main() diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh index 377348b107..bf900645b1 100755 --- a/scripts-dev/lint.sh +++ b/scripts-dev/lint.sh @@ -106,4 +106,5 @@ isort "${files[@]}" python3 -m black "${files[@]}" ./scripts-dev/config-lint.sh flake8 "${files[@]}" +./scripts-dev/check_pydantic_models.py lint mypy diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index f0e22d4ca2..e2bc1640bb 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -2,34 +2,37 @@ # # This script generates SQL files for creating a brand new Synapse DB with the latest # schema, on both SQLite3 and Postgres. -# -# It does so by having Synapse generate an up-to-date SQLite DB, then running -# synapse_port_db to convert it to Postgres. It then dumps the contents of both. export PGHOST="localhost" -POSTGRES_DB_NAME="synapse_full_schema.$$" - -SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite" -POSTGRES_FULL_SCHEMA_OUTPUT_FILE="full.sql.postgres" - +POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$" +POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$" +POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$" REQUIRED_DEPS=("matrix-synapse" "psycopg2") usage() { echo - echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]" + echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]" echo echo "-p <postgres_username>" echo " Username to connect to local postgres instance. The password will be requested" echo " during script execution." echo "-c" - echo " CI mode. Enables coverage tracking and prints every command that the script runs." + echo " CI mode. Prints every command that the script runs." echo "-o <path>" echo " Directory to output full schema files to." + echo "-n <schema number>" + echo " Schema number for the new snapshot. Used to set the location of files within " + echo " the output directory, mimicking that of synapse/storage/schemas." + echo " Defaults to 9999." echo "-h" echo " Display this help text." + echo "" + echo " NB: make sure to run this against the *oldest* supported version of postgres," + echo " or else pg_dump might output non-backwards-compatible syntax." } -while getopts "p:co:h" opt; do +SCHEMA_NUMBER="9999" +while getopts "p:co:hn:" opt; do case $opt in p) export PGUSER=$OPTARG @@ -37,11 +40,6 @@ while getopts "p:co:h" opt; do c) # Print all commands that are being executed set -x - - # Modify required dependencies for coverage - REQUIRED_DEPS+=("coverage" "coverage-enable-subprocess") - - COVERAGE=1 ;; o) command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1) @@ -51,6 +49,9 @@ while getopts "p:co:h" opt; do usage exit ;; + n) + SCHEMA_NUMBER="$OPTARG" + ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 usage @@ -98,11 +99,21 @@ cd "$(dirname "$0")/.." TMPDIR=$(mktemp -d) KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path SQLITE_CONFIG=$TMPDIR/sqlite.conf -SQLITE_DB=$TMPDIR/homeserver.db +SQLITE_MAIN_DB=$TMPDIR/main.db +SQLITE_STATE_DB=$TMPDIR/state.db +SQLITE_COMMON_DB=$TMPDIR/common.db POSTGRES_CONFIG=$TMPDIR/postgres.conf # Ensure these files are delete on script exit -trap 'rm -rf $TMPDIR' EXIT +cleanup() { + echo "Cleaning up temporary sqlite database and config files..." + rm -r "$TMPDIR" + echo "Cleaning up temporary Postgres database..." + dropdb --if-exists "$POSTGRES_COMMON_DB_NAME" + dropdb --if-exists "$POSTGRES_MAIN_DB_NAME" + dropdb --if-exists "$POSTGRES_STATE_DB_NAME" +} +trap 'cleanup' EXIT cat > "$SQLITE_CONFIG" <<EOF server_name: "test" @@ -112,10 +123,22 @@ macaroon_secret_key: "abcde" report_stats: false -database: - name: "sqlite3" - args: - database: "$SQLITE_DB" +databases: + common: + name: "sqlite3" + data_stores: [] + args: + database: "$SQLITE_COMMON_DB" + main: + name: "sqlite3" + data_stores: ["main"] + args: + database: "$SQLITE_MAIN_DB" + state: + name: "sqlite3" + data_stores: ["state"] + args: + database: "$SQLITE_STATE_DB" # Suppress the key server warning. trusted_key_servers: [] @@ -129,13 +152,32 @@ macaroon_secret_key: "abcde" report_stats: false -database: - name: "psycopg2" - args: - user: "$PGUSER" - host: "$PGHOST" - password: "$PGPASSWORD" - database: "$POSTGRES_DB_NAME" +databases: + common: + name: "psycopg2" + data_stores: [] + args: + user: "$PGUSER" + host: "$PGHOST" + password: "$PGPASSWORD" + database: "$POSTGRES_COMMON_DB_NAME" + main: + name: "psycopg2" + data_stores: ["main"] + args: + user: "$PGUSER" + host: "$PGHOST" + password: "$PGPASSWORD" + database: "$POSTGRES_MAIN_DB_NAME" + state: + name: "psycopg2" + data_stores: ["state"] + args: + user: "$PGUSER" + host: "$PGHOST" + password: "$PGPASSWORD" + database: "$POSTGRES_STATE_DB_NAME" + # Suppress the key server warning. trusted_key_servers: [] @@ -147,29 +189,46 @@ python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG" # Make sure the SQLite3 database is using the latest schema and has no pending background update. echo "Running db background jobs..." -synapse/_scripts/update_synapse_database.py --database-config --run-background-updates "$SQLITE_CONFIG" +synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates # Create the PostgreSQL database. -echo "Creating postgres database..." -createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME" - -echo "Copying data from SQLite3 to Postgres with synapse_port_db..." -if [ -z "$COVERAGE" ]; then - # No coverage needed - synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG" -else - # Coverage desired - coverage run synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG" -fi +echo "Creating postgres databases..." +createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME" +createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME" +createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME" + +echo "Running db background jobs..." +synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates + -# Delete schema_version, applied_schema_deltas and applied_module_schemas tables -# Also delete any shadow tables from fts4 -# This needs to be done after synapse_port_db is run echo "Dropping unwanted db tables..." -SQL=" + +# Some common tables are created and updated by Synapse itself and do not belong in the +# schema. +DROP_APP_MANAGED_TABLES=" DROP TABLE schema_version; +DROP TABLE schema_compat_version; DROP TABLE applied_schema_deltas; DROP TABLE applied_module_schemas; +" +# Other common tables are not created by Synapse and do belong in the schema. +# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But +# since there's only one table there, I haven't bothered to do so. +DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES +DROP TABLE background_updates; +" + +sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES" +sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES" +sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES" +psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES" + +# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation +# details behind full text search tables. Omit these from the dumps. + +sqlite3 "$SQLITE_MAIN_DB" <<< " DROP TABLE event_search_content; DROP TABLE event_search_segments; DROP TABLE event_search_segdir; @@ -181,16 +240,57 @@ DROP TABLE user_directory_search_segdir; DROP TABLE user_directory_search_docsize; DROP TABLE user_directory_search_stat; " -sqlite3 "$SQLITE_DB" <<< "$SQL" -psql "$POSTGRES_DB_NAME" -w <<< "$SQL" -echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE'..." -sqlite3 "$SQLITE_DB" ".dump" > "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE" +echo "Dumping SQLite3 schema..." + +mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER" +sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" + +cleanup_pg_schema() { + # Cleanup as follows: + # - Remove empty lines. pg_dump likes to output a lot of these. + # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually + # separate tables etc. + # - Remove "public." prefix --- the schema name. + # - Remove "SET" commands. Last time I ran this, the output commands were + # SET statement_timeout = 0; + # SET lock_timeout = 0; + # SET idle_in_transaction_session_timeout = 0; + # SET client_encoding = 'UTF8'; + # SET standard_conforming_strings = on; + # SET check_function_bodies = false; + # SET xmloption = content; + # SET client_min_messages = warning; + # SET row_security = off; + # SET default_table_access_method = heap; + # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all + # SELECT statements because some of those have side-effects which we do want in the + # schema. Last time I ran this, the only SELECTS were + # SELECT pg_catalog.set_config('search_path', '', false); + # and + # SELECT pg_catalog.setval(text, bigint, bool); + # We do want to remove the former, but the latter is important. If the last argument + # is `true` or omitted, this marks the given integer as having been consumed and + # will NOT appear as the nextval. + sed -e '/^$/d' \ + -e '/^--/d' \ + -e 's/public\.//g' \ + -e '/^SET /d' \ + -e '/^SELECT pg_catalog.set_config/d' +} -echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE'..." -pg_dump --format=plain --no-tablespaces --no-acl --no-owner $POSTGRES_DB_NAME | sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE" +echo "Dumping Postgres schema..." -echo "Cleaning up temporary Postgres database..." -dropdb $POSTGRES_DB_NAME +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" echo "Done! Files dumped to: $OUTPUT_DIR" diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py index d08517a953..2c377533c0 100644 --- a/scripts-dev/mypy_synapse_plugin.py +++ b/scripts-dev/mypy_synapse_plugin.py @@ -29,7 +29,7 @@ class SynapsePlugin(Plugin): self, fullname: str ) -> Optional[Callable[[MethodSigContext], CallableType]]: if fullname.startswith( - "synapse.util.caches.descriptors._CachedFunction.__call__" + "synapse.util.caches.descriptors.CachedFunction.__call__" ) or fullname.startswith( "synapse.util.caches.descriptors._LruCachedFunction.__call__" ): @@ -38,7 +38,7 @@ class SynapsePlugin(Plugin): def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: - """Fixes the `_CachedFunction.__call__` signature to be correct. + """Fixes the `CachedFunction.__call__` signature to be correct. It already has *almost* the correct signature, except: diff --git a/scripts-dev/release.py b/scripts-dev/release.py index 46220c4dd3..bf47b6c713 100755 --- a/scripts-dev/release.py +++ b/scripts-dev/release.py @@ -18,10 +18,12 @@ """ import glob +import json import os import re import subprocess import sys +import time import urllib.request from os import path from tempfile import TemporaryDirectory @@ -71,18 +73,21 @@ def cli() -> None: ./scripts-dev/release.py tag - # ... wait for assets to build ... + # wait for assets to build, either manually or with: + ./scripts-dev/release.py wait-for-actions ./scripts-dev/release.py publish ./scripts-dev/release.py upload - # Optional: generate some nice links for the announcement - ./scripts-dev/release.py merge-back + # Optional: generate some nice links for the announcement ./scripts-dev/release.py announce + Alternatively, `./scripts-dev/release.py full` will do all the above + as well as guiding you through the manual steps. + If the env var GH_TOKEN (or GITHUB_TOKEN) is set, or passed into the `tag`/`publish` command, then a new draft release will be created/published. """ @@ -90,6 +95,10 @@ def cli() -> None: @cli.command() def prepare() -> None: + _prepare() + + +def _prepare() -> None: """Do the initial stages of creating a release, including creating release branch, updating changelog and pushing to GitHub. """ @@ -210,9 +219,7 @@ def prepare() -> None: update_branch(repo) # Create the new release branch - # Type ignore will no longer be needed after GitPython 3.1.28. - # See https://github.com/gitpython-developers/GitPython/pull/1419 - repo.create_head(release_branch_name, commit=base_branch) # type: ignore[arg-type] + repo.create_head(release_branch_name, commit=base_branch) # Special-case SyTest: we don't actually prepare any files so we may # as well push it now (and only when we create a release branch; @@ -284,6 +291,10 @@ def prepare() -> None: @cli.command() @click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"]) def tag(gh_token: Optional[str]) -> None: + _tag(gh_token) + + +def _tag(gh_token: Optional[str]) -> None: """Tags the release and generates a draft GitHub release""" # Make sure we're in a git repo. @@ -374,6 +385,10 @@ def tag(gh_token: Optional[str]) -> None: @cli.command() @click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=True) def publish(gh_token: str) -> None: + _publish(gh_token) + + +def _publish(gh_token: str) -> None: """Publish release on GitHub.""" # Make sure we're in a git repo. @@ -410,7 +425,12 @@ def publish(gh_token: str) -> None: @cli.command() -def upload() -> None: +@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=False) +def upload(gh_token: Optional[str]) -> None: + _upload(gh_token) + + +def _upload(gh_token: Optional[str]) -> None: """Upload release to pypi.""" current_version = get_package_version() @@ -423,18 +443,40 @@ def upload() -> None: click.echo("Tag {tag_name} (tag.commit) is not currently checked out!") click.get_current_context().abort() - pypi_asset_names = [ - f"matrix_synapse-{current_version}-py3-none-any.whl", - f"matrix-synapse-{current_version}.tar.gz", - ] + # Query all the assets corresponding to this release. + gh = Github(gh_token) + gh_repo = gh.get_repo("matrix-org/synapse") + gh_release = gh_repo.get_release(tag_name) + + all_assets = set(gh_release.get_assets()) + + # Only accept the wheels and sdist. + # Notably: we don't care about debs.tar.xz. + asset_names_and_urls = sorted( + (asset.name, asset.browser_download_url) + for asset in all_assets + if asset.name.endswith((".whl", ".tar.gz")) + ) + + # Print out what we've determined. + print("Found relevant assets:") + for asset_name, _ in asset_names_and_urls: + print(f" - {asset_name}") + + ignored_asset_names = sorted( + {asset.name for asset in all_assets} + - {asset_name for asset_name, _ in asset_names_and_urls} + ) + print("\nIgnoring irrelevant assets:") + for asset_name in ignored_asset_names: + print(f" - {asset_name}") with TemporaryDirectory(prefix=f"synapse_upload_{tag_name}_") as tmpdir: - for name in pypi_asset_names: + for name, asset_download_url in asset_names_and_urls: filename = path.join(tmpdir, name) - url = f"https://github.com/matrix-org/synapse/releases/download/{tag_name}/{name}" click.echo(f"Downloading {name} into {filename}") - urllib.request.urlretrieve(url, filename=filename) + urllib.request.urlretrieve(asset_download_url, filename=filename) if click.confirm("Upload to PyPI?", default=True): subprocess.run("twine upload *", shell=True, cwd=tmpdir) @@ -480,7 +522,74 @@ def _merge_into(repo: Repo, source: str, target: str) -> None: @cli.command() +@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=False) +def wait_for_actions(gh_token: Optional[str]) -> None: + _wait_for_actions(gh_token) + + +def _wait_for_actions(gh_token: Optional[str]) -> None: + # Find out the version and tag name. + current_version = get_package_version() + tag_name = f"v{current_version}" + + # Authentication is optional on this endpoint, + # but use a token if we have one to reduce the chance of being rate-limited. + url = f"https://api.github.com/repos/matrix-org/synapse/actions/runs?branch={tag_name}" + headers = {"Accept": "application/vnd.github+json"} + if gh_token is not None: + headers["authorization"] = f"token {gh_token}" + req = urllib.request.Request(url, headers=headers) + + time.sleep(10 * 60) + while True: + time.sleep(5 * 60) + response = urllib.request.urlopen(req) + resp = json.loads(response.read()) + + if len(resp["workflow_runs"]) == 0: + continue + + if all( + workflow["status"] != "in_progress" for workflow in resp["workflow_runs"] + ): + success = ( + workflow["status"] == "completed" for workflow in resp["workflow_runs"] + ) + if success: + _notify("Workflows successful. You can now continue the release.") + else: + _notify("Workflows failed.") + click.confirm("Continue anyway?", abort=True) + + break + + +def _notify(message: str) -> None: + # Send a bell character. Most terminals will play a sound or show a notification + # for this. + click.echo(f"\a{message}") + + # Try and run notify-send, but don't raise an Exception if this fails + # (This is best-effort) + # TODO Support other platforms? + subprocess.run( + [ + "notify-send", + "--app-name", + "Synapse Release Script", + "--expire-time", + "3600000", + message, + ] + ) + + +@cli.command() def merge_back() -> None: + _merge_back() + + +def _merge_back() -> None: """Merge the release branch back into the appropriate branches. All branches will be automatically pulled from the remote and the results will be pushed to the remote.""" @@ -519,6 +628,10 @@ def merge_back() -> None: @cli.command() def announce() -> None: + _announce() + + +def _announce() -> None: """Generate markdown to announce the release.""" current_version = get_package_version() @@ -548,10 +661,56 @@ Announce the release in - #homeowners:matrix.org (Synapse Announcements), bumping the version in the topic - #synapse:matrix.org (Synapse Admins), bumping the version in the topic - #synapse-dev:matrix.org -- #synapse-package-maintainers:matrix.org""" +- #synapse-package-maintainers:matrix.org + +Ask the designated people to do the blog and tweets.""" ) +@cli.command() +@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=True) +def full(gh_token: str) -> None: + click.echo("1. If this is a security release, read the security wiki page.") + click.echo("2. Check for any release blockers before proceeding.") + click.echo(" https://github.com/matrix-org/synapse/labels/X-Release-Blocker") + + click.confirm("Ready?", abort=True) + + click.echo("\n*** prepare ***") + _prepare() + + click.echo("Deploy to matrix.org and ensure that it hasn't fallen over.") + click.echo("Remember to silence the alerts to prevent alert spam.") + click.confirm("Deployed?", abort=True) + + click.echo("\n*** tag ***") + _tag(gh_token) + + click.echo("\n*** wait for actions ***") + _wait_for_actions(gh_token) + + click.echo("\n*** publish ***") + _publish(gh_token) + + click.echo("\n*** upload ***") + _upload(gh_token) + + click.echo("\n*** merge back ***") + _merge_back() + + click.echo("\nUpdate the Debian repository") + click.confirm("Started updating Debian repository?", abort=True) + + click.echo("\nWait for all release methods to be ready.") + # Docker should be ready because it was done by the workflows earlier + # PyPI should be ready because we just ran upload(). + # TODO Automatically poll until the Debs have made it to packages.matrix.org + click.confirm("Debs ready?", abort=True) + + click.echo("\n*** announce ***") + _announce() + + def get_package_version() -> version.Version: version_string = subprocess.check_output(["poetry", "version", "--short"]).decode( "utf-8" |