diff --git a/scripts-dev/build_debian_packages.py b/scripts-dev/build_debian_packages.py
index cd2e64b75f..7442300196 100755
--- a/scripts-dev/build_debian_packages.py
+++ b/scripts-dev/build_debian_packages.py
@@ -27,6 +27,7 @@ DISTS = (
"debian:sid",
"ubuntu:focal", # 20.04 LTS (our EOL forced by Py38 on 2024-10-14)
"ubuntu:jammy", # 22.04 LTS (EOL 2027-04)
+ "ubuntu:kinetic", # 22.10 (EOL 2023-07-20)
)
DESC = """\
diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py
new file mode 100755
index 0000000000..9f2b7ded5b
--- /dev/null
+++ b/scripts-dev/check_pydantic_models.py
@@ -0,0 +1,424 @@
+#! /usr/bin/env python
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A script which enforces that Synapse always uses strict types when defining a Pydantic
+model.
+
+Pydantic does not yet offer a strict mode, but it is planned for pydantic v2. See
+
+ https://github.com/pydantic/pydantic/issues/1098
+ https://pydantic-docs.helpmanual.io/blog/pydantic-v2/#strict-mode
+
+until then, this script is a best effort to stop us from introducing type coersion bugs
+(like the infamous stringy power levels fixed in room version 10).
+"""
+import argparse
+import contextlib
+import functools
+import importlib
+import logging
+import os
+import pkgutil
+import sys
+import textwrap
+import traceback
+import unittest.mock
+from contextlib import contextmanager
+from typing import Any, Callable, Dict, Generator, List, Set, Type, TypeVar
+
+from parameterized import parameterized
+from pydantic import BaseModel as PydanticBaseModel, conbytes, confloat, conint, constr
+from pydantic.typing import get_args
+from typing_extensions import ParamSpec
+
+logger = logging.getLogger(__name__)
+
+CONSTRAINED_TYPE_FACTORIES_WITH_STRICT_FLAG: List[Callable] = [
+ constr,
+ conbytes,
+ conint,
+ confloat,
+]
+
+TYPES_THAT_PYDANTIC_WILL_COERCE_TO = [
+ str,
+ bytes,
+ int,
+ float,
+ bool,
+]
+
+
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+class ModelCheckerException(Exception):
+ """Dummy exception. Allows us to detect unwanted types during a module import."""
+
+
+class MissingStrictInConstrainedTypeException(ModelCheckerException):
+ factory_name: str
+
+ def __init__(self, factory_name: str):
+ self.factory_name = factory_name
+
+
+class FieldHasUnwantedTypeException(ModelCheckerException):
+ message: str
+
+ def __init__(self, message: str):
+ self.message = message
+
+
+def make_wrapper(factory: Callable[P, R]) -> Callable[P, R]:
+ """We patch `constr` and friends with wrappers that enforce strict=True."""
+
+ @functools.wraps(factory)
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+ if "strict" not in kwargs:
+ raise MissingStrictInConstrainedTypeException(factory.__name__)
+ if not kwargs["strict"]:
+ raise MissingStrictInConstrainedTypeException(factory.__name__)
+ return factory(*args, **kwargs)
+
+ return wrapper
+
+
+def field_type_unwanted(type_: Any) -> bool:
+ """Very rough attempt to detect if a type is unwanted as a Pydantic annotation.
+
+ At present, we exclude types which will coerce, or any generic type involving types
+ which will coerce."""
+ logger.debug("Is %s unwanted?")
+ if type_ in TYPES_THAT_PYDANTIC_WILL_COERCE_TO:
+ logger.debug("yes")
+ return True
+ logger.debug("Maybe. Subargs are %s", get_args(type_))
+ rv = any(field_type_unwanted(t) for t in get_args(type_))
+ logger.debug("Conclusion: %s %s unwanted", type_, "is" if rv else "is not")
+ return rv
+
+
+class PatchedBaseModel(PydanticBaseModel):
+ """A patched version of BaseModel that inspects fields after models are defined.
+
+ We complain loudly if we see an unwanted type.
+
+ Beware: ModelField.type_ is presumably private; this is likely to be very brittle.
+ """
+
+ @classmethod
+ def __init_subclass__(cls: Type[PydanticBaseModel], **kwargs: object):
+ for field in cls.__fields__.values():
+ # Note that field.type_ and field.outer_type are computed based on the
+ # annotation type, see pydantic.fields.ModelField._type_analysis
+ if field_type_unwanted(field.outer_type_):
+ # TODO: this only reports the first bad field. Can we find all bad ones
+ # and report them all?
+ raise FieldHasUnwantedTypeException(
+ f"{cls.__module__}.{cls.__qualname__} has field '{field.name}' "
+ f"with unwanted type `{field.outer_type_}`"
+ )
+
+
+@contextmanager
+def monkeypatch_pydantic() -> Generator[None, None, None]:
+ """Patch pydantic with our snooping versions of BaseModel and the con* functions.
+
+ If the snooping functions see something they don't like, they'll raise a
+ ModelCheckingException instance.
+ """
+ with contextlib.ExitStack() as patches:
+ # Most Synapse code ought to import the patched objects directly from
+ # `pydantic`. But we also patch their containing modules `pydantic.main` and
+ # `pydantic.types` for completeness.
+ patch_basemodel1 = unittest.mock.patch(
+ "pydantic.BaseModel", new=PatchedBaseModel
+ )
+ patch_basemodel2 = unittest.mock.patch(
+ "pydantic.main.BaseModel", new=PatchedBaseModel
+ )
+ patches.enter_context(patch_basemodel1)
+ patches.enter_context(patch_basemodel2)
+ for factory in CONSTRAINED_TYPE_FACTORIES_WITH_STRICT_FLAG:
+ wrapper: Callable = make_wrapper(factory)
+ patch1 = unittest.mock.patch(f"pydantic.{factory.__name__}", new=wrapper)
+ patch2 = unittest.mock.patch(
+ f"pydantic.types.{factory.__name__}", new=wrapper
+ )
+ patches.enter_context(patch1)
+ patches.enter_context(patch2)
+ yield
+
+
+def format_model_checker_exception(e: ModelCheckerException) -> str:
+ """Work out which line of code caused e. Format the line in a human-friendly way."""
+ # TODO. FieldHasUnwantedTypeException gives better error messages. Can we ditch the
+ # patches of constr() etc, and instead inspect fields to look for ConstrainedStr
+ # with strict=False? There is some difficulty with the inheritance hierarchy
+ # because StrictStr < ConstrainedStr < str.
+ if isinstance(e, FieldHasUnwantedTypeException):
+ return e.message
+ elif isinstance(e, MissingStrictInConstrainedTypeException):
+ frame_summary = traceback.extract_tb(e.__traceback__)[-2]
+ return (
+ f"Missing `strict=True` from {e.factory_name}() call \n"
+ + traceback.format_list([frame_summary])[0].lstrip()
+ )
+ else:
+ raise ValueError(f"Unknown exception {e}") from e
+
+
+def lint() -> int:
+ """Try to import all of Synapse and see if we spot any Pydantic type coercions.
+
+ Print any problems, then return a status code suitable for sys.exit."""
+ failures = do_lint()
+ if failures:
+ print(f"Found {len(failures)} problem(s)")
+ for failure in sorted(failures):
+ print(failure)
+ return os.EX_DATAERR if failures else os.EX_OK
+
+
+def do_lint() -> Set[str]:
+ """Try to import all of Synapse and see if we spot any Pydantic type coercions."""
+ failures = set()
+
+ with monkeypatch_pydantic():
+ logger.debug("Importing synapse")
+ try:
+ # TODO: make "synapse" an argument so we can target this script at
+ # a subpackage
+ module = importlib.import_module("synapse")
+ except ModelCheckerException as e:
+ logger.warning("Bad annotation found when importing synapse")
+ failures.add(format_model_checker_exception(e))
+ return failures
+
+ try:
+ logger.debug("Fetching subpackages")
+ module_infos = list(
+ pkgutil.walk_packages(module.__path__, f"{module.__name__}.")
+ )
+ except ModelCheckerException as e:
+ logger.warning("Bad annotation found when looking for modules to import")
+ failures.add(format_model_checker_exception(e))
+ return failures
+
+ for module_info in module_infos:
+ logger.debug("Importing %s", module_info.name)
+ try:
+ importlib.import_module(module_info.name)
+ except ModelCheckerException as e:
+ logger.warning(
+ f"Bad annotation found when importing {module_info.name}"
+ )
+ failures.add(format_model_checker_exception(e))
+
+ return failures
+
+
+def run_test_snippet(source: str) -> None:
+ """Exec a snippet of source code in an isolated environment."""
+ # To emulate `source` being called at the top level of the module,
+ # the globals and locals we provide apparently have to be the same mapping.
+ #
+ # > Remember that at the module level, globals and locals are the same dictionary.
+ # > If exec gets two separate objects as globals and locals, the code will be
+ # > executed as if it were embedded in a class definition.
+ globals_: Dict[str, object]
+ locals_: Dict[str, object]
+ globals_ = locals_ = {}
+ exec(textwrap.dedent(source), globals_, locals_)
+
+
+class TestConstrainedTypesPatch(unittest.TestCase):
+ def test_expression_without_strict_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic import constr
+ constr()
+ """
+ )
+
+ def test_called_as_module_attribute_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ import pydantic
+ pydantic.constr()
+ """
+ )
+
+ def test_wildcard_import_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic import *
+ constr()
+ """
+ )
+
+ def test_alternative_import_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic.types import constr
+ constr()
+ """
+ )
+
+ def test_alternative_import_attribute_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ import pydantic.types
+ pydantic.types.constr()
+ """
+ )
+
+ def test_kwarg_but_no_strict_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic import constr
+ constr(min_length=10)
+ """
+ )
+
+ def test_kwarg_strict_False_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic import constr
+ constr(strict=False)
+ """
+ )
+
+ def test_kwarg_strict_True_doesnt_raise(self) -> None:
+ with monkeypatch_pydantic():
+ run_test_snippet(
+ """
+ from pydantic import constr
+ constr(strict=True)
+ """
+ )
+
+ def test_annotation_without_strict_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic import constr
+ x: constr()
+ """
+ )
+
+ def test_field_annotation_without_strict_raises(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic import BaseModel, conint
+ class C:
+ x: conint()
+ """
+ )
+
+
+class TestFieldTypeInspection(unittest.TestCase):
+ @parameterized.expand(
+ [
+ ("str",),
+ ("bytes"),
+ ("int",),
+ ("float",),
+ ("bool"),
+ ("Optional[str]",),
+ ("Union[None, str]",),
+ ("List[str]",),
+ ("List[List[str]]",),
+ ("Dict[StrictStr, str]",),
+ ("Dict[str, StrictStr]",),
+ ("TypedDict('D', x=int)",),
+ ]
+ )
+ def test_field_holding_unwanted_type_raises(self, annotation: str) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ f"""
+ from typing import *
+ from pydantic import *
+ class C(BaseModel):
+ f: {annotation}
+ """
+ )
+
+ @parameterized.expand(
+ [
+ ("StrictStr",),
+ ("StrictBytes"),
+ ("StrictInt",),
+ ("StrictFloat",),
+ ("StrictBool"),
+ ("constr(strict=True, min_length=10)",),
+ ("Optional[StrictStr]",),
+ ("Union[None, StrictStr]",),
+ ("List[StrictStr]",),
+ ("List[List[StrictStr]]",),
+ ("Dict[StrictStr, StrictStr]",),
+ ("TypedDict('D', x=StrictInt)",),
+ ]
+ )
+ def test_field_holding_accepted_type_doesnt_raise(self, annotation: str) -> None:
+ with monkeypatch_pydantic():
+ run_test_snippet(
+ f"""
+ from typing import *
+ from pydantic import *
+ class C(BaseModel):
+ f: {annotation}
+ """
+ )
+
+ def test_field_holding_str_raises_with_alternative_import(self) -> None:
+ with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
+ run_test_snippet(
+ """
+ from pydantic.main import BaseModel
+ class C(BaseModel):
+ f: str
+ """
+ )
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("mode", choices=["lint", "test"], default="lint", nargs="?")
+parser.add_argument("-v", "--verbose", action="store_true")
+
+
+if __name__ == "__main__":
+ args = parser.parse_args(sys.argv[1:])
+ logging.basicConfig(
+ format="%(asctime)s %(name)s:%(lineno)d %(levelname)s %(message)s",
+ level=logging.DEBUG if args.verbose else logging.INFO,
+ )
+ # suppress logs we don't care about
+ logging.getLogger("xmlschema").setLevel(logging.WARNING)
+ if args.mode == "lint":
+ sys.exit(lint())
+ elif args.mode == "test":
+ unittest.main(argv=sys.argv[:1])
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index eab23f18f1..803c6ce92d 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -126,7 +126,7 @@ export COMPLEMENT_BASE_IMAGE=complement-synapse
extra_test_args=()
-test_tags="synapse_blacklist,msc2716,msc3030,msc3787"
+test_tags="synapse_blacklist,msc3787,msc3874"
# All environment variables starting with PASS_ will be shared.
# (The prefix is stripped off before reaching the container.)
@@ -139,6 +139,9 @@ if [[ -n "$WORKERS" ]]; then
# Use workers.
export PASS_SYNAPSE_COMPLEMENT_USE_WORKERS=true
+ # Pass through the workers defined. If none, it will be an empty string
+ export PASS_SYNAPSE_WORKER_TYPES="$WORKER_TYPES"
+
# Workers can only use Postgres as a database.
export PASS_SYNAPSE_COMPLEMENT_DATABASE=postgres
@@ -158,7 +161,10 @@ else
# We only test faster room joins on monoliths, because they are purposefully
# being developed without worker support to start with.
- test_tags="$test_tags,faster_joins"
+ #
+ # The tests for importing historical messages (MSC2716) and jump to date (MSC3030)
+ # also only pass with monoliths, currently.
+ test_tags="$test_tags,faster_joins,msc2716,msc3030"
fi
diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py
index 763dd02c47..b1d5e2e616 100755
--- a/scripts-dev/federation_client.py
+++ b/scripts-dev/federation_client.py
@@ -46,11 +46,12 @@ import signedjson.key
import signedjson.types
import srvlookup
import yaml
+from requests import PreparedRequest, Response
from requests.adapters import HTTPAdapter
from urllib3 import HTTPConnectionPool
# uncomment the following to enable debug logging of http requests
-# from httplib import HTTPConnection
+# from http.client import HTTPConnection
# HTTPConnection.debuglevel = 1
@@ -103,6 +104,7 @@ def request(
destination: str,
path: str,
content: Optional[str],
+ verify_tls: bool,
) -> requests.Response:
if method is None:
if content is None:
@@ -141,7 +143,6 @@ def request(
s.mount("matrix://", MatrixConnectionAdapter())
headers: Dict[str, str] = {
- "Host": destination,
"Authorization": authorization_headers[0],
}
@@ -152,7 +153,7 @@ def request(
method=method,
url=dest,
headers=headers,
- verify=False,
+ verify=verify_tls,
data=content,
stream=True,
)
@@ -203,6 +204,12 @@ def main() -> None:
parser.add_argument("--body", help="Data to send as the body of the HTTP request")
parser.add_argument(
+ "--insecure",
+ action="store_true",
+ help="Disable TLS certificate verification",
+ )
+
+ parser.add_argument(
"path", help="request path, including the '/_matrix/federation/...' prefix."
)
@@ -227,6 +234,7 @@ def main() -> None:
args.destination,
args.path,
content=args.body,
+ verify_tls=not args.insecure,
)
sys.stderr.write("Status Code: %d\n" % (result.status_code,))
@@ -254,36 +262,93 @@ def read_args_from_config(args: argparse.Namespace) -> None:
class MatrixConnectionAdapter(HTTPAdapter):
+ def send(
+ self,
+ request: PreparedRequest,
+ *args: Any,
+ **kwargs: Any,
+ ) -> Response:
+ # overrides the send() method in the base class.
+
+ # We need to look for .well-known redirects before passing the request up to
+ # HTTPAdapter.send().
+ assert isinstance(request.url, str)
+ parsed = urlparse.urlsplit(request.url)
+ server_name = parsed.netloc
+ well_known = self._get_well_known(parsed.netloc)
+
+ if well_known:
+ server_name = well_known
+
+ # replace the scheme in the uri with https, so that cert verification is done
+ # also replace the hostname if we got a .well-known result
+ request.url = urlparse.urlunsplit(
+ ("https", server_name, parsed.path, parsed.query, parsed.fragment)
+ )
+
+ # at this point we also add the host header (otherwise urllib will add one
+ # based on the `host` from the connection returned by `get_connection`,
+ # which will be wrong if there is an SRV record).
+ request.headers["Host"] = server_name
+
+ return super().send(request, *args, **kwargs)
+
+ def get_connection(
+ self, url: str, proxies: Optional[Dict[str, str]] = None
+ ) -> HTTPConnectionPool:
+ # overrides the get_connection() method in the base class
+ parsed = urlparse.urlsplit(url)
+ (host, port, ssl_server_name) = self._lookup(parsed.netloc)
+ print(
+ f"Connecting to {host}:{port} with SNI {ssl_server_name}", file=sys.stderr
+ )
+ return self.poolmanager.connection_from_host(
+ host,
+ port=port,
+ scheme="https",
+ pool_kwargs={"server_hostname": ssl_server_name},
+ )
+
@staticmethod
- def lookup(s: str, skip_well_known: bool = False) -> Tuple[str, int]:
- if s[-1] == "]":
+ def _lookup(server_name: str) -> Tuple[str, int, str]:
+ """
+ Do an SRV lookup on a server name and return the host:port to connect to
+ Given the server_name (after any .well-known lookup), return the host, port and
+ the ssl server name
+ """
+ if server_name[-1] == "]":
# ipv6 literal (with no port)
- return s, 8448
+ return server_name, 8448, server_name
- if ":" in s:
- out = s.rsplit(":", 1)
+ if ":" in server_name:
+ # explicit port
+ out = server_name.rsplit(":", 1)
try:
port = int(out[1])
except ValueError:
- raise ValueError("Invalid host:port '%s'" % s)
- return out[0], port
-
- # try a .well-known lookup
- if not skip_well_known:
- well_known = MatrixConnectionAdapter.get_well_known(s)
- if well_known:
- return MatrixConnectionAdapter.lookup(well_known, skip_well_known=True)
+ raise ValueError("Invalid host:port '%s'" % (server_name,))
+ return out[0], port, out[0]
try:
- srv = srvlookup.lookup("matrix", "tcp", s)[0]
- return srv.host, srv.port
+ srv = srvlookup.lookup("matrix", "tcp", server_name)[0]
+ print(
+ f"SRV lookup on _matrix._tcp.{server_name} gave {srv}",
+ file=sys.stderr,
+ )
+ return srv.host, srv.port, server_name
except Exception:
- return s, 8448
+ return server_name, 8448, server_name
@staticmethod
- def get_well_known(server_name: str) -> Optional[str]:
- uri = "https://%s/.well-known/matrix/server" % (server_name,)
- print("fetching %s" % (uri,), file=sys.stderr)
+ def _get_well_known(server_name: str) -> Optional[str]:
+ if ":" in server_name:
+ # explicit port, or ipv6 literal. Either way, no .well-known
+ return None
+
+ # TODO: check for ipv4 literals
+
+ uri = f"https://{server_name}/.well-known/matrix/server"
+ print(f"fetching {uri}", file=sys.stderr)
try:
resp = requests.get(uri)
@@ -304,19 +369,6 @@ class MatrixConnectionAdapter(HTTPAdapter):
print("Invalid response from %s: %s" % (uri, e), file=sys.stderr)
return None
- def get_connection(
- self, url: str, proxies: Optional[Dict[str, str]] = None
- ) -> HTTPConnectionPool:
- parsed = urlparse.urlparse(url)
-
- (host, port) = self.lookup(parsed.netloc)
- netloc = "%s:%d" % (host, port)
- print("Connecting to %s" % (netloc,), file=sys.stderr)
- url = urlparse.urlunparse(
- ("https", netloc, parsed.path, parsed.params, parsed.query, parsed.fragment)
- )
- return super().get_connection(url, proxies)
-
if __name__ == "__main__":
main()
diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh
index 377348b107..bf900645b1 100755
--- a/scripts-dev/lint.sh
+++ b/scripts-dev/lint.sh
@@ -106,4 +106,5 @@ isort "${files[@]}"
python3 -m black "${files[@]}"
./scripts-dev/config-lint.sh
flake8 "${files[@]}"
+./scripts-dev/check_pydantic_models.py lint
mypy
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh
index f0e22d4ca2..e2bc1640bb 100755
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -2,34 +2,37 @@
#
# This script generates SQL files for creating a brand new Synapse DB with the latest
# schema, on both SQLite3 and Postgres.
-#
-# It does so by having Synapse generate an up-to-date SQLite DB, then running
-# synapse_port_db to convert it to Postgres. It then dumps the contents of both.
export PGHOST="localhost"
-POSTGRES_DB_NAME="synapse_full_schema.$$"
-
-SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite"
-POSTGRES_FULL_SCHEMA_OUTPUT_FILE="full.sql.postgres"
-
+POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
+POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$"
+POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
REQUIRED_DEPS=("matrix-synapse" "psycopg2")
usage() {
echo
- echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]"
+ echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]"
echo
echo "-p <postgres_username>"
echo " Username to connect to local postgres instance. The password will be requested"
echo " during script execution."
echo "-c"
- echo " CI mode. Enables coverage tracking and prints every command that the script runs."
+ echo " CI mode. Prints every command that the script runs."
echo "-o <path>"
echo " Directory to output full schema files to."
+ echo "-n <schema number>"
+ echo " Schema number for the new snapshot. Used to set the location of files within "
+ echo " the output directory, mimicking that of synapse/storage/schemas."
+ echo " Defaults to 9999."
echo "-h"
echo " Display this help text."
+ echo ""
+ echo " NB: make sure to run this against the *oldest* supported version of postgres,"
+ echo " or else pg_dump might output non-backwards-compatible syntax."
}
-while getopts "p:co:h" opt; do
+SCHEMA_NUMBER="9999"
+while getopts "p:co:hn:" opt; do
case $opt in
p)
export PGUSER=$OPTARG
@@ -37,11 +40,6 @@ while getopts "p:co:h" opt; do
c)
# Print all commands that are being executed
set -x
-
- # Modify required dependencies for coverage
- REQUIRED_DEPS+=("coverage" "coverage-enable-subprocess")
-
- COVERAGE=1
;;
o)
command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1)
@@ -51,6 +49,9 @@ while getopts "p:co:h" opt; do
usage
exit
;;
+ n)
+ SCHEMA_NUMBER="$OPTARG"
+ ;;
\?)
echo "ERROR: Invalid option: -$OPTARG" >&2
usage
@@ -98,11 +99,21 @@ cd "$(dirname "$0")/.."
TMPDIR=$(mktemp -d)
KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
SQLITE_CONFIG=$TMPDIR/sqlite.conf
-SQLITE_DB=$TMPDIR/homeserver.db
+SQLITE_MAIN_DB=$TMPDIR/main.db
+SQLITE_STATE_DB=$TMPDIR/state.db
+SQLITE_COMMON_DB=$TMPDIR/common.db
POSTGRES_CONFIG=$TMPDIR/postgres.conf
# Ensure these files are delete on script exit
-trap 'rm -rf $TMPDIR' EXIT
+cleanup() {
+ echo "Cleaning up temporary sqlite database and config files..."
+ rm -r "$TMPDIR"
+ echo "Cleaning up temporary Postgres database..."
+ dropdb --if-exists "$POSTGRES_COMMON_DB_NAME"
+ dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
+ dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
+}
+trap 'cleanup' EXIT
cat > "$SQLITE_CONFIG" <<EOF
server_name: "test"
@@ -112,10 +123,22 @@ macaroon_secret_key: "abcde"
report_stats: false
-database:
- name: "sqlite3"
- args:
- database: "$SQLITE_DB"
+databases:
+ common:
+ name: "sqlite3"
+ data_stores: []
+ args:
+ database: "$SQLITE_COMMON_DB"
+ main:
+ name: "sqlite3"
+ data_stores: ["main"]
+ args:
+ database: "$SQLITE_MAIN_DB"
+ state:
+ name: "sqlite3"
+ data_stores: ["state"]
+ args:
+ database: "$SQLITE_STATE_DB"
# Suppress the key server warning.
trusted_key_servers: []
@@ -129,13 +152,32 @@ macaroon_secret_key: "abcde"
report_stats: false
-database:
- name: "psycopg2"
- args:
- user: "$PGUSER"
- host: "$PGHOST"
- password: "$PGPASSWORD"
- database: "$POSTGRES_DB_NAME"
+databases:
+ common:
+ name: "psycopg2"
+ data_stores: []
+ args:
+ user: "$PGUSER"
+ host: "$PGHOST"
+ password: "$PGPASSWORD"
+ database: "$POSTGRES_COMMON_DB_NAME"
+ main:
+ name: "psycopg2"
+ data_stores: ["main"]
+ args:
+ user: "$PGUSER"
+ host: "$PGHOST"
+ password: "$PGPASSWORD"
+ database: "$POSTGRES_MAIN_DB_NAME"
+ state:
+ name: "psycopg2"
+ data_stores: ["state"]
+ args:
+ user: "$PGUSER"
+ host: "$PGHOST"
+ password: "$PGPASSWORD"
+ database: "$POSTGRES_STATE_DB_NAME"
+
# Suppress the key server warning.
trusted_key_servers: []
@@ -147,29 +189,46 @@ python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG"
# Make sure the SQLite3 database is using the latest schema and has no pending background update.
echo "Running db background jobs..."
-synapse/_scripts/update_synapse_database.py --database-config --run-background-updates "$SQLITE_CONFIG"
+synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
# Create the PostgreSQL database.
-echo "Creating postgres database..."
-createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
-
-echo "Copying data from SQLite3 to Postgres with synapse_port_db..."
-if [ -z "$COVERAGE" ]; then
- # No coverage needed
- synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG"
-else
- # Coverage desired
- coverage run synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG"
-fi
+echo "Creating postgres databases..."
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME"
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"
+
+echo "Running db background jobs..."
+synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
+
-# Delete schema_version, applied_schema_deltas and applied_module_schemas tables
-# Also delete any shadow tables from fts4
-# This needs to be done after synapse_port_db is run
echo "Dropping unwanted db tables..."
-SQL="
+
+# Some common tables are created and updated by Synapse itself and do not belong in the
+# schema.
+DROP_APP_MANAGED_TABLES="
DROP TABLE schema_version;
+DROP TABLE schema_compat_version;
DROP TABLE applied_schema_deltas;
DROP TABLE applied_module_schemas;
+"
+# Other common tables are not created by Synapse and do belong in the schema.
+# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But
+# since there's only one table there, I haven't bothered to do so.
+DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES
+DROP TABLE background_updates;
+"
+
+sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES"
+sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
+sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
+psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES"
+psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
+psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
+
+# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
+# details behind full text search tables. Omit these from the dumps.
+
+sqlite3 "$SQLITE_MAIN_DB" <<< "
DROP TABLE event_search_content;
DROP TABLE event_search_segments;
DROP TABLE event_search_segdir;
@@ -181,16 +240,57 @@ DROP TABLE user_directory_search_segdir;
DROP TABLE user_directory_search_docsize;
DROP TABLE user_directory_search_stat;
"
-sqlite3 "$SQLITE_DB" <<< "$SQL"
-psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
-echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE'..."
-sqlite3 "$SQLITE_DB" ".dump" > "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE"
+echo "Dumping SQLite3 schema..."
+
+mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
+sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+
+cleanup_pg_schema() {
+ # Cleanup as follows:
+ # - Remove empty lines. pg_dump likes to output a lot of these.
+ # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
+ # separate tables etc.
+ # - Remove "public." prefix --- the schema name.
+ # - Remove "SET" commands. Last time I ran this, the output commands were
+ # SET statement_timeout = 0;
+ # SET lock_timeout = 0;
+ # SET idle_in_transaction_session_timeout = 0;
+ # SET client_encoding = 'UTF8';
+ # SET standard_conforming_strings = on;
+ # SET check_function_bodies = false;
+ # SET xmloption = content;
+ # SET client_min_messages = warning;
+ # SET row_security = off;
+ # SET default_table_access_method = heap;
+ # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
+ # SELECT statements because some of those have side-effects which we do want in the
+ # schema. Last time I ran this, the only SELECTS were
+ # SELECT pg_catalog.set_config('search_path', '', false);
+ # and
+ # SELECT pg_catalog.setval(text, bigint, bool);
+ # We do want to remove the former, but the latter is important. If the last argument
+ # is `true` or omitted, this marks the given integer as having been consumed and
+ # will NOT appear as the nextval.
+ sed -e '/^$/d' \
+ -e '/^--/d' \
+ -e 's/public\.//g' \
+ -e '/^SET /d' \
+ -e '/^SELECT pg_catalog.set_config/d'
+}
-echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE'..."
-pg_dump --format=plain --no-tablespaces --no-acl --no-owner $POSTGRES_DB_NAME | sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE"
+echo "Dumping Postgres schema..."
-echo "Cleaning up temporary Postgres database..."
-dropdb $POSTGRES_DB_NAME
+pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
echo "Done! Files dumped to: $OUTPUT_DIR"
diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py
index d08517a953..2c377533c0 100644
--- a/scripts-dev/mypy_synapse_plugin.py
+++ b/scripts-dev/mypy_synapse_plugin.py
@@ -29,7 +29,7 @@ class SynapsePlugin(Plugin):
self, fullname: str
) -> Optional[Callable[[MethodSigContext], CallableType]]:
if fullname.startswith(
- "synapse.util.caches.descriptors._CachedFunction.__call__"
+ "synapse.util.caches.descriptors.CachedFunction.__call__"
) or fullname.startswith(
"synapse.util.caches.descriptors._LruCachedFunction.__call__"
):
@@ -38,7 +38,7 @@ class SynapsePlugin(Plugin):
def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
- """Fixes the `_CachedFunction.__call__` signature to be correct.
+ """Fixes the `CachedFunction.__call__` signature to be correct.
It already has *almost* the correct signature, except:
diff --git a/scripts-dev/release.py b/scripts-dev/release.py
index 46220c4dd3..bf47b6c713 100755
--- a/scripts-dev/release.py
+++ b/scripts-dev/release.py
@@ -18,10 +18,12 @@
"""
import glob
+import json
import os
import re
import subprocess
import sys
+import time
import urllib.request
from os import path
from tempfile import TemporaryDirectory
@@ -71,18 +73,21 @@ def cli() -> None:
./scripts-dev/release.py tag
- # ... wait for assets to build ...
+ # wait for assets to build, either manually or with:
+ ./scripts-dev/release.py wait-for-actions
./scripts-dev/release.py publish
./scripts-dev/release.py upload
- # Optional: generate some nice links for the announcement
-
./scripts-dev/release.py merge-back
+ # Optional: generate some nice links for the announcement
./scripts-dev/release.py announce
+ Alternatively, `./scripts-dev/release.py full` will do all the above
+ as well as guiding you through the manual steps.
+
If the env var GH_TOKEN (or GITHUB_TOKEN) is set, or passed into the
`tag`/`publish` command, then a new draft release will be created/published.
"""
@@ -90,6 +95,10 @@ def cli() -> None:
@cli.command()
def prepare() -> None:
+ _prepare()
+
+
+def _prepare() -> None:
"""Do the initial stages of creating a release, including creating release
branch, updating changelog and pushing to GitHub.
"""
@@ -210,9 +219,7 @@ def prepare() -> None:
update_branch(repo)
# Create the new release branch
- # Type ignore will no longer be needed after GitPython 3.1.28.
- # See https://github.com/gitpython-developers/GitPython/pull/1419
- repo.create_head(release_branch_name, commit=base_branch) # type: ignore[arg-type]
+ repo.create_head(release_branch_name, commit=base_branch)
# Special-case SyTest: we don't actually prepare any files so we may
# as well push it now (and only when we create a release branch;
@@ -284,6 +291,10 @@ def prepare() -> None:
@cli.command()
@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"])
def tag(gh_token: Optional[str]) -> None:
+ _tag(gh_token)
+
+
+def _tag(gh_token: Optional[str]) -> None:
"""Tags the release and generates a draft GitHub release"""
# Make sure we're in a git repo.
@@ -374,6 +385,10 @@ def tag(gh_token: Optional[str]) -> None:
@cli.command()
@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=True)
def publish(gh_token: str) -> None:
+ _publish(gh_token)
+
+
+def _publish(gh_token: str) -> None:
"""Publish release on GitHub."""
# Make sure we're in a git repo.
@@ -410,7 +425,12 @@ def publish(gh_token: str) -> None:
@cli.command()
-def upload() -> None:
+@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=False)
+def upload(gh_token: Optional[str]) -> None:
+ _upload(gh_token)
+
+
+def _upload(gh_token: Optional[str]) -> None:
"""Upload release to pypi."""
current_version = get_package_version()
@@ -423,18 +443,40 @@ def upload() -> None:
click.echo("Tag {tag_name} (tag.commit) is not currently checked out!")
click.get_current_context().abort()
- pypi_asset_names = [
- f"matrix_synapse-{current_version}-py3-none-any.whl",
- f"matrix-synapse-{current_version}.tar.gz",
- ]
+ # Query all the assets corresponding to this release.
+ gh = Github(gh_token)
+ gh_repo = gh.get_repo("matrix-org/synapse")
+ gh_release = gh_repo.get_release(tag_name)
+
+ all_assets = set(gh_release.get_assets())
+
+ # Only accept the wheels and sdist.
+ # Notably: we don't care about debs.tar.xz.
+ asset_names_and_urls = sorted(
+ (asset.name, asset.browser_download_url)
+ for asset in all_assets
+ if asset.name.endswith((".whl", ".tar.gz"))
+ )
+
+ # Print out what we've determined.
+ print("Found relevant assets:")
+ for asset_name, _ in asset_names_and_urls:
+ print(f" - {asset_name}")
+
+ ignored_asset_names = sorted(
+ {asset.name for asset in all_assets}
+ - {asset_name for asset_name, _ in asset_names_and_urls}
+ )
+ print("\nIgnoring irrelevant assets:")
+ for asset_name in ignored_asset_names:
+ print(f" - {asset_name}")
with TemporaryDirectory(prefix=f"synapse_upload_{tag_name}_") as tmpdir:
- for name in pypi_asset_names:
+ for name, asset_download_url in asset_names_and_urls:
filename = path.join(tmpdir, name)
- url = f"https://github.com/matrix-org/synapse/releases/download/{tag_name}/{name}"
click.echo(f"Downloading {name} into {filename}")
- urllib.request.urlretrieve(url, filename=filename)
+ urllib.request.urlretrieve(asset_download_url, filename=filename)
if click.confirm("Upload to PyPI?", default=True):
subprocess.run("twine upload *", shell=True, cwd=tmpdir)
@@ -480,7 +522,74 @@ def _merge_into(repo: Repo, source: str, target: str) -> None:
@cli.command()
+@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=False)
+def wait_for_actions(gh_token: Optional[str]) -> None:
+ _wait_for_actions(gh_token)
+
+
+def _wait_for_actions(gh_token: Optional[str]) -> None:
+ # Find out the version and tag name.
+ current_version = get_package_version()
+ tag_name = f"v{current_version}"
+
+ # Authentication is optional on this endpoint,
+ # but use a token if we have one to reduce the chance of being rate-limited.
+ url = f"https://api.github.com/repos/matrix-org/synapse/actions/runs?branch={tag_name}"
+ headers = {"Accept": "application/vnd.github+json"}
+ if gh_token is not None:
+ headers["authorization"] = f"token {gh_token}"
+ req = urllib.request.Request(url, headers=headers)
+
+ time.sleep(10 * 60)
+ while True:
+ time.sleep(5 * 60)
+ response = urllib.request.urlopen(req)
+ resp = json.loads(response.read())
+
+ if len(resp["workflow_runs"]) == 0:
+ continue
+
+ if all(
+ workflow["status"] != "in_progress" for workflow in resp["workflow_runs"]
+ ):
+ success = (
+ workflow["status"] == "completed" for workflow in resp["workflow_runs"]
+ )
+ if success:
+ _notify("Workflows successful. You can now continue the release.")
+ else:
+ _notify("Workflows failed.")
+ click.confirm("Continue anyway?", abort=True)
+
+ break
+
+
+def _notify(message: str) -> None:
+ # Send a bell character. Most terminals will play a sound or show a notification
+ # for this.
+ click.echo(f"\a{message}")
+
+ # Try and run notify-send, but don't raise an Exception if this fails
+ # (This is best-effort)
+ # TODO Support other platforms?
+ subprocess.run(
+ [
+ "notify-send",
+ "--app-name",
+ "Synapse Release Script",
+ "--expire-time",
+ "3600000",
+ message,
+ ]
+ )
+
+
+@cli.command()
def merge_back() -> None:
+ _merge_back()
+
+
+def _merge_back() -> None:
"""Merge the release branch back into the appropriate branches.
All branches will be automatically pulled from the remote and the results
will be pushed to the remote."""
@@ -519,6 +628,10 @@ def merge_back() -> None:
@cli.command()
def announce() -> None:
+ _announce()
+
+
+def _announce() -> None:
"""Generate markdown to announce the release."""
current_version = get_package_version()
@@ -548,10 +661,56 @@ Announce the release in
- #homeowners:matrix.org (Synapse Announcements), bumping the version in the topic
- #synapse:matrix.org (Synapse Admins), bumping the version in the topic
- #synapse-dev:matrix.org
-- #synapse-package-maintainers:matrix.org"""
+- #synapse-package-maintainers:matrix.org
+
+Ask the designated people to do the blog and tweets."""
)
+@cli.command()
+@click.option("--gh-token", envvar=["GH_TOKEN", "GITHUB_TOKEN"], required=True)
+def full(gh_token: str) -> None:
+ click.echo("1. If this is a security release, read the security wiki page.")
+ click.echo("2. Check for any release blockers before proceeding.")
+ click.echo(" https://github.com/matrix-org/synapse/labels/X-Release-Blocker")
+
+ click.confirm("Ready?", abort=True)
+
+ click.echo("\n*** prepare ***")
+ _prepare()
+
+ click.echo("Deploy to matrix.org and ensure that it hasn't fallen over.")
+ click.echo("Remember to silence the alerts to prevent alert spam.")
+ click.confirm("Deployed?", abort=True)
+
+ click.echo("\n*** tag ***")
+ _tag(gh_token)
+
+ click.echo("\n*** wait for actions ***")
+ _wait_for_actions(gh_token)
+
+ click.echo("\n*** publish ***")
+ _publish(gh_token)
+
+ click.echo("\n*** upload ***")
+ _upload(gh_token)
+
+ click.echo("\n*** merge back ***")
+ _merge_back()
+
+ click.echo("\nUpdate the Debian repository")
+ click.confirm("Started updating Debian repository?", abort=True)
+
+ click.echo("\nWait for all release methods to be ready.")
+ # Docker should be ready because it was done by the workflows earlier
+ # PyPI should be ready because we just ran upload().
+ # TODO Automatically poll until the Debs have made it to packages.matrix.org
+ click.confirm("Debs ready?", abort=True)
+
+ click.echo("\n*** announce ***")
+ _announce()
+
+
def get_package_version() -> version.Version:
version_string = subprocess.check_output(["poetry", "version", "--short"]).decode(
"utf-8"
|