summary refs log tree commit diff
path: root/scripts-dev/schema_versions.py
blob: 5a79a433556c15d30566123569b4952252fd9a1a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python
# Copyright 2023 The Matrix.org Foundation C.I.C.
# Copyright 2023 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A script to calculate which versions of Synapse have backwards-compatible
database schemas. It creates a Markdown table of Synapse versions and the earliest
compatible version.

It is compatible with the mdbook protocol for preprocessors (see
https://rust-lang.github.io/mdBook/for_developers/preprocessors.html#implementing-a-preprocessor-with-a-different-language):

Exit 0 to denote support for all renderers:

    ./scripts-dev/schema_versions.py supports <mdbook renderer>

Parse a JSON list from stdin and add the table to the proper documetnation page:

    ./scripts-dev/schema_versions.py

Additionally, the script supports dumping the table to stdout for debugging:

    ./scripts-dev/schema_versions.py dump
"""

import io
import json
import sys
from collections import defaultdict
from typing import Any, Dict, Iterator, Optional, Tuple

import git
from packaging import version

# The schema version has moved around over the years.
SCHEMA_VERSION_FILES = (
    "synapse/storage/schema/__init__.py",
    "synapse/storage/prepare_database.py",
    "synapse/storage/__init__.py",
    "synapse/app/homeserver.py",
)


# Skip versions of Synapse < v1.0, they're old and essentially not
# compatible with today's federation.
OLDEST_SHOWN_VERSION = version.parse("v1.0")


def get_schema_versions(tag: git.Tag) -> Tuple[Optional[int], Optional[int]]:
    """Get the schema and schema compat versions for a tag."""
    schema_version = None
    schema_compat_version = None

    for file in SCHEMA_VERSION_FILES:
        try:
            schema_file = tag.commit.tree / file
        except KeyError:
            continue

        # We (usually) can't execute the code since it might have unknown imports.
        if file != "synapse/storage/schema/__init__.py":
            with io.BytesIO(schema_file.data_stream.read()) as f:
                for line in f.readlines():
                    if line.startswith(b"SCHEMA_VERSION"):
                        schema_version = int(line.split()[2])

                        # Bail early.
                        break
        else:
            # SCHEMA_COMPAT_VERSION is sometimes across multiple lines, the easist
            # thing to do is exec the code. Luckily it has only ever existed in
            # a file which imports nothing else from Synapse.
            locals: Dict[str, Any] = {}
            exec(schema_file.data_stream.read().decode("utf-8"), {}, locals)
            schema_version = locals["SCHEMA_VERSION"]
            schema_compat_version = locals.get("SCHEMA_COMPAT_VERSION")

    return schema_version, schema_compat_version


def get_tags(repo: git.Repo) -> Iterator[git.Tag]:
    """Return an iterator of tags sorted by version."""
    tags = []
    for tag in repo.tags:
        # All "real" Synapse tags are of the form vX.Y.Z.
        if not tag.name.startswith("v"):
            continue

        # There's a weird tag from the initial react UI.
        if tag.name == "v0.1":
            continue

        try:
            tag_version = version.parse(tag.name)
        except version.InvalidVersion:
            # Skip invalid versions.
            continue

        # Skip pre- and post-release versions.
        if tag_version.is_prerelease or tag_version.is_postrelease or tag_version.local:
            continue

        # Skip old versions.
        if tag_version < OLDEST_SHOWN_VERSION:
            continue

        tags.append((tag_version, tag))

    # Sort based on the version number (not lexically).
    return (tag for _, tag in sorted(tags, key=lambda t: t[0]))


def calculate_version_chart() -> str:
    repo = git.Repo(path=".")

    # Map of schema version -> Synapse versions which are at that schema version.
    schema_versions = defaultdict(list)
    # Map of schema version -> Synapse versions which are compatible with that
    # schema version.
    schema_compat_versions = defaultdict(list)

    # Find ranges of versions which are compatible with a schema version.
    #
    # There are two modes of operation:
    #
    # 1. Pre-schema_compat_version (i.e. schema_compat_version of None), then
    #    Synapse is compatible up/downgrading to a version with
    #    schema_version >= its current version.
    #
    # 2. Post-schema_compat_version (i.e. schema_compat_version is *not* None),
    #    then Synapse is compatible up/downgrading to a version with
    #    schema version >= schema_compat_version.
    #
    #    This is more generous and avoids versions that cannot be rolled back.
    #
    # See https://github.com/matrix-org/synapse/pull/9933 which was included in v1.37.0.
    for tag in get_tags(repo):
        schema_version, schema_compat_version = get_schema_versions(tag)

        # If a schema compat version is given, prefer that over the schema version.
        schema_versions[schema_version].append(tag.name)
        schema_compat_versions[schema_compat_version or schema_version].append(tag.name)

    # Generate a table which maps the latest Synapse version compatible with each
    # schema version.
    result = f"| {'Versions': ^19} | Compatible version |\n"
    result += f"|{'-' * (19 + 2)}|{'-' * (18 + 2)}|\n"
    for schema_version, synapse_versions in schema_compat_versions.items():
        result += f"| {synapse_versions[0] + ' – ' + synapse_versions[-1]: ^19} | {schema_versions[schema_version][0]: ^18} |\n"

    return result


if __name__ == "__main__":
    if len(sys.argv) == 3 and sys.argv[1] == "supports":
        # We don't care about the renderer which is being used, which is the second argument.
        sys.exit(0)
    elif len(sys.argv) == 2 and sys.argv[1] == "dump":
        print(calculate_version_chart())
    else:
        # Expect JSON data on stdin.
        context, book = json.load(sys.stdin)

        for section in book["sections"]:
            if "Chapter" in section and section["Chapter"]["path"] == "upgrade.md":
                section["Chapter"]["content"] = section["Chapter"]["content"].replace(
                    "<!-- REPLACE_WITH_SCHEMA_VERSIONS -->", calculate_version_chart()
                )

        # Print the result back out to stdout.
        print(json.dumps(book))