diff --git a/changelog.d/18440.misc b/changelog.d/18440.misc
new file mode 100644
index 0000000000..6aaa6dde5c
--- /dev/null
+++ b/changelog.d/18440.misc
@@ -0,0 +1 @@
+Add lint to ensure we don't add a `CREATE/DROP INDEX` in a schema delta.
diff --git a/scripts-dev/check_schema_delta.py b/scripts-dev/check_schema_delta.py
index 467be96fdf..454784c3ae 100755
--- a/scripts-dev/check_schema_delta.py
+++ b/scripts-dev/check_schema_delta.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
# Check that no schema deltas have been added to the wrong version.
+#
+# Also checks that schema deltas do not try and create or drop indices.
import re
from typing import Any, Dict, List
@@ -9,6 +11,13 @@ import click
import git
SCHEMA_FILE_REGEX = re.compile(r"^synapse/storage/schema/(.*)/delta/(.*)/(.*)$")
+INDEX_CREATION_REGEX = re.compile(r"CREATE .*INDEX .*ON ([a-z_]+)", flags=re.IGNORECASE)
+INDEX_DELETION_REGEX = re.compile(r"DROP .*INDEX ([a-z_]+)", flags=re.IGNORECASE)
+TABLE_CREATION_REGEX = re.compile(r"CREATE .*TABLE ([a-z_]+)", flags=re.IGNORECASE)
+
+# The base branch we want to check against. We use the main development branch
+# on the assumption that is what we are developing against.
+DEVELOP_BRANCH = "develop"
@click.command()
@@ -20,6 +29,9 @@ SCHEMA_FILE_REGEX = re.compile(r"^synapse/storage/schema/(.*)/delta/(.*)/(.*)$")
help="Always output ANSI colours",
)
def main(force_colors: bool) -> None:
+ # Return code. Set to non-zero when we encounter an error
+ return_code = 0
+
click.secho(
"+++ Checking schema deltas are in the right folder",
fg="green",
@@ -30,17 +42,17 @@ def main(force_colors: bool) -> None:
click.secho("Updating repo...")
repo = git.Repo()
- repo.remote().fetch()
+ repo.remote().fetch(refspec=DEVELOP_BRANCH)
click.secho("Getting current schema version...")
- r = repo.git.show("origin/develop:synapse/storage/schema/__init__.py")
+ r = repo.git.show(f"origin/{DEVELOP_BRANCH}:synapse/storage/schema/__init__.py")
locals: Dict[str, Any] = {}
exec(r, locals)
current_schema_version = locals["SCHEMA_VERSION"]
- diffs: List[git.Diff] = repo.remote().refs.develop.commit.diff(None)
+ diffs: List[git.Diff] = repo.remote().refs[DEVELOP_BRANCH].commit.diff(None)
# Get the schema version of the local file to check against current schema on develop
with open("synapse/storage/schema/__init__.py") as file:
@@ -53,7 +65,7 @@ def main(force_colors: bool) -> None:
# local schema version must be +/-1 the current schema version on develop
if abs(local_schema_version - current_schema_version) != 1:
click.secho(
- "The proposed schema version has diverged more than one version from develop, please fix!",
+ f"The proposed schema version has diverged more than one version from {DEVELOP_BRANCH}, please fix!",
fg="red",
bold=True,
color=force_colors,
@@ -67,21 +79,28 @@ def main(force_colors: bool) -> None:
click.secho(f"Current schema version: {current_schema_version}")
seen_deltas = False
- bad_files = []
+ bad_delta_files = []
+ changed_delta_files = []
for diff in diffs:
- if not diff.new_file or diff.b_path is None:
+ if diff.b_path is None:
+ # We don't lint deleted files.
continue
match = SCHEMA_FILE_REGEX.match(diff.b_path)
if not match:
continue
+ changed_delta_files.append(diff.b_path)
+
+ if not diff.new_file:
+ continue
+
seen_deltas = True
_, delta_version, _ = match.groups()
if delta_version != str(current_schema_version):
- bad_files.append(diff.b_path)
+ bad_delta_files.append(diff.b_path)
if not seen_deltas:
click.secho(
@@ -92,41 +111,91 @@ def main(force_colors: bool) -> None:
)
return
- if not bad_files:
+ if bad_delta_files:
+ bad_delta_files.sort()
+
click.secho(
- f"All deltas are in the correct folder: {current_schema_version}!",
- fg="green",
+ "Found deltas in the wrong folder!",
+ fg="red",
bold=True,
color=force_colors,
)
- return
- bad_files.sort()
-
- click.secho(
- "Found deltas in the wrong folder!",
- fg="red",
- bold=True,
- color=force_colors,
- )
+ for f in bad_delta_files:
+ click.secho(
+ f"\t{f}",
+ fg="red",
+ bold=True,
+ color=force_colors,
+ )
- for f in bad_files:
+ click.secho()
click.secho(
- f"\t{f}",
+ f"Please move these files to delta/{current_schema_version}/",
fg="red",
bold=True,
color=force_colors,
)
- click.secho()
- click.secho(
- f"Please move these files to delta/{current_schema_version}/",
- fg="red",
- bold=True,
- color=force_colors,
- )
+ else:
+ click.secho(
+ f"All deltas are in the correct folder: {current_schema_version}!",
+ fg="green",
+ bold=True,
+ color=force_colors,
+ )
- click.get_current_context().exit(1)
+ # Make sure we process them in order. This sort works because deltas are numbered
+ # and delta files are also numbered in order.
+ changed_delta_files.sort()
+
+ # Now check that we're not trying to create or drop indices. If we want to
+ # do that they should be in background updates. The exception is when we
+ # create indices on tables we've just created.
+ created_tables = set()
+ for delta_file in changed_delta_files:
+ with open(delta_file) as fd:
+ delta_lines = fd.readlines()
+
+ for line in delta_lines:
+ # Strip SQL comments
+ line = line.split("--", maxsplit=1)[0]
+
+ # Check and track any tables we create
+ match = TABLE_CREATION_REGEX.search(line)
+ if match:
+ table_name = match.group(1)
+ created_tables.add(table_name)
+
+ # Check for dropping indices, these are always banned
+ match = INDEX_DELETION_REGEX.search(line)
+ if match:
+ clause = match.group()
+
+ click.secho(
+ f"Found delta with index deletion: '{clause}' in {delta_file}\nThese should be in background updates.",
+ fg="red",
+ bold=True,
+ color=force_colors,
+ )
+ return_code = 1
+
+ # Check for index creation, which is only allowed for tables we've
+ # created.
+ match = INDEX_CREATION_REGEX.search(line)
+ if match:
+ clause = match.group()
+ table_name = match.group(1)
+ if table_name not in created_tables:
+ click.secho(
+ f"Found delta with index creation: '{clause}' in {delta_file}\nThese should be in background updates.",
+ fg="red",
+ bold=True,
+ color=force_colors,
+ )
+ return_code = 1
+
+ click.get_current_context().exit(return_code)
if __name__ == "__main__":
|