diff --git a/changelog.d/9933.misc b/changelog.d/9933.misc
new file mode 100644
index 0000000000..0860026670
--- /dev/null
+++ b/changelog.d/9933.misc
@@ -0,0 +1 @@
+Update the database schema versioning to support gradual migration away from legacy tables.
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 8f39ae0270..af2c968c9a 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -69,6 +69,7 @@
- [Git Usage](dev/git.md)
- [Testing]()
- [OpenTracing](opentracing.md)
+ - [Database Schemas](development/database_schema.md)
- [Synapse Architecture]()
- [Log Contexts](log_contexts.md)
- [Replication](replication.md)
@@ -84,4 +85,4 @@
- [Scripts]()
# Other
- - [Dependency Deprecation Policy](deprecation_policy.md)
\ No newline at end of file
+ - [Dependency Deprecation Policy](deprecation_policy.md)
diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md
new file mode 100644
index 0000000000..7fe8ec63e1
--- /dev/null
+++ b/docs/development/database_schema.md
@@ -0,0 +1,95 @@
+# Synapse database schema files
+
+Synapse's database schema is stored in the `synapse.storage.schema` module.
+
+## Logical databases
+
+Synapse supports splitting its datastore across multiple physical databases (which can
+be useful for large installations), and the schema files are therefore split according
+to the logical database they apply to.
+
+At the time of writing, the following "logical" databases are supported:
+
+* `state` - used to store Matrix room state (more specifically, `state_groups`,
+ their relationships and contents).
+* `main` - stores everything else.
+
+Additionally, the `common` directory contains schema files for tables which must be
+present on *all* physical databases.
+
+## Synapse schema versions
+
+Synapse manages its database schema via "schema versions". These are mainly used to
+help avoid confusion if the Synapse codebase is rolled back after the database is
+updated. They work as follows:
+
+ * The Synapse codebase defines a constant `synapse.storage.schema.SCHEMA_VERSION`
+ which represents the expectations made about the database by that version. For
+ example, as of Synapse v1.36, this is `59`.
+
+ * The database stores a "compatibility version" in
+ `schema_compat_version.compat_version` which defines the `SCHEMA_VERSION` of the
+ oldest version of Synapse which will work with the database. On startup, if
+ `compat_version` is found to be newer than `SCHEMA_VERSION`, Synapse will refuse to
+ start.
+
+ Synapse automatically updates this field from
+ `synapse.storage.schema.SCHEMA_COMPAT_VERSION`.
+
+ * Whenever a backwards-incompatible change is made to the database format (normally
+ via a `delta` file), `synapse.storage.schema.SCHEMA_COMPAT_VERSION` is also updated
+ so that administrators can not accidentally roll back to a too-old version of Synapse.
+
+Generally, the goal is to maintain compatibility with at least one or two previous
+releases of Synapse, so any substantial change tends to require multiple releases and a
+bit of forward-planning to get right.
+
+As a worked example: we want to remove the `room_stats_historical` table. Here is how it
+might pan out.
+
+ 1. Replace any code that *reads* from `room_stats_historical` with alternative
+ implementations, but keep writing to it in case of rollback to an earlier version.
+ Also, increase `synapse.storage.schema.SCHEMA_VERSION`. In this
+ instance, there is no existing code which reads from `room_stats_historical`, so
+ our starting point is:
+
+ v1.36.0: `SCHEMA_VERSION=59`, `SCHEMA_COMPAT_VERSION=59`
+
+ 2. Next (say in Synapse v1.37.0): remove the code that *writes* to
+ `room_stats_historical`, but don’t yet remove the table in case of rollback to
+ v1.36.0. Again, we increase `synapse.storage.schema.SCHEMA_VERSION`, but
+ because we have not broken compatibility with v1.36, we do not yet update
+ `SCHEMA_COMPAT_VERSION`. We now have:
+
+ v1.37.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=59`.
+
+ 3. Later (say in Synapse v1.38.0): we can remove the table altogether. This will
+ break compatibility with v1.36.0, so we must update `SCHEMA_COMPAT_VERSION` accordingly.
+ There is no need to update `synapse.storage.schema.SCHEMA_VERSION`, since there is no
+ change to the Synapse codebase here. So we end up with:
+
+ v1.38.0: `SCHEMA_VERSION=60`, `SCHEMA_COMPAT_VERSION=60`.
+
+If in doubt about whether to update `SCHEMA_VERSION` or not, it is generally best to
+lean towards doing so.
+
+## Full schema dumps
+
+In the `full_schemas` directories, only the most recently-numbered snapshot is used
+(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical
+reference only.
+
+### Building full schema dumps
+
+If you want to recreate these schemas, they need to be made from a database that
+has had all background updates run.
+
+To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
+`full.sql.postgres` and `full.sql.sqlite` files.
+
+Ensure postgres is installed, then run:
+
+ ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
+
+NB at the time of writing, this script predates the split into separate `state`/`main`
+databases so will require updates to handle that correctly.
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 3799d46734..683e5e3b90 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -1,5 +1,4 @@
-# Copyright 2014 - 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
+# Copyright 2014 - 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -26,7 +25,7 @@ from synapse.config.homeserver import HomeServerConfig
from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.engines import BaseDatabaseEngine
from synapse.storage.engines.postgres import PostgresEngine
-from synapse.storage.schema import SCHEMA_VERSION
+from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION
from synapse.storage.types import Cursor
logger = logging.getLogger(__name__)
@@ -59,6 +58,28 @@ UNAPPLIED_DELTA_ON_WORKER_ERROR = (
)
+@attr.s
+class _SchemaState:
+ current_version: int = attr.ib()
+ """The current schema version of the database"""
+
+ compat_version: Optional[int] = attr.ib()
+ """The SCHEMA_VERSION of the oldest version of Synapse for this database
+
+ If this is None, we have an old version of the database without the necessary
+ table.
+ """
+
+ applied_deltas: Collection[str] = attr.ib(factory=tuple)
+ """Any delta files for `current_version` which have already been applied"""
+
+ upgraded: bool = attr.ib(default=False)
+ """Whether the current state was reached by applying deltas.
+
+ If False, we have run the full schema for `current_version`, and have applied no
+ deltas since. If True, we have run some deltas since the original creation."""
+
+
def prepare_database(
db_conn: LoggingDatabaseConnection,
database_engine: BaseDatabaseEngine,
@@ -96,12 +117,11 @@ def prepare_database(
version_info = _get_or_create_schema_state(cur, database_engine)
if version_info:
- user_version, delta_files, upgraded = version_info
logger.info(
"%r: Existing schema is %i (+%i deltas)",
databases,
- user_version,
- len(delta_files),
+ version_info.current_version,
+ len(version_info.applied_deltas),
)
# config should only be None when we are preparing an in-memory SQLite db,
@@ -113,16 +133,18 @@ def prepare_database(
# if it's a worker app, refuse to upgrade the database, to avoid multiple
# workers doing it at once.
- if config.worker_app is not None and user_version != SCHEMA_VERSION:
+ if (
+ config.worker_app is not None
+ and version_info.current_version != SCHEMA_VERSION
+ ):
raise UpgradeDatabaseException(
- OUTDATED_SCHEMA_ON_WORKER_ERROR % (SCHEMA_VERSION, user_version)
+ OUTDATED_SCHEMA_ON_WORKER_ERROR
+ % (SCHEMA_VERSION, version_info.current_version)
)
_upgrade_existing_database(
cur,
- user_version,
- delta_files,
- upgraded,
+ version_info,
database_engine,
config,
databases=databases,
@@ -261,9 +283,7 @@ def _setup_new_database(
_upgrade_existing_database(
cur,
- current_version=max_current_ver,
- applied_delta_files=[],
- upgraded=False,
+ _SchemaState(current_version=max_current_ver, compat_version=None),
database_engine=database_engine,
config=None,
databases=databases,
@@ -273,9 +293,7 @@ def _setup_new_database(
def _upgrade_existing_database(
cur: Cursor,
- current_version: int,
- applied_delta_files: List[str],
- upgraded: bool,
+ current_schema_state: _SchemaState,
database_engine: BaseDatabaseEngine,
config: Optional[HomeServerConfig],
databases: Collection[str],
@@ -321,12 +339,8 @@ def _upgrade_existing_database(
Args:
cur
- current_version: The current version of the schema.
- applied_delta_files: A list of deltas that have already been applied.
- upgraded: Whether the current version was generated by having
- applied deltas or from full schema file. If `True` the function
- will never apply delta files for the given `current_version`, since
- the current_version wasn't generated by applying those delta files.
+ current_schema_state: The current version of the schema, as
+ returned by _get_or_create_schema_state
database_engine
config:
None if we are initialising a blank database, otherwise the application
@@ -337,13 +351,16 @@ def _upgrade_existing_database(
upgrade portions of the delta scripts.
"""
if is_empty:
- assert not applied_delta_files
+ assert not current_schema_state.applied_deltas
else:
assert config
is_worker = config and config.worker_app is not None
- if current_version > SCHEMA_VERSION:
+ if (
+ current_schema_state.compat_version is not None
+ and current_schema_state.compat_version > SCHEMA_VERSION
+ ):
raise ValueError(
"Cannot use this database as it is too "
+ "new for the server to understand"
@@ -357,14 +374,26 @@ def _upgrade_existing_database(
assert config is not None
check_database_before_upgrade(cur, database_engine, config)
- start_ver = current_version
+ # update schema_compat_version before we run any upgrades, so that if synapse
+ # gets downgraded again, it won't try to run against the upgraded database.
+ if (
+ current_schema_state.compat_version is None
+ or current_schema_state.compat_version < SCHEMA_COMPAT_VERSION
+ ):
+ cur.execute("DELETE FROM schema_compat_version")
+ cur.execute(
+ "INSERT INTO schema_compat_version(compat_version) VALUES (?)",
+ (SCHEMA_COMPAT_VERSION,),
+ )
+
+ start_ver = current_schema_state.current_version
# if we got to this schema version by running a full_schema rather than a series
# of deltas, we should not run the deltas for this version.
- if not upgraded:
+ if not current_schema_state.upgraded:
start_ver += 1
- logger.debug("applied_delta_files: %s", applied_delta_files)
+ logger.debug("applied_delta_files: %s", current_schema_state.applied_deltas)
if isinstance(database_engine, PostgresEngine):
specific_engine_extension = ".postgres"
@@ -440,7 +469,7 @@ def _upgrade_existing_database(
absolute_path = entry.absolute_path
logger.debug("Found file: %s (%s)", relative_path, absolute_path)
- if relative_path in applied_delta_files:
+ if relative_path in current_schema_state.applied_deltas:
continue
root_name, ext = os.path.splitext(file_name)
@@ -621,7 +650,7 @@ def execute_statements_from_stream(cur: Cursor, f: TextIO) -> None:
def _get_or_create_schema_state(
txn: Cursor, database_engine: BaseDatabaseEngine
-) -> Optional[Tuple[int, List[str], bool]]:
+) -> Optional[_SchemaState]:
# Bluntly try creating the schema_version tables.
sql_path = os.path.join(schema_path, "common", "schema_version.sql")
executescript(txn, sql_path)
@@ -629,17 +658,31 @@ def _get_or_create_schema_state(
txn.execute("SELECT version, upgraded FROM schema_version")
row = txn.fetchone()
+ if row is None:
+ # new database
+ return None
+
+ current_version = int(row[0])
+ upgraded = bool(row[1])
+
+ compat_version: Optional[int] = None
+ txn.execute("SELECT compat_version FROM schema_compat_version")
+ row = txn.fetchone()
if row is not None:
- current_version = int(row[0])
- txn.execute(
- "SELECT file FROM applied_schema_deltas WHERE version >= ?",
- (current_version,),
- )
- applied_deltas = [d for d, in txn]
- upgraded = bool(row[1])
- return current_version, applied_deltas, upgraded
+ compat_version = int(row[0])
+
+ txn.execute(
+ "SELECT file FROM applied_schema_deltas WHERE version >= ?",
+ (current_version,),
+ )
+ applied_deltas = tuple(d for d, in txn)
- return None
+ return _SchemaState(
+ current_version=current_version,
+ compat_version=compat_version,
+ applied_deltas=applied_deltas,
+ upgraded=upgraded,
+ )
@attr.s(slots=True)
diff --git a/synapse/storage/schema/README.md b/synapse/storage/schema/README.md
index 030153db64..729f44ea6c 100644
--- a/synapse/storage/schema/README.md
+++ b/synapse/storage/schema/README.md
@@ -1,37 +1,4 @@
# Synapse Database Schemas
-This directory contains the schema files used to build Synapse databases.
-
-Synapse supports splitting its datastore across multiple physical databases (which can
-be useful for large installations), and the schema files are therefore split according
-to the logical database they are apply to.
-
-At the time of writing, the following "logical" databases are supported:
-
-* `state` - used to store Matrix room state (more specifically, `state_groups`,
- their relationships and contents.)
-* `main` - stores everything else.
-
-Addionally, the `common` directory contains schema files for tables which must be
-present on *all* physical databases.
-
-## Full schema dumps
-
-In the `full_schemas` directories, only the most recently-numbered snapshot is useful
-(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical
-reference only.
-
-## Building full schema dumps
-
-If you want to recreate these schemas, they need to be made from a database that
-has had all background updates run.
-
-To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
-`full.sql.postgres` and `full.sql.sqlite` files.
-
-Ensure postgres is installed, then run:
-
- ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
-
-NB at the time of writing, this script predates the split into separate `state`/`main`
-databases so will require updates to handle that correctly.
+This directory contains the schema files used to build Synapse databases. For more
+information, see /docs/development/database_schema.md.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index f0d9f23167..d36ba1d773 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,6 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Remember to update this number every time a change is made to database
-# schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 59
+"""Represents the expectations made by the codebase about the database schema
+
+This should be incremented whenever the codebase changes its requirements on the
+shape of the database schema (even if those requirements are backwards-compatible with
+older versions of Synapse).
+
+See `README.md <synapse/storage/schema/README.md>`_ for more information on how this
+works.
+"""
+
+
+SCHEMA_COMPAT_VERSION = 59
+"""Limit on how far the synapse codebase can be rolled back without breaking db compat
+
+This value is stored in the database, and checked on startup. If the value in the
+database is greater than SCHEMA_VERSION, then Synapse will refuse to start.
+"""
diff --git a/synapse/storage/schema/common/schema_version.sql b/synapse/storage/schema/common/schema_version.sql
index 42e5cb6df5..f41fde5d2d 100644
--- a/synapse/storage/schema/common/schema_version.sql
+++ b/synapse/storage/schema/common/schema_version.sql
@@ -20,6 +20,13 @@ CREATE TABLE IF NOT EXISTS schema_version(
CHECK (Lock='X')
);
+CREATE TABLE IF NOT EXISTS schema_compat_version(
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ -- The SCHEMA_VERSION of the oldest synapse this database can be used with
+ compat_version INTEGER NOT NULL,
+ CHECK (Lock='X')
+);
+
CREATE TABLE IF NOT EXISTS applied_schema_deltas(
version INTEGER NOT NULL,
file TEXT NOT NULL,
|