From f2d12ccabef17faa0bf6b34fbb6d944849afc4d4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 14 Sep 2022 12:01:42 -0400 Subject: Use partial indices on SQLIte. (#13802) Partial indices have been supported since SQLite 3.8, but Synapse now requires >= 3.27, so we can enable support for them. This requires rebuilding previous indices which were partial on PostgreSQL, but not on SQLite. --- synapse/storage/background_updates.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'synapse/storage/background_updates.py') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 555b4e77d2..cf1eabc437 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -581,9 +581,6 @@ class BackgroundUpdater: def create_index_sqlite(conn: Connection) -> None: # Sqlite doesn't support concurrent creation of indexes. # - # We don't use partial indices on SQLite as it wasn't introduced - # until 3.8, and wheezy and CentOS 7 have 3.7 - # # We assume that sqlite doesn't give us invalid indices; however # we may still end up with the index existing but the # background_updates not having been recorded if synapse got shut @@ -591,12 +588,13 @@ class BackgroundUpdater: # has supported CREATE TABLE|INDEX IF NOT EXISTS since 3.3.0.) sql = ( "CREATE %(unique)s INDEX IF NOT EXISTS %(name)s ON %(table)s" - " (%(columns)s)" + " (%(columns)s) %(where_clause)s" ) % { "unique": "UNIQUE" if unique else "", "name": index_name, "table": table, "columns": ", ".join(columns), + "where_clause": "WHERE " + where_clause if where_clause else "", } c = conn.cursor() -- cgit 1.5.1 From b2b0c8527957d89b36c0eafea70347c200c1d294 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 15 Sep 2022 14:28:48 -0400 Subject: Support providing an index predicate for upserts. (#13822) This is useful to upsert against a table which has a unique partial index while avoiding conflicts. --- changelog.d/13822.misc | 1 + synapse/storage/background_updates.py | 1 + synapse/storage/database.py | 30 +++++++++++++++++++++++------- 3 files changed, 25 insertions(+), 7 deletions(-) create mode 100644 changelog.d/13822.misc (limited to 'synapse/storage/background_updates.py') diff --git a/changelog.d/13822.misc b/changelog.d/13822.misc new file mode 100644 index 0000000000..dbc77cbcfa --- /dev/null +++ b/changelog.d/13822.misc @@ -0,0 +1 @@ +Support providing an index predicate clause when doing upserts. diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index cf1eabc437..bf5e7ee7be 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -533,6 +533,7 @@ class BackgroundUpdater: index_name: name of index to add table: table to add index to columns: columns/expressions to include in index + where_clause: A WHERE clause to specify a partial unique index. unique: true to make a UNIQUE index psql_only: true to only create this index on psql databases (useful for virtual sqlite tables) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e881bff7fb..921cd4dc5e 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1191,6 +1191,7 @@ class DatabasePool: keyvalues: Dict[str, Any], values: Dict[str, Any], insertion_values: Optional[Dict[str, Any]] = None, + where_clause: Optional[str] = None, lock: bool = True, ) -> bool: """ @@ -1203,6 +1204,7 @@ class DatabasePool: keyvalues: The unique key tables and their new values values: The nonunique columns and their new values insertion_values: additional key/values to use only when inserting + where_clause: An index predicate to apply to the upsert. lock: True to lock the table when doing the upsert. Unused when performing a native upsert. Returns: @@ -1213,7 +1215,12 @@ class DatabasePool: if table not in self._unsafe_to_upsert_tables: return self.simple_upsert_txn_native_upsert( - txn, table, keyvalues, values, insertion_values=insertion_values + txn, + table, + keyvalues, + values, + insertion_values=insertion_values, + where_clause=where_clause, ) else: return self.simple_upsert_txn_emulated( @@ -1222,6 +1229,7 @@ class DatabasePool: keyvalues, values, insertion_values=insertion_values, + where_clause=where_clause, lock=lock, ) @@ -1232,6 +1240,7 @@ class DatabasePool: keyvalues: Dict[str, Any], values: Dict[str, Any], insertion_values: Optional[Dict[str, Any]] = None, + where_clause: Optional[str] = None, lock: bool = True, ) -> bool: """ @@ -1240,6 +1249,7 @@ class DatabasePool: keyvalues: The unique key tables and their new values values: The nonunique columns and their new values insertion_values: additional key/values to use only when inserting + where_clause: An index predicate to apply to the upsert. lock: True to lock the table when doing the upsert. Returns: Returns True if a row was inserted or updated (i.e. if `values` is @@ -1259,14 +1269,17 @@ class DatabasePool: else: return "%s = ?" % (key,) + # Generate a where clause of each keyvalue and optionally the provided + # index predicate. + where = [_getwhere(k) for k in keyvalues] + if where_clause: + where.append(where_clause) + if not values: # If `values` is empty, then all of the values we care about are in # the unique key, so there is nothing to UPDATE. We can just do a # SELECT instead to see if it exists. - sql = "SELECT 1 FROM %s WHERE %s" % ( - table, - " AND ".join(_getwhere(k) for k in keyvalues), - ) + sql = "SELECT 1 FROM %s WHERE %s" % (table, " AND ".join(where)) sqlargs = list(keyvalues.values()) txn.execute(sql, sqlargs) if txn.fetchall(): @@ -1277,7 +1290,7 @@ class DatabasePool: sql = "UPDATE %s SET %s WHERE %s" % ( table, ", ".join("%s = ?" % (k,) for k in values), - " AND ".join(_getwhere(k) for k in keyvalues), + " AND ".join(where), ) sqlargs = list(values.values()) + list(keyvalues.values()) @@ -1307,6 +1320,7 @@ class DatabasePool: keyvalues: Dict[str, Any], values: Dict[str, Any], insertion_values: Optional[Dict[str, Any]] = None, + where_clause: Optional[str] = None, ) -> bool: """ Use the native UPSERT functionality in PostgreSQL. @@ -1316,6 +1330,7 @@ class DatabasePool: keyvalues: The unique key tables and their new values values: The nonunique columns and their new values insertion_values: additional key/values to use only when inserting + where_clause: An index predicate to apply to the upsert. Returns: Returns True if a row was inserted or updated (i.e. if `values` is @@ -1331,11 +1346,12 @@ class DatabasePool: allvalues.update(values) latter = "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) - sql = ("INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s") % ( + sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) %s DO %s" % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), ", ".join(k for k in keyvalues), + f"WHERE {where_clause}" if where_clause else "", latter, ) txn.execute(sql, list(allvalues.values())) -- cgit 1.5.1 From fff9b955fa39bda2cca1fa726b561c7886e746a1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 20 Sep 2022 14:14:12 +0100 Subject: Generate separate snapshots for logical databases (#13792) * Generate separate snapshots for sqlite, postgres and common * Cleanup postgres dbs in the TRAP * Say which logical DB we're applying updates to * Run background updates on the state DB * Add new option for accepting a SCHEMA_NUMBER --- changelog.d/13792.misc | 1 + scripts-dev/make_full_schema.sh | 166 +++++++++++++++++++++------- synapse/_scripts/update_synapse_database.py | 14 ++- synapse/storage/background_updates.py | 5 +- 4 files changed, 140 insertions(+), 46 deletions(-) create mode 100644 changelog.d/13792.misc (limited to 'synapse/storage/background_updates.py') diff --git a/changelog.d/13792.misc b/changelog.d/13792.misc new file mode 100644 index 0000000000..36ac91400a --- /dev/null +++ b/changelog.d/13792.misc @@ -0,0 +1 @@ +Update the script which makes full schema dumps. diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index 61394360ce..d8cd06ee4f 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -2,23 +2,16 @@ # # This script generates SQL files for creating a brand new Synapse DB with the latest # schema, on both SQLite3 and Postgres. -# -# It does so by having Synapse generate an up-to-date SQLite DB, then running -# synapse_port_db to convert it to Postgres. It then dumps the contents of both. export PGHOST="localhost" -POSTGRES_DB_NAME="synapse_full_schema.$$" - -SQLITE_SCHEMA_FILE="schema.sql.sqlite" -SQLITE_ROWS_FILE="rows.sql.sqlite" -POSTGRES_SCHEMA_FILE="full.sql.postgres" -POSTGRES_ROWS_FILE="rows.sql.postgres" - +POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$" +POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$" +POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$" REQUIRED_DEPS=("matrix-synapse" "psycopg2") usage() { echo - echo "Usage: $0 -p -o [-c] [-n] [-h]" + echo "Usage: $0 -p -o [-c] [-n ] [-h]" echo echo "-p " echo " Username to connect to local postgres instance. The password will be requested" @@ -27,11 +20,16 @@ usage() { echo " CI mode. Prints every command that the script runs." echo "-o " echo " Directory to output full schema files to." + echo "-n " + echo " Schema number for the new snapshot. Used to set the location of files within " + echo " the output directory, mimicking that of synapse/storage/schemas." + echo " Defaults to 9999." echo "-h" echo " Display this help text." } -while getopts "p:co:h" opt; do +SCHEMA_NUMBER="9999" +while getopts "p:co:hn:" opt; do case $opt in p) export PGUSER=$OPTARG @@ -48,6 +46,9 @@ while getopts "p:co:h" opt; do usage exit ;; + n) + SCHEMA_NUMBER="$OPTARG" + ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 usage @@ -95,12 +96,21 @@ cd "$(dirname "$0")/.." TMPDIR=$(mktemp -d) KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path SQLITE_CONFIG=$TMPDIR/sqlite.conf -SQLITE_DB=$TMPDIR/homeserver.db +SQLITE_MAIN_DB=$TMPDIR/main.db +SQLITE_STATE_DB=$TMPDIR/state.db +SQLITE_COMMON_DB=$TMPDIR/common.db POSTGRES_CONFIG=$TMPDIR/postgres.conf # Ensure these files are delete on script exit -# TODO: the trap should also drop the temp postgres DB -trap 'rm -rf $TMPDIR' EXIT +cleanup() { + echo "Cleaning up temporary sqlite database and config files..." + rm -r "$TMPDIR" + echo "Cleaning up temporary Postgres database..." + dropdb --if-exists "$POSTGRES_COMMON_DB_NAME" + dropdb --if-exists "$POSTGRES_MAIN_DB_NAME" + dropdb --if-exists "$POSTGRES_STATE_DB_NAME" +} +trap 'cleanup' EXIT cat > "$SQLITE_CONFIG" < "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE" -sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE" +echo "Dumping SQLite3 schema..." + +mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER" +sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" + +cleanup_pg_schema() { + sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' +} -echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..." -pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE" -pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE" +echo "Dumping Postgres schema..." -echo "Cleaning up temporary Postgres database..." -dropdb $POSTGRES_DB_NAME +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres" echo "Done! Files dumped to: $OUTPUT_DIR" diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py index b4aeae6dd5..fb1fb83f50 100755 --- a/synapse/_scripts/update_synapse_database.py +++ b/synapse/_scripts/update_synapse_database.py @@ -48,10 +48,13 @@ class MockHomeserver(HomeServer): def run_background_updates(hs: HomeServer) -> None: - store = hs.get_datastores().main + main = hs.get_datastores().main + state = hs.get_datastores().state async def run_background_updates() -> None: - await store.db_pool.updates.run_background_updates(sleep=False) + await main.db_pool.updates.run_background_updates(sleep=False) + if state: + await state.db_pool.updates.run_background_updates(sleep=False) # Stop the reactor to exit the script once every background update is run. reactor.stop() @@ -97,8 +100,11 @@ def main() -> None: # Load, process and sanity-check the config. hs_config = yaml.safe_load(args.database_config) - if "database" not in hs_config: - sys.stderr.write("The configuration file must have a 'database' section.\n") + if "database" not in hs_config and "databases" not in hs_config: + sys.stderr.write( + "The configuration file must have a 'database' or 'databases' section. " + "See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database" + ) sys.exit(4) config = HomeServerConfig() diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index bf5e7ee7be..2056ecb2c3 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -285,7 +285,10 @@ class BackgroundUpdater: back_to_back_failures = 0 try: - logger.info("Starting background schema updates") + logger.info( + "Starting background schema updates for database %s", + self._database_name, + ) while self.enabled: try: result = await self.do_next_background_update(sleep) -- cgit 1.5.1