summary refs log tree commit diff
path: root/scripts-dev/make_full_schema.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scripts-dev/make_full_schema.sh')
-rwxr-xr-xscripts-dev/make_full_schema.sh206
1 files changed, 153 insertions, 53 deletions
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh

index f0e22d4ca2..e2bc1640bb 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh
@@ -2,34 +2,37 @@ # # This script generates SQL files for creating a brand new Synapse DB with the latest # schema, on both SQLite3 and Postgres. -# -# It does so by having Synapse generate an up-to-date SQLite DB, then running -# synapse_port_db to convert it to Postgres. It then dumps the contents of both. export PGHOST="localhost" -POSTGRES_DB_NAME="synapse_full_schema.$$" - -SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite" -POSTGRES_FULL_SCHEMA_OUTPUT_FILE="full.sql.postgres" - +POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$" +POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$" +POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$" REQUIRED_DEPS=("matrix-synapse" "psycopg2") usage() { echo - echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]" + echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]" echo echo "-p <postgres_username>" echo " Username to connect to local postgres instance. The password will be requested" echo " during script execution." echo "-c" - echo " CI mode. Enables coverage tracking and prints every command that the script runs." + echo " CI mode. Prints every command that the script runs." echo "-o <path>" echo " Directory to output full schema files to." + echo "-n <schema number>" + echo " Schema number for the new snapshot. Used to set the location of files within " + echo " the output directory, mimicking that of synapse/storage/schemas." + echo " Defaults to 9999." echo "-h" echo " Display this help text." + echo "" + echo " NB: make sure to run this against the *oldest* supported version of postgres," + echo " or else pg_dump might output non-backwards-compatible syntax." } -while getopts "p:co:h" opt; do +SCHEMA_NUMBER="9999" +while getopts "p:co:hn:" opt; do case $opt in p) export PGUSER=$OPTARG @@ -37,11 +40,6 @@ while getopts "p:co:h" opt; do c) # Print all commands that are being executed set -x - - # Modify required dependencies for coverage - REQUIRED_DEPS+=("coverage" "coverage-enable-subprocess") - - COVERAGE=1 ;; o) command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1) @@ -51,6 +49,9 @@ while getopts "p:co:h" opt; do usage exit ;; + n) + SCHEMA_NUMBER="$OPTARG" + ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 usage @@ -98,11 +99,21 @@ cd "$(dirname "$0")/.." TMPDIR=$(mktemp -d) KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path SQLITE_CONFIG=$TMPDIR/sqlite.conf -SQLITE_DB=$TMPDIR/homeserver.db +SQLITE_MAIN_DB=$TMPDIR/main.db +SQLITE_STATE_DB=$TMPDIR/state.db +SQLITE_COMMON_DB=$TMPDIR/common.db POSTGRES_CONFIG=$TMPDIR/postgres.conf # Ensure these files are delete on script exit -trap 'rm -rf $TMPDIR' EXIT +cleanup() { + echo "Cleaning up temporary sqlite database and config files..." + rm -r "$TMPDIR" + echo "Cleaning up temporary Postgres database..." + dropdb --if-exists "$POSTGRES_COMMON_DB_NAME" + dropdb --if-exists "$POSTGRES_MAIN_DB_NAME" + dropdb --if-exists "$POSTGRES_STATE_DB_NAME" +} +trap 'cleanup' EXIT cat > "$SQLITE_CONFIG" <<EOF server_name: "test" @@ -112,10 +123,22 @@ macaroon_secret_key: "abcde" report_stats: false -database: - name: "sqlite3" - args: - database: "$SQLITE_DB" +databases: + common: + name: "sqlite3" + data_stores: [] + args: + database: "$SQLITE_COMMON_DB" + main: + name: "sqlite3" + data_stores: ["main"] + args: + database: "$SQLITE_MAIN_DB" + state: + name: "sqlite3" + data_stores: ["state"] + args: + database: "$SQLITE_STATE_DB" # Suppress the key server warning. trusted_key_servers: [] @@ -129,13 +152,32 @@ macaroon_secret_key: "abcde" report_stats: false -database: - name: "psycopg2" - args: - user: "$PGUSER" - host: "$PGHOST" - password: "$PGPASSWORD" - database: "$POSTGRES_DB_NAME" +databases: + common: + name: "psycopg2" + data_stores: [] + args: + user: "$PGUSER" + host: "$PGHOST" + password: "$PGPASSWORD" + database: "$POSTGRES_COMMON_DB_NAME" + main: + name: "psycopg2" + data_stores: ["main"] + args: + user: "$PGUSER" + host: "$PGHOST" + password: "$PGPASSWORD" + database: "$POSTGRES_MAIN_DB_NAME" + state: + name: "psycopg2" + data_stores: ["state"] + args: + user: "$PGUSER" + host: "$PGHOST" + password: "$PGPASSWORD" + database: "$POSTGRES_STATE_DB_NAME" + # Suppress the key server warning. trusted_key_servers: [] @@ -147,29 +189,46 @@ python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG" # Make sure the SQLite3 database is using the latest schema and has no pending background update. echo "Running db background jobs..." -synapse/_scripts/update_synapse_database.py --database-config --run-background-updates "$SQLITE_CONFIG" +synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates # Create the PostgreSQL database. -echo "Creating postgres database..." -createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME" - -echo "Copying data from SQLite3 to Postgres with synapse_port_db..." -if [ -z "$COVERAGE" ]; then - # No coverage needed - synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG" -else - # Coverage desired - coverage run synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG" -fi +echo "Creating postgres databases..." +createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME" +createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME" +createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME" + +echo "Running db background jobs..." +synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates + -# Delete schema_version, applied_schema_deltas and applied_module_schemas tables -# Also delete any shadow tables from fts4 -# This needs to be done after synapse_port_db is run echo "Dropping unwanted db tables..." -SQL=" + +# Some common tables are created and updated by Synapse itself and do not belong in the +# schema. +DROP_APP_MANAGED_TABLES=" DROP TABLE schema_version; +DROP TABLE schema_compat_version; DROP TABLE applied_schema_deltas; DROP TABLE applied_module_schemas; +" +# Other common tables are not created by Synapse and do belong in the schema. +# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But +# since there's only one table there, I haven't bothered to do so. +DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES +DROP TABLE background_updates; +" + +sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES" +sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES" +sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES" +psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES" + +# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation +# details behind full text search tables. Omit these from the dumps. + +sqlite3 "$SQLITE_MAIN_DB" <<< " DROP TABLE event_search_content; DROP TABLE event_search_segments; DROP TABLE event_search_segdir; @@ -181,16 +240,57 @@ DROP TABLE user_directory_search_segdir; DROP TABLE user_directory_search_docsize; DROP TABLE user_directory_search_stat; " -sqlite3 "$SQLITE_DB" <<< "$SQL" -psql "$POSTGRES_DB_NAME" -w <<< "$SQL" -echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE'..." -sqlite3 "$SQLITE_DB" ".dump" > "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE" +echo "Dumping SQLite3 schema..." + +mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER" +sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" + +cleanup_pg_schema() { + # Cleanup as follows: + # - Remove empty lines. pg_dump likes to output a lot of these. + # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually + # separate tables etc. + # - Remove "public." prefix --- the schema name. + # - Remove "SET" commands. Last time I ran this, the output commands were + # SET statement_timeout = 0; + # SET lock_timeout = 0; + # SET idle_in_transaction_session_timeout = 0; + # SET client_encoding = 'UTF8'; + # SET standard_conforming_strings = on; + # SET check_function_bodies = false; + # SET xmloption = content; + # SET client_min_messages = warning; + # SET row_security = off; + # SET default_table_access_method = heap; + # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all + # SELECT statements because some of those have side-effects which we do want in the + # schema. Last time I ran this, the only SELECTS were + # SELECT pg_catalog.set_config('search_path', '', false); + # and + # SELECT pg_catalog.setval(text, bigint, bool); + # We do want to remove the former, but the latter is important. If the last argument + # is `true` or omitted, this marks the given integer as having been consumed and + # will NOT appear as the nextval. + sed -e '/^$/d' \ + -e '/^--/d' \ + -e 's/public\.//g' \ + -e '/^SET /d' \ + -e '/^SELECT pg_catalog.set_config/d' +} -echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE'..." -pg_dump --format=plain --no-tablespaces --no-acl --no-owner $POSTGRES_DB_NAME | sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE" +echo "Dumping Postgres schema..." -echo "Cleaning up temporary Postgres database..." -dropdb $POSTGRES_DB_NAME +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" echo "Done! Files dumped to: $OUTPUT_DIR"