From c97ed64db3d99680819ec4dcd88ea76f3d0c7537 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 23 Oct 2019 15:31:59 +0100 Subject: Make synapse_port_db correctly create indexes (#6102) Make `synapse_port_db` correctly create indexes in the PostgreSQL database, by having it run the background updates on the database before migrating the data. To ensure we're migrating the right data, also block the port if the SQLite3 database still has pending or ongoing background updates. Fixes #4877 --- scripts/synapse_port_db | 182 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 130 insertions(+), 52 deletions(-) (limited to 'scripts') diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 3f942abdb6..5a34d6f2f5 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd # Copyright 2018 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,9 +30,23 @@ import yaml from twisted.enterprise import adbapi from twisted.internet import defer, reactor -from synapse.storage._base import LoggingTransaction, SQLBaseStore +from synapse.config.homeserver import HomeServerConfig +from synapse.logging.context import PreserveLoggingContext +from synapse.storage._base import LoggingTransaction +from synapse.storage.client_ips import ClientIpBackgroundUpdateStore +from synapse.storage.deviceinbox import DeviceInboxBackgroundUpdateStore +from synapse.storage.devices import DeviceBackgroundUpdateStore from synapse.storage.engines import create_engine +from synapse.storage.events_bg_updates import EventsBackgroundUpdatesStore +from synapse.storage.media_repository import MediaRepositoryBackgroundUpdateStore from synapse.storage.prepare_database import prepare_database +from synapse.storage.registration import RegistrationBackgroundUpdateStore +from synapse.storage.roommember import RoomMemberBackgroundUpdateStore +from synapse.storage.search import SearchBackgroundUpdateStore +from synapse.storage.state import StateBackgroundUpdateStore +from synapse.storage.stats import StatsStore +from synapse.storage.user_directory import UserDirectoryBackgroundUpdateStore +from synapse.util import Clock logger = logging.getLogger("synapse_port_db") @@ -98,33 +113,24 @@ APPEND_ONLY_TABLES = [ end_error_exec_info = None -class Store(object): - """This object is used to pull out some of the convenience API from the - Storage layer. - - *All* database interactions should go through this object. - """ - - def __init__(self, db_pool, engine): - self.db_pool = db_pool - self.database_engine = engine - - _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] - _simple_insert = SQLBaseStore.__dict__["_simple_insert"] - - _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] - _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] - _simple_select_one = SQLBaseStore.__dict__["_simple_select_one"] - _simple_select_one_txn = SQLBaseStore.__dict__["_simple_select_one_txn"] - _simple_select_one_onecol = SQLBaseStore.__dict__["_simple_select_one_onecol"] - _simple_select_one_onecol_txn = SQLBaseStore.__dict__[ - "_simple_select_one_onecol_txn" - ] - - _simple_update_one = SQLBaseStore.__dict__["_simple_update_one"] - _simple_update_one_txn = SQLBaseStore.__dict__["_simple_update_one_txn"] - _simple_update_txn = SQLBaseStore.__dict__["_simple_update_txn"] +class Store( + ClientIpBackgroundUpdateStore, + DeviceInboxBackgroundUpdateStore, + DeviceBackgroundUpdateStore, + EventsBackgroundUpdatesStore, + MediaRepositoryBackgroundUpdateStore, + RegistrationBackgroundUpdateStore, + RoomMemberBackgroundUpdateStore, + SearchBackgroundUpdateStore, + StateBackgroundUpdateStore, + UserDirectoryBackgroundUpdateStore, + StatsStore, +): + def __init__(self, db_conn, hs): + super().__init__(db_conn, hs) + self.db_pool = hs.get_db_pool() + @defer.inlineCallbacks def runInteraction(self, desc, func, *args, **kwargs): def r(conn): try: @@ -150,7 +156,8 @@ class Store(object): logger.debug("[TXN FAIL] {%s} %s", desc, e) raise - return self.db_pool.runWithConnection(r) + with PreserveLoggingContext(): + return (yield self.db_pool.runWithConnection(r)) def execute(self, f, *args, **kwargs): return self.runInteraction(f.__name__, f, *args, **kwargs) @@ -176,6 +183,25 @@ class Store(object): raise +class MockHomeserver: + def __init__(self, config, database_engine, db_conn, db_pool): + self.database_engine = database_engine + self.db_conn = db_conn + self.db_pool = db_pool + self.clock = Clock(reactor) + self.config = config + self.hostname = config.server_name + + def get_db_conn(self): + return self.db_conn + + def get_db_pool(self): + return self.db_pool + + def get_clock(self): + return self.clock + + class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -447,31 +473,75 @@ class Porter(object): db_conn.commit() + return db_conn + @defer.inlineCallbacks - def run(self): - try: - sqlite_db_pool = adbapi.ConnectionPool( - self.sqlite_config["name"], **self.sqlite_config["args"] - ) + def build_db_store(self, config): + """Builds and returns a database store using the provided configuration. - postgres_db_pool = adbapi.ConnectionPool( - self.postgres_config["name"], **self.postgres_config["args"] - ) + Args: + config: The database configuration, i.e. a dict following the structure of + the "database" section of Synapse's configuration file. + + Returns: + The built Store object. + """ + engine = create_engine(config) + + self.progress.set_state("Preparing %s" % config["name"]) + conn = self.setup_db(config, engine) + + db_pool = adbapi.ConnectionPool( + config["name"], **config["args"] + ) + + hs = MockHomeserver(self.hs_config, engine, conn, db_pool) + + store = Store(conn, hs) + + yield store.runInteraction( + "%s_engine.check_database" % config["name"], + engine.check_database, + ) - sqlite_engine = create_engine(sqlite_config) - postgres_engine = create_engine(postgres_config) + return store - self.sqlite_store = Store(sqlite_db_pool, sqlite_engine) - self.postgres_store = Store(postgres_db_pool, postgres_engine) + @defer.inlineCallbacks + def run_background_updates_on_postgres(self): + # Manually apply all background updates on the PostgreSQL database. + postgres_ready = yield self.postgres_store.has_completed_background_updates() + + if not postgres_ready: + # Only say that we're running background updates when there are background + # updates to run. + self.progress.set_state("Running background updates on PostgreSQL") + + while not postgres_ready: + yield self.postgres_store.do_next_background_update(100) + postgres_ready = yield ( + self.postgres_store.has_completed_background_updates() + ) - yield self.postgres_store.execute(postgres_engine.check_database) + @defer.inlineCallbacks + def run(self): + try: + self.sqlite_store = yield self.build_db_store(self.sqlite_config) + + # Check if all background updates are done, abort if not. + updates_complete = yield self.sqlite_store.has_completed_background_updates() + if not updates_complete: + sys.stderr.write( + "Pending background updates exist in the SQLite3 database." + " Please start Synapse again and wait until every update has finished" + " before running this script.\n" + ) + defer.returnValue(None) - # Step 1. Set up databases. - self.progress.set_state("Preparing SQLite3") - self.setup_db(sqlite_config, sqlite_engine) + self.postgres_store = yield self.build_db_store( + self.hs_config.database_config + ) - self.progress.set_state("Preparing PostgreSQL") - self.setup_db(postgres_config, postgres_engine) + yield self.run_background_updates_on_postgres() self.progress.set_state("Creating port tables") @@ -563,6 +633,8 @@ class Porter(object): def conv(j, col): if j in bool_cols: return bool(col) + if isinstance(col, bytes): + return bytearray(col) elif isinstance(col, string_types) and "\0" in col: logger.warn( "DROPPING ROW: NUL value in table %s col %s: %r", @@ -926,18 +998,24 @@ if __name__ == "__main__": }, } - postgres_config = yaml.safe_load(args.postgres_config) + hs_config = yaml.safe_load(args.postgres_config) - if "database" in postgres_config: - postgres_config = postgres_config["database"] + if "database" not in hs_config: + sys.stderr.write("The configuration file must have a 'database' section.\n") + sys.exit(4) + + postgres_config = hs_config["database"] if "name" not in postgres_config: - sys.stderr.write("Malformed database config: no 'name'") + sys.stderr.write("Malformed database config: no 'name'\n") sys.exit(2) if postgres_config["name"] != "psycopg2": - sys.stderr.write("Database must use 'psycopg2' connector.") + sys.stderr.write("Database must use the 'psycopg2' connector.\n") sys.exit(3) + config = HomeServerConfig() + config.parse_config_dict(hs_config, "", "") + def start(stdscr=None): if stdscr: progress = CursesProgress(stdscr) @@ -946,9 +1024,9 @@ if __name__ == "__main__": porter = Porter( sqlite_config=sqlite_config, - postgres_config=postgres_config, progress=progress, batch_size=args.batch_size, + hs_config=config, ) reactor.callWhenRunning(porter.run) -- cgit 1.4.1 From b2510dce858cee67229b1d19610b463fb1373b20 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 23 Oct 2019 15:54:17 +0100 Subject: Fix import paths in synapse_port_db --- scripts/synapse_port_db | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'scripts') diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 5a34d6f2f5..33f0bef0f2 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -33,19 +33,27 @@ from twisted.internet import defer, reactor from synapse.config.homeserver import HomeServerConfig from synapse.logging.context import PreserveLoggingContext from synapse.storage._base import LoggingTransaction -from synapse.storage.client_ips import ClientIpBackgroundUpdateStore -from synapse.storage.deviceinbox import DeviceInboxBackgroundUpdateStore -from synapse.storage.devices import DeviceBackgroundUpdateStore +from synapse.storage.data_stores.main.client_ips import ClientIpBackgroundUpdateStore +from synapse.storage.data_stores.main.deviceinbox import DeviceInboxBackgroundUpdateStore +from synapse.storage.data_stores.main.devices import DeviceBackgroundUpdateStore from synapse.storage.engines import create_engine -from synapse.storage.events_bg_updates import EventsBackgroundUpdatesStore -from synapse.storage.media_repository import MediaRepositoryBackgroundUpdateStore +from synapse.storage.data_stores.main.events_bg_updates import ( + EventsBackgroundUpdatesStore +) +from synapse.storage.data_stores.main.media_repository import ( + MediaRepositoryBackgroundUpdateStore +) from synapse.storage.prepare_database import prepare_database -from synapse.storage.registration import RegistrationBackgroundUpdateStore -from synapse.storage.roommember import RoomMemberBackgroundUpdateStore -from synapse.storage.search import SearchBackgroundUpdateStore -from synapse.storage.state import StateBackgroundUpdateStore -from synapse.storage.stats import StatsStore -from synapse.storage.user_directory import UserDirectoryBackgroundUpdateStore +from synapse.storage.data_stores.main.registration import ( + RegistrationBackgroundUpdateStore +) +from synapse.storage.data_stores.main.roommember import RoomMemberBackgroundUpdateStore +from synapse.storage.data_stores.main.search import SearchBackgroundUpdateStore +from synapse.storage.data_stores.main.state import StateBackgroundUpdateStore +from synapse.storage.data_stores.main.stats import StatsStore +from synapse.storage.data_stores.main.user_directory import ( + UserDirectoryBackgroundUpdateStore +) from synapse.util import Clock logger = logging.getLogger("synapse_port_db") -- cgit 1.4.1 From 2891693da608f72fab0d5b68fb8c6bdca586b431 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 24 Oct 2019 12:38:48 +0100 Subject: Add new boolean column to synapse_port_db --- scripts/synapse_port_db | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 33f0bef0f2..f2b1c106da 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -66,6 +66,7 @@ BOOLEAN_COLUMNS = { "presence_list": ["accepted"], "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], + "devices": ["hidden"], "device_lists_outbound_pokes": ["sent"], "users_who_share_rooms": ["share_private"], "groups": ["is_public"], -- cgit 1.4.1 From ff39cb46defd575c83ff03fef6cebad69afea1fb Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 24 Oct 2019 13:31:49 +0100 Subject: Fix up changelog --- CHANGES.md | 33 ++++++++++++++++++--------------- scripts/synapse_port_db | 16 +++++++++------- 2 files changed, 27 insertions(+), 22 deletions(-) (limited to 'scripts') diff --git a/CHANGES.md b/CHANGES.md index 482d124833..d438c5272a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,13 @@ Synapse 1.5.0rc1 (2019-10-24) ========================== +This release includes a database migration step **which may take a long time to complete**: + +- Allow devices to be marked as hidden, for use by features such as cross-signing. + This adds a new field with a default value to the devices field in the database, + and so the database upgrade may take a long time depending on how many devices + are in the database. ([\#5759](https://github.com/matrix-org/synapse/issues/5759)) + Features -------- @@ -16,22 +23,22 @@ Bugfixes -------- - Return an HTTP 404 instead of 400 when requesting a filter by ID that is unknown to the server. Thanks to @krombel for contributing this! ([\#2380](https://github.com/matrix-org/synapse/issues/2380)) -- Fix a problem where users could be invited twice to the same group. ([\#3436](https://github.com/matrix-org/synapse/issues/3436)) -- Added domain validation when including a list of invitees upon room creation. ([\#4088](https://github.com/matrix-org/synapse/issues/4088)) +- Fix a bug where users could be invited twice to the same group. ([\#3436](https://github.com/matrix-org/synapse/issues/3436)) +- Fix `/createRoom` failing with badly-formatted MXIDs in the invitee list. Thanks to @wener291! ([\#4088](https://github.com/matrix-org/synapse/issues/4088)) - Make the `synapse_port_db` script create the right indexes on a new PostgreSQL database. ([\#6102](https://github.com/matrix-org/synapse/issues/6102), [\#6178](https://github.com/matrix-org/synapse/issues/6178), [\#6243](https://github.com/matrix-org/synapse/issues/6243)) - Fix bug when uploading a large file: Synapse responds with `M_UNKNOWN` while it should be `M_TOO_LARGE` according to spec. Contributed by Anshul Angaria. ([\#6109](https://github.com/matrix-org/synapse/issues/6109)) -- Prevent user push rules being deleted from a room when it is upgraded. ([\#6144](https://github.com/matrix-org/synapse/issues/6144)) +- Fix user push rules being deleted from a room when it is upgraded. ([\#6144](https://github.com/matrix-org/synapse/issues/6144)) - Don't 500 when trying to exchange a revoked 3PID invite. ([\#6147](https://github.com/matrix-org/synapse/issues/6147)) - Fix transferring notifications and tags when joining an upgraded room that is new to your server. ([\#6155](https://github.com/matrix-org/synapse/issues/6155)) - Fix bug where guest account registration can wedge after restart. ([\#6161](https://github.com/matrix-org/synapse/issues/6161)) -- Fix monthly active user reaping where reserved users are specified. ([\#6168](https://github.com/matrix-org/synapse/issues/6168)) -- Fix /federation/v1/state endpoint for recent room versions. ([\#6170](https://github.com/matrix-org/synapse/issues/6170)) +- Fix monthly active user reaping when reserved users are specified. ([\#6168](https://github.com/matrix-org/synapse/issues/6168)) +- Fix `/federation/v1/state` endpoint not supporting newer room versions. ([\#6170](https://github.com/matrix-org/synapse/issues/6170)) - Fix bug where we were updating censored events as bytes rather than text, occaisonally causing invalid JSON being inserted breaking APIs that attempted to fetch such events. ([\#6186](https://github.com/matrix-org/synapse/issues/6186)) - Fix occasional missed updates in the room and user directories. ([\#6187](https://github.com/matrix-org/synapse/issues/6187)) -- Fix tracing of non-JSON APIs, /media, /key etc. ([\#6195](https://github.com/matrix-org/synapse/issues/6195)) +- Fix tracing of non-JSON APIs, `/media`, `/key` etc. ([\#6195](https://github.com/matrix-org/synapse/issues/6195)) - Fix bug where presence would not get timed out correctly if a synchrotron worker is used and restarted. ([\#6212](https://github.com/matrix-org/synapse/issues/6212)) - synapse_port_db: Add 2 additional BOOLEAN_COLUMNS to be able to convert from database schema v56. ([\#6216](https://github.com/matrix-org/synapse/issues/6216)) -- Prevent the demo Synapse's from blacklisting `::1`. ([\#6229](https://github.com/matrix-org/synapse/issues/6229)) +- Fix a bug where the Synapse demo script blacklisted `::1` (ipv6 localhost) from receiving federation traffic. ([\#6229](https://github.com/matrix-org/synapse/issues/6229)) Updates to the Docker image @@ -44,13 +51,9 @@ Internal Changes ---------------- - Update `user_filters` table to have a unique index, and non-null columns. Thanks to @pik for contributing this. ([\#1172](https://github.com/matrix-org/synapse/issues/1172), [\#6175](https://github.com/matrix-org/synapse/issues/6175), [\#6184](https://github.com/matrix-org/synapse/issues/6184)) -- Allow devices to be marked as hidden, for use by features such as cross-signing. - This adds a new field with a default value to the devices field in the database, - and so the database upgrade may take a long time depending on how many devices - are in the database. ([\#5759](https://github.com/matrix-org/synapse/issues/5759)) - Move lookup-related functions from RoomMemberHandler to IdentityHandler. ([\#5978](https://github.com/matrix-org/synapse/issues/5978)) - Improve performance of the public room list directory. ([\#6019](https://github.com/matrix-org/synapse/issues/6019), [\#6152](https://github.com/matrix-org/synapse/issues/6152), [\#6153](https://github.com/matrix-org/synapse/issues/6153), [\#6154](https://github.com/matrix-org/synapse/issues/6154)) -- Edit header dicts docstrings in SimpleHttpClient to note that `str` or `bytes` can be passed as header keys. ([\#6077](https://github.com/matrix-org/synapse/issues/6077)) +- Edit header dicts docstrings in `SimpleHttpClient` to note that `str` or `bytes` can be passed as header keys. ([\#6077](https://github.com/matrix-org/synapse/issues/6077)) - Add snapcraft packaging information. Contributed by @devec0. ([\#6084](https://github.com/matrix-org/synapse/issues/6084), [\#6191](https://github.com/matrix-org/synapse/issues/6191)) - Kill off half-implemented password-reset via sms. ([\#6101](https://github.com/matrix-org/synapse/issues/6101)) - Remove `get_user_by_req` opentracing span and add some tags. ([\#6108](https://github.com/matrix-org/synapse/issues/6108)) @@ -59,7 +62,7 @@ Internal Changes - Refactor configuration loading to allow better typechecking. ([\#6137](https://github.com/matrix-org/synapse/issues/6137)) - Log responder when responding to media request. ([\#6139](https://github.com/matrix-org/synapse/issues/6139)) - Improve performance of `find_next_generated_user_id` DB query. ([\#6148](https://github.com/matrix-org/synapse/issues/6148)) -- Expand type-checking on modules imported by synapse.config. ([\#6150](https://github.com/matrix-org/synapse/issues/6150)) +- Expand type-checking on modules imported by `synapse.config`. ([\#6150](https://github.com/matrix-org/synapse/issues/6150)) - Use Postgres ANY for selecting many values. ([\#6156](https://github.com/matrix-org/synapse/issues/6156)) - Add more caching to `_get_joined_users_from_context` DB query. ([\#6159](https://github.com/matrix-org/synapse/issues/6159)) - Add some metrics on the federation sender. ([\#6160](https://github.com/matrix-org/synapse/issues/6160)) @@ -68,10 +71,10 @@ Internal Changes - Reject (accidental) attempts to insert bytes into postgres tables. ([\#6186](https://github.com/matrix-org/synapse/issues/6186)) - Make `version` optional in body of `PUT /room_keys/version/{version}`, since it's redundant. ([\#6189](https://github.com/matrix-org/synapse/issues/6189)) - Make storage layer responsible for adding device names to key, rather than the handler. ([\#6193](https://github.com/matrix-org/synapse/issues/6193)) -- Port synapse.rest.admin module to use async/await. ([\#6196](https://github.com/matrix-org/synapse/issues/6196)) +- Port `synapse.rest.admin` module to use async/await. ([\#6196](https://github.com/matrix-org/synapse/issues/6196)) - Enforce that all boolean configuration values are lowercase in CI. ([\#6203](https://github.com/matrix-org/synapse/issues/6203)) - Remove some unused event-auth code. ([\#6214](https://github.com/matrix-org/synapse/issues/6214)) -- Remove Auth.check method. ([\#6217](https://github.com/matrix-org/synapse/issues/6217)) +- Remove `Auth.check` method. ([\#6217](https://github.com/matrix-org/synapse/issues/6217)) - Remove `format_tap.py` script in favour of a perl reimplementation in Sytest's repo. ([\#6219](https://github.com/matrix-org/synapse/issues/6219)) - Refactor storage layer in preparation to support having multiple databases. ([\#6231](https://github.com/matrix-org/synapse/issues/6231)) - Remove some extra quotation marks across the codebase. ([\#6236](https://github.com/matrix-org/synapse/issues/6236)) diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 33f0bef0f2..26a6013694 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -34,26 +34,28 @@ from synapse.config.homeserver import HomeServerConfig from synapse.logging.context import PreserveLoggingContext from synapse.storage._base import LoggingTransaction from synapse.storage.data_stores.main.client_ips import ClientIpBackgroundUpdateStore -from synapse.storage.data_stores.main.deviceinbox import DeviceInboxBackgroundUpdateStore +from synapse.storage.data_stores.main.deviceinbox import ( + DeviceInboxBackgroundUpdateStore, +) from synapse.storage.data_stores.main.devices import DeviceBackgroundUpdateStore -from synapse.storage.engines import create_engine from synapse.storage.data_stores.main.events_bg_updates import ( - EventsBackgroundUpdatesStore + EventsBackgroundUpdatesStore, ) from synapse.storage.data_stores.main.media_repository import ( - MediaRepositoryBackgroundUpdateStore + MediaRepositoryBackgroundUpdateStore, ) -from synapse.storage.prepare_database import prepare_database from synapse.storage.data_stores.main.registration import ( - RegistrationBackgroundUpdateStore + RegistrationBackgroundUpdateStore, ) from synapse.storage.data_stores.main.roommember import RoomMemberBackgroundUpdateStore from synapse.storage.data_stores.main.search import SearchBackgroundUpdateStore from synapse.storage.data_stores.main.state import StateBackgroundUpdateStore from synapse.storage.data_stores.main.stats import StatsStore from synapse.storage.data_stores.main.user_directory import ( - UserDirectoryBackgroundUpdateStore + UserDirectoryBackgroundUpdateStore, ) +from synapse.storage.engines import create_engine +from synapse.storage.prepare_database import prepare_database from synapse.util import Clock logger = logging.getLogger("synapse_port_db") -- cgit 1.4.1