From 1bede478430c7a008ddaae195a81eca4ea155630 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Apr 2015 18:47:20 +0100 Subject: Add beginnings of migration sqlite->maria db script --- scripts/port_to_maria.py | 271 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 scripts/port_to_maria.py (limited to 'scripts') diff --git a/scripts/port_to_maria.py b/scripts/port_to_maria.py new file mode 100644 index 0000000000..6e0adf7030 --- /dev/null +++ b/scripts/port_to_maria.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, reactor +from twisted.enterprise import adbapi + +from synapse.storage._base import LoggingTransaction, SQLBaseStore +from synapse.storage.engines import create_engine + +import argparse +import itertools +import logging +import yaml + + +logger = logging.getLogger("port_to_maria") + + +BINARY_COLUMNS = { + "event_content_hashes": ["hash"], + "event_reference_hashes": ["hash"], + "event_signatures": ["signature"], + "event_edge_hashes": ["hash"], + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "application_services_txns": ["event_ids"], + "received_transactions": ["response_json"], + "sent_transactions": ["response_json"], + "server_tls_certificates": ["tls_certificate"], + "server_signature_keys": ["verify_key"], + "pushers": ["pushkey", "data"], + "user_filters": ["filter_json"], +} + +UNICODE_COLUMNS = { + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "users": ["password_hash"], +} + + +class Store(object): + def __init__(self, db_pool, engine): + self.db_pool = db_pool + self.engine = engine + + _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] + _simple_insert = SQLBaseStore.__dict__["_simple_insert"] + + _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] + _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] + + _execute_and_decode = SQLBaseStore.__dict__["_execute_and_decode"] + + def runInteraction(self, desc, func, *args, **kwargs): + def r(conn): + try: + i = 0 + N = 5 + while True: + try: + txn = conn.cursor() + return func( + LoggingTransaction(txn, desc, self.engine), + *args, **kwargs + ) + except self.engine.module.DatabaseError as e: + if self.engine.is_deadlock(e): + logger.warn("[TXN DEADLOCK] {%s} %d/%d", desc, i, N) + if i < N: + i += 1 + conn.rollback() + continue + raise + except Exception as e: + logger.debug("[TXN FAIL] {%s}", desc, e) + raise + + return self.db_pool.runWithConnection(r) + + def insert_many(self, table, headers, rows): + sql = "INSERT INTO %s (%s) VALUES (%s)" % ( + table, + ", ".join(k for k in headers), + ", ".join("%s" for _ in headers) + ) + + def t(txn): + try: + txn.executemany(sql, rows) + except: + logger.exception( + "Failed to insert: %s", + table, + ) + raise + + return self.runInteraction("insert_many", t) + + +def chunks(n): + for i in itertools.count(0, n): + yield range(i, i+n) + + +@defer.inlineCallbacks +def handle_table(table, sqlite_store, mysql_store): + N = 1000 + + select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) + + uni_col_names = UNICODE_COLUMNS.get(table, []) + + def conv_uni(c): + return sqlite_store.engine.load_unicode(c) + + next_chunk = 0 + while True: + def r(txn): + txn.execute(select, (next_chunk, N,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + return headers, rows + + headers, rows = yield sqlite_store.runInteraction("select", r) + + logger.info("Got %d rows for %s", len(rows), table) + + if rows: + uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] + next_chunk = rows[-1][0] + 1 + + for i, row in enumerate(rows): + rows[i] = tuple( + mysql_store.engine.encode_parameter( + conv_uni(col) if j in uni_cols else col + ) + for j, col in enumerate(row) + if j > 0 + ) + + yield mysql_store.insert_many(table, headers[1:], rows) + else: + return + + +def setup_db(db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } + ) + + database_engine.prepare_database(db_conn) + + db_conn.commit() + + +@defer.inlineCallbacks +def main(sqlite_config, mysql_config): + try: + sqlite_db_pool = adbapi.ConnectionPool( + sqlite_config["name"], + **sqlite_config["args"] + ) + + mysql_db_pool = adbapi.ConnectionPool( + mysql_config["name"], + **mysql_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + mysql_engine = create_engine("mysql.connector") + + sqlite_store = Store(sqlite_db_pool, sqlite_engine) + mysql_store = Store(mysql_db_pool, mysql_engine) + + # Step 1. Set up mysql database. + logger.info("Preparing sqlite database...") + setup_db(sqlite_config, sqlite_engine) + + logger.info("Preparing mysql database...") + setup_db(mysql_config, mysql_engine) + + # Step 2. Get tables. + logger.info("Fetching tables...") + tables = yield sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + logger.info("Found %d tables", len(tables)) + + # Process tables. + yield defer.gatherResults( + [ + handle_table(table, sqlite_store, mysql_store) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + # for table in ["current_state_events"]: # tables: + # if table not in ["schema_version", "applied_schema_deltas"]: + # if not table.startswith("sqlite_"): + # yield handle_table(table, sqlite_store, mysql_store) + except: + logger.exception("") + finally: + reactor.stop() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sqlite-database") + parser.add_argument( + "--mysql-config", type=argparse.FileType('r'), + ) + + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + + sqlite_config = { + "name": "sqlite3", + "args": { + "database": args.sqlite_database, + "cp_min": 1, + "cp_max": 1, + "check_same_thread": False, + }, + } + + mysql_config = yaml.safe_load(args.mysql_config) + mysql_config["args"].update({ + "sql_mode": "TRADITIONAL", + "charset": "utf8mb4", + "use_unicode": True, + "collation": "utf8mb4_bin", + }) + + import codecs + codecs.register( + lambda name: codecs.lookup('utf8') if name == "utf8mb4" else None + ) + + reactor.callWhenRunning( + main, + sqlite_config=sqlite_config, + mysql_config=mysql_config, + ) + + reactor.run() -- cgit 1.4.1 From 90bcb869579fe9c33c223851fece8bd3eeabf942 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 13 Apr 2015 13:45:04 +0100 Subject: Support running porting script multiple times --- scripts/port_to_maria.py | 108 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/port_to_maria.py b/scripts/port_to_maria.py index 6e0adf7030..b14cca8918 100644 --- a/scripts/port_to_maria.py +++ b/scripts/port_to_maria.py @@ -51,16 +51,52 @@ UNICODE_COLUMNS = { } +APPEND_ONLY_TABLES = [ + "event_content_hashes", + "event_reference_hashes", + "event_signatures", + "event_edge_hashes", + "events", + "event_json", + "state_events", + "room_memberships", + "feedback", + "topics", + "room_names", + "rooms", + "local_media_repository", + "local_media_repository_thumbnails", + "remote_media_cache", + "remote_media_cache_thumbnails", + "redactions", + "event_edges", + "event_auth", + "received_transactions", + "sent_transactions", + "transaction_id_to_pdu", + "users", + "state_groups", + "state_groups_state", + "event_to_state_groups", + "rejections", +] + + class Store(object): def __init__(self, db_pool, engine): self.db_pool = db_pool - self.engine = engine + self.database_engine = engine _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] _simple_insert = SQLBaseStore.__dict__["_simple_insert"] _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] + _simple_select_one_onecol = SQLBaseStore.__dict__["_simple_select_one_onecol"] + _simple_select_one_onecol_txn = SQLBaseStore.__dict__["_simple_select_one_onecol_txn"] + + _simple_update_one = SQLBaseStore.__dict__["_simple_update_one"] + _simple_update_one_txn = SQLBaseStore.__dict__["_simple_update_one_txn"] _execute_and_decode = SQLBaseStore.__dict__["_execute_and_decode"] @@ -73,11 +109,11 @@ class Store(object): try: txn = conn.cursor() return func( - LoggingTransaction(txn, desc, self.engine), + LoggingTransaction(txn, desc, self.database_engine), *args, **kwargs ) - except self.engine.module.DatabaseError as e: - if self.engine.is_deadlock(e): + except self.database_engine.module.DatabaseError as e: + if self.database_engine.is_deadlock(e): logger.warn("[TXN DEADLOCK] {%s} %d/%d", desc, i, N) if i < N: i += 1 @@ -117,16 +153,50 @@ def chunks(n): @defer.inlineCallbacks def handle_table(table, sqlite_store, mysql_store): - N = 1000 + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield mysql_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) + + if next_chunk is None: + yield mysql_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + next_chunk = 0 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("DELETE FROM %s" % (table,)) + mysql_store._simple_insert_txn( + txn, + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + yield mysql_store.runInteraction( + "delete_non_append_only", delete_all + ) + + next_chunk = 0 + + N = 5000 select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) uni_col_names = UNICODE_COLUMNS.get(table, []) def conv_uni(c): - return sqlite_store.engine.load_unicode(c) + return sqlite_store.database_engine.load_unicode(c) - next_chunk = 0 while True: def r(txn): txn.execute(select, (next_chunk, N,)) @@ -145,7 +215,7 @@ def handle_table(table, sqlite_store, mysql_store): for i, row in enumerate(rows): rows[i] = tuple( - mysql_store.engine.encode_parameter( + mysql_store.database_engine.encode_parameter( conv_uni(col) if j in uni_cols else col ) for j, col in enumerate(row) @@ -153,6 +223,12 @@ def handle_table(table, sqlite_store, mysql_store): ) yield mysql_store.insert_many(table, headers[1:], rows) + + yield mysql_store._simple_update_one( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) else: return @@ -208,6 +284,22 @@ def main(sqlite_config, mysql_config): logger.info("Found %d tables", len(tables)) + def create_port_table(txn): + try: + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " `table_name` varchar(100) NOT NULL UNIQUE," + " `rowid` bigint unsigned NOT NULL" + ")" + ) + except mysql_engine.module.DatabaseError as e: + if e.errno != mysql_engine.module.errorcode.ER_TABLE_EXISTS_ERROR: + raise + + yield mysql_store.runInteraction( + "create_port_table", create_port_table + ) + # Process tables. yield defer.gatherResults( [ -- cgit 1.4.1 From 63677d1f4752fd928fb2b6f1e97c819896bc5323 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 Apr 2015 10:23:24 +0100 Subject: Change port script to work with postgres --- scripts/port_to_maria.py | 114 ++++++++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 47 deletions(-) (limited to 'scripts') diff --git a/scripts/port_to_maria.py b/scripts/port_to_maria.py index b14cca8918..0d7ba92357 100644 --- a/scripts/port_to_maria.py +++ b/scripts/port_to_maria.py @@ -22,6 +22,7 @@ from synapse.storage.engines import create_engine import argparse import itertools import logging +import types import yaml @@ -51,6 +52,14 @@ UNICODE_COLUMNS = { } +BOOLEAN_COLUMNS = { + "events": ["processed", "outlier"], + "rooms": ["is_public"], + "event_edges": ["is_state"], + "presence_list": ["accepted"], +} + + APPEND_ONLY_TABLES = [ "event_content_hashes", "event_reference_hashes", @@ -126,24 +135,22 @@ class Store(object): return self.db_pool.runWithConnection(r) - def insert_many(self, table, headers, rows): + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, ", ".join(k for k in headers), ", ".join("%s" for _ in headers) ) - def t(txn): - try: - txn.executemany(sql, rows) - except: - logger.exception( - "Failed to insert: %s", - table, - ) - raise + try: + txn.executemany(sql, rows) + except: + logger.exception( + "Failed to insert: %s", + table, + ) + raise - return self.runInteraction("insert_many", t) def chunks(n): @@ -175,7 +182,7 @@ def handle_table(table, sqlite_store, mysql_store): "DELETE FROM port_from_sqlite3 WHERE table_name = %s", (table,) ) - txn.execute("DELETE FROM %s" % (table,)) + txn.execute("TRUNCATE %s CASCADE" % (table,)) mysql_store._simple_insert_txn( txn, table="port_from_sqlite3", @@ -188,14 +195,15 @@ def handle_table(table, sqlite_store, mysql_store): next_chunk = 0 + logger.info("next_chunk for %s: %d", table, next_chunk) + N = 5000 select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) uni_col_names = UNICODE_COLUMNS.get(table, []) - - def conv_uni(c): - return sqlite_store.database_engine.load_unicode(c) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + bin_col_names = BINARY_COLUMNS.get(table, []) while True: def r(txn): @@ -211,24 +219,42 @@ def handle_table(table, sqlite_store, mysql_store): if rows: uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] + bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] + bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] next_chunk = rows[-1][0] + 1 + def conv(j, col): + if j in uni_cols: + col = sqlite_store.database_engine.load_unicode(col) + if j in bool_cols: + return bool(col) + + if j in bin_cols: + if isinstance(col, types.UnicodeType): + col = buffer(col.encode("utf8")) + + return col + for i, row in enumerate(rows): rows[i] = tuple( mysql_store.database_engine.encode_parameter( - conv_uni(col) if j in uni_cols else col + conv(j, col) ) for j, col in enumerate(row) if j > 0 ) - yield mysql_store.insert_many(table, headers[1:], rows) + def ins(txn): + mysql_store.insert_many_txn(txn, table, headers[1:], rows) - yield mysql_store._simple_update_one( - table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, - ) + mysql_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) + + yield mysql_store.runInteraction("insert_many", ins) else: return @@ -260,7 +286,7 @@ def main(sqlite_config, mysql_config): ) sqlite_engine = create_engine("sqlite3") - mysql_engine = create_engine("mysql.connector") + mysql_engine = create_engine("psycopg2") sqlite_store = Store(sqlite_db_pool, sqlite_engine) mysql_store = Store(mysql_db_pool, mysql_engine) @@ -285,20 +311,19 @@ def main(sqlite_config, mysql_config): logger.info("Found %d tables", len(tables)) def create_port_table(txn): - try: - txn.execute( - "CREATE TABLE port_from_sqlite3 (" - " `table_name` varchar(100) NOT NULL UNIQUE," - " `rowid` bigint unsigned NOT NULL" - ")" - ) - except mysql_engine.module.DatabaseError as e: - if e.errno != mysql_engine.module.errorcode.ER_TABLE_EXISTS_ERROR: - raise + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) - yield mysql_store.runInteraction( - "create_port_table", create_port_table - ) + try: + yield mysql_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) # Process tables. yield defer.gatherResults( @@ -342,17 +367,12 @@ if __name__ == "__main__": } mysql_config = yaml.safe_load(args.mysql_config) - mysql_config["args"].update({ - "sql_mode": "TRADITIONAL", - "charset": "utf8mb4", - "use_unicode": True, - "collation": "utf8mb4_bin", - }) - - import codecs - codecs.register( - lambda name: codecs.lookup('utf8') if name == "utf8mb4" else None - ) + # mysql_config["args"].update({ + # "sql_mode": "TRADITIONAL", + # "charset": "utf8mb4", + # "use_unicode": True, + # "collation": "utf8mb4_bin", + # }) reactor.callWhenRunning( main, -- cgit 1.4.1 From ffad75bd6284873c27efb2cfdfdcf9f909eb9db3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 Apr 2015 17:00:50 +0100 Subject: Remove mysql/maria support --- scripts/port_from_sqlite_to_postgres.py | 373 ++++++++++++++++++++ scripts/port_to_maria.py | 383 --------------------- synapse/app/homeserver.py | 9 +- synapse/storage/engines/__init__.py | 2 - synapse/storage/engines/maria.py | 50 --- .../storage/schema/delta/16/unique_constraints.sql | 2 +- 6 files changed, 375 insertions(+), 444 deletions(-) create mode 100644 scripts/port_from_sqlite_to_postgres.py delete mode 100644 scripts/port_to_maria.py delete mode 100644 synapse/storage/engines/maria.py (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py new file mode 100644 index 0000000000..4b3fd9e529 --- /dev/null +++ b/scripts/port_from_sqlite_to_postgres.py @@ -0,0 +1,373 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, reactor +from twisted.enterprise import adbapi + +from synapse.storage._base import LoggingTransaction, SQLBaseStore +from synapse.storage.engines import create_engine + +import argparse +import itertools +import logging +import types +import yaml + + +logger = logging.getLogger("port_from_sqlite_to_postgres") + + +BINARY_COLUMNS = { + "event_content_hashes": ["hash"], + "event_reference_hashes": ["hash"], + "event_signatures": ["signature"], + "event_edge_hashes": ["hash"], + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "application_services_txns": ["event_ids"], + "received_transactions": ["response_json"], + "sent_transactions": ["response_json"], + "server_tls_certificates": ["tls_certificate"], + "server_signature_keys": ["verify_key"], + "pushers": ["pushkey", "data"], + "user_filters": ["filter_json"], +} + +UNICODE_COLUMNS = { + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "users": ["password_hash"], +} + + +BOOLEAN_COLUMNS = { + "events": ["processed", "outlier"], + "rooms": ["is_public"], + "event_edges": ["is_state"], + "presence_list": ["accepted"], +} + + +APPEND_ONLY_TABLES = [ + "event_content_hashes", + "event_reference_hashes", + "event_signatures", + "event_edge_hashes", + "events", + "event_json", + "state_events", + "room_memberships", + "feedback", + "topics", + "room_names", + "rooms", + "local_media_repository", + "local_media_repository_thumbnails", + "remote_media_cache", + "remote_media_cache_thumbnails", + "redactions", + "event_edges", + "event_auth", + "received_transactions", + "sent_transactions", + "transaction_id_to_pdu", + "users", + "state_groups", + "state_groups_state", + "event_to_state_groups", + "rejections", +] + + +class Store(object): + def __init__(self, db_pool, engine): + self.db_pool = db_pool + self.database_engine = engine + + _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] + _simple_insert = SQLBaseStore.__dict__["_simple_insert"] + + _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] + _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] + _simple_select_one_onecol = SQLBaseStore.__dict__["_simple_select_one_onecol"] + _simple_select_one_onecol_txn = SQLBaseStore.__dict__["_simple_select_one_onecol_txn"] + + _simple_update_one = SQLBaseStore.__dict__["_simple_update_one"] + _simple_update_one_txn = SQLBaseStore.__dict__["_simple_update_one_txn"] + + _execute_and_decode = SQLBaseStore.__dict__["_execute_and_decode"] + + def runInteraction(self, desc, func, *args, **kwargs): + def r(conn): + try: + i = 0 + N = 5 + while True: + try: + txn = conn.cursor() + return func( + LoggingTransaction(txn, desc, self.database_engine), + *args, **kwargs + ) + except self.database_engine.module.DatabaseError as e: + if self.database_engine.is_deadlock(e): + logger.warn("[TXN DEADLOCK] {%s} %d/%d", desc, i, N) + if i < N: + i += 1 + conn.rollback() + continue + raise + except Exception as e: + logger.debug("[TXN FAIL] {%s}", desc, e) + raise + + return self.db_pool.runWithConnection(r) + + def insert_many_txn(self, txn, table, headers, rows): + sql = "INSERT INTO %s (%s) VALUES (%s)" % ( + table, + ", ".join(k for k in headers), + ", ".join("%s" for _ in headers) + ) + + try: + txn.executemany(sql, rows) + except: + logger.exception( + "Failed to insert: %s", + table, + ) + raise + + + +def chunks(n): + for i in itertools.count(0, n): + yield range(i, i+n) + + +@defer.inlineCallbacks +def handle_table(table, sqlite_store, postgres_store): + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield postgres_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) + + if next_chunk is None: + yield postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + next_chunk = 0 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + postgres_store._simple_insert_txn( + txn, + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + yield postgres_store.runInteraction( + "delete_non_append_only", delete_all + ) + + next_chunk = 0 + + logger.info("next_chunk for %s: %d", table, next_chunk) + + N = 5000 + + select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) + + uni_col_names = UNICODE_COLUMNS.get(table, []) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + bin_col_names = BINARY_COLUMNS.get(table, []) + + while True: + def r(txn): + txn.execute(select, (next_chunk, N,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + return headers, rows + + headers, rows = yield sqlite_store.runInteraction("select", r) + + logger.info("Got %d rows for %s", len(rows), table) + + if rows: + uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] + bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] + bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] + next_chunk = rows[-1][0] + 1 + + def conv(j, col): + if j in uni_cols: + col = sqlite_store.database_engine.load_unicode(col) + if j in bool_cols: + return bool(col) + + if j in bin_cols: + if isinstance(col, types.UnicodeType): + col = buffer(col.encode("utf8")) + + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + + def ins(txn): + postgres_store.insert_many_txn(txn, table, headers[1:], rows) + + postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) + + yield postgres_store.runInteraction("insert_many", ins) + else: + return + + +def setup_db(db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } + ) + + database_engine.prepare_database(db_conn) + + db_conn.commit() + + +@defer.inlineCallbacks +def main(sqlite_config, postgress_config): + try: + sqlite_db_pool = adbapi.ConnectionPool( + sqlite_config["name"], + **sqlite_config["args"] + ) + + postgres_db_pool = adbapi.ConnectionPool( + postgress_config["name"], + **postgress_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + postgres_engine = create_engine("psycopg2") + + sqlite_store = Store(sqlite_db_pool, sqlite_engine) + postgres_store = Store(postgres_db_pool, postgres_engine) + + # Step 1. Set up databases. + logger.info("Preparing sqlite database...") + setup_db(sqlite_config, sqlite_engine) + + logger.info("Preparing postgres database...") + setup_db(postgress_config, postgres_engine) + + # Step 2. Get tables. + logger.info("Fetching tables...") + tables = yield sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + logger.info("Found %d tables", len(tables)) + + def create_port_table(txn): + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) + + try: + yield postgres_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) + + # Process tables. + yield defer.gatherResults( + [ + handle_table(table, sqlite_store, postgres_store) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + except: + logger.exception("") + finally: + reactor.stop() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sqlite-database") + parser.add_argument( + "--postgres-config", type=argparse.FileType('r'), + ) + + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + + sqlite_config = { + "name": "sqlite3", + "args": { + "database": args.sqlite_database, + "cp_min": 1, + "cp_max": 1, + "check_same_thread": False, + }, + } + + postgres_config = yaml.safe_load(args.postgres_config) + + reactor.callWhenRunning( + main, + sqlite_config=sqlite_config, + postgres_config=postgres_config, + ) + + reactor.run() diff --git a/scripts/port_to_maria.py b/scripts/port_to_maria.py deleted file mode 100644 index 0d7ba92357..0000000000 --- a/scripts/port_to_maria.py +++ /dev/null @@ -1,383 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from twisted.internet import defer, reactor -from twisted.enterprise import adbapi - -from synapse.storage._base import LoggingTransaction, SQLBaseStore -from synapse.storage.engines import create_engine - -import argparse -import itertools -import logging -import types -import yaml - - -logger = logging.getLogger("port_to_maria") - - -BINARY_COLUMNS = { - "event_content_hashes": ["hash"], - "event_reference_hashes": ["hash"], - "event_signatures": ["signature"], - "event_edge_hashes": ["hash"], - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "application_services_txns": ["event_ids"], - "received_transactions": ["response_json"], - "sent_transactions": ["response_json"], - "server_tls_certificates": ["tls_certificate"], - "server_signature_keys": ["verify_key"], - "pushers": ["pushkey", "data"], - "user_filters": ["filter_json"], -} - -UNICODE_COLUMNS = { - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "users": ["password_hash"], -} - - -BOOLEAN_COLUMNS = { - "events": ["processed", "outlier"], - "rooms": ["is_public"], - "event_edges": ["is_state"], - "presence_list": ["accepted"], -} - - -APPEND_ONLY_TABLES = [ - "event_content_hashes", - "event_reference_hashes", - "event_signatures", - "event_edge_hashes", - "events", - "event_json", - "state_events", - "room_memberships", - "feedback", - "topics", - "room_names", - "rooms", - "local_media_repository", - "local_media_repository_thumbnails", - "remote_media_cache", - "remote_media_cache_thumbnails", - "redactions", - "event_edges", - "event_auth", - "received_transactions", - "sent_transactions", - "transaction_id_to_pdu", - "users", - "state_groups", - "state_groups_state", - "event_to_state_groups", - "rejections", -] - - -class Store(object): - def __init__(self, db_pool, engine): - self.db_pool = db_pool - self.database_engine = engine - - _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] - _simple_insert = SQLBaseStore.__dict__["_simple_insert"] - - _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] - _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] - _simple_select_one_onecol = SQLBaseStore.__dict__["_simple_select_one_onecol"] - _simple_select_one_onecol_txn = SQLBaseStore.__dict__["_simple_select_one_onecol_txn"] - - _simple_update_one = SQLBaseStore.__dict__["_simple_update_one"] - _simple_update_one_txn = SQLBaseStore.__dict__["_simple_update_one_txn"] - - _execute_and_decode = SQLBaseStore.__dict__["_execute_and_decode"] - - def runInteraction(self, desc, func, *args, **kwargs): - def r(conn): - try: - i = 0 - N = 5 - while True: - try: - txn = conn.cursor() - return func( - LoggingTransaction(txn, desc, self.database_engine), - *args, **kwargs - ) - except self.database_engine.module.DatabaseError as e: - if self.database_engine.is_deadlock(e): - logger.warn("[TXN DEADLOCK] {%s} %d/%d", desc, i, N) - if i < N: - i += 1 - conn.rollback() - continue - raise - except Exception as e: - logger.debug("[TXN FAIL] {%s}", desc, e) - raise - - return self.db_pool.runWithConnection(r) - - def insert_many_txn(self, txn, table, headers, rows): - sql = "INSERT INTO %s (%s) VALUES (%s)" % ( - table, - ", ".join(k for k in headers), - ", ".join("%s" for _ in headers) - ) - - try: - txn.executemany(sql, rows) - except: - logger.exception( - "Failed to insert: %s", - table, - ) - raise - - - -def chunks(n): - for i in itertools.count(0, n): - yield range(i, i+n) - - -@defer.inlineCallbacks -def handle_table(table, sqlite_store, mysql_store): - if table in APPEND_ONLY_TABLES: - # It's safe to just carry on inserting. - next_chunk = yield mysql_store._simple_select_one_onecol( - table="port_from_sqlite3", - keyvalues={"table_name": table}, - retcol="rowid", - allow_none=True, - ) - - if next_chunk is None: - yield mysql_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} - ) - - next_chunk = 0 - else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - mysql_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} - ) - - yield mysql_store.runInteraction( - "delete_non_append_only", delete_all - ) - - next_chunk = 0 - - logger.info("next_chunk for %s: %d", table, next_chunk) - - N = 5000 - - select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) - - uni_col_names = UNICODE_COLUMNS.get(table, []) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - bin_col_names = BINARY_COLUMNS.get(table, []) - - while True: - def r(txn): - txn.execute(select, (next_chunk, N,)) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] - - return headers, rows - - headers, rows = yield sqlite_store.runInteraction("select", r) - - logger.info("Got %d rows for %s", len(rows), table) - - if rows: - uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] - bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] - bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] - next_chunk = rows[-1][0] + 1 - - def conv(j, col): - if j in uni_cols: - col = sqlite_store.database_engine.load_unicode(col) - if j in bool_cols: - return bool(col) - - if j in bin_cols: - if isinstance(col, types.UnicodeType): - col = buffer(col.encode("utf8")) - - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - mysql_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) - - def ins(txn): - mysql_store.insert_many_txn(txn, table, headers[1:], rows) - - mysql_store._simple_update_one_txn( - txn, - table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, - ) - - yield mysql_store.runInteraction("insert_many", ins) - else: - return - - -def setup_db(db_config, database_engine): - db_conn = database_engine.module.connect( - **{ - k: v for k, v in db_config.get("args", {}).items() - if not k.startswith("cp_") - } - ) - - database_engine.prepare_database(db_conn) - - db_conn.commit() - - -@defer.inlineCallbacks -def main(sqlite_config, mysql_config): - try: - sqlite_db_pool = adbapi.ConnectionPool( - sqlite_config["name"], - **sqlite_config["args"] - ) - - mysql_db_pool = adbapi.ConnectionPool( - mysql_config["name"], - **mysql_config["args"] - ) - - sqlite_engine = create_engine("sqlite3") - mysql_engine = create_engine("psycopg2") - - sqlite_store = Store(sqlite_db_pool, sqlite_engine) - mysql_store = Store(mysql_db_pool, mysql_engine) - - # Step 1. Set up mysql database. - logger.info("Preparing sqlite database...") - setup_db(sqlite_config, sqlite_engine) - - logger.info("Preparing mysql database...") - setup_db(mysql_config, mysql_engine) - - # Step 2. Get tables. - logger.info("Fetching tables...") - tables = yield sqlite_store._simple_select_onecol( - table="sqlite_master", - keyvalues={ - "type": "table", - }, - retcol="name", - ) - - logger.info("Found %d tables", len(tables)) - - def create_port_table(txn): - txn.execute( - "CREATE TABLE port_from_sqlite3 (" - " table_name varchar(100) NOT NULL UNIQUE," - " rowid bigint NOT NULL" - ")" - ) - - try: - yield mysql_store.runInteraction( - "create_port_table", create_port_table - ) - except Exception as e: - logger.info("Failed to create port table: %s", e) - - # Process tables. - yield defer.gatherResults( - [ - handle_table(table, sqlite_store, mysql_store) - for table in tables - if table not in ["schema_version", "applied_schema_deltas"] - and not table.startswith("sqlite_") - ], - consumeErrors=True, - ) - - # for table in ["current_state_events"]: # tables: - # if table not in ["schema_version", "applied_schema_deltas"]: - # if not table.startswith("sqlite_"): - # yield handle_table(table, sqlite_store, mysql_store) - except: - logger.exception("") - finally: - reactor.stop() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--sqlite-database") - parser.add_argument( - "--mysql-config", type=argparse.FileType('r'), - ) - - args = parser.parse_args() - logging.basicConfig(level=logging.INFO) - - sqlite_config = { - "name": "sqlite3", - "args": { - "database": args.sqlite_database, - "cp_min": 1, - "cp_max": 1, - "check_same_thread": False, - }, - } - - mysql_config = yaml.safe_load(args.mysql_config) - # mysql_config["args"].update({ - # "sql_mode": "TRADITIONAL", - # "charset": "utf8mb4", - # "use_unicode": True, - # "collation": "utf8mb4_bin", - # }) - - reactor.callWhenRunning( - main, - sqlite_config=sqlite_config, - mysql_config=mysql_config, - ) - - reactor.run() diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f8a33120b5..93500dd791 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -366,14 +366,7 @@ def setup(config_options): } name = db_config.get("name", None) - if name in ["MySQLdb", "mysql.connector"]: - db_config.setdefault("args", {}).update({ - "sql_mode": "TRADITIONAL", - "charset": "utf8mb4", - "use_unicode": True, - "collation": "utf8mb4_bin", - }) - elif name == "psycopg2": + if name == "psycopg2": pass elif name == "sqlite3": db_config.setdefault("args", {}).update({ diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index 548d4e1b42..eb76df7f01 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .maria import MariaEngine from .postgres import PostgresEngine from .sqlite3 import Sqlite3Engine @@ -22,7 +21,6 @@ import importlib SUPPORTED_MODULE = { "sqlite3": Sqlite3Engine, - "mysql.connector": MariaEngine, "psycopg2": PostgresEngine, } diff --git a/synapse/storage/engines/maria.py b/synapse/storage/engines/maria.py deleted file mode 100644 index 90165f6849..0000000000 --- a/synapse/storage/engines/maria.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.storage import prepare_database - -import types - - -class MariaEngine(object): - def __init__(self, database_module): - self.module = database_module - - def convert_param_style(self, sql): - return sql.replace("?", "%s") - - def encode_parameter(self, param): - if isinstance(param, types.BufferType): - return bytes(param) - return param - - def on_new_connection(self, db_conn): - pass - - def prepare_database(self, db_conn): - cur = db_conn.cursor() - cur.execute( - "ALTER DATABASE CHARACTER SET utf8mb4 COLLATE utf8mb4_bin" - ) - db_conn.commit() - prepare_database(db_conn, self) - - def is_deadlock(self, error): - if isinstance(error, self.module.DatabaseError): - return error.sqlstate == "40001" and error.errno == 1213 - return False - - def load_unicode(self, v): - return bytes(v).decode("UTF8") diff --git a/synapse/storage/schema/delta/16/unique_constraints.sql b/synapse/storage/schema/delta/16/unique_constraints.sql index f9fbb6b448..3604ea8427 100644 --- a/synapse/storage/schema/delta/16/unique_constraints.sql +++ b/synapse/storage/schema/delta/16/unique_constraints.sql @@ -1,5 +1,5 @@ --- We can use SQLite features here, since mysql support was only added in v16 +-- We can use SQLite features here, since other db support was only added in v16 -- DELETE FROM current_state_events WHERE rowid not in ( -- cgit 1.4.1 From 427bcb76085c005069f8163e035ebce395a14b2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 Apr 2015 11:13:05 +0100 Subject: Fix port script after storage changes. Add very simple (off by default) curses UI to see progress. --- scripts/port_from_sqlite_to_postgres.py | 580 ++++++++++++++++++++++---------- 1 file changed, 409 insertions(+), 171 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 4b3fd9e529..fc1603c1c9 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -20,38 +20,17 @@ from synapse.storage._base import LoggingTransaction, SQLBaseStore from synapse.storage.engines import create_engine import argparse -import itertools +import curses import logging -import types +import sys +import time +import traceback import yaml logger = logging.getLogger("port_from_sqlite_to_postgres") -BINARY_COLUMNS = { - "event_content_hashes": ["hash"], - "event_reference_hashes": ["hash"], - "event_signatures": ["signature"], - "event_edge_hashes": ["hash"], - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "application_services_txns": ["event_ids"], - "received_transactions": ["response_json"], - "sent_transactions": ["response_json"], - "server_tls_certificates": ["tls_certificate"], - "server_signature_keys": ["verify_key"], - "pushers": ["pushkey", "data"], - "user_filters": ["filter_json"], -} - -UNICODE_COLUMNS = { - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "users": ["password_hash"], -} - - BOOLEAN_COLUMNS = { "events": ["processed", "outlier"], "rooms": ["is_public"], @@ -91,7 +70,15 @@ APPEND_ONLY_TABLES = [ ] +end_error_exec_info = None + + class Store(object): + """This object is used to pull out some of the convenience API from the + Storage layer. + + *All* database interactions should go through this object. + """ def __init__(self, db_pool, engine): self.db_pool = db_pool self.database_engine = engine @@ -130,11 +117,14 @@ class Store(object): continue raise except Exception as e: - logger.debug("[TXN FAIL] {%s}", desc, e) + logger.debug("[TXN FAIL] {%s} %s", desc, e) raise return self.db_pool.runWithConnection(r) + def execute(self, f): + return self.runInteraction(f.__name__, f) + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, @@ -152,205 +142,435 @@ class Store(object): raise +class Progress(object): + """Used to report progress of the port + """ + def __init__(self): + self.tables = {} + + self.start_time = int(time.time()) + + def add_table(self, table, cur, size): + self.tables[table] = { + "start": cur, + "num_done": cur, + "total": size, + "perc": int(cur * 100 / size), + } + + def update(self, table, num_done): + data = self.tables[table] + data["num_done"] = num_done + data["perc"] = int(num_done * 100 / data["total"]) + + def done(self): + pass + -def chunks(n): - for i in itertools.count(0, n): - yield range(i, i+n) +class CursesProgress(Progress): + """Reports progress to a curses window + """ + def __init__(self, stdscr): + self.stdscr = stdscr + curses.use_default_colors() + curses.curs_set(0) -@defer.inlineCallbacks -def handle_table(table, sqlite_store, postgres_store): - if table in APPEND_ONLY_TABLES: - # It's safe to just carry on inserting. - next_chunk = yield postgres_store._simple_select_one_onecol( - table="port_from_sqlite3", - keyvalues={"table_name": table}, - retcol="rowid", - allow_none=True, + curses.init_pair(1, curses.COLOR_RED, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + + self.last_update = 0 + + self.finished = False + + super(CursesProgress, self).__init__() + + def update(self, table, num_done): + super(CursesProgress, self).update(table, num_done) + + self.render() + + def render(self, force=False): + now = time.time() + + if not force and now - self.last_update < 0.2: + # reactor.callLater(1, self.render) + return + + self.stdscr.clear() + + rows, cols = self.stdscr.getmaxyx() + + duration = int(now) - int(self.start_time) + + minutes, seconds = divmod(duration, 60) + duration_str = '%02dm %02ds' % (minutes, seconds,) + + if self.finished: + status = "Time spent: %s (Done!)" % (duration_str,) + else: + min_perc = min( + (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) + if v["total"] - v["start"] else 100 + for v in self.tables.values() + ) + if min_perc > 0: + est_remaining = (int(now) - self.start_time) * 100 / min_perc + est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) + else: + est_remaining_str = "Unknown" + status = ( + "Time spent: %s (est. remaining: %s)" + % (duration_str, est_remaining_str,) + ) + + self.stdscr.addstr( + 0, 0, + status, + curses.A_BOLD, ) - if next_chunk is None: - yield postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} + max_len = max([len(t) for t in self.tables.keys()]) + + left_margin = 5 + middle_space = 1 + + items = self.tables.items() + items.sort( + key=lambda i: (i[1]["perc"], i[0]), + ) + + for i, (table, data) in enumerate(items): + if i + 2 >= rows: + break + + perc = data["perc"] + + color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) + + self.stdscr.addstr( + i+2, left_margin + max_len - len(table), + table, + curses.A_BOLD | color, ) - next_chunk = 0 - else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) + size = 20 + + progress = "[%s%s]" % ( + "#" * int(perc*size/100), + " " * (size - int(perc*size/100)), ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - postgres_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} + + self.stdscr.addstr( + i+2, left_margin + max_len + middle_space, + "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), ) - yield postgres_store.runInteraction( - "delete_non_append_only", delete_all - ) + if self.finished: + self.stdscr.addstr( + self.rows-1, 0 , + "Press any key to exit...", + ) - next_chunk = 0 + self.stdscr.refresh() + self.last_update = time.time() - logger.info("next_chunk for %s: %d", table, next_chunk) + def done(self): + self.finished = True + self.render(True) + self.stdscr.getch() - N = 5000 + def on_prepare_sqlite(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing SQLite database...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def on_prepare_postgres(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing PostgreSQL database...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def fetching_tables(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Fetching tables...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def preparing_tables(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing tables...", + curses.A_BOLD, + ) + self.stdscr.refresh() - select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) - uni_col_names = UNICODE_COLUMNS.get(table, []) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - bin_col_names = BINARY_COLUMNS.get(table, []) +class TerminalProgress(Progress): + """Just prints progress to the terminal + """ + def update(self, table, num_done): + super(TerminalProgress, self).update(table, num_done) - while True: - def r(txn): - txn.execute(select, (next_chunk, N,)) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] + data = self.tables[table] - return headers, rows + print "%s: %d%% (%d/%d)" % ( + table, data["perc"], + data["num_done"], data["total"], + ) - headers, rows = yield sqlite_store.runInteraction("select", r) + def on_prepare_sqlite(self): + print "Preparing SQLite database..." - logger.info("Got %d rows for %s", len(rows), table) + def on_prepare_postgres(self): + print "Preparing PostgreSQL database..." - if rows: - uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] - bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] - bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] - next_chunk = rows[-1][0] + 1 + def fetching_tables(self): + print "Fetching tables..." - def conv(j, col): - if j in uni_cols: - col = sqlite_store.database_engine.load_unicode(col) - if j in bool_cols: - return bool(col) + def preparing_tables(self): + print "Preparing tables..." - if j in bin_cols: - if isinstance(col, types.UnicodeType): - col = buffer(col.encode("utf8")) - return col +class Porter(object): + def __init__(self, **kwargs): + self.__dict__.update(kwargs) - for i, row in enumerate(rows): - rows[i] = tuple( - postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) + @defer.inlineCallbacks + def handle_table(self, table): + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield self.postgres_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) - def ins(txn): - postgres_store.insert_many_txn(txn, table, headers[1:], rows) + if next_chunk is None: + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 1} + ) - postgres_store._simple_update_one_txn( + next_chunk = 1 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + self.postgres_store._simple_insert_txn( txn, table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, + values={"table_name": table, "rowid": 0} ) + yield self.postgres_store.execute(delete_all) - yield postgres_store.runInteraction("insert_many", ins) - else: + next_chunk = 1 + + def get_table_size(txn): + txn.execute("SELECT count(*) FROM %s" % (table,)) + size, = txn.fetchone() + return int(size) + + table_size = yield self.sqlite_store.execute(get_table_size) + postgres_size = yield self.postgres_store.execute(get_table_size) + + if not table_size: return + self.progress.add_table(table, postgres_size, table_size) -def setup_db(db_config, database_engine): - db_conn = database_engine.module.connect( - **{ - k: v for k, v in db_config.get("args", {}).items() - if not k.startswith("cp_") - } - ) + select = ( + "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" + % (table,) + ) - database_engine.prepare_database(db_conn) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) - db_conn.commit() + while True: + def r(txn): + txn.execute(select, (next_chunk, self.batch_size,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + return headers, rows -@defer.inlineCallbacks -def main(sqlite_config, postgress_config): - try: - sqlite_db_pool = adbapi.ConnectionPool( - sqlite_config["name"], - **sqlite_config["args"] - ) + headers, rows = yield self.sqlite_store.runInteraction("select", r) - postgres_db_pool = adbapi.ConnectionPool( - postgress_config["name"], - **postgress_config["args"] - ) + if rows: + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + next_chunk = rows[-1][0] + 1 + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) - sqlite_engine = create_engine("sqlite3") - postgres_engine = create_engine("psycopg2") + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) - sqlite_store = Store(sqlite_db_pool, sqlite_engine) - postgres_store = Store(postgres_db_pool, postgres_engine) + self.postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) - # Step 1. Set up databases. - logger.info("Preparing sqlite database...") - setup_db(sqlite_config, sqlite_engine) + yield self.postgres_store.execute(insert) - logger.info("Preparing postgres database...") - setup_db(postgress_config, postgres_engine) + postgres_size += len(rows) - # Step 2. Get tables. - logger.info("Fetching tables...") - tables = yield sqlite_store._simple_select_onecol( - table="sqlite_master", - keyvalues={ - "type": "table", - }, - retcol="name", + self.progress.update(table, postgres_size) + else: + return + + def setup_db(self, db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } ) - logger.info("Found %d tables", len(tables)) + database_engine.prepare_database(db_conn) - def create_port_table(txn): - txn.execute( - "CREATE TABLE port_from_sqlite3 (" - " table_name varchar(100) NOT NULL UNIQUE," - " rowid bigint NOT NULL" - ")" - ) + db_conn.commit() + @defer.inlineCallbacks + def run(self): try: - yield postgres_store.runInteraction( - "create_port_table", create_port_table + sqlite_db_pool = adbapi.ConnectionPool( + self.sqlite_config["name"], + **self.sqlite_config["args"] ) - except Exception as e: - logger.info("Failed to create port table: %s", e) - - # Process tables. - yield defer.gatherResults( - [ - handle_table(table, sqlite_store, postgres_store) - for table in tables - if table not in ["schema_version", "applied_schema_deltas"] - and not table.startswith("sqlite_") - ], - consumeErrors=True, - ) - except: - logger.exception("") - finally: - reactor.stop() + postgres_db_pool = adbapi.ConnectionPool( + self.postgres_config["name"], + **self.postgres_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + postgres_engine = create_engine("psycopg2") + + self.sqlite_store = Store(sqlite_db_pool, sqlite_engine) + self.postgres_store = Store(postgres_db_pool, postgres_engine) + + # Step 1. Set up databases. + self.progress.on_prepare_sqlite() + self.setup_db(sqlite_config, sqlite_engine) + + self.progress.on_prepare_postgres() + self.setup_db(postgres_config, postgres_engine) + + # Step 2. Get tables. + self.progress.fetching_tables() + sqlite_tables = yield self.sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + postgres_tables = yield self.postgres_store._simple_select_onecol( + table="information_schema.tables", + keyvalues={ + "table_schema": "public", + }, + retcol="distinct table_name", + ) + + tables = set(sqlite_tables) & set(postgres_tables) + + self.progress.preparing_tables() + + logger.info("Found %d tables", len(tables)) + + def create_port_table(txn): + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) + + try: + yield self.postgres_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) + + # Process tables. + yield defer.gatherResults( + [ + self.handle_table(table) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + self.progress.done() + except: + global end_error_exec_info + end_error_exec_info = sys.exc_info() + logger.exception("") + finally: + reactor.stop() if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("-v", action='store_true') + parser.add_argument("--curses", action='store_true') parser.add_argument("--sqlite-database") parser.add_argument( "--postgres-config", type=argparse.FileType('r'), ) + parser.add_argument("--batch-size", type=int, default=1000) + args = parser.parse_args() - logging.basicConfig(level=logging.INFO) + + + logging_config = { + "level": logging.DEBUG if args.v else logging.INFO, + "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" + } + + if args.curses: + logging_config["filename"] = "port-synapse.log" + + logging.basicConfig(**logging_config) sqlite_config = { "name": "sqlite3", @@ -364,10 +584,28 @@ if __name__ == "__main__": postgres_config = yaml.safe_load(args.postgres_config) - reactor.callWhenRunning( - main, - sqlite_config=sqlite_config, - postgres_config=postgres_config, - ) + def start(stdscr=None): + if stdscr: + progress = CursesProgress(stdscr) + else: + progress = TerminalProgress() + + porter = Porter( + sqlite_config=sqlite_config, + postgres_config=postgres_config, + progress=progress, + batch_size=args.batch_size, + ) + + reactor.callWhenRunning(porter.run) + + reactor.run() + + if args.curses: + curses.wrapper(start) + else: + start() - reactor.run() + if end_error_exec_info: + exc_type, exc_value, exc_traceback = end_error_exec_info + traceback.print_exception(exc_type, exc_value, exc_traceback) -- cgit 1.4.1 From dfc46c6220b7439262de6fc7034e7a6df99d4e40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 Apr 2015 12:46:29 +0100 Subject: PEP8 --- scripts/port_from_sqlite_to_postgres.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index fc1603c1c9..f4b6ed0681 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -268,7 +268,7 @@ class CursesProgress(Progress): if self.finished: self.stdscr.addstr( - self.rows-1, 0 , + self.rows-1, 0, "Press any key to exit...", ) @@ -561,7 +561,6 @@ if __name__ == "__main__": args = parser.parse_args() - logging_config = { "level": logging.DEBUG if args.v else logging.INFO, "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" -- cgit 1.4.1 From 4e49f52375acb705b32937115210d055b0e1ce38 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:36:37 +0100 Subject: Don't port over all of the sent_transactions table --- scripts/port_from_sqlite_to_postgres.py | 158 ++++++++++++++++++++++++-------- 1 file changed, 120 insertions(+), 38 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index f4b6ed0681..19e35bf806 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -122,8 +122,8 @@ class Store(object): return self.db_pool.runWithConnection(r) - def execute(self, f): - return self.runInteraction(f.__name__, f) + def execute(self, f, *args, **kwargs): + return self.runInteraction(f.__name__, f, *args, **kwargs) def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( @@ -347,9 +347,118 @@ class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) + def convert_rows(self, table, headers, rows): + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + @defer.inlineCallbacks def handle_table(self, table): - if table in APPEND_ONLY_TABLES: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + + def get_table_size(txn): + txn.execute("SELECT count(*) FROM %s" % (table,)) + size, = txn.fetchone() + return int(size) + + if table == "sent_transactions": + # This is a big table, and we really only need some of the recent + # data + yield self.postgres_store.execute(delete_all) + + # Only save things from the last day + yesterday = 1429114568820 #int(time.time()*1000) - 86400000 + + # And save the max transaction id from each destination + select = ( + "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" + "SELECT max(rowid) FROM sent_transactions" + " GROUP BY destination" + ")" + ) + + def r(txn): + txn.execute(select) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + ts_ind = headers.index('ts') + + return headers, [r for r in rows if r[ts_ind] < yesterday] + + headers, rows = yield self.sqlite_store.runInteraction( + "select", r, + ) + + self.convert_rows(table, headers, rows) + + inserted_rows = len(rows) + max_inserted_rowid = max(r[0] for r in rows) + + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) + + yield self.postgres_store.execute(insert) + + def get_start_id(txn): + txn.execute( + "SELECT rowid FROM sent_transactions WHERE ts >= ?" + " ORDER BY rowid ASC LIMIT 1", + (yesterday,) + ) + + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + return 1 + + next_chunk = yield self.sqlite_store.execute(get_start_id) + next_chunk = max(max_inserted_rowid + 1, next_chunk) + + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": next_chunk} + ) + + def get_sent_table_size(txn): + txn.execute( + "SELECT count(*) FROM sent_transactions" + " WHERE ts >= ?", + (yesterday,) + ) + size, = txn.fetchone() + return int(size) + + table_size = yield self.sqlite_store.execute( + get_sent_table_size + ) + + table_size += inserted_rows + elif table in APPEND_ONLY_TABLES: # It's safe to just carry on inserting. next_chunk = yield self.postgres_store._simple_select_one_onecol( table="port_from_sqlite3", @@ -365,28 +474,18 @@ class Porter(object): ) next_chunk = 1 + + table_size = yield self.sqlite_store.execute(get_table_size) else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - self.postgres_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} - ) yield self.postgres_store.execute(delete_all) + self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + table_size = yield self.sqlite_store.execute(get_table_size) next_chunk = 1 - def get_table_size(txn): - txn.execute("SELECT count(*) FROM %s" % (table,)) - size, = txn.fetchone() - return int(size) - - table_size = yield self.sqlite_store.execute(get_table_size) postgres_size = yield self.postgres_store.execute(get_table_size) if not table_size: @@ -399,8 +498,6 @@ class Porter(object): % (table,) ) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - while True: def r(txn): txn.execute(select, (next_chunk, self.batch_size,)) @@ -412,24 +509,9 @@ class Porter(object): headers, rows = yield self.sqlite_store.runInteraction("select", r) if rows: - bool_cols = [ - i for i, h in enumerate(headers) if h in bool_col_names - ] next_chunk = rows[-1][0] + 1 - def conv(j, col): - if j in bool_cols: - return bool(col) - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - self.postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) + self.convert_rows(table, headers, rows) def insert(txn): self.postgres_store.insert_many_txn( -- cgit 1.4.1 From 40cbd6b6ee809c23750ea818ebd668be78bbf328 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:53:15 +0100 Subject: Shuffle progress stuff --- scripts/port_from_sqlite_to_postgres.py | 375 +++++++++++++++----------------- 1 file changed, 174 insertions(+), 201 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 19e35bf806..845e3fcf3b 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -142,207 +142,6 @@ class Store(object): raise -class Progress(object): - """Used to report progress of the port - """ - def __init__(self): - self.tables = {} - - self.start_time = int(time.time()) - - def add_table(self, table, cur, size): - self.tables[table] = { - "start": cur, - "num_done": cur, - "total": size, - "perc": int(cur * 100 / size), - } - - def update(self, table, num_done): - data = self.tables[table] - data["num_done"] = num_done - data["perc"] = int(num_done * 100 / data["total"]) - - def done(self): - pass - - -class CursesProgress(Progress): - """Reports progress to a curses window - """ - def __init__(self, stdscr): - self.stdscr = stdscr - - curses.use_default_colors() - curses.curs_set(0) - - curses.init_pair(1, curses.COLOR_RED, -1) - curses.init_pair(2, curses.COLOR_GREEN, -1) - - self.last_update = 0 - - self.finished = False - - super(CursesProgress, self).__init__() - - def update(self, table, num_done): - super(CursesProgress, self).update(table, num_done) - - self.render() - - def render(self, force=False): - now = time.time() - - if not force and now - self.last_update < 0.2: - # reactor.callLater(1, self.render) - return - - self.stdscr.clear() - - rows, cols = self.stdscr.getmaxyx() - - duration = int(now) - int(self.start_time) - - minutes, seconds = divmod(duration, 60) - duration_str = '%02dm %02ds' % (minutes, seconds,) - - if self.finished: - status = "Time spent: %s (Done!)" % (duration_str,) - else: - min_perc = min( - (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) - if v["total"] - v["start"] else 100 - for v in self.tables.values() - ) - if min_perc > 0: - est_remaining = (int(now) - self.start_time) * 100 / min_perc - est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) - else: - est_remaining_str = "Unknown" - status = ( - "Time spent: %s (est. remaining: %s)" - % (duration_str, est_remaining_str,) - ) - - self.stdscr.addstr( - 0, 0, - status, - curses.A_BOLD, - ) - - max_len = max([len(t) for t in self.tables.keys()]) - - left_margin = 5 - middle_space = 1 - - items = self.tables.items() - items.sort( - key=lambda i: (i[1]["perc"], i[0]), - ) - - for i, (table, data) in enumerate(items): - if i + 2 >= rows: - break - - perc = data["perc"] - - color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) - - self.stdscr.addstr( - i+2, left_margin + max_len - len(table), - table, - curses.A_BOLD | color, - ) - - size = 20 - - progress = "[%s%s]" % ( - "#" * int(perc*size/100), - " " * (size - int(perc*size/100)), - ) - - self.stdscr.addstr( - i+2, left_margin + max_len + middle_space, - "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), - ) - - if self.finished: - self.stdscr.addstr( - self.rows-1, 0, - "Press any key to exit...", - ) - - self.stdscr.refresh() - self.last_update = time.time() - - def done(self): - self.finished = True - self.render(True) - self.stdscr.getch() - - def on_prepare_sqlite(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing SQLite database...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def on_prepare_postgres(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing PostgreSQL database...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def fetching_tables(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Fetching tables...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def preparing_tables(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing tables...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - -class TerminalProgress(Progress): - """Just prints progress to the terminal - """ - def update(self, table, num_done): - super(TerminalProgress, self).update(table, num_done) - - data = self.tables[table] - - print "%s: %d%% (%d/%d)" % ( - table, data["perc"], - data["num_done"], data["total"], - ) - - def on_prepare_sqlite(self): - print "Preparing SQLite database..." - - def on_prepare_postgres(self): - print "Preparing PostgreSQL database..." - - def fetching_tables(self): - print "Fetching tables..." - - def preparing_tables(self): - print "Preparing tables..." - - class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -630,6 +429,180 @@ class Porter(object): reactor.stop() +############################################## +###### The following is simply UI stuff ###### +############################################## + + +class Progress(object): + """Used to report progress of the port + """ + def __init__(self): + self.tables = {} + + self.start_time = int(time.time()) + + def add_table(self, table, cur, size): + self.tables[table] = { + "start": cur, + "num_done": cur, + "total": size, + "perc": int(cur * 100 / size), + } + + def update(self, table, num_done): + data = self.tables[table] + data["num_done"] = num_done + data["perc"] = int(num_done * 100 / data["total"]) + + def done(self): + pass + + +class CursesProgress(Progress): + """Reports progress to a curses window + """ + def __init__(self, stdscr): + self.stdscr = stdscr + + curses.use_default_colors() + curses.curs_set(0) + + curses.init_pair(1, curses.COLOR_RED, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + + self.last_update = 0 + + self.finished = False + + super(CursesProgress, self).__init__() + + def update(self, table, num_done): + super(CursesProgress, self).update(table, num_done) + + self.render() + + def render(self, force=False): + now = time.time() + + if not force and now - self.last_update < 0.2: + # reactor.callLater(1, self.render) + return + + self.stdscr.clear() + + rows, cols = self.stdscr.getmaxyx() + + duration = int(now) - int(self.start_time) + + minutes, seconds = divmod(duration, 60) + duration_str = '%02dm %02ds' % (minutes, seconds,) + + if self.finished: + status = "Time spent: %s (Done!)" % (duration_str,) + else: + min_perc = min( + (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) + if v["total"] - v["start"] else 100 + for v in self.tables.values() + ) + if min_perc > 0: + est_remaining = (int(now) - self.start_time) * 100 / min_perc + est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) + else: + est_remaining_str = "Unknown" + status = ( + "Time spent: %s (est. remaining: %s)" + % (duration_str, est_remaining_str,) + ) + + self.stdscr.addstr( + 0, 0, + status, + curses.A_BOLD, + ) + + max_len = max([len(t) for t in self.tables.keys()]) + + left_margin = 5 + middle_space = 1 + + items = self.tables.items() + items.sort( + key=lambda i: (i[1]["perc"], i[0]), + ) + + for i, (table, data) in enumerate(items): + if i + 2 >= rows: + break + + perc = data["perc"] + + color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) + + self.stdscr.addstr( + i+2, left_margin + max_len - len(table), + table, + curses.A_BOLD | color, + ) + + size = 20 + + progress = "[%s%s]" % ( + "#" * int(perc*size/100), + " " * (size - int(perc*size/100)), + ) + + self.stdscr.addstr( + i+2, left_margin + max_len + middle_space, + "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), + ) + + if self.finished: + self.stdscr.addstr( + self.rows-1, 0, + "Press any key to exit...", + ) + + self.stdscr.refresh() + self.last_update = time.time() + + def done(self): + self.finished = True + self.render(True) + self.stdscr.getch() + + def set_state(self, state): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + state + "...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + +class TerminalProgress(Progress): + """Just prints progress to the terminal + """ + def update(self, table, num_done): + super(TerminalProgress, self).update(table, num_done) + + data = self.tables[table] + + print "%s: %d%% (%d/%d)" % ( + table, data["perc"], + data["num_done"], data["total"], + ) + + def set_state(self, state): + print state + "..." + + +############################################## +############################################## + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-v", action='store_true') -- cgit 1.4.1 From 5b8b1a43bd392559b1960c4a79f5dcdde0d37f23 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:53:40 +0100 Subject: Split setuping up and processing of tables --- scripts/port_from_sqlite_to_postgres.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 845e3fcf3b..1e7ac072f1 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -168,7 +168,7 @@ class Porter(object): ) @defer.inlineCallbacks - def handle_table(self, table): + def setup_table(self, table): def delete_all(txn): txn.execute( "DELETE FROM port_from_sqlite3 WHERE table_name = %s", @@ -287,6 +287,10 @@ class Porter(object): postgres_size = yield self.postgres_store.execute(get_table_size) + defer.returnValue((table, postgres_size, table_size, next_chunk)) + + @defer.inlineCallbacks + def handle_table(self, table, postgres_size, table_size, next_chunk): if not table_size: return @@ -364,14 +368,14 @@ class Porter(object): self.postgres_store = Store(postgres_db_pool, postgres_engine) # Step 1. Set up databases. - self.progress.on_prepare_sqlite() + self.progress.set_state("Preparing SQLite3") self.setup_db(sqlite_config, sqlite_engine) - self.progress.on_prepare_postgres() + self.progress.set_state("Preparing PostgreSQL") self.setup_db(postgres_config, postgres_engine) # Step 2. Get tables. - self.progress.fetching_tables() + self.progress.set_state("Fetching tables") sqlite_tables = yield self.sqlite_store._simple_select_onecol( table="sqlite_master", keyvalues={ @@ -390,7 +394,7 @@ class Porter(object): tables = set(sqlite_tables) & set(postgres_tables) - self.progress.preparing_tables() + self.progress.set_state("Creating tables") logger.info("Found %d tables", len(tables)) @@ -409,10 +413,12 @@ class Porter(object): except Exception as e: logger.info("Failed to create port table: %s", e) - # Process tables. - yield defer.gatherResults( + self.progress.set_state("Preparing tables") + + # Set up tables. + setup_res = yield defer.gatherResults( [ - self.handle_table(table) + self.setup_table(table) for table in tables if table not in ["schema_version", "applied_schema_deltas"] and not table.startswith("sqlite_") @@ -420,6 +426,15 @@ class Porter(object): consumeErrors=True, ) + # Process tables. + yield defer.gatherResults( + [ + self.handle_table(*res) + for res in setup_res + ], + consumeErrors=True, + ) + self.progress.done() except: global end_error_exec_info -- cgit 1.4.1 From 1ccaea5b92e476ebb38a246ea933258a7199bbae Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 10:34:06 +0100 Subject: Typo in port script --- scripts/port_from_sqlite_to_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 1e7ac072f1..56e149c477 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -575,7 +575,7 @@ class CursesProgress(Progress): if self.finished: self.stdscr.addstr( - self.rows-1, 0, + rows-1, 0, "Press any key to exit...", ) -- cgit 1.4.1 From ce8b0b2868af32fac46a8414393760d326b764b7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 10:45:05 +0100 Subject: Remove accidentally committed debug hardcode hack --- scripts/port_from_sqlite_to_postgres.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 56e149c477..596be75c49 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -184,10 +184,11 @@ class Porter(object): if table == "sent_transactions": # This is a big table, and we really only need some of the recent # data + yield self.postgres_store.execute(delete_all) # Only save things from the last day - yesterday = 1429114568820 #int(time.time()*1000) - 86400000 + yesterday = int(time.time()*1000) - 86400000 # And save the max transaction id from each destination select = ( -- cgit 1.4.1 From 4a13ae72019655ad0531f93af18382c196fb362d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 11:16:44 +0100 Subject: Correctly handle total/remaining counts in the presence of sent_transasctions table --- scripts/port_from_sqlite_to_postgres.py | 290 ++++++++++++++++++-------------- 1 file changed, 167 insertions(+), 123 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 596be75c49..1e52d82fe0 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -125,6 +125,12 @@ class Store(object): def execute(self, f, *args, **kwargs): return self.runInteraction(f.__name__, f, *args, **kwargs) + def execute_sql(self, sql, *args): + def r(txn): + txn.execute(sql, args) + return txn.fetchall() + return self.runInteraction("execute_sql", r) + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, @@ -146,119 +152,9 @@ class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) - def convert_rows(self, table, headers, rows): - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - - bool_cols = [ - i for i, h in enumerate(headers) if h in bool_col_names - ] - - def conv(j, col): - if j in bool_cols: - return bool(col) - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - self.postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) - @defer.inlineCallbacks def setup_table(self, table): - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - - def get_table_size(txn): - txn.execute("SELECT count(*) FROM %s" % (table,)) - size, = txn.fetchone() - return int(size) - - if table == "sent_transactions": - # This is a big table, and we really only need some of the recent - # data - - yield self.postgres_store.execute(delete_all) - - # Only save things from the last day - yesterday = int(time.time()*1000) - 86400000 - - # And save the max transaction id from each destination - select = ( - "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" - "SELECT max(rowid) FROM sent_transactions" - " GROUP BY destination" - ")" - ) - - def r(txn): - txn.execute(select) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] - - ts_ind = headers.index('ts') - - return headers, [r for r in rows if r[ts_ind] < yesterday] - - headers, rows = yield self.sqlite_store.runInteraction( - "select", r, - ) - - self.convert_rows(table, headers, rows) - - inserted_rows = len(rows) - max_inserted_rowid = max(r[0] for r in rows) - - def insert(txn): - self.postgres_store.insert_many_txn( - txn, table, headers[1:], rows - ) - - yield self.postgres_store.execute(insert) - - def get_start_id(txn): - txn.execute( - "SELECT rowid FROM sent_transactions WHERE ts >= ?" - " ORDER BY rowid ASC LIMIT 1", - (yesterday,) - ) - - rows = txn.fetchall() - if rows: - return rows[0][0] - else: - return 1 - - next_chunk = yield self.sqlite_store.execute(get_start_id) - next_chunk = max(max_inserted_rowid + 1, next_chunk) - - yield self.postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": next_chunk} - ) - - def get_sent_table_size(txn): - txn.execute( - "SELECT count(*) FROM sent_transactions" - " WHERE ts >= ?", - (yesterday,) - ) - size, = txn.fetchone() - return int(size) - - table_size = yield self.sqlite_store.execute( - get_sent_table_size - ) - - table_size += inserted_rows - elif table in APPEND_ONLY_TABLES: + if table in APPEND_ONLY_TABLES: # It's safe to just carry on inserting. next_chunk = yield self.postgres_store._simple_select_one_onecol( table="port_from_sqlite3", @@ -267,28 +163,47 @@ class Porter(object): allow_none=True, ) + total_to_port = None if next_chunk is None: - yield self.postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 1} - ) + if table == "sent_transactions": + next_chunk, already_ported, total_to_port = ( + yield self._setup_sent_transactions() + ) + else: + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 1} + ) - next_chunk = 1 + next_chunk = 1 + already_ported = 0 - table_size = yield self.sqlite_store.execute(get_table_size) + if total_to_port is None: + already_ported, total_to_port = yield self._get_total_count_to_port( + table, next_chunk + ) else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + yield self.postgres_store.execute(delete_all) - self.postgres_store._simple_insert( + + yield self.postgres_store._simple_insert( table="port_from_sqlite3", values={"table_name": table, "rowid": 0} ) - table_size = yield self.sqlite_store.execute(get_table_size) next_chunk = 1 - postgres_size = yield self.postgres_store.execute(get_table_size) + already_ported, total_to_port = yield self._get_total_count_to_port( + table, next_chunk + ) - defer.returnValue((table, postgres_size, table_size, next_chunk)) + defer.returnValue((table, already_ported, total_to_port, next_chunk)) @defer.inlineCallbacks def handle_table(self, table, postgres_size, table_size, next_chunk): @@ -315,7 +230,7 @@ class Porter(object): if rows: next_chunk = rows[-1][0] + 1 - self.convert_rows(table, headers, rows) + self._convert_rows(table, headers, rows) def insert(txn): self.postgres_store.insert_many_txn( @@ -414,7 +329,7 @@ class Porter(object): except Exception as e: logger.info("Failed to create port table: %s", e) - self.progress.set_state("Preparing tables") + self.progress.set_state("Setting up") # Set up tables. setup_res = yield defer.gatherResults( @@ -444,6 +359,135 @@ class Porter(object): finally: reactor.stop() + def _convert_rows(self, table, headers, rows): + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + + @defer.inlineCallbacks + def _setup_sent_transactions(self): + # Only save things from the last day + yesterday = int(time.time()*1000) - 86400000 + + # And save the max transaction id from each destination + select = ( + "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" + "SELECT max(rowid) FROM sent_transactions" + " GROUP BY destination" + ")" + ) + + def r(txn): + txn.execute(select) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + ts_ind = headers.index('ts') + + return headers, [r for r in rows if r[ts_ind] < yesterday] + + headers, rows = yield self.sqlite_store.runInteraction( + "select", r, + ) + + self._convert_rows("sent_transactions", headers, rows) + + inserted_rows = len(rows) + max_inserted_rowid = max(r[0] for r in rows) + + def insert(txn): + self.postgres_store.insert_many_txn( + txn, "sent_transactions", headers[1:], rows + ) + + yield self.postgres_store.execute(insert) + + def get_start_id(txn): + txn.execute( + "SELECT rowid FROM sent_transactions WHERE ts >= ?" + " ORDER BY rowid ASC LIMIT 1", + (yesterday,) + ) + + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + return 1 + + next_chunk = yield self.sqlite_store.execute(get_start_id) + next_chunk = max(max_inserted_rowid + 1, next_chunk) + + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": "sent_transactions", "rowid": next_chunk} + ) + + def get_sent_table_size(txn): + txn.execute( + "SELECT count(*) FROM sent_transactions" + " WHERE ts >= ?", + (yesterday,) + ) + size, = txn.fetchone() + return int(size) + + remaining_count = yield self.sqlite_store.execute( + get_sent_table_size + ) + + total_count = remaining_count + inserted_rows + + defer.returnValue((next_chunk, remaining_count, total_count)) + + @defer.inlineCallbacks + def _get_remaining_count_to_port(self, table, next_chunk): + rows = yield self.sqlite_store.execute_sql( + "SELECT count(*) FROM %s WHERE rowid >= ?" % (table,), + next_chunk, + ) + + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _get_already_ported_count(self, table): + rows = yield self.postgres_store.execute_sql( + "SELECT count(*) FROM %s" % (table,), + ) + + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _get_total_count_to_port(self, table, next_chunk): + remaining, done = yield defer.gatherResults( + [ + self._get_remaining_count_to_port(table, next_chunk), + self._get_already_ported_count(table), + ], + consumeErrors=True, + ) + + remaining = int(remaining) if remaining else 0 + done = int(done) if done else 0 + + defer.returnValue((done, remaining + done)) + ############################################## ###### The following is simply UI stuff ###### -- cgit 1.4.1 From af27b84ff769ceb9fe1aaa10c9435586be4c6867 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 12:40:04 +0100 Subject: Correctly handle total/remaining counts in the presence of sent_transasctions table --- scripts/port_from_sqlite_to_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 1e52d82fe0..a40a93ca76 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -454,7 +454,7 @@ class Porter(object): total_count = remaining_count + inserted_rows - defer.returnValue((next_chunk, remaining_count, total_count)) + defer.returnValue((next_chunk, inserted_rows, total_count)) @defer.inlineCallbacks def _get_remaining_count_to_port(self, table, next_chunk): -- cgit 1.4.1 From a1d4813a54ad711d6db7da51f301412e3f139346 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 12:55:29 +0100 Subject: Quickly fix dodgy est. time remaining --- scripts/port_from_sqlite_to_postgres.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index a40a93ca76..b4ee860a92 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -535,11 +535,20 @@ class CursesProgress(Progress): self.finished = False + self.total_processed = 0 + self.total_remaining = 0 + super(CursesProgress, self).__init__() def update(self, table, num_done): super(CursesProgress, self).update(table, num_done) + self.total_processed = 0 + self.total_remaining = 0 + for table, data in self.tables.items(): + self.total_processed += data["num_done"] - data["start"] + self.total_remaining += data["total"] - data["num_done"] + self.render() def render(self, force=False): @@ -561,13 +570,11 @@ class CursesProgress(Progress): if self.finished: status = "Time spent: %s (Done!)" % (duration_str,) else: - min_perc = min( - (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) - if v["total"] - v["start"] else 100 - for v in self.tables.values() - ) - if min_perc > 0: - est_remaining = (int(now) - self.start_time) * 100 / min_perc + + if self.total_processed > 0: + left = float(self.total_remaining) / self.total_processed + + est_remaining = (int(now) - self.start_time) * left est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) else: est_remaining_str = "Unknown" -- cgit 1.4.1 From f41a9a1ffcb652c045dea5f4108984d287c6de17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 17:42:36 +0100 Subject: Add better help to scripts/port_from_sqlite_to_postgres.py --- scripts/port_from_sqlite_to_postgres.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'scripts') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index b4ee860a92..3296f1f54f 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -671,15 +671,30 @@ class TerminalProgress(Progress): if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description="A script to port an existing synapse SQLite database to" + " a new PostgreSQL database." + ) parser.add_argument("-v", action='store_true') - parser.add_argument("--curses", action='store_true') - parser.add_argument("--sqlite-database") parser.add_argument( - "--postgres-config", type=argparse.FileType('r'), + "--sqlite-database", required=True, + help="The snapshot of the SQLite database file. This must not be" + " currently used by a running synapse server" + ) + parser.add_argument( + "--postgres-config", type=argparse.FileType('r'), required=True, + help="The database config file for the PostgreSQL database" + ) + parser.add_argument( + "--curses", action='store_true', + help="display a curses based progress UI" ) - parser.add_argument("--batch-size", type=int, default=1000) + parser.add_argument( + "--batch-size", type=int, default=1000, + help="The number of rows to select from the SQLite table each" + " iteration [default=1000]", + ) args = parser.parse_args() @@ -705,6 +720,13 @@ if __name__ == "__main__": postgres_config = yaml.safe_load(args.postgres_config) + if "name" not in postgres_config: + sys.stderr.write("Malformed database config: no 'name'") + sys.exit(2) + if postgres_config["name"] != "psycopg2": + sys.stderr.write("Database must use 'psycopg2' connector.") + sys.exit(3) + def start(stdscr=None): if stdscr: progress = CursesProgress(stdscr) -- cgit 1.4.1