From ffad75bd6284873c27efb2cfdfdcf9f909eb9db3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 Apr 2015 17:00:50 +0100 Subject: Remove mysql/maria support --- scripts/port_from_sqlite_to_postgres.py | 373 ++++++++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) create mode 100644 scripts/port_from_sqlite_to_postgres.py (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py new file mode 100644 index 0000000000..4b3fd9e529 --- /dev/null +++ b/scripts/port_from_sqlite_to_postgres.py @@ -0,0 +1,373 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, reactor +from twisted.enterprise import adbapi + +from synapse.storage._base import LoggingTransaction, SQLBaseStore +from synapse.storage.engines import create_engine + +import argparse +import itertools +import logging +import types +import yaml + + +logger = logging.getLogger("port_from_sqlite_to_postgres") + + +BINARY_COLUMNS = { + "event_content_hashes": ["hash"], + "event_reference_hashes": ["hash"], + "event_signatures": ["signature"], + "event_edge_hashes": ["hash"], + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "application_services_txns": ["event_ids"], + "received_transactions": ["response_json"], + "sent_transactions": ["response_json"], + "server_tls_certificates": ["tls_certificate"], + "server_signature_keys": ["verify_key"], + "pushers": ["pushkey", "data"], + "user_filters": ["filter_json"], +} + +UNICODE_COLUMNS = { + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "users": ["password_hash"], +} + + +BOOLEAN_COLUMNS = { + "events": ["processed", "outlier"], + "rooms": ["is_public"], + "event_edges": ["is_state"], + "presence_list": ["accepted"], +} + + +APPEND_ONLY_TABLES = [ + "event_content_hashes", + "event_reference_hashes", + "event_signatures", + "event_edge_hashes", + "events", + "event_json", + "state_events", + "room_memberships", + "feedback", + "topics", + "room_names", + "rooms", + "local_media_repository", + "local_media_repository_thumbnails", + "remote_media_cache", + "remote_media_cache_thumbnails", + "redactions", + "event_edges", + "event_auth", + "received_transactions", + "sent_transactions", + "transaction_id_to_pdu", + "users", + "state_groups", + "state_groups_state", + "event_to_state_groups", + "rejections", +] + + +class Store(object): + def __init__(self, db_pool, engine): + self.db_pool = db_pool + self.database_engine = engine + + _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] + _simple_insert = SQLBaseStore.__dict__["_simple_insert"] + + _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] + _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] + _simple_select_one_onecol = SQLBaseStore.__dict__["_simple_select_one_onecol"] + _simple_select_one_onecol_txn = SQLBaseStore.__dict__["_simple_select_one_onecol_txn"] + + _simple_update_one = SQLBaseStore.__dict__["_simple_update_one"] + _simple_update_one_txn = SQLBaseStore.__dict__["_simple_update_one_txn"] + + _execute_and_decode = SQLBaseStore.__dict__["_execute_and_decode"] + + def runInteraction(self, desc, func, *args, **kwargs): + def r(conn): + try: + i = 0 + N = 5 + while True: + try: + txn = conn.cursor() + return func( + LoggingTransaction(txn, desc, self.database_engine), + *args, **kwargs + ) + except self.database_engine.module.DatabaseError as e: + if self.database_engine.is_deadlock(e): + logger.warn("[TXN DEADLOCK] {%s} %d/%d", desc, i, N) + if i < N: + i += 1 + conn.rollback() + continue + raise + except Exception as e: + logger.debug("[TXN FAIL] {%s}", desc, e) + raise + + return self.db_pool.runWithConnection(r) + + def insert_many_txn(self, txn, table, headers, rows): + sql = "INSERT INTO %s (%s) VALUES (%s)" % ( + table, + ", ".join(k for k in headers), + ", ".join("%s" for _ in headers) + ) + + try: + txn.executemany(sql, rows) + except: + logger.exception( + "Failed to insert: %s", + table, + ) + raise + + + +def chunks(n): + for i in itertools.count(0, n): + yield range(i, i+n) + + +@defer.inlineCallbacks +def handle_table(table, sqlite_store, postgres_store): + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield postgres_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) + + if next_chunk is None: + yield postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + next_chunk = 0 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + postgres_store._simple_insert_txn( + txn, + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + yield postgres_store.runInteraction( + "delete_non_append_only", delete_all + ) + + next_chunk = 0 + + logger.info("next_chunk for %s: %d", table, next_chunk) + + N = 5000 + + select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) + + uni_col_names = UNICODE_COLUMNS.get(table, []) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + bin_col_names = BINARY_COLUMNS.get(table, []) + + while True: + def r(txn): + txn.execute(select, (next_chunk, N,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + return headers, rows + + headers, rows = yield sqlite_store.runInteraction("select", r) + + logger.info("Got %d rows for %s", len(rows), table) + + if rows: + uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] + bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] + bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] + next_chunk = rows[-1][0] + 1 + + def conv(j, col): + if j in uni_cols: + col = sqlite_store.database_engine.load_unicode(col) + if j in bool_cols: + return bool(col) + + if j in bin_cols: + if isinstance(col, types.UnicodeType): + col = buffer(col.encode("utf8")) + + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + + def ins(txn): + postgres_store.insert_many_txn(txn, table, headers[1:], rows) + + postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) + + yield postgres_store.runInteraction("insert_many", ins) + else: + return + + +def setup_db(db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } + ) + + database_engine.prepare_database(db_conn) + + db_conn.commit() + + +@defer.inlineCallbacks +def main(sqlite_config, postgress_config): + try: + sqlite_db_pool = adbapi.ConnectionPool( + sqlite_config["name"], + **sqlite_config["args"] + ) + + postgres_db_pool = adbapi.ConnectionPool( + postgress_config["name"], + **postgress_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + postgres_engine = create_engine("psycopg2") + + sqlite_store = Store(sqlite_db_pool, sqlite_engine) + postgres_store = Store(postgres_db_pool, postgres_engine) + + # Step 1. Set up databases. + logger.info("Preparing sqlite database...") + setup_db(sqlite_config, sqlite_engine) + + logger.info("Preparing postgres database...") + setup_db(postgress_config, postgres_engine) + + # Step 2. Get tables. + logger.info("Fetching tables...") + tables = yield sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + logger.info("Found %d tables", len(tables)) + + def create_port_table(txn): + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) + + try: + yield postgres_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) + + # Process tables. + yield defer.gatherResults( + [ + handle_table(table, sqlite_store, postgres_store) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + except: + logger.exception("") + finally: + reactor.stop() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sqlite-database") + parser.add_argument( + "--postgres-config", type=argparse.FileType('r'), + ) + + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + + sqlite_config = { + "name": "sqlite3", + "args": { + "database": args.sqlite_database, + "cp_min": 1, + "cp_max": 1, + "check_same_thread": False, + }, + } + + postgres_config = yaml.safe_load(args.postgres_config) + + reactor.callWhenRunning( + main, + sqlite_config=sqlite_config, + postgres_config=postgres_config, + ) + + reactor.run() -- cgit 1.5.1 From 427bcb76085c005069f8163e035ebce395a14b2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 Apr 2015 11:13:05 +0100 Subject: Fix port script after storage changes. Add very simple (off by default) curses UI to see progress. --- scripts/port_from_sqlite_to_postgres.py | 580 ++++++++++++++++++++++---------- 1 file changed, 409 insertions(+), 171 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 4b3fd9e529..fc1603c1c9 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -20,38 +20,17 @@ from synapse.storage._base import LoggingTransaction, SQLBaseStore from synapse.storage.engines import create_engine import argparse -import itertools +import curses import logging -import types +import sys +import time +import traceback import yaml logger = logging.getLogger("port_from_sqlite_to_postgres") -BINARY_COLUMNS = { - "event_content_hashes": ["hash"], - "event_reference_hashes": ["hash"], - "event_signatures": ["signature"], - "event_edge_hashes": ["hash"], - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "application_services_txns": ["event_ids"], - "received_transactions": ["response_json"], - "sent_transactions": ["response_json"], - "server_tls_certificates": ["tls_certificate"], - "server_signature_keys": ["verify_key"], - "pushers": ["pushkey", "data"], - "user_filters": ["filter_json"], -} - -UNICODE_COLUMNS = { - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "users": ["password_hash"], -} - - BOOLEAN_COLUMNS = { "events": ["processed", "outlier"], "rooms": ["is_public"], @@ -91,7 +70,15 @@ APPEND_ONLY_TABLES = [ ] +end_error_exec_info = None + + class Store(object): + """This object is used to pull out some of the convenience API from the + Storage layer. + + *All* database interactions should go through this object. + """ def __init__(self, db_pool, engine): self.db_pool = db_pool self.database_engine = engine @@ -130,11 +117,14 @@ class Store(object): continue raise except Exception as e: - logger.debug("[TXN FAIL] {%s}", desc, e) + logger.debug("[TXN FAIL] {%s} %s", desc, e) raise return self.db_pool.runWithConnection(r) + def execute(self, f): + return self.runInteraction(f.__name__, f) + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, @@ -152,205 +142,435 @@ class Store(object): raise +class Progress(object): + """Used to report progress of the port + """ + def __init__(self): + self.tables = {} + + self.start_time = int(time.time()) + + def add_table(self, table, cur, size): + self.tables[table] = { + "start": cur, + "num_done": cur, + "total": size, + "perc": int(cur * 100 / size), + } + + def update(self, table, num_done): + data = self.tables[table] + data["num_done"] = num_done + data["perc"] = int(num_done * 100 / data["total"]) + + def done(self): + pass + -def chunks(n): - for i in itertools.count(0, n): - yield range(i, i+n) +class CursesProgress(Progress): + """Reports progress to a curses window + """ + def __init__(self, stdscr): + self.stdscr = stdscr + curses.use_default_colors() + curses.curs_set(0) -@defer.inlineCallbacks -def handle_table(table, sqlite_store, postgres_store): - if table in APPEND_ONLY_TABLES: - # It's safe to just carry on inserting. - next_chunk = yield postgres_store._simple_select_one_onecol( - table="port_from_sqlite3", - keyvalues={"table_name": table}, - retcol="rowid", - allow_none=True, + curses.init_pair(1, curses.COLOR_RED, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + + self.last_update = 0 + + self.finished = False + + super(CursesProgress, self).__init__() + + def update(self, table, num_done): + super(CursesProgress, self).update(table, num_done) + + self.render() + + def render(self, force=False): + now = time.time() + + if not force and now - self.last_update < 0.2: + # reactor.callLater(1, self.render) + return + + self.stdscr.clear() + + rows, cols = self.stdscr.getmaxyx() + + duration = int(now) - int(self.start_time) + + minutes, seconds = divmod(duration, 60) + duration_str = '%02dm %02ds' % (minutes, seconds,) + + if self.finished: + status = "Time spent: %s (Done!)" % (duration_str,) + else: + min_perc = min( + (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) + if v["total"] - v["start"] else 100 + for v in self.tables.values() + ) + if min_perc > 0: + est_remaining = (int(now) - self.start_time) * 100 / min_perc + est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) + else: + est_remaining_str = "Unknown" + status = ( + "Time spent: %s (est. remaining: %s)" + % (duration_str, est_remaining_str,) + ) + + self.stdscr.addstr( + 0, 0, + status, + curses.A_BOLD, ) - if next_chunk is None: - yield postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} + max_len = max([len(t) for t in self.tables.keys()]) + + left_margin = 5 + middle_space = 1 + + items = self.tables.items() + items.sort( + key=lambda i: (i[1]["perc"], i[0]), + ) + + for i, (table, data) in enumerate(items): + if i + 2 >= rows: + break + + perc = data["perc"] + + color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) + + self.stdscr.addstr( + i+2, left_margin + max_len - len(table), + table, + curses.A_BOLD | color, ) - next_chunk = 0 - else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) + size = 20 + + progress = "[%s%s]" % ( + "#" * int(perc*size/100), + " " * (size - int(perc*size/100)), ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - postgres_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} + + self.stdscr.addstr( + i+2, left_margin + max_len + middle_space, + "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), ) - yield postgres_store.runInteraction( - "delete_non_append_only", delete_all - ) + if self.finished: + self.stdscr.addstr( + self.rows-1, 0 , + "Press any key to exit...", + ) - next_chunk = 0 + self.stdscr.refresh() + self.last_update = time.time() - logger.info("next_chunk for %s: %d", table, next_chunk) + def done(self): + self.finished = True + self.render(True) + self.stdscr.getch() - N = 5000 + def on_prepare_sqlite(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing SQLite database...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def on_prepare_postgres(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing PostgreSQL database...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def fetching_tables(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Fetching tables...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def preparing_tables(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing tables...", + curses.A_BOLD, + ) + self.stdscr.refresh() - select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) - uni_col_names = UNICODE_COLUMNS.get(table, []) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - bin_col_names = BINARY_COLUMNS.get(table, []) +class TerminalProgress(Progress): + """Just prints progress to the terminal + """ + def update(self, table, num_done): + super(TerminalProgress, self).update(table, num_done) - while True: - def r(txn): - txn.execute(select, (next_chunk, N,)) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] + data = self.tables[table] - return headers, rows + print "%s: %d%% (%d/%d)" % ( + table, data["perc"], + data["num_done"], data["total"], + ) - headers, rows = yield sqlite_store.runInteraction("select", r) + def on_prepare_sqlite(self): + print "Preparing SQLite database..." - logger.info("Got %d rows for %s", len(rows), table) + def on_prepare_postgres(self): + print "Preparing PostgreSQL database..." - if rows: - uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] - bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] - bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] - next_chunk = rows[-1][0] + 1 + def fetching_tables(self): + print "Fetching tables..." - def conv(j, col): - if j in uni_cols: - col = sqlite_store.database_engine.load_unicode(col) - if j in bool_cols: - return bool(col) + def preparing_tables(self): + print "Preparing tables..." - if j in bin_cols: - if isinstance(col, types.UnicodeType): - col = buffer(col.encode("utf8")) - return col +class Porter(object): + def __init__(self, **kwargs): + self.__dict__.update(kwargs) - for i, row in enumerate(rows): - rows[i] = tuple( - postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) + @defer.inlineCallbacks + def handle_table(self, table): + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield self.postgres_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) - def ins(txn): - postgres_store.insert_many_txn(txn, table, headers[1:], rows) + if next_chunk is None: + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 1} + ) - postgres_store._simple_update_one_txn( + next_chunk = 1 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + self.postgres_store._simple_insert_txn( txn, table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, + values={"table_name": table, "rowid": 0} ) + yield self.postgres_store.execute(delete_all) - yield postgres_store.runInteraction("insert_many", ins) - else: + next_chunk = 1 + + def get_table_size(txn): + txn.execute("SELECT count(*) FROM %s" % (table,)) + size, = txn.fetchone() + return int(size) + + table_size = yield self.sqlite_store.execute(get_table_size) + postgres_size = yield self.postgres_store.execute(get_table_size) + + if not table_size: return + self.progress.add_table(table, postgres_size, table_size) -def setup_db(db_config, database_engine): - db_conn = database_engine.module.connect( - **{ - k: v for k, v in db_config.get("args", {}).items() - if not k.startswith("cp_") - } - ) + select = ( + "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" + % (table,) + ) - database_engine.prepare_database(db_conn) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) - db_conn.commit() + while True: + def r(txn): + txn.execute(select, (next_chunk, self.batch_size,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + return headers, rows -@defer.inlineCallbacks -def main(sqlite_config, postgress_config): - try: - sqlite_db_pool = adbapi.ConnectionPool( - sqlite_config["name"], - **sqlite_config["args"] - ) + headers, rows = yield self.sqlite_store.runInteraction("select", r) - postgres_db_pool = adbapi.ConnectionPool( - postgress_config["name"], - **postgress_config["args"] - ) + if rows: + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + next_chunk = rows[-1][0] + 1 + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) - sqlite_engine = create_engine("sqlite3") - postgres_engine = create_engine("psycopg2") + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) - sqlite_store = Store(sqlite_db_pool, sqlite_engine) - postgres_store = Store(postgres_db_pool, postgres_engine) + self.postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) - # Step 1. Set up databases. - logger.info("Preparing sqlite database...") - setup_db(sqlite_config, sqlite_engine) + yield self.postgres_store.execute(insert) - logger.info("Preparing postgres database...") - setup_db(postgress_config, postgres_engine) + postgres_size += len(rows) - # Step 2. Get tables. - logger.info("Fetching tables...") - tables = yield sqlite_store._simple_select_onecol( - table="sqlite_master", - keyvalues={ - "type": "table", - }, - retcol="name", + self.progress.update(table, postgres_size) + else: + return + + def setup_db(self, db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } ) - logger.info("Found %d tables", len(tables)) + database_engine.prepare_database(db_conn) - def create_port_table(txn): - txn.execute( - "CREATE TABLE port_from_sqlite3 (" - " table_name varchar(100) NOT NULL UNIQUE," - " rowid bigint NOT NULL" - ")" - ) + db_conn.commit() + @defer.inlineCallbacks + def run(self): try: - yield postgres_store.runInteraction( - "create_port_table", create_port_table + sqlite_db_pool = adbapi.ConnectionPool( + self.sqlite_config["name"], + **self.sqlite_config["args"] ) - except Exception as e: - logger.info("Failed to create port table: %s", e) - - # Process tables. - yield defer.gatherResults( - [ - handle_table(table, sqlite_store, postgres_store) - for table in tables - if table not in ["schema_version", "applied_schema_deltas"] - and not table.startswith("sqlite_") - ], - consumeErrors=True, - ) - except: - logger.exception("") - finally: - reactor.stop() + postgres_db_pool = adbapi.ConnectionPool( + self.postgres_config["name"], + **self.postgres_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + postgres_engine = create_engine("psycopg2") + + self.sqlite_store = Store(sqlite_db_pool, sqlite_engine) + self.postgres_store = Store(postgres_db_pool, postgres_engine) + + # Step 1. Set up databases. + self.progress.on_prepare_sqlite() + self.setup_db(sqlite_config, sqlite_engine) + + self.progress.on_prepare_postgres() + self.setup_db(postgres_config, postgres_engine) + + # Step 2. Get tables. + self.progress.fetching_tables() + sqlite_tables = yield self.sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + postgres_tables = yield self.postgres_store._simple_select_onecol( + table="information_schema.tables", + keyvalues={ + "table_schema": "public", + }, + retcol="distinct table_name", + ) + + tables = set(sqlite_tables) & set(postgres_tables) + + self.progress.preparing_tables() + + logger.info("Found %d tables", len(tables)) + + def create_port_table(txn): + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) + + try: + yield self.postgres_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) + + # Process tables. + yield defer.gatherResults( + [ + self.handle_table(table) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + self.progress.done() + except: + global end_error_exec_info + end_error_exec_info = sys.exc_info() + logger.exception("") + finally: + reactor.stop() if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("-v", action='store_true') + parser.add_argument("--curses", action='store_true') parser.add_argument("--sqlite-database") parser.add_argument( "--postgres-config", type=argparse.FileType('r'), ) + parser.add_argument("--batch-size", type=int, default=1000) + args = parser.parse_args() - logging.basicConfig(level=logging.INFO) + + + logging_config = { + "level": logging.DEBUG if args.v else logging.INFO, + "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" + } + + if args.curses: + logging_config["filename"] = "port-synapse.log" + + logging.basicConfig(**logging_config) sqlite_config = { "name": "sqlite3", @@ -364,10 +584,28 @@ if __name__ == "__main__": postgres_config = yaml.safe_load(args.postgres_config) - reactor.callWhenRunning( - main, - sqlite_config=sqlite_config, - postgres_config=postgres_config, - ) + def start(stdscr=None): + if stdscr: + progress = CursesProgress(stdscr) + else: + progress = TerminalProgress() + + porter = Porter( + sqlite_config=sqlite_config, + postgres_config=postgres_config, + progress=progress, + batch_size=args.batch_size, + ) + + reactor.callWhenRunning(porter.run) + + reactor.run() + + if args.curses: + curses.wrapper(start) + else: + start() - reactor.run() + if end_error_exec_info: + exc_type, exc_value, exc_traceback = end_error_exec_info + traceback.print_exception(exc_type, exc_value, exc_traceback) -- cgit 1.5.1 From dfc46c6220b7439262de6fc7034e7a6df99d4e40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 Apr 2015 12:46:29 +0100 Subject: PEP8 --- scripts/port_from_sqlite_to_postgres.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index fc1603c1c9..f4b6ed0681 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -268,7 +268,7 @@ class CursesProgress(Progress): if self.finished: self.stdscr.addstr( - self.rows-1, 0 , + self.rows-1, 0, "Press any key to exit...", ) @@ -561,7 +561,6 @@ if __name__ == "__main__": args = parser.parse_args() - logging_config = { "level": logging.DEBUG if args.v else logging.INFO, "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" -- cgit 1.5.1 From 4e49f52375acb705b32937115210d055b0e1ce38 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:36:37 +0100 Subject: Don't port over all of the sent_transactions table --- scripts/port_from_sqlite_to_postgres.py | 158 ++++++++++++++++++++++++-------- 1 file changed, 120 insertions(+), 38 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index f4b6ed0681..19e35bf806 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -122,8 +122,8 @@ class Store(object): return self.db_pool.runWithConnection(r) - def execute(self, f): - return self.runInteraction(f.__name__, f) + def execute(self, f, *args, **kwargs): + return self.runInteraction(f.__name__, f, *args, **kwargs) def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( @@ -347,9 +347,118 @@ class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) + def convert_rows(self, table, headers, rows): + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + @defer.inlineCallbacks def handle_table(self, table): - if table in APPEND_ONLY_TABLES: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + + def get_table_size(txn): + txn.execute("SELECT count(*) FROM %s" % (table,)) + size, = txn.fetchone() + return int(size) + + if table == "sent_transactions": + # This is a big table, and we really only need some of the recent + # data + yield self.postgres_store.execute(delete_all) + + # Only save things from the last day + yesterday = 1429114568820 #int(time.time()*1000) - 86400000 + + # And save the max transaction id from each destination + select = ( + "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" + "SELECT max(rowid) FROM sent_transactions" + " GROUP BY destination" + ")" + ) + + def r(txn): + txn.execute(select) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + ts_ind = headers.index('ts') + + return headers, [r for r in rows if r[ts_ind] < yesterday] + + headers, rows = yield self.sqlite_store.runInteraction( + "select", r, + ) + + self.convert_rows(table, headers, rows) + + inserted_rows = len(rows) + max_inserted_rowid = max(r[0] for r in rows) + + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) + + yield self.postgres_store.execute(insert) + + def get_start_id(txn): + txn.execute( + "SELECT rowid FROM sent_transactions WHERE ts >= ?" + " ORDER BY rowid ASC LIMIT 1", + (yesterday,) + ) + + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + return 1 + + next_chunk = yield self.sqlite_store.execute(get_start_id) + next_chunk = max(max_inserted_rowid + 1, next_chunk) + + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": next_chunk} + ) + + def get_sent_table_size(txn): + txn.execute( + "SELECT count(*) FROM sent_transactions" + " WHERE ts >= ?", + (yesterday,) + ) + size, = txn.fetchone() + return int(size) + + table_size = yield self.sqlite_store.execute( + get_sent_table_size + ) + + table_size += inserted_rows + elif table in APPEND_ONLY_TABLES: # It's safe to just carry on inserting. next_chunk = yield self.postgres_store._simple_select_one_onecol( table="port_from_sqlite3", @@ -365,28 +474,18 @@ class Porter(object): ) next_chunk = 1 + + table_size = yield self.sqlite_store.execute(get_table_size) else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - self.postgres_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} - ) yield self.postgres_store.execute(delete_all) + self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + table_size = yield self.sqlite_store.execute(get_table_size) next_chunk = 1 - def get_table_size(txn): - txn.execute("SELECT count(*) FROM %s" % (table,)) - size, = txn.fetchone() - return int(size) - - table_size = yield self.sqlite_store.execute(get_table_size) postgres_size = yield self.postgres_store.execute(get_table_size) if not table_size: @@ -399,8 +498,6 @@ class Porter(object): % (table,) ) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - while True: def r(txn): txn.execute(select, (next_chunk, self.batch_size,)) @@ -412,24 +509,9 @@ class Porter(object): headers, rows = yield self.sqlite_store.runInteraction("select", r) if rows: - bool_cols = [ - i for i, h in enumerate(headers) if h in bool_col_names - ] next_chunk = rows[-1][0] + 1 - def conv(j, col): - if j in bool_cols: - return bool(col) - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - self.postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) + self.convert_rows(table, headers, rows) def insert(txn): self.postgres_store.insert_many_txn( -- cgit 1.5.1 From 40cbd6b6ee809c23750ea818ebd668be78bbf328 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:53:15 +0100 Subject: Shuffle progress stuff --- scripts/port_from_sqlite_to_postgres.py | 375 +++++++++++++++----------------- 1 file changed, 174 insertions(+), 201 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 19e35bf806..845e3fcf3b 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -142,207 +142,6 @@ class Store(object): raise -class Progress(object): - """Used to report progress of the port - """ - def __init__(self): - self.tables = {} - - self.start_time = int(time.time()) - - def add_table(self, table, cur, size): - self.tables[table] = { - "start": cur, - "num_done": cur, - "total": size, - "perc": int(cur * 100 / size), - } - - def update(self, table, num_done): - data = self.tables[table] - data["num_done"] = num_done - data["perc"] = int(num_done * 100 / data["total"]) - - def done(self): - pass - - -class CursesProgress(Progress): - """Reports progress to a curses window - """ - def __init__(self, stdscr): - self.stdscr = stdscr - - curses.use_default_colors() - curses.curs_set(0) - - curses.init_pair(1, curses.COLOR_RED, -1) - curses.init_pair(2, curses.COLOR_GREEN, -1) - - self.last_update = 0 - - self.finished = False - - super(CursesProgress, self).__init__() - - def update(self, table, num_done): - super(CursesProgress, self).update(table, num_done) - - self.render() - - def render(self, force=False): - now = time.time() - - if not force and now - self.last_update < 0.2: - # reactor.callLater(1, self.render) - return - - self.stdscr.clear() - - rows, cols = self.stdscr.getmaxyx() - - duration = int(now) - int(self.start_time) - - minutes, seconds = divmod(duration, 60) - duration_str = '%02dm %02ds' % (minutes, seconds,) - - if self.finished: - status = "Time spent: %s (Done!)" % (duration_str,) - else: - min_perc = min( - (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) - if v["total"] - v["start"] else 100 - for v in self.tables.values() - ) - if min_perc > 0: - est_remaining = (int(now) - self.start_time) * 100 / min_perc - est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) - else: - est_remaining_str = "Unknown" - status = ( - "Time spent: %s (est. remaining: %s)" - % (duration_str, est_remaining_str,) - ) - - self.stdscr.addstr( - 0, 0, - status, - curses.A_BOLD, - ) - - max_len = max([len(t) for t in self.tables.keys()]) - - left_margin = 5 - middle_space = 1 - - items = self.tables.items() - items.sort( - key=lambda i: (i[1]["perc"], i[0]), - ) - - for i, (table, data) in enumerate(items): - if i + 2 >= rows: - break - - perc = data["perc"] - - color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) - - self.stdscr.addstr( - i+2, left_margin + max_len - len(table), - table, - curses.A_BOLD | color, - ) - - size = 20 - - progress = "[%s%s]" % ( - "#" * int(perc*size/100), - " " * (size - int(perc*size/100)), - ) - - self.stdscr.addstr( - i+2, left_margin + max_len + middle_space, - "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), - ) - - if self.finished: - self.stdscr.addstr( - self.rows-1, 0, - "Press any key to exit...", - ) - - self.stdscr.refresh() - self.last_update = time.time() - - def done(self): - self.finished = True - self.render(True) - self.stdscr.getch() - - def on_prepare_sqlite(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing SQLite database...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def on_prepare_postgres(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing PostgreSQL database...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def fetching_tables(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Fetching tables...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def preparing_tables(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing tables...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - -class TerminalProgress(Progress): - """Just prints progress to the terminal - """ - def update(self, table, num_done): - super(TerminalProgress, self).update(table, num_done) - - data = self.tables[table] - - print "%s: %d%% (%d/%d)" % ( - table, data["perc"], - data["num_done"], data["total"], - ) - - def on_prepare_sqlite(self): - print "Preparing SQLite database..." - - def on_prepare_postgres(self): - print "Preparing PostgreSQL database..." - - def fetching_tables(self): - print "Fetching tables..." - - def preparing_tables(self): - print "Preparing tables..." - - class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -630,6 +429,180 @@ class Porter(object): reactor.stop() +############################################## +###### The following is simply UI stuff ###### +############################################## + + +class Progress(object): + """Used to report progress of the port + """ + def __init__(self): + self.tables = {} + + self.start_time = int(time.time()) + + def add_table(self, table, cur, size): + self.tables[table] = { + "start": cur, + "num_done": cur, + "total": size, + "perc": int(cur * 100 / size), + } + + def update(self, table, num_done): + data = self.tables[table] + data["num_done"] = num_done + data["perc"] = int(num_done * 100 / data["total"]) + + def done(self): + pass + + +class CursesProgress(Progress): + """Reports progress to a curses window + """ + def __init__(self, stdscr): + self.stdscr = stdscr + + curses.use_default_colors() + curses.curs_set(0) + + curses.init_pair(1, curses.COLOR_RED, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + + self.last_update = 0 + + self.finished = False + + super(CursesProgress, self).__init__() + + def update(self, table, num_done): + super(CursesProgress, self).update(table, num_done) + + self.render() + + def render(self, force=False): + now = time.time() + + if not force and now - self.last_update < 0.2: + # reactor.callLater(1, self.render) + return + + self.stdscr.clear() + + rows, cols = self.stdscr.getmaxyx() + + duration = int(now) - int(self.start_time) + + minutes, seconds = divmod(duration, 60) + duration_str = '%02dm %02ds' % (minutes, seconds,) + + if self.finished: + status = "Time spent: %s (Done!)" % (duration_str,) + else: + min_perc = min( + (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) + if v["total"] - v["start"] else 100 + for v in self.tables.values() + ) + if min_perc > 0: + est_remaining = (int(now) - self.start_time) * 100 / min_perc + est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) + else: + est_remaining_str = "Unknown" + status = ( + "Time spent: %s (est. remaining: %s)" + % (duration_str, est_remaining_str,) + ) + + self.stdscr.addstr( + 0, 0, + status, + curses.A_BOLD, + ) + + max_len = max([len(t) for t in self.tables.keys()]) + + left_margin = 5 + middle_space = 1 + + items = self.tables.items() + items.sort( + key=lambda i: (i[1]["perc"], i[0]), + ) + + for i, (table, data) in enumerate(items): + if i + 2 >= rows: + break + + perc = data["perc"] + + color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) + + self.stdscr.addstr( + i+2, left_margin + max_len - len(table), + table, + curses.A_BOLD | color, + ) + + size = 20 + + progress = "[%s%s]" % ( + "#" * int(perc*size/100), + " " * (size - int(perc*size/100)), + ) + + self.stdscr.addstr( + i+2, left_margin + max_len + middle_space, + "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), + ) + + if self.finished: + self.stdscr.addstr( + self.rows-1, 0, + "Press any key to exit...", + ) + + self.stdscr.refresh() + self.last_update = time.time() + + def done(self): + self.finished = True + self.render(True) + self.stdscr.getch() + + def set_state(self, state): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + state + "...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + +class TerminalProgress(Progress): + """Just prints progress to the terminal + """ + def update(self, table, num_done): + super(TerminalProgress, self).update(table, num_done) + + data = self.tables[table] + + print "%s: %d%% (%d/%d)" % ( + table, data["perc"], + data["num_done"], data["total"], + ) + + def set_state(self, state): + print state + "..." + + +############################################## +############################################## + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-v", action='store_true') -- cgit 1.5.1 From 5b8b1a43bd392559b1960c4a79f5dcdde0d37f23 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:53:40 +0100 Subject: Split setuping up and processing of tables --- scripts/port_from_sqlite_to_postgres.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 845e3fcf3b..1e7ac072f1 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -168,7 +168,7 @@ class Porter(object): ) @defer.inlineCallbacks - def handle_table(self, table): + def setup_table(self, table): def delete_all(txn): txn.execute( "DELETE FROM port_from_sqlite3 WHERE table_name = %s", @@ -287,6 +287,10 @@ class Porter(object): postgres_size = yield self.postgres_store.execute(get_table_size) + defer.returnValue((table, postgres_size, table_size, next_chunk)) + + @defer.inlineCallbacks + def handle_table(self, table, postgres_size, table_size, next_chunk): if not table_size: return @@ -364,14 +368,14 @@ class Porter(object): self.postgres_store = Store(postgres_db_pool, postgres_engine) # Step 1. Set up databases. - self.progress.on_prepare_sqlite() + self.progress.set_state("Preparing SQLite3") self.setup_db(sqlite_config, sqlite_engine) - self.progress.on_prepare_postgres() + self.progress.set_state("Preparing PostgreSQL") self.setup_db(postgres_config, postgres_engine) # Step 2. Get tables. - self.progress.fetching_tables() + self.progress.set_state("Fetching tables") sqlite_tables = yield self.sqlite_store._simple_select_onecol( table="sqlite_master", keyvalues={ @@ -390,7 +394,7 @@ class Porter(object): tables = set(sqlite_tables) & set(postgres_tables) - self.progress.preparing_tables() + self.progress.set_state("Creating tables") logger.info("Found %d tables", len(tables)) @@ -409,10 +413,12 @@ class Porter(object): except Exception as e: logger.info("Failed to create port table: %s", e) - # Process tables. - yield defer.gatherResults( + self.progress.set_state("Preparing tables") + + # Set up tables. + setup_res = yield defer.gatherResults( [ - self.handle_table(table) + self.setup_table(table) for table in tables if table not in ["schema_version", "applied_schema_deltas"] and not table.startswith("sqlite_") @@ -420,6 +426,15 @@ class Porter(object): consumeErrors=True, ) + # Process tables. + yield defer.gatherResults( + [ + self.handle_table(*res) + for res in setup_res + ], + consumeErrors=True, + ) + self.progress.done() except: global end_error_exec_info -- cgit 1.5.1 From 1ccaea5b92e476ebb38a246ea933258a7199bbae Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 10:34:06 +0100 Subject: Typo in port script --- scripts/port_from_sqlite_to_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 1e7ac072f1..56e149c477 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -575,7 +575,7 @@ class CursesProgress(Progress): if self.finished: self.stdscr.addstr( - self.rows-1, 0, + rows-1, 0, "Press any key to exit...", ) -- cgit 1.5.1 From ce8b0b2868af32fac46a8414393760d326b764b7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 10:45:05 +0100 Subject: Remove accidentally committed debug hardcode hack --- scripts/port_from_sqlite_to_postgres.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 56e149c477..596be75c49 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -184,10 +184,11 @@ class Porter(object): if table == "sent_transactions": # This is a big table, and we really only need some of the recent # data + yield self.postgres_store.execute(delete_all) # Only save things from the last day - yesterday = 1429114568820 #int(time.time()*1000) - 86400000 + yesterday = int(time.time()*1000) - 86400000 # And save the max transaction id from each destination select = ( -- cgit 1.5.1 From 4a13ae72019655ad0531f93af18382c196fb362d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 11:16:44 +0100 Subject: Correctly handle total/remaining counts in the presence of sent_transasctions table --- scripts/port_from_sqlite_to_postgres.py | 290 ++++++++++++++++++-------------- 1 file changed, 167 insertions(+), 123 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 596be75c49..1e52d82fe0 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -125,6 +125,12 @@ class Store(object): def execute(self, f, *args, **kwargs): return self.runInteraction(f.__name__, f, *args, **kwargs) + def execute_sql(self, sql, *args): + def r(txn): + txn.execute(sql, args) + return txn.fetchall() + return self.runInteraction("execute_sql", r) + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, @@ -146,119 +152,9 @@ class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) - def convert_rows(self, table, headers, rows): - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - - bool_cols = [ - i for i, h in enumerate(headers) if h in bool_col_names - ] - - def conv(j, col): - if j in bool_cols: - return bool(col) - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - self.postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) - @defer.inlineCallbacks def setup_table(self, table): - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - - def get_table_size(txn): - txn.execute("SELECT count(*) FROM %s" % (table,)) - size, = txn.fetchone() - return int(size) - - if table == "sent_transactions": - # This is a big table, and we really only need some of the recent - # data - - yield self.postgres_store.execute(delete_all) - - # Only save things from the last day - yesterday = int(time.time()*1000) - 86400000 - - # And save the max transaction id from each destination - select = ( - "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" - "SELECT max(rowid) FROM sent_transactions" - " GROUP BY destination" - ")" - ) - - def r(txn): - txn.execute(select) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] - - ts_ind = headers.index('ts') - - return headers, [r for r in rows if r[ts_ind] < yesterday] - - headers, rows = yield self.sqlite_store.runInteraction( - "select", r, - ) - - self.convert_rows(table, headers, rows) - - inserted_rows = len(rows) - max_inserted_rowid = max(r[0] for r in rows) - - def insert(txn): - self.postgres_store.insert_many_txn( - txn, table, headers[1:], rows - ) - - yield self.postgres_store.execute(insert) - - def get_start_id(txn): - txn.execute( - "SELECT rowid FROM sent_transactions WHERE ts >= ?" - " ORDER BY rowid ASC LIMIT 1", - (yesterday,) - ) - - rows = txn.fetchall() - if rows: - return rows[0][0] - else: - return 1 - - next_chunk = yield self.sqlite_store.execute(get_start_id) - next_chunk = max(max_inserted_rowid + 1, next_chunk) - - yield self.postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": next_chunk} - ) - - def get_sent_table_size(txn): - txn.execute( - "SELECT count(*) FROM sent_transactions" - " WHERE ts >= ?", - (yesterday,) - ) - size, = txn.fetchone() - return int(size) - - table_size = yield self.sqlite_store.execute( - get_sent_table_size - ) - - table_size += inserted_rows - elif table in APPEND_ONLY_TABLES: + if table in APPEND_ONLY_TABLES: # It's safe to just carry on inserting. next_chunk = yield self.postgres_store._simple_select_one_onecol( table="port_from_sqlite3", @@ -267,28 +163,47 @@ class Porter(object): allow_none=True, ) + total_to_port = None if next_chunk is None: - yield self.postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 1} - ) + if table == "sent_transactions": + next_chunk, already_ported, total_to_port = ( + yield self._setup_sent_transactions() + ) + else: + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 1} + ) - next_chunk = 1 + next_chunk = 1 + already_ported = 0 - table_size = yield self.sqlite_store.execute(get_table_size) + if total_to_port is None: + already_ported, total_to_port = yield self._get_total_count_to_port( + table, next_chunk + ) else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + yield self.postgres_store.execute(delete_all) - self.postgres_store._simple_insert( + + yield self.postgres_store._simple_insert( table="port_from_sqlite3", values={"table_name": table, "rowid": 0} ) - table_size = yield self.sqlite_store.execute(get_table_size) next_chunk = 1 - postgres_size = yield self.postgres_store.execute(get_table_size) + already_ported, total_to_port = yield self._get_total_count_to_port( + table, next_chunk + ) - defer.returnValue((table, postgres_size, table_size, next_chunk)) + defer.returnValue((table, already_ported, total_to_port, next_chunk)) @defer.inlineCallbacks def handle_table(self, table, postgres_size, table_size, next_chunk): @@ -315,7 +230,7 @@ class Porter(object): if rows: next_chunk = rows[-1][0] + 1 - self.convert_rows(table, headers, rows) + self._convert_rows(table, headers, rows) def insert(txn): self.postgres_store.insert_many_txn( @@ -414,7 +329,7 @@ class Porter(object): except Exception as e: logger.info("Failed to create port table: %s", e) - self.progress.set_state("Preparing tables") + self.progress.set_state("Setting up") # Set up tables. setup_res = yield defer.gatherResults( @@ -444,6 +359,135 @@ class Porter(object): finally: reactor.stop() + def _convert_rows(self, table, headers, rows): + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + + @defer.inlineCallbacks + def _setup_sent_transactions(self): + # Only save things from the last day + yesterday = int(time.time()*1000) - 86400000 + + # And save the max transaction id from each destination + select = ( + "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" + "SELECT max(rowid) FROM sent_transactions" + " GROUP BY destination" + ")" + ) + + def r(txn): + txn.execute(select) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + ts_ind = headers.index('ts') + + return headers, [r for r in rows if r[ts_ind] < yesterday] + + headers, rows = yield self.sqlite_store.runInteraction( + "select", r, + ) + + self._convert_rows("sent_transactions", headers, rows) + + inserted_rows = len(rows) + max_inserted_rowid = max(r[0] for r in rows) + + def insert(txn): + self.postgres_store.insert_many_txn( + txn, "sent_transactions", headers[1:], rows + ) + + yield self.postgres_store.execute(insert) + + def get_start_id(txn): + txn.execute( + "SELECT rowid FROM sent_transactions WHERE ts >= ?" + " ORDER BY rowid ASC LIMIT 1", + (yesterday,) + ) + + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + return 1 + + next_chunk = yield self.sqlite_store.execute(get_start_id) + next_chunk = max(max_inserted_rowid + 1, next_chunk) + + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": "sent_transactions", "rowid": next_chunk} + ) + + def get_sent_table_size(txn): + txn.execute( + "SELECT count(*) FROM sent_transactions" + " WHERE ts >= ?", + (yesterday,) + ) + size, = txn.fetchone() + return int(size) + + remaining_count = yield self.sqlite_store.execute( + get_sent_table_size + ) + + total_count = remaining_count + inserted_rows + + defer.returnValue((next_chunk, remaining_count, total_count)) + + @defer.inlineCallbacks + def _get_remaining_count_to_port(self, table, next_chunk): + rows = yield self.sqlite_store.execute_sql( + "SELECT count(*) FROM %s WHERE rowid >= ?" % (table,), + next_chunk, + ) + + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _get_already_ported_count(self, table): + rows = yield self.postgres_store.execute_sql( + "SELECT count(*) FROM %s" % (table,), + ) + + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _get_total_count_to_port(self, table, next_chunk): + remaining, done = yield defer.gatherResults( + [ + self._get_remaining_count_to_port(table, next_chunk), + self._get_already_ported_count(table), + ], + consumeErrors=True, + ) + + remaining = int(remaining) if remaining else 0 + done = int(done) if done else 0 + + defer.returnValue((done, remaining + done)) + ############################################## ###### The following is simply UI stuff ###### -- cgit 1.5.1 From af27b84ff769ceb9fe1aaa10c9435586be4c6867 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 12:40:04 +0100 Subject: Correctly handle total/remaining counts in the presence of sent_transasctions table --- scripts/port_from_sqlite_to_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 1e52d82fe0..a40a93ca76 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -454,7 +454,7 @@ class Porter(object): total_count = remaining_count + inserted_rows - defer.returnValue((next_chunk, remaining_count, total_count)) + defer.returnValue((next_chunk, inserted_rows, total_count)) @defer.inlineCallbacks def _get_remaining_count_to_port(self, table, next_chunk): -- cgit 1.5.1 From a1d4813a54ad711d6db7da51f301412e3f139346 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 12:55:29 +0100 Subject: Quickly fix dodgy est. time remaining --- scripts/port_from_sqlite_to_postgres.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index a40a93ca76..b4ee860a92 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -535,11 +535,20 @@ class CursesProgress(Progress): self.finished = False + self.total_processed = 0 + self.total_remaining = 0 + super(CursesProgress, self).__init__() def update(self, table, num_done): super(CursesProgress, self).update(table, num_done) + self.total_processed = 0 + self.total_remaining = 0 + for table, data in self.tables.items(): + self.total_processed += data["num_done"] - data["start"] + self.total_remaining += data["total"] - data["num_done"] + self.render() def render(self, force=False): @@ -561,13 +570,11 @@ class CursesProgress(Progress): if self.finished: status = "Time spent: %s (Done!)" % (duration_str,) else: - min_perc = min( - (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) - if v["total"] - v["start"] else 100 - for v in self.tables.values() - ) - if min_perc > 0: - est_remaining = (int(now) - self.start_time) * 100 / min_perc + + if self.total_processed > 0: + left = float(self.total_remaining) / self.total_processed + + est_remaining = (int(now) - self.start_time) * left est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) else: est_remaining_str = "Unknown" -- cgit 1.5.1 From f41a9a1ffcb652c045dea5f4108984d287c6de17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 17:42:36 +0100 Subject: Add better help to scripts/port_from_sqlite_to_postgres.py --- scripts/port_from_sqlite_to_postgres.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index b4ee860a92..3296f1f54f 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -671,15 +671,30 @@ class TerminalProgress(Progress): if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description="A script to port an existing synapse SQLite database to" + " a new PostgreSQL database." + ) parser.add_argument("-v", action='store_true') - parser.add_argument("--curses", action='store_true') - parser.add_argument("--sqlite-database") parser.add_argument( - "--postgres-config", type=argparse.FileType('r'), + "--sqlite-database", required=True, + help="The snapshot of the SQLite database file. This must not be" + " currently used by a running synapse server" + ) + parser.add_argument( + "--postgres-config", type=argparse.FileType('r'), required=True, + help="The database config file for the PostgreSQL database" + ) + parser.add_argument( + "--curses", action='store_true', + help="display a curses based progress UI" ) - parser.add_argument("--batch-size", type=int, default=1000) + parser.add_argument( + "--batch-size", type=int, default=1000, + help="The number of rows to select from the SQLite table each" + " iteration [default=1000]", + ) args = parser.parse_args() @@ -705,6 +720,13 @@ if __name__ == "__main__": postgres_config = yaml.safe_load(args.postgres_config) + if "name" not in postgres_config: + sys.stderr.write("Malformed database config: no 'name'") + sys.exit(2) + if postgres_config["name"] != "psycopg2": + sys.stderr.write("Database must use 'psycopg2' connector.") + sys.exit(3) + def start(stdscr=None): if stdscr: progress = CursesProgress(stdscr) -- cgit 1.5.1