From ffad75bd6284873c27efb2cfdfdcf9f909eb9db3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 15 Apr 2015 17:00:50 +0100 Subject: Remove mysql/maria support --- scripts/port_from_sqlite_to_postgres.py | 373 ++++++++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) create mode 100644 scripts/port_from_sqlite_to_postgres.py (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py new file mode 100644 index 0000000000..4b3fd9e529 --- /dev/null +++ b/scripts/port_from_sqlite_to_postgres.py @@ -0,0 +1,373 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, reactor +from twisted.enterprise import adbapi + +from synapse.storage._base import LoggingTransaction, SQLBaseStore +from synapse.storage.engines import create_engine + +import argparse +import itertools +import logging +import types +import yaml + + +logger = logging.getLogger("port_from_sqlite_to_postgres") + + +BINARY_COLUMNS = { + "event_content_hashes": ["hash"], + "event_reference_hashes": ["hash"], + "event_signatures": ["signature"], + "event_edge_hashes": ["hash"], + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "application_services_txns": ["event_ids"], + "received_transactions": ["response_json"], + "sent_transactions": ["response_json"], + "server_tls_certificates": ["tls_certificate"], + "server_signature_keys": ["verify_key"], + "pushers": ["pushkey", "data"], + "user_filters": ["filter_json"], +} + +UNICODE_COLUMNS = { + "events": ["content", "unrecognized_keys"], + "event_json": ["internal_metadata", "json"], + "users": ["password_hash"], +} + + +BOOLEAN_COLUMNS = { + "events": ["processed", "outlier"], + "rooms": ["is_public"], + "event_edges": ["is_state"], + "presence_list": ["accepted"], +} + + +APPEND_ONLY_TABLES = [ + "event_content_hashes", + "event_reference_hashes", + "event_signatures", + "event_edge_hashes", + "events", + "event_json", + "state_events", + "room_memberships", + "feedback", + "topics", + "room_names", + "rooms", + "local_media_repository", + "local_media_repository_thumbnails", + "remote_media_cache", + "remote_media_cache_thumbnails", + "redactions", + "event_edges", + "event_auth", + "received_transactions", + "sent_transactions", + "transaction_id_to_pdu", + "users", + "state_groups", + "state_groups_state", + "event_to_state_groups", + "rejections", +] + + +class Store(object): + def __init__(self, db_pool, engine): + self.db_pool = db_pool + self.database_engine = engine + + _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] + _simple_insert = SQLBaseStore.__dict__["_simple_insert"] + + _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] + _simple_select_onecol = SQLBaseStore.__dict__["_simple_select_onecol"] + _simple_select_one_onecol = SQLBaseStore.__dict__["_simple_select_one_onecol"] + _simple_select_one_onecol_txn = SQLBaseStore.__dict__["_simple_select_one_onecol_txn"] + + _simple_update_one = SQLBaseStore.__dict__["_simple_update_one"] + _simple_update_one_txn = SQLBaseStore.__dict__["_simple_update_one_txn"] + + _execute_and_decode = SQLBaseStore.__dict__["_execute_and_decode"] + + def runInteraction(self, desc, func, *args, **kwargs): + def r(conn): + try: + i = 0 + N = 5 + while True: + try: + txn = conn.cursor() + return func( + LoggingTransaction(txn, desc, self.database_engine), + *args, **kwargs + ) + except self.database_engine.module.DatabaseError as e: + if self.database_engine.is_deadlock(e): + logger.warn("[TXN DEADLOCK] {%s} %d/%d", desc, i, N) + if i < N: + i += 1 + conn.rollback() + continue + raise + except Exception as e: + logger.debug("[TXN FAIL] {%s}", desc, e) + raise + + return self.db_pool.runWithConnection(r) + + def insert_many_txn(self, txn, table, headers, rows): + sql = "INSERT INTO %s (%s) VALUES (%s)" % ( + table, + ", ".join(k for k in headers), + ", ".join("%s" for _ in headers) + ) + + try: + txn.executemany(sql, rows) + except: + logger.exception( + "Failed to insert: %s", + table, + ) + raise + + + +def chunks(n): + for i in itertools.count(0, n): + yield range(i, i+n) + + +@defer.inlineCallbacks +def handle_table(table, sqlite_store, postgres_store): + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield postgres_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) + + if next_chunk is None: + yield postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + next_chunk = 0 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + postgres_store._simple_insert_txn( + txn, + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + + yield postgres_store.runInteraction( + "delete_non_append_only", delete_all + ) + + next_chunk = 0 + + logger.info("next_chunk for %s: %d", table, next_chunk) + + N = 5000 + + select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) + + uni_col_names = UNICODE_COLUMNS.get(table, []) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + bin_col_names = BINARY_COLUMNS.get(table, []) + + while True: + def r(txn): + txn.execute(select, (next_chunk, N,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + return headers, rows + + headers, rows = yield sqlite_store.runInteraction("select", r) + + logger.info("Got %d rows for %s", len(rows), table) + + if rows: + uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] + bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] + bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] + next_chunk = rows[-1][0] + 1 + + def conv(j, col): + if j in uni_cols: + col = sqlite_store.database_engine.load_unicode(col) + if j in bool_cols: + return bool(col) + + if j in bin_cols: + if isinstance(col, types.UnicodeType): + col = buffer(col.encode("utf8")) + + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + + def ins(txn): + postgres_store.insert_many_txn(txn, table, headers[1:], rows) + + postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) + + yield postgres_store.runInteraction("insert_many", ins) + else: + return + + +def setup_db(db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } + ) + + database_engine.prepare_database(db_conn) + + db_conn.commit() + + +@defer.inlineCallbacks +def main(sqlite_config, postgress_config): + try: + sqlite_db_pool = adbapi.ConnectionPool( + sqlite_config["name"], + **sqlite_config["args"] + ) + + postgres_db_pool = adbapi.ConnectionPool( + postgress_config["name"], + **postgress_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + postgres_engine = create_engine("psycopg2") + + sqlite_store = Store(sqlite_db_pool, sqlite_engine) + postgres_store = Store(postgres_db_pool, postgres_engine) + + # Step 1. Set up databases. + logger.info("Preparing sqlite database...") + setup_db(sqlite_config, sqlite_engine) + + logger.info("Preparing postgres database...") + setup_db(postgress_config, postgres_engine) + + # Step 2. Get tables. + logger.info("Fetching tables...") + tables = yield sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + logger.info("Found %d tables", len(tables)) + + def create_port_table(txn): + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) + + try: + yield postgres_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) + + # Process tables. + yield defer.gatherResults( + [ + handle_table(table, sqlite_store, postgres_store) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + except: + logger.exception("") + finally: + reactor.stop() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sqlite-database") + parser.add_argument( + "--postgres-config", type=argparse.FileType('r'), + ) + + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + + sqlite_config = { + "name": "sqlite3", + "args": { + "database": args.sqlite_database, + "cp_min": 1, + "cp_max": 1, + "check_same_thread": False, + }, + } + + postgres_config = yaml.safe_load(args.postgres_config) + + reactor.callWhenRunning( + main, + sqlite_config=sqlite_config, + postgres_config=postgres_config, + ) + + reactor.run() -- cgit 1.5.1 From 427bcb76085c005069f8163e035ebce395a14b2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 Apr 2015 11:13:05 +0100 Subject: Fix port script after storage changes. Add very simple (off by default) curses UI to see progress. --- scripts/port_from_sqlite_to_postgres.py | 580 ++++++++++++++++++++++---------- 1 file changed, 409 insertions(+), 171 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 4b3fd9e529..fc1603c1c9 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -20,38 +20,17 @@ from synapse.storage._base import LoggingTransaction, SQLBaseStore from synapse.storage.engines import create_engine import argparse -import itertools +import curses import logging -import types +import sys +import time +import traceback import yaml logger = logging.getLogger("port_from_sqlite_to_postgres") -BINARY_COLUMNS = { - "event_content_hashes": ["hash"], - "event_reference_hashes": ["hash"], - "event_signatures": ["signature"], - "event_edge_hashes": ["hash"], - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "application_services_txns": ["event_ids"], - "received_transactions": ["response_json"], - "sent_transactions": ["response_json"], - "server_tls_certificates": ["tls_certificate"], - "server_signature_keys": ["verify_key"], - "pushers": ["pushkey", "data"], - "user_filters": ["filter_json"], -} - -UNICODE_COLUMNS = { - "events": ["content", "unrecognized_keys"], - "event_json": ["internal_metadata", "json"], - "users": ["password_hash"], -} - - BOOLEAN_COLUMNS = { "events": ["processed", "outlier"], "rooms": ["is_public"], @@ -91,7 +70,15 @@ APPEND_ONLY_TABLES = [ ] +end_error_exec_info = None + + class Store(object): + """This object is used to pull out some of the convenience API from the + Storage layer. + + *All* database interactions should go through this object. + """ def __init__(self, db_pool, engine): self.db_pool = db_pool self.database_engine = engine @@ -130,11 +117,14 @@ class Store(object): continue raise except Exception as e: - logger.debug("[TXN FAIL] {%s}", desc, e) + logger.debug("[TXN FAIL] {%s} %s", desc, e) raise return self.db_pool.runWithConnection(r) + def execute(self, f): + return self.runInteraction(f.__name__, f) + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, @@ -152,205 +142,435 @@ class Store(object): raise +class Progress(object): + """Used to report progress of the port + """ + def __init__(self): + self.tables = {} + + self.start_time = int(time.time()) + + def add_table(self, table, cur, size): + self.tables[table] = { + "start": cur, + "num_done": cur, + "total": size, + "perc": int(cur * 100 / size), + } + + def update(self, table, num_done): + data = self.tables[table] + data["num_done"] = num_done + data["perc"] = int(num_done * 100 / data["total"]) + + def done(self): + pass + -def chunks(n): - for i in itertools.count(0, n): - yield range(i, i+n) +class CursesProgress(Progress): + """Reports progress to a curses window + """ + def __init__(self, stdscr): + self.stdscr = stdscr + curses.use_default_colors() + curses.curs_set(0) -@defer.inlineCallbacks -def handle_table(table, sqlite_store, postgres_store): - if table in APPEND_ONLY_TABLES: - # It's safe to just carry on inserting. - next_chunk = yield postgres_store._simple_select_one_onecol( - table="port_from_sqlite3", - keyvalues={"table_name": table}, - retcol="rowid", - allow_none=True, + curses.init_pair(1, curses.COLOR_RED, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + + self.last_update = 0 + + self.finished = False + + super(CursesProgress, self).__init__() + + def update(self, table, num_done): + super(CursesProgress, self).update(table, num_done) + + self.render() + + def render(self, force=False): + now = time.time() + + if not force and now - self.last_update < 0.2: + # reactor.callLater(1, self.render) + return + + self.stdscr.clear() + + rows, cols = self.stdscr.getmaxyx() + + duration = int(now) - int(self.start_time) + + minutes, seconds = divmod(duration, 60) + duration_str = '%02dm %02ds' % (minutes, seconds,) + + if self.finished: + status = "Time spent: %s (Done!)" % (duration_str,) + else: + min_perc = min( + (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) + if v["total"] - v["start"] else 100 + for v in self.tables.values() + ) + if min_perc > 0: + est_remaining = (int(now) - self.start_time) * 100 / min_perc + est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) + else: + est_remaining_str = "Unknown" + status = ( + "Time spent: %s (est. remaining: %s)" + % (duration_str, est_remaining_str,) + ) + + self.stdscr.addstr( + 0, 0, + status, + curses.A_BOLD, ) - if next_chunk is None: - yield postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} + max_len = max([len(t) for t in self.tables.keys()]) + + left_margin = 5 + middle_space = 1 + + items = self.tables.items() + items.sort( + key=lambda i: (i[1]["perc"], i[0]), + ) + + for i, (table, data) in enumerate(items): + if i + 2 >= rows: + break + + perc = data["perc"] + + color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) + + self.stdscr.addstr( + i+2, left_margin + max_len - len(table), + table, + curses.A_BOLD | color, ) - next_chunk = 0 - else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) + size = 20 + + progress = "[%s%s]" % ( + "#" * int(perc*size/100), + " " * (size - int(perc*size/100)), ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - postgres_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} + + self.stdscr.addstr( + i+2, left_margin + max_len + middle_space, + "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), ) - yield postgres_store.runInteraction( - "delete_non_append_only", delete_all - ) + if self.finished: + self.stdscr.addstr( + self.rows-1, 0 , + "Press any key to exit...", + ) - next_chunk = 0 + self.stdscr.refresh() + self.last_update = time.time() - logger.info("next_chunk for %s: %d", table, next_chunk) + def done(self): + self.finished = True + self.render(True) + self.stdscr.getch() - N = 5000 + def on_prepare_sqlite(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing SQLite database...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def on_prepare_postgres(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing PostgreSQL database...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def fetching_tables(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Fetching tables...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + def preparing_tables(self): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + "Preparing tables...", + curses.A_BOLD, + ) + self.stdscr.refresh() - select = "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) - uni_col_names = UNICODE_COLUMNS.get(table, []) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - bin_col_names = BINARY_COLUMNS.get(table, []) +class TerminalProgress(Progress): + """Just prints progress to the terminal + """ + def update(self, table, num_done): + super(TerminalProgress, self).update(table, num_done) - while True: - def r(txn): - txn.execute(select, (next_chunk, N,)) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] + data = self.tables[table] - return headers, rows + print "%s: %d%% (%d/%d)" % ( + table, data["perc"], + data["num_done"], data["total"], + ) - headers, rows = yield sqlite_store.runInteraction("select", r) + def on_prepare_sqlite(self): + print "Preparing SQLite database..." - logger.info("Got %d rows for %s", len(rows), table) + def on_prepare_postgres(self): + print "Preparing PostgreSQL database..." - if rows: - uni_cols = [i for i, h in enumerate(headers) if h in uni_col_names] - bool_cols = [i for i, h in enumerate(headers) if h in bool_col_names] - bin_cols = [i for i, h in enumerate(headers) if h in bin_col_names] - next_chunk = rows[-1][0] + 1 + def fetching_tables(self): + print "Fetching tables..." - def conv(j, col): - if j in uni_cols: - col = sqlite_store.database_engine.load_unicode(col) - if j in bool_cols: - return bool(col) + def preparing_tables(self): + print "Preparing tables..." - if j in bin_cols: - if isinstance(col, types.UnicodeType): - col = buffer(col.encode("utf8")) - return col +class Porter(object): + def __init__(self, **kwargs): + self.__dict__.update(kwargs) - for i, row in enumerate(rows): - rows[i] = tuple( - postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) + @defer.inlineCallbacks + def handle_table(self, table): + if table in APPEND_ONLY_TABLES: + # It's safe to just carry on inserting. + next_chunk = yield self.postgres_store._simple_select_one_onecol( + table="port_from_sqlite3", + keyvalues={"table_name": table}, + retcol="rowid", + allow_none=True, + ) - def ins(txn): - postgres_store.insert_many_txn(txn, table, headers[1:], rows) + if next_chunk is None: + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 1} + ) - postgres_store._simple_update_one_txn( + next_chunk = 1 + else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + self.postgres_store._simple_insert_txn( txn, table="port_from_sqlite3", - keyvalues={"table_name": table}, - updatevalues={"rowid": next_chunk}, + values={"table_name": table, "rowid": 0} ) + yield self.postgres_store.execute(delete_all) - yield postgres_store.runInteraction("insert_many", ins) - else: + next_chunk = 1 + + def get_table_size(txn): + txn.execute("SELECT count(*) FROM %s" % (table,)) + size, = txn.fetchone() + return int(size) + + table_size = yield self.sqlite_store.execute(get_table_size) + postgres_size = yield self.postgres_store.execute(get_table_size) + + if not table_size: return + self.progress.add_table(table, postgres_size, table_size) -def setup_db(db_config, database_engine): - db_conn = database_engine.module.connect( - **{ - k: v for k, v in db_config.get("args", {}).items() - if not k.startswith("cp_") - } - ) + select = ( + "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" + % (table,) + ) - database_engine.prepare_database(db_conn) + bool_col_names = BOOLEAN_COLUMNS.get(table, []) - db_conn.commit() + while True: + def r(txn): + txn.execute(select, (next_chunk, self.batch_size,)) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + return headers, rows -@defer.inlineCallbacks -def main(sqlite_config, postgress_config): - try: - sqlite_db_pool = adbapi.ConnectionPool( - sqlite_config["name"], - **sqlite_config["args"] - ) + headers, rows = yield self.sqlite_store.runInteraction("select", r) - postgres_db_pool = adbapi.ConnectionPool( - postgress_config["name"], - **postgress_config["args"] - ) + if rows: + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + next_chunk = rows[-1][0] + 1 + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) - sqlite_engine = create_engine("sqlite3") - postgres_engine = create_engine("psycopg2") + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) - sqlite_store = Store(sqlite_db_pool, sqlite_engine) - postgres_store = Store(postgres_db_pool, postgres_engine) + self.postgres_store._simple_update_one_txn( + txn, + table="port_from_sqlite3", + keyvalues={"table_name": table}, + updatevalues={"rowid": next_chunk}, + ) - # Step 1. Set up databases. - logger.info("Preparing sqlite database...") - setup_db(sqlite_config, sqlite_engine) + yield self.postgres_store.execute(insert) - logger.info("Preparing postgres database...") - setup_db(postgress_config, postgres_engine) + postgres_size += len(rows) - # Step 2. Get tables. - logger.info("Fetching tables...") - tables = yield sqlite_store._simple_select_onecol( - table="sqlite_master", - keyvalues={ - "type": "table", - }, - retcol="name", + self.progress.update(table, postgres_size) + else: + return + + def setup_db(self, db_config, database_engine): + db_conn = database_engine.module.connect( + **{ + k: v for k, v in db_config.get("args", {}).items() + if not k.startswith("cp_") + } ) - logger.info("Found %d tables", len(tables)) + database_engine.prepare_database(db_conn) - def create_port_table(txn): - txn.execute( - "CREATE TABLE port_from_sqlite3 (" - " table_name varchar(100) NOT NULL UNIQUE," - " rowid bigint NOT NULL" - ")" - ) + db_conn.commit() + @defer.inlineCallbacks + def run(self): try: - yield postgres_store.runInteraction( - "create_port_table", create_port_table + sqlite_db_pool = adbapi.ConnectionPool( + self.sqlite_config["name"], + **self.sqlite_config["args"] ) - except Exception as e: - logger.info("Failed to create port table: %s", e) - - # Process tables. - yield defer.gatherResults( - [ - handle_table(table, sqlite_store, postgres_store) - for table in tables - if table not in ["schema_version", "applied_schema_deltas"] - and not table.startswith("sqlite_") - ], - consumeErrors=True, - ) - except: - logger.exception("") - finally: - reactor.stop() + postgres_db_pool = adbapi.ConnectionPool( + self.postgres_config["name"], + **self.postgres_config["args"] + ) + + sqlite_engine = create_engine("sqlite3") + postgres_engine = create_engine("psycopg2") + + self.sqlite_store = Store(sqlite_db_pool, sqlite_engine) + self.postgres_store = Store(postgres_db_pool, postgres_engine) + + # Step 1. Set up databases. + self.progress.on_prepare_sqlite() + self.setup_db(sqlite_config, sqlite_engine) + + self.progress.on_prepare_postgres() + self.setup_db(postgres_config, postgres_engine) + + # Step 2. Get tables. + self.progress.fetching_tables() + sqlite_tables = yield self.sqlite_store._simple_select_onecol( + table="sqlite_master", + keyvalues={ + "type": "table", + }, + retcol="name", + ) + + postgres_tables = yield self.postgres_store._simple_select_onecol( + table="information_schema.tables", + keyvalues={ + "table_schema": "public", + }, + retcol="distinct table_name", + ) + + tables = set(sqlite_tables) & set(postgres_tables) + + self.progress.preparing_tables() + + logger.info("Found %d tables", len(tables)) + + def create_port_table(txn): + txn.execute( + "CREATE TABLE port_from_sqlite3 (" + " table_name varchar(100) NOT NULL UNIQUE," + " rowid bigint NOT NULL" + ")" + ) + + try: + yield self.postgres_store.runInteraction( + "create_port_table", create_port_table + ) + except Exception as e: + logger.info("Failed to create port table: %s", e) + + # Process tables. + yield defer.gatherResults( + [ + self.handle_table(table) + for table in tables + if table not in ["schema_version", "applied_schema_deltas"] + and not table.startswith("sqlite_") + ], + consumeErrors=True, + ) + + self.progress.done() + except: + global end_error_exec_info + end_error_exec_info = sys.exc_info() + logger.exception("") + finally: + reactor.stop() if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("-v", action='store_true') + parser.add_argument("--curses", action='store_true') parser.add_argument("--sqlite-database") parser.add_argument( "--postgres-config", type=argparse.FileType('r'), ) + parser.add_argument("--batch-size", type=int, default=1000) + args = parser.parse_args() - logging.basicConfig(level=logging.INFO) + + + logging_config = { + "level": logging.DEBUG if args.v else logging.INFO, + "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" + } + + if args.curses: + logging_config["filename"] = "port-synapse.log" + + logging.basicConfig(**logging_config) sqlite_config = { "name": "sqlite3", @@ -364,10 +584,28 @@ if __name__ == "__main__": postgres_config = yaml.safe_load(args.postgres_config) - reactor.callWhenRunning( - main, - sqlite_config=sqlite_config, - postgres_config=postgres_config, - ) + def start(stdscr=None): + if stdscr: + progress = CursesProgress(stdscr) + else: + progress = TerminalProgress() + + porter = Porter( + sqlite_config=sqlite_config, + postgres_config=postgres_config, + progress=progress, + batch_size=args.batch_size, + ) + + reactor.callWhenRunning(porter.run) + + reactor.run() + + if args.curses: + curses.wrapper(start) + else: + start() - reactor.run() + if end_error_exec_info: + exc_type, exc_value, exc_traceback = end_error_exec_info + traceback.print_exception(exc_type, exc_value, exc_traceback) -- cgit 1.5.1 From dfc46c6220b7439262de6fc7034e7a6df99d4e40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 17 Apr 2015 12:46:29 +0100 Subject: PEP8 --- scripts/port_from_sqlite_to_postgres.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index fc1603c1c9..f4b6ed0681 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -268,7 +268,7 @@ class CursesProgress(Progress): if self.finished: self.stdscr.addstr( - self.rows-1, 0 , + self.rows-1, 0, "Press any key to exit...", ) @@ -561,7 +561,6 @@ if __name__ == "__main__": args = parser.parse_args() - logging_config = { "level": logging.DEBUG if args.v else logging.INFO, "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" -- cgit 1.5.1 From 4e49f52375acb705b32937115210d055b0e1ce38 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:36:37 +0100 Subject: Don't port over all of the sent_transactions table --- scripts/port_from_sqlite_to_postgres.py | 158 ++++++++++++++++++++++++-------- 1 file changed, 120 insertions(+), 38 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index f4b6ed0681..19e35bf806 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -122,8 +122,8 @@ class Store(object): return self.db_pool.runWithConnection(r) - def execute(self, f): - return self.runInteraction(f.__name__, f) + def execute(self, f, *args, **kwargs): + return self.runInteraction(f.__name__, f, *args, **kwargs) def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( @@ -347,9 +347,118 @@ class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) + def convert_rows(self, table, headers, rows): + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + @defer.inlineCallbacks def handle_table(self, table): - if table in APPEND_ONLY_TABLES: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + + def get_table_size(txn): + txn.execute("SELECT count(*) FROM %s" % (table,)) + size, = txn.fetchone() + return int(size) + + if table == "sent_transactions": + # This is a big table, and we really only need some of the recent + # data + yield self.postgres_store.execute(delete_all) + + # Only save things from the last day + yesterday = 1429114568820 #int(time.time()*1000) - 86400000 + + # And save the max transaction id from each destination + select = ( + "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" + "SELECT max(rowid) FROM sent_transactions" + " GROUP BY destination" + ")" + ) + + def r(txn): + txn.execute(select) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + ts_ind = headers.index('ts') + + return headers, [r for r in rows if r[ts_ind] < yesterday] + + headers, rows = yield self.sqlite_store.runInteraction( + "select", r, + ) + + self.convert_rows(table, headers, rows) + + inserted_rows = len(rows) + max_inserted_rowid = max(r[0] for r in rows) + + def insert(txn): + self.postgres_store.insert_many_txn( + txn, table, headers[1:], rows + ) + + yield self.postgres_store.execute(insert) + + def get_start_id(txn): + txn.execute( + "SELECT rowid FROM sent_transactions WHERE ts >= ?" + " ORDER BY rowid ASC LIMIT 1", + (yesterday,) + ) + + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + return 1 + + next_chunk = yield self.sqlite_store.execute(get_start_id) + next_chunk = max(max_inserted_rowid + 1, next_chunk) + + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": next_chunk} + ) + + def get_sent_table_size(txn): + txn.execute( + "SELECT count(*) FROM sent_transactions" + " WHERE ts >= ?", + (yesterday,) + ) + size, = txn.fetchone() + return int(size) + + table_size = yield self.sqlite_store.execute( + get_sent_table_size + ) + + table_size += inserted_rows + elif table in APPEND_ONLY_TABLES: # It's safe to just carry on inserting. next_chunk = yield self.postgres_store._simple_select_one_onecol( table="port_from_sqlite3", @@ -365,28 +474,18 @@ class Porter(object): ) next_chunk = 1 + + table_size = yield self.sqlite_store.execute(get_table_size) else: - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - self.postgres_store._simple_insert_txn( - txn, - table="port_from_sqlite3", - values={"table_name": table, "rowid": 0} - ) yield self.postgres_store.execute(delete_all) + self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 0} + ) + table_size = yield self.sqlite_store.execute(get_table_size) next_chunk = 1 - def get_table_size(txn): - txn.execute("SELECT count(*) FROM %s" % (table,)) - size, = txn.fetchone() - return int(size) - - table_size = yield self.sqlite_store.execute(get_table_size) postgres_size = yield self.postgres_store.execute(get_table_size) if not table_size: @@ -399,8 +498,6 @@ class Porter(object): % (table,) ) - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - while True: def r(txn): txn.execute(select, (next_chunk, self.batch_size,)) @@ -412,24 +509,9 @@ class Porter(object): headers, rows = yield self.sqlite_store.runInteraction("select", r) if rows: - bool_cols = [ - i for i, h in enumerate(headers) if h in bool_col_names - ] next_chunk = rows[-1][0] + 1 - def conv(j, col): - if j in bool_cols: - return bool(col) - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - self.postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) + self.convert_rows(table, headers, rows) def insert(txn): self.postgres_store.insert_many_txn( -- cgit 1.5.1 From 40cbd6b6ee809c23750ea818ebd668be78bbf328 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:53:15 +0100 Subject: Shuffle progress stuff --- scripts/port_from_sqlite_to_postgres.py | 375 +++++++++++++++----------------- 1 file changed, 174 insertions(+), 201 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 19e35bf806..845e3fcf3b 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -142,207 +142,6 @@ class Store(object): raise -class Progress(object): - """Used to report progress of the port - """ - def __init__(self): - self.tables = {} - - self.start_time = int(time.time()) - - def add_table(self, table, cur, size): - self.tables[table] = { - "start": cur, - "num_done": cur, - "total": size, - "perc": int(cur * 100 / size), - } - - def update(self, table, num_done): - data = self.tables[table] - data["num_done"] = num_done - data["perc"] = int(num_done * 100 / data["total"]) - - def done(self): - pass - - -class CursesProgress(Progress): - """Reports progress to a curses window - """ - def __init__(self, stdscr): - self.stdscr = stdscr - - curses.use_default_colors() - curses.curs_set(0) - - curses.init_pair(1, curses.COLOR_RED, -1) - curses.init_pair(2, curses.COLOR_GREEN, -1) - - self.last_update = 0 - - self.finished = False - - super(CursesProgress, self).__init__() - - def update(self, table, num_done): - super(CursesProgress, self).update(table, num_done) - - self.render() - - def render(self, force=False): - now = time.time() - - if not force and now - self.last_update < 0.2: - # reactor.callLater(1, self.render) - return - - self.stdscr.clear() - - rows, cols = self.stdscr.getmaxyx() - - duration = int(now) - int(self.start_time) - - minutes, seconds = divmod(duration, 60) - duration_str = '%02dm %02ds' % (minutes, seconds,) - - if self.finished: - status = "Time spent: %s (Done!)" % (duration_str,) - else: - min_perc = min( - (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) - if v["total"] - v["start"] else 100 - for v in self.tables.values() - ) - if min_perc > 0: - est_remaining = (int(now) - self.start_time) * 100 / min_perc - est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) - else: - est_remaining_str = "Unknown" - status = ( - "Time spent: %s (est. remaining: %s)" - % (duration_str, est_remaining_str,) - ) - - self.stdscr.addstr( - 0, 0, - status, - curses.A_BOLD, - ) - - max_len = max([len(t) for t in self.tables.keys()]) - - left_margin = 5 - middle_space = 1 - - items = self.tables.items() - items.sort( - key=lambda i: (i[1]["perc"], i[0]), - ) - - for i, (table, data) in enumerate(items): - if i + 2 >= rows: - break - - perc = data["perc"] - - color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) - - self.stdscr.addstr( - i+2, left_margin + max_len - len(table), - table, - curses.A_BOLD | color, - ) - - size = 20 - - progress = "[%s%s]" % ( - "#" * int(perc*size/100), - " " * (size - int(perc*size/100)), - ) - - self.stdscr.addstr( - i+2, left_margin + max_len + middle_space, - "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), - ) - - if self.finished: - self.stdscr.addstr( - self.rows-1, 0, - "Press any key to exit...", - ) - - self.stdscr.refresh() - self.last_update = time.time() - - def done(self): - self.finished = True - self.render(True) - self.stdscr.getch() - - def on_prepare_sqlite(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing SQLite database...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def on_prepare_postgres(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing PostgreSQL database...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def fetching_tables(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Fetching tables...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - def preparing_tables(self): - self.stdscr.clear() - self.stdscr.addstr( - 0, 0, - "Preparing tables...", - curses.A_BOLD, - ) - self.stdscr.refresh() - - -class TerminalProgress(Progress): - """Just prints progress to the terminal - """ - def update(self, table, num_done): - super(TerminalProgress, self).update(table, num_done) - - data = self.tables[table] - - print "%s: %d%% (%d/%d)" % ( - table, data["perc"], - data["num_done"], data["total"], - ) - - def on_prepare_sqlite(self): - print "Preparing SQLite database..." - - def on_prepare_postgres(self): - print "Preparing PostgreSQL database..." - - def fetching_tables(self): - print "Fetching tables..." - - def preparing_tables(self): - print "Preparing tables..." - - class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -630,6 +429,180 @@ class Porter(object): reactor.stop() +############################################## +###### The following is simply UI stuff ###### +############################################## + + +class Progress(object): + """Used to report progress of the port + """ + def __init__(self): + self.tables = {} + + self.start_time = int(time.time()) + + def add_table(self, table, cur, size): + self.tables[table] = { + "start": cur, + "num_done": cur, + "total": size, + "perc": int(cur * 100 / size), + } + + def update(self, table, num_done): + data = self.tables[table] + data["num_done"] = num_done + data["perc"] = int(num_done * 100 / data["total"]) + + def done(self): + pass + + +class CursesProgress(Progress): + """Reports progress to a curses window + """ + def __init__(self, stdscr): + self.stdscr = stdscr + + curses.use_default_colors() + curses.curs_set(0) + + curses.init_pair(1, curses.COLOR_RED, -1) + curses.init_pair(2, curses.COLOR_GREEN, -1) + + self.last_update = 0 + + self.finished = False + + super(CursesProgress, self).__init__() + + def update(self, table, num_done): + super(CursesProgress, self).update(table, num_done) + + self.render() + + def render(self, force=False): + now = time.time() + + if not force and now - self.last_update < 0.2: + # reactor.callLater(1, self.render) + return + + self.stdscr.clear() + + rows, cols = self.stdscr.getmaxyx() + + duration = int(now) - int(self.start_time) + + minutes, seconds = divmod(duration, 60) + duration_str = '%02dm %02ds' % (minutes, seconds,) + + if self.finished: + status = "Time spent: %s (Done!)" % (duration_str,) + else: + min_perc = min( + (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) + if v["total"] - v["start"] else 100 + for v in self.tables.values() + ) + if min_perc > 0: + est_remaining = (int(now) - self.start_time) * 100 / min_perc + est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) + else: + est_remaining_str = "Unknown" + status = ( + "Time spent: %s (est. remaining: %s)" + % (duration_str, est_remaining_str,) + ) + + self.stdscr.addstr( + 0, 0, + status, + curses.A_BOLD, + ) + + max_len = max([len(t) for t in self.tables.keys()]) + + left_margin = 5 + middle_space = 1 + + items = self.tables.items() + items.sort( + key=lambda i: (i[1]["perc"], i[0]), + ) + + for i, (table, data) in enumerate(items): + if i + 2 >= rows: + break + + perc = data["perc"] + + color = curses.color_pair(2) if perc == 100 else curses.color_pair(1) + + self.stdscr.addstr( + i+2, left_margin + max_len - len(table), + table, + curses.A_BOLD | color, + ) + + size = 20 + + progress = "[%s%s]" % ( + "#" * int(perc*size/100), + " " * (size - int(perc*size/100)), + ) + + self.stdscr.addstr( + i+2, left_margin + max_len + middle_space, + "%s %3d%% (%d/%d)" % (progress, perc, data["num_done"], data["total"]), + ) + + if self.finished: + self.stdscr.addstr( + self.rows-1, 0, + "Press any key to exit...", + ) + + self.stdscr.refresh() + self.last_update = time.time() + + def done(self): + self.finished = True + self.render(True) + self.stdscr.getch() + + def set_state(self, state): + self.stdscr.clear() + self.stdscr.addstr( + 0, 0, + state + "...", + curses.A_BOLD, + ) + self.stdscr.refresh() + + +class TerminalProgress(Progress): + """Just prints progress to the terminal + """ + def update(self, table, num_done): + super(TerminalProgress, self).update(table, num_done) + + data = self.tables[table] + + print "%s: %d%% (%d/%d)" % ( + table, data["perc"], + data["num_done"], data["total"], + ) + + def set_state(self, state): + print state + "..." + + +############################################## +############################################## + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-v", action='store_true') -- cgit 1.5.1 From 5b8b1a43bd392559b1960c4a79f5dcdde0d37f23 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 27 Apr 2015 17:53:40 +0100 Subject: Split setuping up and processing of tables --- scripts/port_from_sqlite_to_postgres.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 845e3fcf3b..1e7ac072f1 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -168,7 +168,7 @@ class Porter(object): ) @defer.inlineCallbacks - def handle_table(self, table): + def setup_table(self, table): def delete_all(txn): txn.execute( "DELETE FROM port_from_sqlite3 WHERE table_name = %s", @@ -287,6 +287,10 @@ class Porter(object): postgres_size = yield self.postgres_store.execute(get_table_size) + defer.returnValue((table, postgres_size, table_size, next_chunk)) + + @defer.inlineCallbacks + def handle_table(self, table, postgres_size, table_size, next_chunk): if not table_size: return @@ -364,14 +368,14 @@ class Porter(object): self.postgres_store = Store(postgres_db_pool, postgres_engine) # Step 1. Set up databases. - self.progress.on_prepare_sqlite() + self.progress.set_state("Preparing SQLite3") self.setup_db(sqlite_config, sqlite_engine) - self.progress.on_prepare_postgres() + self.progress.set_state("Preparing PostgreSQL") self.setup_db(postgres_config, postgres_engine) # Step 2. Get tables. - self.progress.fetching_tables() + self.progress.set_state("Fetching tables") sqlite_tables = yield self.sqlite_store._simple_select_onecol( table="sqlite_master", keyvalues={ @@ -390,7 +394,7 @@ class Porter(object): tables = set(sqlite_tables) & set(postgres_tables) - self.progress.preparing_tables() + self.progress.set_state("Creating tables") logger.info("Found %d tables", len(tables)) @@ -409,10 +413,12 @@ class Porter(object): except Exception as e: logger.info("Failed to create port table: %s", e) - # Process tables. - yield defer.gatherResults( + self.progress.set_state("Preparing tables") + + # Set up tables. + setup_res = yield defer.gatherResults( [ - self.handle_table(table) + self.setup_table(table) for table in tables if table not in ["schema_version", "applied_schema_deltas"] and not table.startswith("sqlite_") @@ -420,6 +426,15 @@ class Porter(object): consumeErrors=True, ) + # Process tables. + yield defer.gatherResults( + [ + self.handle_table(*res) + for res in setup_res + ], + consumeErrors=True, + ) + self.progress.done() except: global end_error_exec_info -- cgit 1.5.1 From 1ccaea5b92e476ebb38a246ea933258a7199bbae Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 10:34:06 +0100 Subject: Typo in port script --- scripts/port_from_sqlite_to_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 1e7ac072f1..56e149c477 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -575,7 +575,7 @@ class CursesProgress(Progress): if self.finished: self.stdscr.addstr( - self.rows-1, 0, + rows-1, 0, "Press any key to exit...", ) -- cgit 1.5.1 From ce8b0b2868af32fac46a8414393760d326b764b7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 10:45:05 +0100 Subject: Remove accidentally committed debug hardcode hack --- scripts/port_from_sqlite_to_postgres.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 56e149c477..596be75c49 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -184,10 +184,11 @@ class Porter(object): if table == "sent_transactions": # This is a big table, and we really only need some of the recent # data + yield self.postgres_store.execute(delete_all) # Only save things from the last day - yesterday = 1429114568820 #int(time.time()*1000) - 86400000 + yesterday = int(time.time()*1000) - 86400000 # And save the max transaction id from each destination select = ( -- cgit 1.5.1 From 4a13ae72019655ad0531f93af18382c196fb362d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 11:16:44 +0100 Subject: Correctly handle total/remaining counts in the presence of sent_transasctions table --- scripts/port_from_sqlite_to_postgres.py | 290 ++++++++++++++++++-------------- 1 file changed, 167 insertions(+), 123 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 596be75c49..1e52d82fe0 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -125,6 +125,12 @@ class Store(object): def execute(self, f, *args, **kwargs): return self.runInteraction(f.__name__, f, *args, **kwargs) + def execute_sql(self, sql, *args): + def r(txn): + txn.execute(sql, args) + return txn.fetchall() + return self.runInteraction("execute_sql", r) + def insert_many_txn(self, txn, table, headers, rows): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, @@ -146,119 +152,9 @@ class Porter(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) - def convert_rows(self, table, headers, rows): - bool_col_names = BOOLEAN_COLUMNS.get(table, []) - - bool_cols = [ - i for i, h in enumerate(headers) if h in bool_col_names - ] - - def conv(j, col): - if j in bool_cols: - return bool(col) - return col - - for i, row in enumerate(rows): - rows[i] = tuple( - self.postgres_store.database_engine.encode_parameter( - conv(j, col) - ) - for j, col in enumerate(row) - if j > 0 - ) - @defer.inlineCallbacks def setup_table(self, table): - def delete_all(txn): - txn.execute( - "DELETE FROM port_from_sqlite3 WHERE table_name = %s", - (table,) - ) - txn.execute("TRUNCATE %s CASCADE" % (table,)) - - def get_table_size(txn): - txn.execute("SELECT count(*) FROM %s" % (table,)) - size, = txn.fetchone() - return int(size) - - if table == "sent_transactions": - # This is a big table, and we really only need some of the recent - # data - - yield self.postgres_store.execute(delete_all) - - # Only save things from the last day - yesterday = int(time.time()*1000) - 86400000 - - # And save the max transaction id from each destination - select = ( - "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" - "SELECT max(rowid) FROM sent_transactions" - " GROUP BY destination" - ")" - ) - - def r(txn): - txn.execute(select) - rows = txn.fetchall() - headers = [column[0] for column in txn.description] - - ts_ind = headers.index('ts') - - return headers, [r for r in rows if r[ts_ind] < yesterday] - - headers, rows = yield self.sqlite_store.runInteraction( - "select", r, - ) - - self.convert_rows(table, headers, rows) - - inserted_rows = len(rows) - max_inserted_rowid = max(r[0] for r in rows) - - def insert(txn): - self.postgres_store.insert_many_txn( - txn, table, headers[1:], rows - ) - - yield self.postgres_store.execute(insert) - - def get_start_id(txn): - txn.execute( - "SELECT rowid FROM sent_transactions WHERE ts >= ?" - " ORDER BY rowid ASC LIMIT 1", - (yesterday,) - ) - - rows = txn.fetchall() - if rows: - return rows[0][0] - else: - return 1 - - next_chunk = yield self.sqlite_store.execute(get_start_id) - next_chunk = max(max_inserted_rowid + 1, next_chunk) - - yield self.postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": next_chunk} - ) - - def get_sent_table_size(txn): - txn.execute( - "SELECT count(*) FROM sent_transactions" - " WHERE ts >= ?", - (yesterday,) - ) - size, = txn.fetchone() - return int(size) - - table_size = yield self.sqlite_store.execute( - get_sent_table_size - ) - - table_size += inserted_rows - elif table in APPEND_ONLY_TABLES: + if table in APPEND_ONLY_TABLES: # It's safe to just carry on inserting. next_chunk = yield self.postgres_store._simple_select_one_onecol( table="port_from_sqlite3", @@ -267,28 +163,47 @@ class Porter(object): allow_none=True, ) + total_to_port = None if next_chunk is None: - yield self.postgres_store._simple_insert( - table="port_from_sqlite3", - values={"table_name": table, "rowid": 1} - ) + if table == "sent_transactions": + next_chunk, already_ported, total_to_port = ( + yield self._setup_sent_transactions() + ) + else: + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": table, "rowid": 1} + ) - next_chunk = 1 + next_chunk = 1 + already_ported = 0 - table_size = yield self.sqlite_store.execute(get_table_size) + if total_to_port is None: + already_ported, total_to_port = yield self._get_total_count_to_port( + table, next_chunk + ) else: + def delete_all(txn): + txn.execute( + "DELETE FROM port_from_sqlite3 WHERE table_name = %s", + (table,) + ) + txn.execute("TRUNCATE %s CASCADE" % (table,)) + yield self.postgres_store.execute(delete_all) - self.postgres_store._simple_insert( + + yield self.postgres_store._simple_insert( table="port_from_sqlite3", values={"table_name": table, "rowid": 0} ) - table_size = yield self.sqlite_store.execute(get_table_size) next_chunk = 1 - postgres_size = yield self.postgres_store.execute(get_table_size) + already_ported, total_to_port = yield self._get_total_count_to_port( + table, next_chunk + ) - defer.returnValue((table, postgres_size, table_size, next_chunk)) + defer.returnValue((table, already_ported, total_to_port, next_chunk)) @defer.inlineCallbacks def handle_table(self, table, postgres_size, table_size, next_chunk): @@ -315,7 +230,7 @@ class Porter(object): if rows: next_chunk = rows[-1][0] + 1 - self.convert_rows(table, headers, rows) + self._convert_rows(table, headers, rows) def insert(txn): self.postgres_store.insert_many_txn( @@ -414,7 +329,7 @@ class Porter(object): except Exception as e: logger.info("Failed to create port table: %s", e) - self.progress.set_state("Preparing tables") + self.progress.set_state("Setting up") # Set up tables. setup_res = yield defer.gatherResults( @@ -444,6 +359,135 @@ class Porter(object): finally: reactor.stop() + def _convert_rows(self, table, headers, rows): + bool_col_names = BOOLEAN_COLUMNS.get(table, []) + + bool_cols = [ + i for i, h in enumerate(headers) if h in bool_col_names + ] + + def conv(j, col): + if j in bool_cols: + return bool(col) + return col + + for i, row in enumerate(rows): + rows[i] = tuple( + self.postgres_store.database_engine.encode_parameter( + conv(j, col) + ) + for j, col in enumerate(row) + if j > 0 + ) + + @defer.inlineCallbacks + def _setup_sent_transactions(self): + # Only save things from the last day + yesterday = int(time.time()*1000) - 86400000 + + # And save the max transaction id from each destination + select = ( + "SELECT rowid, * FROM sent_transactions WHERE rowid IN (" + "SELECT max(rowid) FROM sent_transactions" + " GROUP BY destination" + ")" + ) + + def r(txn): + txn.execute(select) + rows = txn.fetchall() + headers = [column[0] for column in txn.description] + + ts_ind = headers.index('ts') + + return headers, [r for r in rows if r[ts_ind] < yesterday] + + headers, rows = yield self.sqlite_store.runInteraction( + "select", r, + ) + + self._convert_rows("sent_transactions", headers, rows) + + inserted_rows = len(rows) + max_inserted_rowid = max(r[0] for r in rows) + + def insert(txn): + self.postgres_store.insert_many_txn( + txn, "sent_transactions", headers[1:], rows + ) + + yield self.postgres_store.execute(insert) + + def get_start_id(txn): + txn.execute( + "SELECT rowid FROM sent_transactions WHERE ts >= ?" + " ORDER BY rowid ASC LIMIT 1", + (yesterday,) + ) + + rows = txn.fetchall() + if rows: + return rows[0][0] + else: + return 1 + + next_chunk = yield self.sqlite_store.execute(get_start_id) + next_chunk = max(max_inserted_rowid + 1, next_chunk) + + yield self.postgres_store._simple_insert( + table="port_from_sqlite3", + values={"table_name": "sent_transactions", "rowid": next_chunk} + ) + + def get_sent_table_size(txn): + txn.execute( + "SELECT count(*) FROM sent_transactions" + " WHERE ts >= ?", + (yesterday,) + ) + size, = txn.fetchone() + return int(size) + + remaining_count = yield self.sqlite_store.execute( + get_sent_table_size + ) + + total_count = remaining_count + inserted_rows + + defer.returnValue((next_chunk, remaining_count, total_count)) + + @defer.inlineCallbacks + def _get_remaining_count_to_port(self, table, next_chunk): + rows = yield self.sqlite_store.execute_sql( + "SELECT count(*) FROM %s WHERE rowid >= ?" % (table,), + next_chunk, + ) + + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _get_already_ported_count(self, table): + rows = yield self.postgres_store.execute_sql( + "SELECT count(*) FROM %s" % (table,), + ) + + defer.returnValue(rows[0][0]) + + @defer.inlineCallbacks + def _get_total_count_to_port(self, table, next_chunk): + remaining, done = yield defer.gatherResults( + [ + self._get_remaining_count_to_port(table, next_chunk), + self._get_already_ported_count(table), + ], + consumeErrors=True, + ) + + remaining = int(remaining) if remaining else 0 + done = int(done) if done else 0 + + defer.returnValue((done, remaining + done)) + ############################################## ###### The following is simply UI stuff ###### -- cgit 1.5.1 From af27b84ff769ceb9fe1aaa10c9435586be4c6867 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 12:40:04 +0100 Subject: Correctly handle total/remaining counts in the presence of sent_transasctions table --- scripts/port_from_sqlite_to_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 1e52d82fe0..a40a93ca76 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -454,7 +454,7 @@ class Porter(object): total_count = remaining_count + inserted_rows - defer.returnValue((next_chunk, remaining_count, total_count)) + defer.returnValue((next_chunk, inserted_rows, total_count)) @defer.inlineCallbacks def _get_remaining_count_to_port(self, table, next_chunk): -- cgit 1.5.1 From a1d4813a54ad711d6db7da51f301412e3f139346 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 12:55:29 +0100 Subject: Quickly fix dodgy est. time remaining --- scripts/port_from_sqlite_to_postgres.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index a40a93ca76..b4ee860a92 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -535,11 +535,20 @@ class CursesProgress(Progress): self.finished = False + self.total_processed = 0 + self.total_remaining = 0 + super(CursesProgress, self).__init__() def update(self, table, num_done): super(CursesProgress, self).update(table, num_done) + self.total_processed = 0 + self.total_remaining = 0 + for table, data in self.tables.items(): + self.total_processed += data["num_done"] - data["start"] + self.total_remaining += data["total"] - data["num_done"] + self.render() def render(self, force=False): @@ -561,13 +570,11 @@ class CursesProgress(Progress): if self.finished: status = "Time spent: %s (Done!)" % (duration_str,) else: - min_perc = min( - (v["num_done"] - v["start"]) * 100. / (v["total"] - v["start"]) - if v["total"] - v["start"] else 100 - for v in self.tables.values() - ) - if min_perc > 0: - est_remaining = (int(now) - self.start_time) * 100 / min_perc + + if self.total_processed > 0: + left = float(self.total_remaining) / self.total_processed + + est_remaining = (int(now) - self.start_time) * left est_remaining_str = '%02dm %02ds remaining' % divmod(est_remaining, 60) else: est_remaining_str = "Unknown" -- cgit 1.5.1 From f41a9a1ffcb652c045dea5f4108984d287c6de17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 Apr 2015 17:42:36 +0100 Subject: Add better help to scripts/port_from_sqlite_to_postgres.py --- scripts/port_from_sqlite_to_postgres.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index b4ee860a92..3296f1f54f 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -671,15 +671,30 @@ class TerminalProgress(Progress): if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description="A script to port an existing synapse SQLite database to" + " a new PostgreSQL database." + ) parser.add_argument("-v", action='store_true') - parser.add_argument("--curses", action='store_true') - parser.add_argument("--sqlite-database") parser.add_argument( - "--postgres-config", type=argparse.FileType('r'), + "--sqlite-database", required=True, + help="The snapshot of the SQLite database file. This must not be" + " currently used by a running synapse server" + ) + parser.add_argument( + "--postgres-config", type=argparse.FileType('r'), required=True, + help="The database config file for the PostgreSQL database" + ) + parser.add_argument( + "--curses", action='store_true', + help="display a curses based progress UI" ) - parser.add_argument("--batch-size", type=int, default=1000) + parser.add_argument( + "--batch-size", type=int, default=1000, + help="The number of rows to select from the SQLite table each" + " iteration [default=1000]", + ) args = parser.parse_args() @@ -705,6 +720,13 @@ if __name__ == "__main__": postgres_config = yaml.safe_load(args.postgres_config) + if "name" not in postgres_config: + sys.stderr.write("Malformed database config: no 'name'") + sys.exit(2) + if postgres_config["name"] != "psycopg2": + sys.stderr.write("Database must use 'psycopg2' connector.") + sys.exit(3) + def start(stdscr=None): if stdscr: progress = CursesProgress(stdscr) -- cgit 1.5.1 From 204132a998ec3be7069a9f2dada323bcdb217908 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 Apr 2015 11:42:28 +0100 Subject: Check that postgres database has correct charset set --- scripts/port_from_sqlite_to_postgres.py | 4 ++++ synapse/app/homeserver.py | 11 ++++++++++- synapse/storage/engines/__init__.py | 1 + synapse/storage/engines/_base.py | 18 ++++++++++++++++++ synapse/storage/engines/postgres.py | 11 +++++++++++ synapse/storage/engines/sqlite3.py | 3 +++ 6 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/engines/_base.py (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/scripts/port_from_sqlite_to_postgres.py b/scripts/port_from_sqlite_to_postgres.py index 3296f1f54f..da760af087 100644 --- a/scripts/port_from_sqlite_to_postgres.py +++ b/scripts/port_from_sqlite_to_postgres.py @@ -283,6 +283,10 @@ class Porter(object): self.sqlite_store = Store(sqlite_db_pool, sqlite_engine) self.postgres_store = Store(postgres_db_pool, postgres_engine) + yield self.postgres_store.execute( + postgres_engine.check_database + ) + # Step 1. Set up databases. self.progress.set_state("Preparing SQLite3") self.setup_db(sqlite_config, sqlite_engine) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 694a0125ad..cbd295c0a5 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -17,7 +17,7 @@ import sys sys.dont_write_bytecode = True -from synapse.storage.engines import create_engine +from synapse.storage.engines import create_engine, IncorrectDatabaseSetup from synapse.storage import ( are_all_users_on_domain, UpgradeDatabaseException, ) @@ -255,6 +255,15 @@ class SynapseHomeServer(HomeServer): ) sys.exit(1) + try: + database_engine.check_database(db_conn.cursor()) + except IncorrectDatabaseSetup as e: + sys.stderr.write("*" * len(e.message) + '\n') + sys.stderr.write(e.message) + sys.stderr.write('\n') + sys.stderr.write("*" * len(e.message) + '\n') + sys.exit(2) + def get_version_string(): try: diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index eb76df7f01..ab070f3428 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ._base import IncorrectDatabaseSetup from .postgres import PostgresEngine from .sqlite3 import Sqlite3Engine diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py new file mode 100644 index 0000000000..0b549d314b --- /dev/null +++ b/synapse/storage/engines/_base.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class IncorrectDatabaseSetup(RuntimeError): + pass diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index b8cca9b187..ca858daee9 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -15,12 +15,23 @@ from synapse.storage import prepare_database +from ._base import IncorrectDatabaseSetup + class PostgresEngine(object): def __init__(self, database_module): self.module = database_module self.module.extensions.register_type(self.module.extensions.UNICODE) + def check_database(self, txn): + txn.execute("SHOW SERVER_ENCODING") + rows = txn.fetchall() + if rows and rows[0][0] != "UTF8": + raise IncorrectDatabaseSetup( + "Database has incorrect encoding: '%s' instead of 'UTF8'" + % (rows[0][0],) + ) + def convert_param_style(self, sql): return sql.replace("?", "%s") diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index f62d5d1205..7b49157cbd 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -20,6 +20,9 @@ class Sqlite3Engine(object): def __init__(self, database_module): self.module = database_module + def check_database(self, txn): + pass + def convert_param_style(self, sql): return sql -- cgit 1.5.1 From 119e5d7702a1de0b196a374b53b646c06ee753e5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 May 2015 11:41:19 +0100 Subject: Seperate scripts/ into scripts/ and scripts-dev/, where scripts/* are automatically added to the package --- register_new_matrix_user | 153 -------------------------------- scripts/check_auth.py | 65 -------------- scripts/check_event_hash.py | 50 ----------- scripts/check_signature.py | 73 --------------- scripts/copyrighter-sql.pl | 33 ------- scripts/copyrighter.pl | 33 ------- scripts/database-save.sh | 16 ---- scripts/federation_client.py | 146 ------------------------------ scripts/hash_history.py | 69 -------------- scripts/make_identicons.pl | 39 -------- scripts/nuke-room-from-db.sh | 24 ----- scripts/port_from_sqlite_to_postgres.py | 1 + scripts/sphinx_api_docs.sh | 1 - scripts/upgrade_db_to_v0.6.0.py | 2 +- setup.py | 3 +- 15 files changed, 4 insertions(+), 704 deletions(-) delete mode 100755 register_new_matrix_user delete mode 100644 scripts/check_auth.py delete mode 100644 scripts/check_event_hash.py delete mode 100644 scripts/check_signature.py delete mode 100755 scripts/copyrighter-sql.pl delete mode 100755 scripts/copyrighter.pl delete mode 100755 scripts/database-save.sh delete mode 100644 scripts/federation_client.py delete mode 100644 scripts/hash_history.py delete mode 100755 scripts/make_identicons.pl delete mode 100755 scripts/nuke-room-from-db.sh mode change 100644 => 100755 scripts/port_from_sqlite_to_postgres.py delete mode 100644 scripts/sphinx_api_docs.sh mode change 100644 => 100755 scripts/upgrade_db_to_v0.6.0.py (limited to 'scripts/port_from_sqlite_to_postgres.py') diff --git a/register_new_matrix_user b/register_new_matrix_user deleted file mode 100755 index 0ca83795a3..0000000000 --- a/register_new_matrix_user +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import getpass -import hashlib -import hmac -import json -import sys -import urllib2 -import yaml - - -def request_registration(user, password, server_location, shared_secret): - mac = hmac.new( - key=shared_secret, - msg=user, - digestmod=hashlib.sha1, - ).hexdigest() - - data = { - "username": user, - "password": password, - "mac": mac, - } - - server_location = server_location.rstrip("/") - - print "Sending registration request..." - - req = urllib2.Request( - "%s/_matrix/client/v2_alpha/register" % (server_location,), - data=json.dumps(data), - headers={'Content-Type': 'application/json'} - ) - try: - if sys.version_info[:3] >= (2, 7, 9): - # As of version 2.7.9, urllib2 now checks SSL certs - import ssl - f = urllib2.urlopen(req, context=ssl.SSLContext(ssl.PROTOCOL_SSLv23)) - else: - f = urllib2.urlopen(req) - f.read() - f.close() - print "Success." - except urllib2.HTTPError as e: - print "ERROR! Received %d %s" % (e.code, e.reason,) - if 400 <= e.code < 500: - if e.info().type == "application/json": - resp = json.load(e) - if "error" in resp: - print resp["error"] - sys.exit(1) - - -def register_new_user(user, password, server_location, shared_secret): - if not user: - try: - default_user = getpass.getuser() - except: - default_user = None - - if default_user: - user = raw_input("New user localpart [%s]: " % (default_user,)) - if not user: - user = default_user - else: - user = raw_input("New user localpart: ") - - if not user: - print "Invalid user name" - sys.exit(1) - - if not password: - password = getpass.getpass("Password: ") - - if not password: - print "Password cannot be blank." - sys.exit(1) - - confirm_password = getpass.getpass("Confirm password: ") - - if password != confirm_password: - print "Passwords do not match" - sys.exit(1) - - request_registration(user, password, server_location, shared_secret) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Used to register new users with a given home server when" - " registration has been disabled. The home server must be" - " configured with the 'registration_shared_secret' option" - " set.", - ) - parser.add_argument( - "-u", "--user", - default=None, - help="Local part of the new user. Will prompt if omitted.", - ) - parser.add_argument( - "-p", "--password", - default=None, - help="New password for user. Will prompt if omitted.", - ) - - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument( - "-c", "--config", - type=argparse.FileType('r'), - help="Path to server config file. Used to read in shared secret.", - ) - - group.add_argument( - "-k", "--shared-secret", - help="Shared secret as defined in server config file.", - ) - - parser.add_argument( - "server_url", - default="https://localhost:8448", - nargs='?', - help="URL to use to talk to the home server. Defaults to " - " 'https://localhost:8448'.", - ) - - args = parser.parse_args() - - if "config" in args and args.config: - config = yaml.safe_load(args.config) - secret = config.get("registration_shared_secret", None) - if not secret: - print "No 'registration_shared_secret' defined in config." - sys.exit(1) - else: - secret = args.shared_secret - - register_new_user(args.user, args.password, args.server_url, secret) diff --git a/scripts/check_auth.py b/scripts/check_auth.py deleted file mode 100644 index b889ac7fa7..0000000000 --- a/scripts/check_auth.py +++ /dev/null @@ -1,65 +0,0 @@ -from synapse.events import FrozenEvent -from synapse.api.auth import Auth - -from mock import Mock - -import argparse -import itertools -import json -import sys - - -def check_auth(auth, auth_chain, events): - auth_chain.sort(key=lambda e: e.depth) - - auth_map = { - e.event_id: e - for e in auth_chain - } - - create_events = {} - for e in auth_chain: - if e.type == "m.room.create": - create_events[e.room_id] = e - - for e in itertools.chain(auth_chain, events): - auth_events_list = [auth_map[i] for i, _ in e.auth_events] - - auth_events = { - (e.type, e.state_key): e - for e in auth_events_list - } - - auth_events[("m.room.create", "")] = create_events[e.room_id] - - try: - auth.check(e, auth_events=auth_events) - except Exception as ex: - print "Failed:", e.event_id, e.type, e.state_key - print "Auth_events:", auth_events - print ex - print json.dumps(e.get_dict(), sort_keys=True, indent=4) - # raise - print "Success:", e.event_id, e.type, e.state_key - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument( - 'json', - nargs='?', - type=argparse.FileType('r'), - default=sys.stdin, - ) - - args = parser.parse_args() - - js = json.load(args.json) - - - auth = Auth(Mock()) - check_auth( - auth, - [FrozenEvent(d) for d in js["auth_chain"]], - [FrozenEvent(d) for d in js["pdus"]], - ) diff --git a/scripts/check_event_hash.py b/scripts/check_event_hash.py deleted file mode 100644 index 679afbd268..0000000000 --- a/scripts/check_event_hash.py +++ /dev/null @@ -1,50 +0,0 @@ -from synapse.crypto.event_signing import * -from syutil.base64util import encode_base64 - -import argparse -import hashlib -import sys -import json - - -class dictobj(dict): - def __init__(self, *args, **kargs): - dict.__init__(self, *args, **kargs) - self.__dict__ = self - - def get_dict(self): - return dict(self) - - def get_full_dict(self): - return dict(self) - - def get_pdu_json(self): - return dict(self) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("input_json", nargs="?", type=argparse.FileType('r'), - default=sys.stdin) - args = parser.parse_args() - logging.basicConfig() - - event_json = dictobj(json.load(args.input_json)) - - algorithms = { - "sha256": hashlib.sha256, - } - - for alg_name in event_json.hashes: - if check_event_content_hash(event_json, algorithms[alg_name]): - print "PASS content hash %s" % (alg_name,) - else: - print "FAIL content hash %s" % (alg_name,) - - for algorithm in algorithms.values(): - name, h_bytes = compute_event_reference_hash(event_json, algorithm) - print "Reference hash %s: %s" % (name, encode_base64(h_bytes)) - -if __name__=="__main__": - main() - diff --git a/scripts/check_signature.py b/scripts/check_signature.py deleted file mode 100644 index 59e3d603ac..0000000000 --- a/scripts/check_signature.py +++ /dev/null @@ -1,73 +0,0 @@ - -from syutil.crypto.jsonsign import verify_signed_json -from syutil.crypto.signing_key import ( - decode_verify_key_bytes, write_signing_keys -) -from syutil.base64util import decode_base64 - -import urllib2 -import json -import sys -import dns.resolver -import pprint -import argparse -import logging - -def get_targets(server_name): - if ":" in server_name: - target, port = server_name.split(":") - yield (target, int(port)) - return - try: - answers = dns.resolver.query("_matrix._tcp." + server_name, "SRV") - for srv in answers: - yield (srv.target, srv.port) - except dns.resolver.NXDOMAIN: - yield (server_name, 8448) - -def get_server_keys(server_name, target, port): - url = "https://%s:%i/_matrix/key/v1" % (target, port) - keys = json.load(urllib2.urlopen(url)) - verify_keys = {} - for key_id, key_base64 in keys["verify_keys"].items(): - verify_key = decode_verify_key_bytes(key_id, decode_base64(key_base64)) - verify_signed_json(keys, server_name, verify_key) - verify_keys[key_id] = verify_key - return verify_keys - -def main(): - - parser = argparse.ArgumentParser() - parser.add_argument("signature_name") - parser.add_argument("input_json", nargs="?", type=argparse.FileType('r'), - default=sys.stdin) - - args = parser.parse_args() - logging.basicConfig() - - server_name = args.signature_name - keys = {} - for target, port in get_targets(server_name): - try: - keys = get_server_keys(server_name, target, port) - print "Using keys from https://%s:%s/_matrix/key/v1" % (target, port) - write_signing_keys(sys.stdout, keys.values()) - break - except: - logging.exception("Error talking to %s:%s", target, port) - - json_to_check = json.load(args.input_json) - print "Checking JSON:" - for key_id in json_to_check["signatures"][args.signature_name]: - try: - key = keys[key_id] - verify_signed_json(json_to_check, args.signature_name, key) - print "PASS %s" % (key_id,) - except: - logging.exception("Check for key %s failed" % (key_id,)) - print "FAIL %s" % (key_id,) - - -if __name__ == '__main__': - main() - diff --git a/scripts/copyrighter-sql.pl b/scripts/copyrighter-sql.pl deleted file mode 100755 index 890e51e587..0000000000 --- a/scripts/copyrighter-sql.pl +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/perl -pi -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -$copyright = <table-save.sql -.dump users -.dump access_tokens -.dump presence -.dump profiles -EOF diff --git a/scripts/federation_client.py b/scripts/federation_client.py deleted file mode 100644 index ea62dceb36..0000000000 --- a/scripts/federation_client.py +++ /dev/null @@ -1,146 +0,0 @@ -import nacl.signing -import json -import base64 -import requests -import sys -import srvlookup - - -def encode_base64(input_bytes): - """Encode bytes as a base64 string without any padding.""" - - input_len = len(input_bytes) - output_len = 4 * ((input_len + 2) // 3) + (input_len + 2) % 3 - 2 - output_bytes = base64.b64encode(input_bytes) - output_string = output_bytes[:output_len].decode("ascii") - return output_string - - -def decode_base64(input_string): - """Decode a base64 string to bytes inferring padding from the length of the - string.""" - - input_bytes = input_string.encode("ascii") - input_len = len(input_bytes) - padding = b"=" * (3 - ((input_len + 3) % 4)) - output_len = 3 * ((input_len + 2) // 4) + (input_len + 2) % 4 - 2 - output_bytes = base64.b64decode(input_bytes + padding) - return output_bytes[:output_len] - - -def encode_canonical_json(value): - return json.dumps( - value, - # Encode code-points outside of ASCII as UTF-8 rather than \u escapes - ensure_ascii=False, - # Remove unecessary white space. - separators=(',',':'), - # Sort the keys of dictionaries. - sort_keys=True, - # Encode the resulting unicode as UTF-8 bytes. - ).encode("UTF-8") - - -def sign_json(json_object, signing_key, signing_name): - signatures = json_object.pop("signatures", {}) - unsigned = json_object.pop("unsigned", None) - - signed = signing_key.sign(encode_canonical_json(json_object)) - signature_base64 = encode_base64(signed.signature) - - key_id = "%s:%s" % (signing_key.alg, signing_key.version) - signatures.setdefault(signing_name, {})[key_id] = signature_base64 - - json_object["signatures"] = signatures - if unsigned is not None: - json_object["unsigned"] = unsigned - - return json_object - - -NACL_ED25519 = "ed25519" - -def decode_signing_key_base64(algorithm, version, key_base64): - """Decode a base64 encoded signing key - Args: - algorithm (str): The algorithm the key is for (currently "ed25519"). - version (str): Identifies this key out of the keys for this entity. - key_base64 (str): Base64 encoded bytes of the key. - Returns: - A SigningKey object. - """ - if algorithm == NACL_ED25519: - key_bytes = decode_base64(key_base64) - key = nacl.signing.SigningKey(key_bytes) - key.version = version - key.alg = NACL_ED25519 - return key - else: - raise ValueError("Unsupported algorithm %s" % (algorithm,)) - - -def read_signing_keys(stream): - """Reads a list of keys from a stream - Args: - stream : A stream to iterate for keys. - Returns: - list of SigningKey objects. - """ - keys = [] - for line in stream: - algorithm, version, key_base64 = line.split() - keys.append(decode_signing_key_base64(algorithm, version, key_base64)) - return keys - - -def lookup(destination, path): - if ":" in destination: - return "https://%s%s" % (destination, path) - else: - try: - srv = srvlookup.lookup("matrix", "tcp", destination)[0] - return "https://%s:%d%s" % (srv.host, srv.port, path) - except: - return "https://%s:%d%s" % (destination, 8448, path) - -def get_json(origin_name, origin_key, destination, path): - request_json = { - "method": "GET", - "uri": path, - "origin": origin_name, - "destination": destination, - } - - signed_json = sign_json(request_json, origin_key, origin_name) - - authorization_headers = [] - - for key, sig in signed_json["signatures"][origin_name].items(): - authorization_headers.append(bytes( - "X-Matrix origin=%s,key=\"%s\",sig=\"%s\"" % ( - origin_name, key, sig, - ) - )) - - result = requests.get( - lookup(destination, path), - headers={"Authorization": authorization_headers[0]}, - verify=False, - ) - return result.json() - - -def main(): - origin_name, keyfile, destination, path = sys.argv[1:] - - with open(keyfile) as f: - key = read_signing_keys(f)[0] - - result = get_json( - origin_name, key, destination, "/_matrix/federation/v1/" + path - ) - - json.dump(result, sys.stdout) - -if __name__ == "__main__": - main() diff --git a/scripts/hash_history.py b/scripts/hash_history.py deleted file mode 100644 index bdad530af8..0000000000 --- a/scripts/hash_history.py +++ /dev/null @@ -1,69 +0,0 @@ -from synapse.storage.pdu import PduStore -from synapse.storage.signatures import SignatureStore -from synapse.storage._base import SQLBaseStore -from synapse.federation.units import Pdu -from synapse.crypto.event_signing import ( - add_event_pdu_content_hash, compute_pdu_event_reference_hash -) -from synapse.api.events.utils import prune_pdu -from syutil.base64util import encode_base64, decode_base64 -from syutil.jsonutil import encode_canonical_json -import sqlite3 -import sys - -class Store(object): - _get_pdu_tuples = PduStore.__dict__["_get_pdu_tuples"] - _get_pdu_content_hashes_txn = SignatureStore.__dict__["_get_pdu_content_hashes_txn"] - _get_prev_pdu_hashes_txn = SignatureStore.__dict__["_get_prev_pdu_hashes_txn"] - _get_pdu_origin_signatures_txn = SignatureStore.__dict__["_get_pdu_origin_signatures_txn"] - _store_pdu_content_hash_txn = SignatureStore.__dict__["_store_pdu_content_hash_txn"] - _store_pdu_reference_hash_txn = SignatureStore.__dict__["_store_pdu_reference_hash_txn"] - _store_prev_pdu_hash_txn = SignatureStore.__dict__["_store_prev_pdu_hash_txn"] - _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] - - -store = Store() - - -def select_pdus(cursor): - cursor.execute( - "SELECT pdu_id, origin FROM pdus ORDER BY depth ASC" - ) - - ids = cursor.fetchall() - - pdu_tuples = store._get_pdu_tuples(cursor, ids) - - pdus = [Pdu.from_pdu_tuple(p) for p in pdu_tuples] - - reference_hashes = {} - - for pdu in pdus: - try: - if pdu.prev_pdus: - print "PROCESS", pdu.pdu_id, pdu.origin, pdu.prev_pdus - for pdu_id, origin, hashes in pdu.prev_pdus: - ref_alg, ref_hsh = reference_hashes[(pdu_id, origin)] - hashes[ref_alg] = encode_base64(ref_hsh) - store._store_prev_pdu_hash_txn(cursor, pdu.pdu_id, pdu.origin, pdu_id, origin, ref_alg, ref_hsh) - print "SUCCESS", pdu.pdu_id, pdu.origin, pdu.prev_pdus - pdu = add_event_pdu_content_hash(pdu) - ref_alg, ref_hsh = compute_pdu_event_reference_hash(pdu) - reference_hashes[(pdu.pdu_id, pdu.origin)] = (ref_alg, ref_hsh) - store._store_pdu_reference_hash_txn(cursor, pdu.pdu_id, pdu.origin, ref_alg, ref_hsh) - - for alg, hsh_base64 in pdu.hashes.items(): - print alg, hsh_base64 - store._store_pdu_content_hash_txn(cursor, pdu.pdu_id, pdu.origin, alg, decode_base64(hsh_base64)) - - except: - print "FAILED_", pdu.pdu_id, pdu.origin, pdu.prev_pdus - -def main(): - conn = sqlite3.connect(sys.argv[1]) - cursor = conn.cursor() - select_pdus(cursor) - conn.commit() - -if __name__=='__main__': - main() diff --git a/scripts/make_identicons.pl b/scripts/make_identicons.pl deleted file mode 100755 index cbff63e298..0000000000 --- a/scripts/make_identicons.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; - -use DBI; -use DBD::SQLite; -use JSON; -use Getopt::Long; - -my $db; # = "homeserver.db"; -my $server = "http://localhost:8008"; -my $size = 320; - -GetOptions("db|d=s", \$db, - "server|s=s", \$server, - "width|w=i", \$size) or usage(); - -usage() unless $db; - -my $dbh = DBI->connect("dbi:SQLite:dbname=$db","","") || die $DBI::errstr; - -my $res = $dbh->selectall_arrayref("select token, name from access_tokens, users where access_tokens.user_id = users.id group by user_id") || die $DBI::errstr; - -foreach (@$res) { - my ($token, $mxid) = ($_->[0], $_->[1]); - my ($user_id) = ($mxid =~ m/@(.*):/); - my ($url) = $dbh->selectrow_array("select avatar_url from profiles where user_id=?", undef, $user_id); - if (!$url || $url =~ /#auto$/) { - `curl -s -o tmp.png "$server/_matrix/media/v1/identicon?name=${mxid}&width=$size&height=$size"`; - my $json = `curl -s -X POST -H "Content-Type: image/png" -T "tmp.png" $server/_matrix/media/v1/upload?access_token=$token`; - my $content_uri = from_json($json)->{content_uri}; - `curl -X PUT -H "Content-Type: application/json" --data '{ "avatar_url": "${content_uri}#auto"}' $server/_matrix/client/api/v1/profile/${mxid}/avatar_url?access_token=$token`; - } -} - -sub usage { - die "usage: ./make-identicons.pl\n\t-d database [e.g. homeserver.db]\n\t-s homeserver (default: http://localhost:8008)\n\t-w identicon size in pixels (default 320)"; -} \ No newline at end of file diff --git a/scripts/nuke-room-from-db.sh b/scripts/nuke-room-from-db.sh deleted file mode 100755 index 58c036c896..0000000000 --- a/scripts/nuke-room-from-db.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -## CAUTION: -## This script will remove (hopefully) all trace of the given room ID from -## your homeserver.db - -## Do not run it lightly. - -ROOMID="$1" - -sqlite3 homeserver.db <