From cb91ce5bba7ad8644b1e34e627c4238cd3f1aa41 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Dec 2014 13:58:57 +0000 Subject: Rename upgrade script --- scripts/upgrade_db_to_v0.6.0.py | 298 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 scripts/upgrade_db_to_v0.6.0.py (limited to 'scripts/upgrade_db_to_v0.6.0.py') diff --git a/scripts/upgrade_db_to_v0.6.0.py b/scripts/upgrade_db_to_v0.6.0.py new file mode 100644 index 0000000000..5f6fa3a5aa --- /dev/null +++ b/scripts/upgrade_db_to_v0.6.0.py @@ -0,0 +1,298 @@ +from synapse.storage._base import SQLBaseStore +from synapse.storage.signatures import SignatureStore +from synapse.storage.event_federation import EventFederationStore + +from syutil.base64util import encode_base64, decode_base64 + +from synapse.crypto.event_signing import compute_event_signature + +from synapse.events.builder import EventBuilder +from synapse.events.utils import prune_event + +from synapse.crypto.event_signing import check_event_content_hash + +from syutil.crypto.jsonsign import ( + verify_signed_json, SignatureVerifyException, +) +from syutil.crypto.signing_key import decode_verify_key_bytes + +from syutil.jsonutil import encode_canonical_json + +import argparse +import dns.resolver +import hashlib +import json +import sqlite3 +import syutil +import urllib2 + + +delta_sql = """ +CREATE TABLE IF NOT EXISTS event_json( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + internal_metadata NOT NULL, + json BLOB NOT NULL, + CONSTRAINT ev_j_uniq UNIQUE (event_id) +); + +CREATE INDEX IF NOT EXISTS event_json_id ON event_json(event_id); +CREATE INDEX IF NOT EXISTS event_json_room_id ON event_json(room_id); + +PRAGMA user_version = 10; +""" + + +class Store(object): + _get_event_signatures_txn = SignatureStore.__dict__["_get_event_signatures_txn"] + _get_event_content_hashes_txn = SignatureStore.__dict__["_get_event_content_hashes_txn"] + _get_event_reference_hashes_txn = SignatureStore.__dict__["_get_event_reference_hashes_txn"] + _get_prev_event_hashes_txn = SignatureStore.__dict__["_get_prev_event_hashes_txn"] + _get_prev_events_and_state = EventFederationStore.__dict__["_get_prev_events_and_state"] + _get_auth_events = EventFederationStore.__dict__["_get_auth_events"] + cursor_to_dict = SQLBaseStore.__dict__["cursor_to_dict"] + _simple_select_onecol_txn = SQLBaseStore.__dict__["_simple_select_onecol_txn"] + _simple_select_list_txn = SQLBaseStore.__dict__["_simple_select_list_txn"] + _simple_insert_txn = SQLBaseStore.__dict__["_simple_insert_txn"] + + def _generate_event_json(self, txn, rows): + events = [] + for row in rows: + d = dict(row) + + d.pop("stream_ordering", None) + d.pop("topological_ordering", None) + d.pop("processed", None) + + if "origin_server_ts" not in d: + d["origin_server_ts"] = d.pop("ts", 0) + else: + d.pop("ts", 0) + + d.pop("prev_state", None) + d.update(json.loads(d.pop("unrecognized_keys"))) + + d["sender"] = d.pop("user_id") + + d["content"] = json.loads(d["content"]) + + if "age_ts" not in d: + # For compatibility + d["age_ts"] = d.get("origin_server_ts", 0) + + d.setdefault("unsigned", {})["age_ts"] = d.pop("age_ts") + + outlier = d.pop("outlier", False) + + # d.pop("membership", None) + + d.pop("state_hash", None) + + d.pop("replaces_state", None) + + b = EventBuilder(d) + b.internal_metadata.outlier = outlier + + events.append(b) + + for i, ev in enumerate(events): + signatures = self._get_event_signatures_txn( + txn, ev.event_id, + ) + + ev.signatures = { + n: { + k: encode_base64(v) for k, v in s.items() + } + for n, s in signatures.items() + } + + hashes = self._get_event_content_hashes_txn( + txn, ev.event_id, + ) + + ev.hashes = { + k: encode_base64(v) for k, v in hashes.items() + } + + prevs = self._get_prev_events_and_state(txn, ev.event_id) + + ev.prev_events = [ + (e_id, h) + for e_id, h, is_state in prevs + if is_state == 0 + ] + + # ev.auth_events = self._get_auth_events(txn, ev.event_id) + + hashes = dict(ev.auth_events) + + for e_id, hash in ev.prev_events: + if e_id in hashes and not hash: + hash.update(hashes[e_id]) + # + # if hasattr(ev, "state_key"): + # ev.prev_state = [ + # (e_id, h) + # for e_id, h, is_state in prevs + # if is_state == 1 + # ] + + return [e.build() for e in events] + + +store = Store() + + +def get_key(server_name): + print "Getting keys for: %s" % (server_name,) + targets = [] + if ":" in server_name: + target, port = server_name.split(":") + targets.append((target, int(port))) + return + try: + answers = dns.resolver.query("_matrix._tcp." + server_name, "SRV") + for srv in answers: + targets.append((srv.target, srv.port)) + except dns.resolver.NXDOMAIN: + targets.append((server_name, 8448)) + except: + print "Failed to lookup keys for %s" % (server_name,) + return {} + + for target, port in targets: + url = "https://%s:%i/_matrix/key/v1" % (target, port) + try: + keys = json.load(urllib2.urlopen(url, timeout=2)) + verify_keys = {} + for key_id, key_base64 in keys["verify_keys"].items(): + verify_key = decode_verify_key_bytes( + key_id, decode_base64(key_base64) + ) + verify_signed_json(keys, server_name, verify_key) + verify_keys[key_id] = verify_key + print "Got keys for: %s" % (server_name,) + return verify_keys + except urllib2.URLError: + pass + + print "Failed to get keys for %s" % (server_name,) + return {} + + +def reinsert_events(cursor, server_name, signing_key): + cursor.executescript(delta_sql) + + cursor.execute( + "SELECT * FROM events ORDER BY rowid ASC" + ) + + rows = store.cursor_to_dict(cursor) + + events = store._generate_event_json(cursor, rows) + + print "Got events from DB." + + algorithms = { + "sha256": hashlib.sha256, + } + + key_id = "%s:%s" % (signing_key.alg, signing_key.version) + verify_key = signing_key.verify_key + verify_key.alg = signing_key.alg + verify_key.version = signing_key.version + + server_keys = { + server_name: { + key_id: verify_key + } + } + + for event in events: + for alg_name in event.hashes: + if check_event_content_hash(event, algorithms[alg_name]): + pass + else: + pass + print "FAIL content hash %s %s" % (alg_name, event.event_id, ) + + have_own_correctly_signed = False + for host, sigs in event.signatures.items(): + pruned = prune_event(event) + + for key_id in sigs: + if host not in server_keys: + server_keys[host] = get_key(host) + if key_id in server_keys[host]: + try: + verify_signed_json( + pruned.get_pdu_json(), + host, + server_keys[host][key_id] + ) + + if host == server_name: + have_own_correctly_signed = True + except SignatureVerifyException: + print "FAIL signature check %s %s" % ( + key_id, event.event_id + ) + + # TODO: Re sign with our own server key + if not have_own_correctly_signed: + sigs = compute_event_signature(event, server_name, signing_key) + event.signatures.update(sigs) + + pruned = prune_event(event) + + for key_id in event.signatures[server_name]: + verify_signed_json( + pruned.get_pdu_json(), + server_name, + server_keys[server_name][key_id] + ) + + event_json = encode_canonical_json( + event.get_dict() + ).decode("UTF-8") + + metadata_json = encode_canonical_json( + event.internal_metadata.get_dict() + ).decode("UTF-8") + + store._simple_insert_txn( + cursor, + table="event_json", + values={ + "event_id": event.event_id, + "room_id": event.room_id, + "internal_metadata": metadata_json, + "json": event_json, + }, + or_replace=True, + ) + + +def main(database, server_name, signing_key): + conn = sqlite3.connect(database) + cursor = conn.cursor() + reinsert_events(cursor, server_name, signing_key) + conn.commit() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("database") + parser.add_argument("server_name") + parser.add_argument( + "signing_key", type=argparse.FileType('r'), + ) + args = parser.parse_args() + + signing_key = syutil.crypto.signing_key.read_signing_keys( + args.signing_key + ) + + main(args.database, args.server_name, signing_key[0]) -- cgit 1.4.1 From 592ba14b36bca88aa4517aa4885f89a706cc3b06 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Dec 2014 14:07:05 +0000 Subject: Fix bugs in upgrade script. Handle the case when there are colons in server_name. Handle http exceptions more gracefully. --- scripts/upgrade_db_to_v0.6.0.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'scripts/upgrade_db_to_v0.6.0.py') diff --git a/scripts/upgrade_db_to_v0.6.0.py b/scripts/upgrade_db_to_v0.6.0.py index 5f6fa3a5aa..8f371bb4bb 100644 --- a/scripts/upgrade_db_to_v0.6.0.py +++ b/scripts/upgrade_db_to_v0.6.0.py @@ -21,6 +21,7 @@ from syutil.jsonutil import encode_canonical_json import argparse import dns.resolver import hashlib +import httplib import json import sqlite3 import syutil @@ -150,7 +151,6 @@ def get_key(server_name): if ":" in server_name: target, port = server_name.split(":") targets.append((target, int(port))) - return try: answers = dns.resolver.query("_matrix._tcp." + server_name, "SRV") for srv in answers: @@ -176,6 +176,10 @@ def get_key(server_name): return verify_keys except urllib2.URLError: pass + except urllib2.HTTPError: + pass + except httplib.HTTPException: + pass print "Failed to get keys for %s" % (server_name,) return {} -- cgit 1.4.1 From d2ca24087fe6fdd91535602ffe493a92c3a468c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Dec 2014 14:36:31 +0000 Subject: Bump UPGRADES and CHANGES --- CHANGES.rst | 9 +++++++++ UPGRADE.rst | 16 ++++++++++++++++ scripts/upgrade_db_to_v0.6.0.py | 2 ++ 3 files changed, 27 insertions(+) (limited to 'scripts/upgrade_db_to_v0.6.0.py') diff --git a/CHANGES.rst b/CHANGES.rst index 0d36e8eeff..23bdac6a8a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,12 @@ +Changes in synapse 0.6.0 (2014-12-16) +===================================== + + * Add new API for media upload and download that supports thumbnailing. + * Implement typing notifications. + * Fix bugs where we sent events with invalid signatures due to bugs where + we incorrectly persisted events. + * Improve performance of database queries involving retrieving events. + Changes in synapse 0.5.4a (2014-12-13) ====================================== diff --git a/UPGRADE.rst b/UPGRADE.rst index 5ebdd455c1..a602a9f3eb 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -1,3 +1,19 @@ +Upgrading to v0.6.0 +=================== + +This update includes a change to the database schema. To upgrade you first need +to upgrade the database by running:: + + python scripts/upgrade_db_to_v0.6.0.py + +Where `` is the location of the database, `` is the +server name as specified in the synapse configuration, and `` is +the location of the signing key as specified in the synapse configuration. + +This may take some time to complete. Failures of signatures and content hashes +can safely be ignored. + + Upgrading to v0.5.1 =================== diff --git a/scripts/upgrade_db_to_v0.6.0.py b/scripts/upgrade_db_to_v0.6.0.py index 8f371bb4bb..add088a818 100644 --- a/scripts/upgrade_db_to_v0.6.0.py +++ b/scripts/upgrade_db_to_v0.6.0.py @@ -284,6 +284,8 @@ def main(database, server_name, signing_key): reinsert_events(cursor, server_name, signing_key) conn.commit() + print "Success!" + if __name__ == "__main__": parser = argparse.ArgumentParser() -- cgit 1.4.1 From 42b725ce52844b3e858193aa12ddc06933c7584a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Dec 2014 15:13:34 +0000 Subject: Fix upgrade script to run all the missing deltas. --- scripts/upgrade_db_to_v0.6.0.py | 20 +++++++++++++ synapse/storage/schema/delta/v9.sql | 58 ++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) (limited to 'scripts/upgrade_db_to_v0.6.0.py') diff --git a/scripts/upgrade_db_to_v0.6.0.py b/scripts/upgrade_db_to_v0.6.0.py index add088a818..32c415a662 100644 --- a/scripts/upgrade_db_to_v0.6.0.py +++ b/scripts/upgrade_db_to_v0.6.0.py @@ -1,3 +1,5 @@ + +from synapse.storage import SCHEMA_VERSION, read_schema from synapse.storage._base import SQLBaseStore from synapse.storage.signatures import SignatureStore from synapse.storage.event_federation import EventFederationStore @@ -186,12 +188,16 @@ def get_key(server_name): def reinsert_events(cursor, server_name, signing_key): + print "Running delta: v10" + cursor.executescript(delta_sql) cursor.execute( "SELECT * FROM events ORDER BY rowid ASC" ) + print "Getting events..." + rows = store.cursor_to_dict(cursor) events = store._generate_event_json(cursor, rows) @@ -281,7 +287,21 @@ def reinsert_events(cursor, server_name, signing_key): def main(database, server_name, signing_key): conn = sqlite3.connect(database) cursor = conn.cursor() + + # Do other deltas: + cursor.execute("PRAGMA user_version") + row = cursor.fetchone() + + if row and row[0]: + user_version = row[0] + # Run every version since after the current version. + for v in range(user_version + 1, 10): + print "Running delta: %d" % (v,) + sql_script = read_schema("delta/v%d" % (v,)) + cursor.executescript(sql_script) + reinsert_events(cursor, server_name, signing_key) + conn.commit() print "Success!" diff --git a/synapse/storage/schema/delta/v9.sql b/synapse/storage/schema/delta/v9.sql index ad680c64da..0af29733a0 100644 --- a/synapse/storage/schema/delta/v9.sql +++ b/synapse/storage/schema/delta/v9.sql @@ -20,4 +20,60 @@ CREATE TABLE IF NOT EXISTS destinations( retry_interval INTEGER ); -PRAGMA user_version = 9; \ No newline at end of file + +CREATE TABLE IF NOT EXISTS local_media_repository ( + media_id TEXT, -- The id used to refer to the media. + media_type TEXT, -- The MIME-type of the media. + media_length INTEGER, -- Length of the media in bytes. + created_ts INTEGER, -- When the content was uploaded in ms. + upload_name TEXT, -- The name the media was uploaded with. + user_id TEXT, -- The user who uploaded the file. + CONSTRAINT uniqueness UNIQUE (media_id) +); + +CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails ( + media_id TEXT, -- The id used to refer to the media. + thumbnail_width INTEGER, -- The width of the thumbnail in pixels. + thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_method TEXT, -- The method used to make the thumbnail. + thumbnail_length INTEGER, -- The length of the thumbnail in bytes. + CONSTRAINT uniqueness UNIQUE ( + media_id, thumbnail_width, thumbnail_height, thumbnail_type + ) +); + +CREATE INDEX IF NOT EXISTS local_media_repository_thumbnails_media_id + ON local_media_repository_thumbnails (media_id); + +CREATE TABLE IF NOT EXISTS remote_media_cache ( + media_origin TEXT, -- The remote HS the media came from. + media_id TEXT, -- The id used to refer to the media on that server. + media_type TEXT, -- The MIME-type of the media. + created_ts INTEGER, -- When the content was uploaded in ms. + upload_name TEXT, -- The name the media was uploaded with. + media_length INTEGER, -- Length of the media in bytes. + filesystem_id TEXT, -- The name used to store the media on disk. + CONSTRAINT uniqueness UNIQUE (media_origin, media_id) +); + +CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails ( + media_origin TEXT, -- The remote HS the media came from. + media_id TEXT, -- The id used to refer to the media. + thumbnail_width INTEGER, -- The width of the thumbnail in pixels. + thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_method TEXT, -- The method used to make the thumbnail + thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_length INTEGER, -- The length of the thumbnail in bytes. + filesystem_id TEXT, -- The name used to store the media on disk. + CONSTRAINT uniqueness UNIQUE ( + media_origin, media_id, thumbnail_width, thumbnail_height, + thumbnail_type, thumbnail_type + ) +); + +CREATE INDEX IF NOT EXISTS remote_media_cache_thumbnails_media_id + ON local_media_repository_thumbnails (media_id); + + +PRAGMA user_version = 9; -- cgit 1.4.1 From 28f71ecf0da6e162f1f3aeb03a80723f9ff1fdd6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Dec 2014 17:29:22 +0000 Subject: Change upgrade script to not check hashes or signatures --- scripts/upgrade_db_to_v0.6.0.py | 99 ++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 46 deletions(-) (limited to 'scripts/upgrade_db_to_v0.6.0.py') diff --git a/scripts/upgrade_db_to_v0.6.0.py b/scripts/upgrade_db_to_v0.6.0.py index 32c415a662..298e9c0789 100644 --- a/scripts/upgrade_db_to_v0.6.0.py +++ b/scripts/upgrade_db_to_v0.6.0.py @@ -21,7 +21,7 @@ from syutil.crypto.signing_key import decode_verify_key_bytes from syutil.jsonutil import encode_canonical_json import argparse -import dns.resolver +# import dns.resolver import hashlib import httplib import json @@ -147,44 +147,44 @@ class Store(object): store = Store() -def get_key(server_name): - print "Getting keys for: %s" % (server_name,) - targets = [] - if ":" in server_name: - target, port = server_name.split(":") - targets.append((target, int(port))) - try: - answers = dns.resolver.query("_matrix._tcp." + server_name, "SRV") - for srv in answers: - targets.append((srv.target, srv.port)) - except dns.resolver.NXDOMAIN: - targets.append((server_name, 8448)) - except: - print "Failed to lookup keys for %s" % (server_name,) - return {} - - for target, port in targets: - url = "https://%s:%i/_matrix/key/v1" % (target, port) - try: - keys = json.load(urllib2.urlopen(url, timeout=2)) - verify_keys = {} - for key_id, key_base64 in keys["verify_keys"].items(): - verify_key = decode_verify_key_bytes( - key_id, decode_base64(key_base64) - ) - verify_signed_json(keys, server_name, verify_key) - verify_keys[key_id] = verify_key - print "Got keys for: %s" % (server_name,) - return verify_keys - except urllib2.URLError: - pass - except urllib2.HTTPError: - pass - except httplib.HTTPException: - pass - - print "Failed to get keys for %s" % (server_name,) - return {} +# def get_key(server_name): +# print "Getting keys for: %s" % (server_name,) +# targets = [] +# if ":" in server_name: +# target, port = server_name.split(":") +# targets.append((target, int(port))) +# try: +# answers = dns.resolver.query("_matrix._tcp." + server_name, "SRV") +# for srv in answers: +# targets.append((srv.target, srv.port)) +# except dns.resolver.NXDOMAIN: +# targets.append((server_name, 8448)) +# except: +# print "Failed to lookup keys for %s" % (server_name,) +# return {} +# +# for target, port in targets: +# url = "https://%s:%i/_matrix/key/v1" % (target, port) +# try: +# keys = json.load(urllib2.urlopen(url, timeout=2)) +# verify_keys = {} +# for key_id, key_base64 in keys["verify_keys"].items(): +# verify_key = decode_verify_key_bytes( +# key_id, decode_base64(key_base64) +# ) +# verify_signed_json(keys, server_name, verify_key) +# verify_keys[key_id] = verify_key +# print "Got keys for: %s" % (server_name,) +# return verify_keys +# except urllib2.URLError: +# pass +# except urllib2.HTTPError: +# pass +# except httplib.HTTPException: +# pass +# +# print "Failed to get keys for %s" % (server_name,) +# return {} def reinsert_events(cursor, server_name, signing_key): @@ -219,13 +219,20 @@ def reinsert_events(cursor, server_name, signing_key): } } + i = 0 + N = len(events) + for event in events: - for alg_name in event.hashes: - if check_event_content_hash(event, algorithms[alg_name]): - pass - else: - pass - print "FAIL content hash %s %s" % (alg_name, event.event_id, ) + if i % 100 == 0: + print "Processed: %d/%d events" % (i,N,) + i += 1 + + # for alg_name in event.hashes: + # if check_event_content_hash(event, algorithms[alg_name]): + # pass + # else: + # pass + # print "FAIL content hash %s %s" % (alg_name, event.event_id, ) have_own_correctly_signed = False for host, sigs in event.signatures.items(): @@ -233,7 +240,7 @@ def reinsert_events(cursor, server_name, signing_key): for key_id in sigs: if host not in server_keys: - server_keys[host] = get_key(host) + server_keys[host] = {} # get_key(host) if key_id in server_keys[host]: try: verify_signed_json( -- cgit 1.4.1