From c877f0f0345f1ff6d329af2920d7d1a6b5659a86 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 16:41:39 +0100 Subject: Optimise event_search in postgres --- synapse/storage/schema/delta/31/search_update.py | 65 ++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 synapse/storage/schema/delta/31/search_update.py (limited to 'synapse/storage/schema/delta') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py new file mode 100644 index 0000000000..46a3795d12 --- /dev/null +++ b/synapse/storage/schema/delta/31/search_update.py @@ -0,0 +1,65 @@ +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine +from synapse.storage.prepare_database import get_statements + +import logging +import ujson + +logger = logging.getLogger(__name__) + + +ALTER_TABLE = """ +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; + +CREATE INDEX event_search_room_order ON event_search( + room_id, origin_server_ts, stream_ordering +); +CREATE INDEX event_search_order ON event_search(origin_server_ts, stream_ordering); +""" + + +def run_create(cur, database_engine, *args, **kwargs): + if not isinstance(database_engine, PostgresEngine): + return + + for statement in get_statements(ALTER_TABLE.splitlines()): + cur.execute(statement) + + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] + + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + + sql = ( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)" + ) + + sql = database_engine.convert_param_style(sql) + + cur.execute(sql, ("event_search_order", progress_json)) -- cgit 1.4.1 From b743c1237e68e75056b83ea4ab93ba2e1ec44b7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:12:04 +0100 Subject: Add missing run_upgrade --- synapse/storage/schema/delta/31/search_update.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage/schema/delta') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 46a3795d12..989e990dbd 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -63,3 +63,7 @@ def run_create(cur, database_engine, *args, **kwargs): sql = database_engine.convert_param_style(sql) cur.execute(sql, ("event_search_order", progress_json)) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + pass -- cgit 1.4.1 From 51bb339ab2130ab29ee9fcfec48d8e62f46c75f6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:16:11 +0100 Subject: Create index concurrently --- synapse/storage/schema/delta/31/search_update.py | 6 +----- synapse/storage/search.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'synapse/storage/schema/delta') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 989e990dbd..470ae0c005 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -24,11 +24,6 @@ logger = logging.getLogger(__name__) ALTER_TABLE = """ ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; - -CREATE INDEX event_search_room_order ON event_search( - room_id, origin_server_ts, stream_ordering -); -CREATE INDEX event_search_order ON event_search(origin_server_ts, stream_ordering); """ @@ -52,6 +47,7 @@ def run_create(cur, database_engine, *args, **kwargs): "target_min_stream_id_inclusive": min_stream_id, "max_stream_id_exclusive": max_stream_id + 1, "rows_inserted": 0, + "have_added_indexes": False, } progress_json = ujson.dumps(progress) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 375057fa3e..548e9eeaef 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -141,10 +141,21 @@ class SearchStore(BackgroundUpdateStore): target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) + have_added_index = progress['have_added_indexes'] INSERT_CLUMP_SIZE = 1000 def reindex_search_txn(txn): + if not have_added_index: + txn.execute( + "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" + "room_id, origin_server_ts, stream_ordering)" + ) + txn.execute( + "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" + "origin_server_ts, stream_ordering)" + ) + sql = ( "SELECT stream_ordering, origin_server_ts, event_id FROM events" " INNER JOIN event_search USING (room_id, event_id)" @@ -173,7 +184,8 @@ class SearchStore(BackgroundUpdateStore): progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows) + "rows_inserted": rows_inserted + len(rows), + "have_added_index": True, } self._background_update_progress_txn( -- cgit 1.4.1 From 8fae3d7b1eea87b48db96f1671d850a4a247e926 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:01:49 +0100 Subject: Use special UPDATE syntax --- synapse/storage/schema/delta/31/search_update.py | 4 +-- synapse/storage/search.py | 32 +++++++++--------------- 2 files changed, 14 insertions(+), 22 deletions(-) (limited to 'synapse/storage/schema/delta') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 470ae0c005..2c15edd1a4 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -22,8 +22,8 @@ logger = logging.getLogger(__name__) ALTER_TABLE = """ -ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; -ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT DEFAULT 0; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT DEFAULT 0; """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dc47425c23..813e1e90ac 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -169,34 +169,26 @@ class SearchStore(BackgroundUpdateStore): self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, ) - INSERT_CLUMP_SIZE = 1000 - def reindex_search_txn(txn): - sql = ( - "SELECT e.stream_ordering, e.origin_server_ts, event_id FROM events as e" - " INNER JOIN event_search USING (room_id, event_id)" - " WHERE ? <= e.stream_ordering AND e.stream_ordering < ?" - " ORDER BY e.stream_ordering DESC" + events_sql = ( + "SELECT stream_ordering, origin_server_ts, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" " LIMIT ?" ) - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + sql = ( + "UPDATE event_search AS es SET es.stream_ordering = e.stream_ordering," + " es.origin_server_ts = e.origin_server_ts" + " FROM (%s) AS e" + " WHERE e.event_id = es.event_id" + " RETURNING es.stream_ordering" + ) % (events_sql,) + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) rows = txn.fetchall() - if not rows: - return 0 - min_stream_id = rows[-1][0] - sql = ( - "UPDATE event_search SET stream_ordering = ?, origin_server_ts = ?" - " WHERE event_id = ?" - ) - - for index in range(0, len(rows), INSERT_CLUMP_SIZE): - clump = rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, -- cgit 1.4.1 From ae571810f2283c1825da62af0e931a0e40f74168 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:09:48 +0100 Subject: Order NULLs first --- synapse/storage/schema/delta/31/search_update.py | 4 ++-- synapse/storage/search.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'synapse/storage/schema/delta') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 2c15edd1a4..470ae0c005 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -22,8 +22,8 @@ logger = logging.getLogger(__name__) ALTER_TABLE = """ -ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT DEFAULT 0; -ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT DEFAULT 0; +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dd3486783d..2c71db8c96 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -148,13 +148,16 @@ class SearchStore(BackgroundUpdateStore): conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() + + # We create with NULLS FIRST so that when we search *backwards* + # we get the ones with non null origin_server_ts *first* c.execute( "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" - "room_id, origin_server_ts, stream_ordering)" + "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) c.execute( "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" - "origin_server_ts, stream_ordering)" + "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) conn.set_session(autocommit=False) @@ -434,7 +437,15 @@ class SearchStore(BackgroundUpdateStore): # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. - sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?" + if isinstance(self.database_engine, PostgresEngine): + sql += ( + " ORDER BY origin_server_ts DESC NULLS LAST," + " stream_ordering DESC NULLS LAST LIMIT ?" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?" + else: + raise Exception("Unrecognized database engine") args.append(limit) -- cgit 1.4.1