From 61c7edfd34abdb9eaa7c8d3dd3dbef95b60de5de Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Apr 2016 17:22:03 +0100 Subject: Add cache to _get_state_groups_from_groups --- synapse/storage/state.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c5d2a3a6df..5b743db67a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -174,6 +174,12 @@ class StateStore(SQLBaseStore): return [r[0] for r in results] return self.runInteraction("get_current_state_for_key", f) + @cached(num_args=2, lru=True, max_entries=1000) + def _get_state_group_from_group(self, group, types): + raise NotImplementedError() + + @cachedList(cached_method_name="_get_state_group_from_group", + list_name="groups", num_args=2, inlineCallbacks=True) def _get_state_groups_from_groups(self, groups, types): """Returns dictionary state_group -> (dict of (type, state_key) -> event id) """ @@ -201,18 +207,23 @@ class StateStore(SQLBaseStore): txn.execute(sql, args) rows = self.cursor_to_dict(txn) - results = {} + results = {group: {} for group in groups} for row in rows: key = (row["type"], row["state_key"]) - results.setdefault(row["state_group"], {})[key] = row["event_id"] + results[row["state_group"]][key] = row["event_id"] return results + results = {} + chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)] for chunk in chunks: - return self.runInteraction( + res = yield self.runInteraction( "_get_state_groups_from_groups", f, chunk ) + results.update(res) + + defer.returnValue(results) @defer.inlineCallbacks def get_state_for_events(self, event_ids, types): @@ -359,6 +370,8 @@ class StateStore(SQLBaseStore): a `state_key` of None matches all state_keys. If `types` is None then all events are returned. """ + if types: + types = frozenset(types) results = {} missing_groups = [] if types is not None: -- cgit 1.4.1 From c877f0f0345f1ff6d329af2920d7d1a6b5659a86 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 16:41:39 +0100 Subject: Optimise event_search in postgres --- synapse/storage/room.py | 16 ++++-- synapse/storage/schema/delta/31/search_update.py | 65 ++++++++++++++++++++++++ synapse/storage/search.py | 61 +++++++++++++++++++++- 3 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/31/search_update.py (limited to 'synapse/storage') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9be977f387..70aa64fb31 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -169,20 +169,28 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) else: # This should be unreachable. raise Exception("Unrecognized database engine") - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py new file mode 100644 index 0000000000..46a3795d12 --- /dev/null +++ b/synapse/storage/schema/delta/31/search_update.py @@ -0,0 +1,65 @@ +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine +from synapse.storage.prepare_database import get_statements + +import logging +import ujson + +logger = logging.getLogger(__name__) + + +ALTER_TABLE = """ +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; + +CREATE INDEX event_search_room_order ON event_search( + room_id, origin_server_ts, stream_ordering +); +CREATE INDEX event_search_order ON event_search(origin_server_ts, stream_ordering); +""" + + +def run_create(cur, database_engine, *args, **kwargs): + if not isinstance(database_engine, PostgresEngine): + return + + for statement in get_statements(ALTER_TABLE.splitlines()): + cur.execute(statement) + + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] + + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + + sql = ( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)" + ) + + sql = database_engine.convert_param_style(sql) + + cur.execute(sql, ("event_search_order", progress_json)) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 59ac7f424c..375057fa3e 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -29,12 +29,17 @@ logger = logging.getLogger(__name__) class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" + EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" def __init__(self, hs): super(SearchStore, self).__init__(hs) self.register_background_update_handler( self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search ) + self.register_background_update_handler( + self.EVENT_SEARCH_ORDER_UPDATE_NAME, + self._background_reindex_search_order + ) @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): @@ -131,6 +136,61 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(result) + @defer.inlineCallbacks + def _background_reindex_search_order(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_ordering, origin_server_ts, event_id FROM events" + " INNER JOIN event_search USING (room_id, event_id)" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + + sql = ( + "UPDATE event_search SET stream_ordering = ?, origin_server_ts = ?" + " WHERE event_id = ?" + ) + + for index in range(0, len(rows), INSERT_CLUMP_SIZE): + clump = rows[index:index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows) + } + + self._background_update_progress_txn( + txn, self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress + ) + + return len(rows) + + result = yield self.runInteraction( + self.EVENT_SEARCH_ORDER_UPDATE_NAME, reindex_search_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_SEARCH_ORDER_UPDATE_NAME) + + defer.returnValue(result) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. @@ -310,7 +370,6 @@ class SearchStore(BackgroundUpdateStore): "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank," " origin_server_ts, stream_ordering, room_id, event_id" " FROM event_search" - " NATURAL JOIN events" " WHERE vector @@ to_tsquery('english', ?) AND " ) args = [search_query, search_query] + args -- cgit 1.4.1 From b743c1237e68e75056b83ea4ab93ba2e1ec44b7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:12:04 +0100 Subject: Add missing run_upgrade --- synapse/storage/schema/delta/31/search_update.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 46a3795d12..989e990dbd 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -63,3 +63,7 @@ def run_create(cur, database_engine, *args, **kwargs): sql = database_engine.convert_param_style(sql) cur.execute(sql, ("event_search_order", progress_json)) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + pass -- cgit 1.4.1 From 51bb339ab2130ab29ee9fcfec48d8e62f46c75f6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:16:11 +0100 Subject: Create index concurrently --- synapse/storage/schema/delta/31/search_update.py | 6 +----- synapse/storage/search.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 989e990dbd..470ae0c005 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -24,11 +24,6 @@ logger = logging.getLogger(__name__) ALTER_TABLE = """ ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; - -CREATE INDEX event_search_room_order ON event_search( - room_id, origin_server_ts, stream_ordering -); -CREATE INDEX event_search_order ON event_search(origin_server_ts, stream_ordering); """ @@ -52,6 +47,7 @@ def run_create(cur, database_engine, *args, **kwargs): "target_min_stream_id_inclusive": min_stream_id, "max_stream_id_exclusive": max_stream_id + 1, "rows_inserted": 0, + "have_added_indexes": False, } progress_json = ujson.dumps(progress) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 375057fa3e..548e9eeaef 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -141,10 +141,21 @@ class SearchStore(BackgroundUpdateStore): target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) + have_added_index = progress['have_added_indexes'] INSERT_CLUMP_SIZE = 1000 def reindex_search_txn(txn): + if not have_added_index: + txn.execute( + "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" + "room_id, origin_server_ts, stream_ordering)" + ) + txn.execute( + "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" + "origin_server_ts, stream_ordering)" + ) + sql = ( "SELECT stream_ordering, origin_server_ts, event_id FROM events" " INNER JOIN event_search USING (room_id, event_id)" @@ -173,7 +184,8 @@ class SearchStore(BackgroundUpdateStore): progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows) + "rows_inserted": rows_inserted + len(rows), + "have_added_index": True, } self._background_update_progress_txn( -- cgit 1.4.1 From 129e4034870956126daf520b41f74a51040ddbf9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:19:25 +0100 Subject: Create index must be on a conn --- synapse/storage/search.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 548e9eeaef..05641fb579 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -143,19 +143,26 @@ class SearchStore(BackgroundUpdateStore): rows_inserted = progress.get("rows_inserted", 0) have_added_index = progress['have_added_indexes'] - INSERT_CLUMP_SIZE = 1000 - - def reindex_search_txn(txn): - if not have_added_index: - txn.execute( + if not have_added_index: + def create_index(conn): + conn.rollback() + conn.set_session(autocommit=True) + c = conn.cursor() + c.execute( "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" "room_id, origin_server_ts, stream_ordering)" ) - txn.execute( + c.execute( "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" "origin_server_ts, stream_ordering)" ) + conn.set_session(autocommit=False) + + yield self.runWithConnection(create_index) + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): sql = ( "SELECT stream_ordering, origin_server_ts, event_id FROM events" " INNER JOIN event_search USING (room_id, event_id)" -- cgit 1.4.1 From 3b0fa77f5050bb54dccc5140a76b171d6603f2e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:37:42 +0100 Subject: Fix SQL statement --- synapse/storage/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 05641fb579..f7730f5d7c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -164,10 +164,10 @@ class SearchStore(BackgroundUpdateStore): def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, origin_server_ts, event_id FROM events" + "SELECT e.stream_ordering, e.origin_server_ts, event_id FROM events as e" " INNER JOIN event_search USING (room_id, event_id)" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" + " WHERE ? <= e.stream_ordering AND e.stream_ordering < ?" + " ORDER BY e.stream_ordering DESC" " LIMIT ?" ) -- cgit 1.4.1 From e395eb1108abac2ada1f846d08285a84fd5042a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:39:24 +0100 Subject: Update progress when creating index --- synapse/storage/search.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f7730f5d7c..1baed674a8 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -158,6 +158,13 @@ class SearchStore(BackgroundUpdateStore): ) conn.set_session(autocommit=False) + pg = dict(progress) + pg["have_added_indexes"] = True + + self._background_update_progress_txn( + conn.cursor(), self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress + ) + yield self.runWithConnection(create_index) INSERT_CLUMP_SIZE = 1000 -- cgit 1.4.1 From 26db18bc90e745e1eb054e2ffd6969918a6253c4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:45:56 +0100 Subject: Need to do _background_update_progress_txn in actual transaction --- synapse/storage/search.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 1baed674a8..fd40b44a9a 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -158,14 +158,16 @@ class SearchStore(BackgroundUpdateStore): ) conn.set_session(autocommit=False) - pg = dict(progress) - pg["have_added_indexes"] = True + yield self.runWithConnection(create_index) - self._background_update_progress_txn( - conn.cursor(), self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress - ) + pg = dict(progress) + pg["have_added_indexes"] = True - yield self.runWithConnection(create_index) + yield self.runInteraction( + self.EVENT_SEARCH_ORDER_UPDATE_NAME, + self._background_update_progress_txn, + self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, + ) INSERT_CLUMP_SIZE = 1000 -- cgit 1.4.1 From b57dcb4b51d31914756412ec00d94646bc3b4c79 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 17:49:00 +0100 Subject: Typo --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index fd40b44a9a..dc47425c23 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -201,7 +201,7 @@ class SearchStore(BackgroundUpdateStore): "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, "rows_inserted": rows_inserted + len(rows), - "have_added_index": True, + "have_added_indexes": True, } self._background_update_progress_txn( -- cgit 1.4.1 From 8fae3d7b1eea87b48db96f1671d850a4a247e926 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:01:49 +0100 Subject: Use special UPDATE syntax --- synapse/storage/schema/delta/31/search_update.py | 4 +-- synapse/storage/search.py | 32 +++++++++--------------- 2 files changed, 14 insertions(+), 22 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 470ae0c005..2c15edd1a4 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -22,8 +22,8 @@ logger = logging.getLogger(__name__) ALTER_TABLE = """ -ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; -ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT DEFAULT 0; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT DEFAULT 0; """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dc47425c23..813e1e90ac 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -169,34 +169,26 @@ class SearchStore(BackgroundUpdateStore): self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, ) - INSERT_CLUMP_SIZE = 1000 - def reindex_search_txn(txn): - sql = ( - "SELECT e.stream_ordering, e.origin_server_ts, event_id FROM events as e" - " INNER JOIN event_search USING (room_id, event_id)" - " WHERE ? <= e.stream_ordering AND e.stream_ordering < ?" - " ORDER BY e.stream_ordering DESC" + events_sql = ( + "SELECT stream_ordering, origin_server_ts, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" " LIMIT ?" ) - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + sql = ( + "UPDATE event_search AS es SET es.stream_ordering = e.stream_ordering," + " es.origin_server_ts = e.origin_server_ts" + " FROM (%s) AS e" + " WHERE e.event_id = es.event_id" + " RETURNING es.stream_ordering" + ) % (events_sql,) + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) rows = txn.fetchall() - if not rows: - return 0 - min_stream_id = rows[-1][0] - sql = ( - "UPDATE event_search SET stream_ordering = ?, origin_server_ts = ?" - " WHERE event_id = ?" - ) - - for index in range(0, len(rows), INSERT_CLUMP_SIZE): - clump = rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, -- cgit 1.4.1 From 3ddbb1687ced5eb9b9a87367fcd6b754b8d0c5dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:02:36 +0100 Subject: Fix query --- synapse/storage/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 813e1e90ac..dd3486783d 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -178,8 +178,8 @@ class SearchStore(BackgroundUpdateStore): ) sql = ( - "UPDATE event_search AS es SET es.stream_ordering = e.stream_ordering," - " es.origin_server_ts = e.origin_server_ts" + "UPDATE event_search AS es SET stream_ordering = e.stream_ordering," + " origin_server_ts = e.origin_server_ts" " FROM (%s) AS e" " WHERE e.event_id = es.event_id" " RETURNING es.stream_ordering" -- cgit 1.4.1 From ae571810f2283c1825da62af0e931a0e40f74168 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Apr 2016 18:09:48 +0100 Subject: Order NULLs first --- synapse/storage/schema/delta/31/search_update.py | 4 ++-- synapse/storage/search.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py index 2c15edd1a4..470ae0c005 100644 --- a/synapse/storage/schema/delta/31/search_update.py +++ b/synapse/storage/schema/delta/31/search_update.py @@ -22,8 +22,8 @@ logger = logging.getLogger(__name__) ALTER_TABLE = """ -ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT DEFAULT 0; -ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT DEFAULT 0; +ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT; +ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT; """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dd3486783d..2c71db8c96 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -148,13 +148,16 @@ class SearchStore(BackgroundUpdateStore): conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() + + # We create with NULLS FIRST so that when we search *backwards* + # we get the ones with non null origin_server_ts *first* c.execute( "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" - "room_id, origin_server_ts, stream_ordering)" + "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) c.execute( "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" - "origin_server_ts, stream_ordering)" + "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) conn.set_session(autocommit=False) @@ -434,7 +437,15 @@ class SearchStore(BackgroundUpdateStore): # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. - sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?" + if isinstance(self.database_engine, PostgresEngine): + sql += ( + " ORDER BY origin_server_ts DESC NULLS LAST," + " stream_ordering DESC NULLS LAST LIMIT ?" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?" + else: + raise Exception("Unrecognized database engine") args.append(limit) -- cgit 1.4.1 From 183cacac90ca237b448da244270d55920470389b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 22 Apr 2016 09:37:16 +0100 Subject: Simplify query and handle finishing correctly --- synapse/storage/background_updates.py | 3 ++- synapse/storage/search.py | 30 ++++++++++++++---------------- 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 49904046cf..66a995157d 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -173,11 +173,12 @@ class BackgroundUpdateStore(SQLBaseStore): logger.info( "Updating %r. Updated %r items in %rms." - " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)", + " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)", update_name, items_updated, duration_ms, performance.total_items_per_ms(), performance.average_items_per_ms(), performance.total_item_count, + batch_size, ) performance.update(items_updated, duration_ms) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 2c71db8c96..0224299625 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -169,28 +169,26 @@ class SearchStore(BackgroundUpdateStore): yield self.runInteraction( self.EVENT_SEARCH_ORDER_UPDATE_NAME, self._background_update_progress_txn, - self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress, + self.EVENT_SEARCH_ORDER_UPDATE_NAME, pg, ) def reindex_search_txn(txn): - events_sql = ( - "SELECT stream_ordering, origin_server_ts, event_id FROM events" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - sql = ( "UPDATE event_search AS es SET stream_ordering = e.stream_ordering," " origin_server_ts = e.origin_server_ts" - " FROM (%s) AS e" + " FROM events AS e" " WHERE e.event_id = es.event_id" + " AND ? <= e.stream_ordering AND e.stream_ordering < ?" " RETURNING es.stream_ordering" - ) % (events_sql,) + ) - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + min_stream_id = max_stream_id - batch_size + txn.execute(sql, (min_stream_id, max_stream_id)) rows = txn.fetchall() - min_stream_id = rows[-1][0] + + if min_stream_id < target_min_stream_id: + # We've recached the end. + return len(rows), False progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -203,16 +201,16 @@ class SearchStore(BackgroundUpdateStore): txn, self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress ) - return len(rows) + return len(rows), True - result = yield self.runInteraction( + num_rows, finished = yield self.runInteraction( self.EVENT_SEARCH_ORDER_UPDATE_NAME, reindex_search_txn ) - if not result: + if not finished: yield self._end_background_update(self.EVENT_SEARCH_ORDER_UPDATE_NAME) - defer.returnValue(result) + defer.returnValue(num_rows) @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): -- cgit 1.4.1 From 871357d539bfbaf5552a098de2253600bf5f3a51 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 27 Apr 2016 11:54:13 +0100 Subject: Check that somethign has happend before running the selects --- synapse/storage/events.py | 10 ++++++++-- synapse/storage/pusher.py | 3 +++ synapse/storage/receipts.py | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21487724ed..0307b2af3c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1143,6 +1143,12 @@ class EventsStore(SQLBaseStore): current_backfill_id, current_forward_id, limit): """Get all the new events that have arrived at the server either as new events or as backfilled events""" + have_backfill_events = last_backfill_id != current_backfill_id + have_forward_events = last_forward_id != current_forward_id + + if not have_backfill_events and not have_forward_events: + return defer.succeed(AllNewEventsResult([], [], [], [], [])) + def get_all_new_events_txn(txn): sql = ( "SELECT e.stream_ordering, ej.internal_metadata, ej.json, eg.state_group" @@ -1155,7 +1161,7 @@ class EventsStore(SQLBaseStore): " ORDER BY e.stream_ordering ASC" " LIMIT ?" ) - if last_forward_id != current_forward_id: + if have_forward_events: txn.execute(sql, (last_forward_id, current_forward_id, limit)) new_forward_events = txn.fetchall() @@ -1199,7 +1205,7 @@ class EventsStore(SQLBaseStore): " ORDER BY e.stream_ordering DESC" " LIMIT ?" ) - if last_backfill_id != current_backfill_id: + if have_backfill_events: txn.execute(sql, (-last_backfill_id, -current_backfill_id, limit)) new_backfill_events = txn.fetchall() diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index e5755c0aea..11feb3eb11 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -106,6 +106,9 @@ class PusherStore(SQLBaseStore): return self._pushers_id_gen.get_current_token() def get_all_updated_pushers(self, last_id, current_id, limit): + if last_id == current_id: + return defer.succeed(([], [])) + def get_all_updated_pushers_txn(txn): sql = ( "SELECT id, user_name, access_token, profile_tag, kind," diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 3b8805593e..935fc503d9 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -391,6 +391,9 @@ class ReceiptsStore(SQLBaseStore): ) def get_all_updated_receipts(self, last_id, current_id, limit=None): + if last_id == current_id: + return defer.succeed([]) + def get_all_updated_receipts_txn(txn): sql = ( "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" -- cgit 1.4.1