From 9e25443db84f16bca36d1ba605e5b5ea09d1f8c7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Sep 2016 14:31:26 +0100 Subject: Move to storing state_groups_state as deltas --- synapse/storage/state.py | 161 ++++++++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 59 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ec551b0b4f..73cebc7383 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -16,6 +16,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches import intern_string +from synapse.storage.engines import PostgresEngine from twisted.internet import defer @@ -118,20 +119,45 @@ class StateStore(SQLBaseStore): }, ) - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { + if context.prev_group: + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in state_event_ids.items() - ], - ) + "prev_state_group": context.prev_group, + }, + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": context.state_group, + "room_id": event.room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in context.delta_ids.items() + ], + ) + else: + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": context.state_group, + "room_id": event.room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in state_event_ids.items() + ], + ) self._simple_insert_many_txn( txn, @@ -214,26 +240,70 @@ class StateStore(SQLBaseStore): else: where_clause = "" - sql = ( - "SELECT state_group, event_id, type, state_key" - " FROM state_groups_state WHERE" - " state_group IN (%s) %s" % ( - ",".join("?" for _ in groups), - where_clause, - ) - ) - - args = list(groups) - if types is not None: - args.extend([i for typ in types for i in typ]) - - txn.execute(sql, args) - rows = self.cursor_to_dict(txn) - results = {group: {} for group in groups} - for row in rows: - key = (row["type"], row["state_key"]) - results[row["state_group"]][key] = row["event_id"] + if isinstance(self.database_engine, PostgresEngine): + sql = (""" + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT type, state_key, event_id FROM state_groups_state + WHERE ROW(type, state_key, state_group) IN ( + SELECT type, state_key, max(state_group) FROM state + INNER JOIN state_groups_state USING (state_group) + GROUP BY type, state_key + ) + %s; + """) % (where_clause,) + + for group in groups: + args = [group] + if types is not None: + args.extend([i for typ in types for i in typ]) + + txn.execute(sql, args) + rows = self.cursor_to_dict(txn) + for row in rows: + key = (row["type"], row["state_key"]) + results[group][key] = row["event_id"] + else: + for group in groups: + group_tree = [group] + next_group = group + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + group_tree.append(next_group) + + sql = (""" + SELECT type, state_key, event_id FROM state_groups_state + INNER JOIN ( + SELECT type, state_key, max(state_group) as state_group + FROM state_groups_state + WHERE state_group IN (%s) %s + GROUP BY type, state_key + ) USING (type, state_key, state_group); + """) % (",".join("?" for _ in group_tree), where_clause,) + + args = list(group_tree) + if types is not None: + args.extend([i for typ in types for i in typ]) + + txn.execute(sql, args) + rows = self.cursor_to_dict(txn) + for row in rows: + key = (row["type"], row["state_key"]) + results[group][key] = row["event_id"] + return results results = {} @@ -504,32 +574,5 @@ class StateStore(SQLBaseStore): defer.returnValue(results) - def get_all_new_state_groups(self, last_id, current_id, limit): - def get_all_new_state_groups_txn(txn): - sql = ( - "SELECT id, room_id, event_id FROM state_groups" - " WHERE ? < id AND id <= ? ORDER BY id LIMIT ?" - ) - txn.execute(sql, (last_id, current_id, limit)) - groups = txn.fetchall() - - if not groups: - return ([], []) - - lower_bound = groups[0][0] - upper_bound = groups[-1][0] - sql = ( - "SELECT state_group, type, state_key, event_id" - " FROM state_groups_state" - " WHERE ? <= state_group AND state_group <= ?" - ) - - txn.execute(sql, (lower_bound, upper_bound)) - state_group_state = txn.fetchall() - return (groups, state_group_state) - return self.runInteraction( - "get_all_new_state_groups", get_all_new_state_groups_txn - ) - def get_next_state_group(self): return self._state_groups_id_gen.get_next() -- cgit 1.4.1 From 598317927cb8f741528d639f3ce875299fde478e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Sep 2016 10:41:38 +0100 Subject: Limit the length of state chains --- synapse/storage/events.py | 49 +++++++++++++---------- synapse/storage/state.py | 100 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 106 insertions(+), 43 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1a7d4c5199..7e9b351513 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -497,7 +497,11 @@ class EventsStore(SQLBaseStore): # insert into the state_group, state_groups_state and # event_to_state_groups tables. - self._store_mult_state_groups_txn(txn, ((event, context),)) + try: + self._store_mult_state_groups_txn(txn, ((event, context),)) + except Exception: + logger.exception("") + raise metadata_json = encode_json( event.internal_metadata.get_dict() @@ -1543,6 +1547,9 @@ class EventsStore(SQLBaseStore): ) event_rows = txn.fetchall() + for event_id, state_key in event_rows: + txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) + # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged txn.execute( @@ -1571,26 +1578,26 @@ class EventsStore(SQLBaseStore): # Get all state groups that are only referenced by events that are # to be deleted. - txn.execute( - "SELECT state_group FROM event_to_state_groups" - " INNER JOIN events USING (event_id)" - " WHERE state_group IN (" - " SELECT DISTINCT state_group FROM events" - " INNER JOIN event_to_state_groups USING (event_id)" - " WHERE room_id = ? AND topological_ordering < ?" - " )" - " GROUP BY state_group HAVING MAX(topological_ordering) < ?", - (room_id, topological_ordering, topological_ordering) - ) - state_rows = txn.fetchall() - txn.executemany( - "DELETE FROM state_groups_state WHERE state_group = ?", - state_rows - ) - txn.executemany( - "DELETE FROM state_groups WHERE id = ?", - state_rows - ) + # txn.execute( + # "SELECT state_group FROM event_to_state_groups" + # " INNER JOIN events USING (event_id)" + # " WHERE state_group IN (" + # " SELECT DISTINCT state_group FROM events" + # " INNER JOIN event_to_state_groups USING (event_id)" + # " WHERE room_id = ? AND topological_ordering < ?" + # " )" + # " GROUP BY state_group HAVING MAX(topological_ordering) < ?", + # (room_id, topological_ordering, topological_ordering) + # ) + # state_rows = txn.fetchall() + # txn.executemany( + # "DELETE FROM state_groups_state WHERE state_group = ?", + # state_rows + # ) + # txn.executemany( + # "DELETE FROM state_groups WHERE id = ?", + # state_rows + # ) # Delete all non-state txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 73cebc7383..7f45c0cd99 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -25,6 +25,9 @@ import logging logger = logging.getLogger(__name__) +MAX_STATE_DELTA_HOPS = 100 + + class StateStore(SQLBaseStore): """ Keeps track of the state at a given event. @@ -104,7 +107,6 @@ class StateStore(SQLBaseStore): state_groups[event.event_id] = context.state_group if self._have_persisted_state_group_txn(txn, context.state_group): - logger.info("Already persisted state_group: %r", context.state_group) continue state_event_ids = dict(context.current_state_ids) @@ -120,29 +122,48 @@ class StateStore(SQLBaseStore): ) if context.prev_group: - self._simple_insert_txn( - txn, - table="state_group_edges", - values={ - "state_group": context.state_group, - "prev_state_group": context.prev_group, - }, + potential_hops = self._count_state_group_hops_txn( + txn, context.prev_group ) - - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { + if potential_hops < MAX_STATE_DELTA_HOPS: + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.delta_ids.items() - ], - ) + "prev_state_group": context.prev_group, + }, + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": context.state_group, + "room_id": event.room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in context.delta_ids.items() + ], + ) + else: + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": context.state_group, + "room_id": event.room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in context.current_state_ids.items() + ], + ) else: self._simple_insert_many_txn( txn, @@ -171,6 +192,41 @@ class StateStore(SQLBaseStore): ], ) + def _count_state_group_hops_txn(self, txn, state_group): + if isinstance(self.database_engine, PostgresEngine): + sql = (""" + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT count(*) FROM state; + """) + + txn.execute(sql, (state_group,)) + row = txn.fetchone() + if row and row[0]: + return row[0] + else: + return 0 + else: + next_group = state_group + count = 0 + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + count += 1 + + return count + @defer.inlineCallbacks def get_current_state(self, room_id, event_type=None, state_key=""): if event_type and state_key is not None: -- cgit 1.4.1 From a99e9335502df3389ff6f16ef52c43ce391b6955 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 09:34:24 +0100 Subject: Add upgrade script that will slowly prune state_groups_state entries --- synapse/replication/slave/storage/events.py | 3 + synapse/storage/schema/delta/35/state_dedupe.sql | 17 ++ synapse/storage/state.py | 278 +++++++++++++++++------ 3 files changed, 223 insertions(+), 75 deletions(-) create mode 100644 synapse/storage/schema/delta/35/state_dedupe.sql (limited to 'synapse/storage/state.py') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index cbebd5b2f7..15c52774a2 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -86,6 +86,9 @@ class SlavedEventStore(BaseSlavedStore): _get_state_groups_from_groups = ( StateStore.__dict__["_get_state_groups_from_groups"] ) + _get_state_groups_from_groups_txn = ( + DataStore._get_state_groups_from_groups_txn.__func__ + ) _get_state_group_from_group = ( StateStore.__dict__["_get_state_group_from_group"] ) diff --git a/synapse/storage/schema/delta/35/state_dedupe.sql b/synapse/storage/schema/delta/35/state_dedupe.sql new file mode 100644 index 0000000000..97e5067ef4 --- /dev/null +++ b/synapse/storage/schema/delta/35/state_dedupe.sql @@ -0,0 +1,17 @@ +/* Copyright 2016 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('state_group_state_deduplication', '{}'); diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 7f45c0cd99..968b68f462 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -47,6 +47,15 @@ class StateStore(SQLBaseStore): * `state_groups_state`: Maps state group to state events. """ + STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" + + def __init__(self, hs): + super(StateStore, self).__init__(hs) + self.register_background_update_handler( + self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, + self._background_deduplicate_state, + ) + @defer.inlineCallbacks def get_state_groups_ids(self, room_id, event_ids): if not event_ids: @@ -288,92 +297,92 @@ class StateStore(SQLBaseStore): def _get_state_groups_from_groups(self, groups, types): """Returns dictionary state_group -> (dict of (type, state_key) -> event id) """ - def f(txn, groups): - if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) - else: - where_clause = "" - - results = {group: {} for group in groups} - if isinstance(self.database_engine, PostgresEngine): - sql = (""" - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT type, state_key, event_id FROM state_groups_state - WHERE ROW(type, state_key, state_group) IN ( - SELECT type, state_key, max(state_group) FROM state - INNER JOIN state_groups_state USING (state_group) - GROUP BY type, state_key - ) - %s; - """) % (where_clause,) - - for group in groups: - args = [group] - if types is not None: - args.extend([i for typ in types for i in typ]) - - txn.execute(sql, args) - rows = self.cursor_to_dict(txn) - for row in rows: - key = (row["type"], row["state_key"]) - results[group][key] = row["event_id"] - else: - for group in groups: - group_tree = [group] - next_group = group - - while next_group: - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - if next_group: - group_tree.append(next_group) - - sql = (""" - SELECT type, state_key, event_id FROM state_groups_state - INNER JOIN ( - SELECT type, state_key, max(state_group) as state_group - FROM state_groups_state - WHERE state_group IN (%s) %s - GROUP BY type, state_key - ) USING (type, state_key, state_group); - """) % (",".join("?" for _ in group_tree), where_clause,) - - args = list(group_tree) - if types is not None: - args.extend([i for typ in types for i in typ]) - - txn.execute(sql, args) - rows = self.cursor_to_dict(txn) - for row in rows: - key = (row["type"], row["state_key"]) - results[group][key] = row["event_id"] - - return results - results = {} chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)] for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - f, chunk + self._get_state_groups_from_groups_txn, chunk, types, ) results.update(res) defer.returnValue(results) + def _get_state_groups_from_groups_txn(self, txn, groups, types=None): + if types is not None: + where_clause = "AND (%s)" % ( + " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), + ) + else: + where_clause = "" + + results = {group: {} for group in groups} + if isinstance(self.database_engine, PostgresEngine): + sql = (""" + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT type, state_key, event_id FROM state_groups_state + WHERE ROW(type, state_key, state_group) IN ( + SELECT type, state_key, max(state_group) FROM state + INNER JOIN state_groups_state USING (state_group) + GROUP BY type, state_key + ) + %s; + """) % (where_clause,) + + for group in groups: + args = [group] + if types is not None: + args.extend([i for typ in types for i in typ]) + + txn.execute(sql, args) + rows = self.cursor_to_dict(txn) + for row in rows: + key = (row["type"], row["state_key"]) + results[group][key] = row["event_id"] + else: + for group in groups: + group_tree = [group] + next_group = group + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + group_tree.append(next_group) + + sql = (""" + SELECT type, state_key, event_id FROM state_groups_state + INNER JOIN ( + SELECT type, state_key, max(state_group) as state_group + FROM state_groups_state + WHERE state_group IN (%s) %s + GROUP BY type, state_key + ) USING (type, state_key, state_group); + """) % (",".join("?" for _ in group_tree), where_clause,) + + args = list(group_tree) + if types is not None: + args.extend([i for typ in types for i in typ]) + + txn.execute(sql, args) + rows = self.cursor_to_dict(txn) + for row in rows: + key = (row["type"], row["state_key"]) + results[group][key] = row["event_id"] + + return results + @defer.inlineCallbacks def get_state_for_events(self, event_ids, types): """Given a list of event_ids and type tuples, return a list of state @@ -632,3 +641,122 @@ class StateStore(SQLBaseStore): def get_next_state_group(self): return self._state_groups_id_gen.get_next() + + @defer.inlineCallbacks + def _background_deduplicate_state(self, progress, batch_size): + last_state_group = progress.get("last_state_group", 0) + rows_inserted = progress.get("rows_inserted", 0) + max_group = progress.get("max_group", None) + + if max_group is None: + rows = yield self._execute( + "_background_deduplicate_state", None, + "SELECT coalesce(max(id), 0) FROM state_groups", + ) + max_group = rows[0][0] + + def reindex_txn(txn): + new_last_state_group = last_state_group + for count in xrange(batch_size): + txn.execute( + "SELECT id, room_id FROM state_groups" + " WHERE ? < id AND id <= ?" + " ORDER BY id ASC" + " LIMIT 1", + (new_last_state_group, max_group,) + ) + row = txn.fetchone() + if row: + state_group, room_id = row + + if not row or not state_group: + return True, count + + txn.execute( + "SELECT coalesce(max(id), 0) FROM state_groups" + " WHERE id < ? AND room_id = ?", + (state_group, room_id,) + ) + prev_group, = txn.fetchone() + new_last_state_group = state_group + + if prev_group: + potential_hops = self._count_state_group_hops_txn( + txn, prev_group + ) + if potential_hops >= MAX_STATE_DELTA_HOPS: + # We want to ensure chains are at most this long,# + # otherwise read performance degrades. + continue + + prev_state = self._get_state_groups_from_groups_txn( + txn, [prev_group], types=None + ) + prev_state = prev_state.values()[0] + + curr_state = self._get_state_groups_from_groups_txn( + txn, [state_group], types=None + ) + curr_state = curr_state.values()[0] + + if not set(prev_state.keys()) - set(curr_state.keys()): + # We can only do a delta if the current has a strict super set + # of keys + + delta_state = { + key: value for key, value in curr_state.items() + if prev_state.get(key, None) != value + } + + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ + "state_group": state_group, + "prev_state_group": prev_group, + } + ) + + self._simple_delete_txn( + txn, + table="state_groups_state", + keyvalues={ + "state_group": state_group, + } + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in delta_state.items() + ], + ) + + progress = { + "last_state_group": state_group, + "rows_inserted": rows_inserted + batch_size, + "max_group": max_group, + } + + self._background_update_progress_txn( + txn, self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, progress + ) + + return False, batch_size + + finished, result = yield self.runInteraction( + self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, reindex_txn + ) + + if finished: + yield self._end_background_update(self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME) + + defer.returnValue(result) -- cgit 1.4.1 From 628e65721bdf1fb39e78a833d757a38e614b652d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 10:41:27 +0100 Subject: Add comments --- synapse/events/snapshot.py | 5 +++ synapse/storage/state.py | 79 ++++++++++++++++++++++------------------------ 2 files changed, 43 insertions(+), 41 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index ec32008d5a..11605b34a3 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -27,12 +27,17 @@ class EventContext(object): ] def __init__(self): + # The current state including the current event self.current_state_ids = None + # The current state excluding the current event self.prev_state_ids = None self.state_group = None + self.rejected = False self.push_actions = [] + # A previously persisted state group and a delta between that + # and this state. self.prev_group = None self.delta_ids = None diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 968b68f462..ee8b763008 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -118,8 +118,6 @@ class StateStore(SQLBaseStore): if self._have_persisted_state_group_txn(txn, context.state_group): continue - state_event_ids = dict(context.current_state_ids) - self._simple_insert_txn( txn, table="state_groups", @@ -130,49 +128,36 @@ class StateStore(SQLBaseStore): }, ) + # We persist as a delta if we can, while also ensuring the chain + # of deltas isn't tooo long, as otherwise read performance degrades. if context.prev_group: potential_hops = self._count_state_group_hops_txn( txn, context.prev_group ) - if potential_hops < MAX_STATE_DELTA_HOPS: - self._simple_insert_txn( - txn, - table="state_group_edges", - values={ - "state_group": context.state_group, - "prev_state_group": context.prev_group, - }, - ) + if context.prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ + "state_group": context.state_group, + "prev_state_group": context.prev_group, + }, + ) - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.delta_ids.items() - ], - ) - else: - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.current_state_ids.items() - ], - ) + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": context.state_group, + "room_id": event.room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in context.delta_ids.items() + ], + ) else: self._simple_insert_many_txn( txn, @@ -185,7 +170,7 @@ class StateStore(SQLBaseStore): "state_key": key[1], "event_id": state_id, } - for key, state_id in state_event_ids.items() + for key, state_id in context.current_state_ids.items() ], ) @@ -202,6 +187,10 @@ class StateStore(SQLBaseStore): ) def _count_state_group_hops_txn(self, txn, state_group): + """Given a state group, count how many hops there are in the tree. + + This is used to ensure the delta chains don't get too long. + """ if isinstance(self.database_engine, PostgresEngine): sql = (""" WITH RECURSIVE state(state_group) AS ( @@ -319,6 +308,11 @@ class StateStore(SQLBaseStore): results = {group: {} for group in groups} if isinstance(self.database_engine, PostgresEngine): + # The below query walks the state_group tree so that the "state" + # table includes all state_groups in the tree. It then joins + # against `state_groups_state` to fetch the latest state. + # It assumes that previous state groups are always numerically + # lesser. sql = (""" WITH RECURSIVE state(state_group) AS ( VALUES(?::bigint) @@ -644,6 +638,9 @@ class StateStore(SQLBaseStore): @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): + """This background update will slowly deduplicate state by reencoding + them as deltas. + """ last_state_group = progress.get("last_state_group", 0) rows_inserted = progress.get("rows_inserted", 0) max_group = progress.get("max_group", None) -- cgit 1.4.1 From 373654c6354c04b08a6f4dcb0ff7fa9ccae02f55 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 14:50:36 +0100 Subject: Comment about sqlite and WITH RECURSIVE --- synapse/storage/state.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ee8b763008..e790793370 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -209,6 +209,8 @@ class StateStore(SQLBaseStore): else: return 0 else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) next_group = state_group count = 0 @@ -340,6 +342,8 @@ class StateStore(SQLBaseStore): key = (row["type"], row["state_key"]) results[group][key] = row["event_id"] else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) for group in groups: group_tree = [group] next_group = group -- cgit 1.4.1 From 70332a12dd0a2ea01e1f8f835dcb5ca15526a5f3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 14:57:14 +0100 Subject: Take value in a better way --- synapse/storage/events.py | 2 +- synapse/storage/state.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bec35ea68d..ed182c8d11 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1616,7 +1616,7 @@ class EventsStore(SQLBaseStore): curr_state = self._get_state_groups_from_groups_txn( txn, [new_state_edge], types=None ) - curr_state = curr_state.values()[0] + curr_state = curr_state[new_state_edge] self._simple_delete_txn( txn, diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e790793370..589a4fec6e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -693,12 +693,12 @@ class StateStore(SQLBaseStore): prev_state = self._get_state_groups_from_groups_txn( txn, [prev_group], types=None ) - prev_state = prev_state.values()[0] + prev_state = prev_state[prev_group] curr_state = self._get_state_groups_from_groups_txn( txn, [state_group], types=None ) - curr_state = curr_state.values()[0] + curr_state = curr_state[state_group] if not set(prev_state.keys()) - set(curr_state.keys()): # We can only do a delta if the current has a strict super set -- cgit 1.4.1 From a7032abb2e64f79be5823b770230cb223cc22ff1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 15:07:23 +0100 Subject: Correctly handle reindexing state groups that already have an edge --- synapse/storage/state.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 589a4fec6e..af3ddd962d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -673,6 +673,17 @@ class StateStore(SQLBaseStore): if not row or not state_group: return True, count + txn.execute( + "SELECT state_group FROM state_group_edges" + " WHERE state_group = ?", + (state_group,) + ) + + # If we reach a point where we've already started inserting + # edges we should stop. + if txn.fetchall(): + return True, count + txn.execute( "SELECT coalesce(max(id), 0) FROM state_groups" " WHERE id < ? AND room_id = ?", @@ -709,6 +720,14 @@ class StateStore(SQLBaseStore): if prev_state.get(key, None) != value } + self._simple_delete_txn( + txn, + table="state_group_edges", + keyvalues={ + "state_group": state_group, + } + ) + self._simple_insert_txn( txn, table="state_group_edges", -- cgit 1.4.1 From 0595413c0fe51d4f400f597bf57cd13d5e3450e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Sep 2016 15:49:57 +0100 Subject: Scale the batch size so that we're not bitten by the minimum --- synapse/storage/state.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index af3ddd962d..0730399b80 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -649,6 +649,10 @@ class StateStore(SQLBaseStore): rows_inserted = progress.get("rows_inserted", 0) max_group = progress.get("max_group", None) + BATCH_SIZE_SCALE_FACTOR = 100 + + batch_size = max(1, int(batch_size / BATCH_SIZE_SCALE_FACTOR)) + if max_group is None: rows = yield self._execute( "_background_deduplicate_state", None, @@ -779,4 +783,4 @@ class StateStore(SQLBaseStore): if finished: yield self._end_background_update(self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME) - defer.returnValue(result) + defer.returnValue(result * BATCH_SIZE_SCALE_FACTOR) -- cgit 1.4.1 From d25c20ccbe0f10fe5d6c0cef2156db7e8d76049c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Sep 2016 14:22:22 +0100 Subject: Use windowing function to make use of index --- synapse/storage/state.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0730399b80..26ecad5907 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -322,11 +322,11 @@ class StateStore(SQLBaseStore): SELECT prev_state_group FROM state_group_edges e, state s WHERE s.state_group = e.state_group ) - SELECT type, state_key, event_id FROM state_groups_state - WHERE ROW(type, state_key, state_group) IN ( - SELECT type, state_key, max(state_group) FROM state - INNER JOIN state_groups_state USING (state_group) - GROUP BY type, state_key + SELECT type, state_key, last_value(event_id) OVER ( + PARTITION BY type, state_key ORDER BY state_group ASC + ) AS event_id FROM state_groups_state + WHERE state_group IN ( + SELECT state_group FROM state ) %s; """) % (where_clause,) -- cgit 1.4.1 From fadb01551a897fdf1a2cbe43ff463c9616bd11ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Sep 2016 14:39:01 +0100 Subject: Add appopriate framing clause --- synapse/storage/state.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 26ecad5907..382f308a60 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -324,6 +324,7 @@ class StateStore(SQLBaseStore): ) SELECT type, state_key, last_value(event_id) OVER ( PARTITION BY type, state_key ORDER BY state_group ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS event_id FROM state_groups_state WHERE state_group IN ( SELECT state_group FROM state -- cgit 1.4.1 From 513188aa56bc680a54dbdf6d40657da72c5c6877 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Sep 2016 14:53:19 +0100 Subject: Comment --- synapse/storage/state.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 382f308a60..d6643473db 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -315,6 +315,10 @@ class StateStore(SQLBaseStore): # against `state_groups_state` to fetch the latest state. # It assumes that previous state groups are always numerically # lesser. + # The PARTITION is used to get the event_id in the greatest state + # group for the given type, state_key. + # This may return multiple rows per (type, state_key), but last_value + # should be the same. sql = (""" WITH RECURSIVE state(state_group) AS ( VALUES(?::bigint) -- cgit 1.4.1 From b568ca309c5724d28b6ebd9c0a3cd8179fa6d6d3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 8 Sep 2016 09:38:54 +0100 Subject: Temporarily disable sequential scans for state fetching --- synapse/storage/state.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d6643473db..fef87834ca 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -310,6 +310,10 @@ class StateStore(SQLBaseStore): results = {group: {} for group in groups} if isinstance(self.database_engine, PostgresEngine): + # Temporarily disable sequential scans in this transaction. This is + # a temporary hack until we can add the right indices in + txn.execute("SET LOCAL enable_seqscan=off") + # The below query walks the state_group tree so that the "state" # table includes all state_groups in the tree. It then joins # against `state_groups_state` to fetch the latest state. -- cgit 1.4.1