From d72667fcce6f751c55ea510c964d68499cb67305 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Apr 2017 10:10:49 +0100 Subject: Speed up get_current_state_ids Using _simple_select_list is fairly expensive for functions that return a lot of rows and/or get called a lot. (This is because it carefully constructs a list of dicts). get_current_state_ids gets called a lot on startup and e.g. when the IRC bridge decided to send tonnes of joins/leaves (as it invalidates the cache). We therefore replace it with a custon txn function that builds up the final result dict without building up and intermediate representation. --- synapse/storage/state.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fb23f6f462..86e5fdb76b 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -14,7 +14,7 @@ # limitations under the License. from ._base import SQLBaseStore -from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches import intern_string from synapse.storage.engines import PostgresEngine @@ -69,17 +69,24 @@ class StateStore(SQLBaseStore): where_clause="type='m.room.member'", ) - @cachedInlineCallbacks(max_entries=100000, iterable=True) + @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): - rows = yield self._simple_select_list( - table="current_state_events", - keyvalues={"room_id": room_id}, - retcols=["event_id", "type", "state_key"], - desc="_calculate_state_delta", + def _get_current_state_ids_txn(txn): + txn.execute( + """SELECT type, state_key, event_id FROM current_state_events + WHERE room_id = ? + """, + (room_id,) + ) + + return { + (r[0], r[1]): r[2] for r in txn + } + + return self.runInteraction( + "get_current_state_ids", + _get_current_state_ids_txn, ) - defer.returnValue({ - (r["type"], r["state_key"]): r["event_id"] for r in rows - }) @defer.inlineCallbacks def get_state_groups_ids(self, room_id, event_ids): -- cgit 1.4.1 From 2a3e822f4494e42c1b118c2fa0132b3a2f13bbfb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 7 Apr 2017 13:47:04 +0100 Subject: Comment --- synapse/storage/state.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 86e5fdb76b..acd69944c4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -71,6 +71,15 @@ class StateStore(SQLBaseStore): @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): + """Get the current state event ids for a room based on the + current_state_events table. + + Args: + room_id (str) + + Returns: + deferred: dict of (type, state_key) -> event_id + """ def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events -- cgit 1.4.1 From e4f34311166cc93ad26fc12969867e1db30b4a30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 24 Apr 2017 13:27:38 +0100 Subject: Remove unused cache --- synapse/replication/slave/storage/events.py | 3 --- synapse/storage/state.py | 7 +------ 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 4ca1e5aa8c..ab48ff925e 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -102,9 +102,6 @@ class SlavedEventStore(BaseSlavedStore): _get_state_groups_from_groups_txn = ( DataStore._get_state_groups_from_groups_txn.__func__ ) - _get_state_group_from_group = ( - StateStore.__dict__["_get_state_group_from_group"] - ) get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index acd69944c4..927a936013 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -279,12 +279,7 @@ class StateStore(SQLBaseStore): return count - @cached(num_args=2, max_entries=100000, iterable=True) - def _get_state_group_from_group(self, group, types): - raise NotImplementedError() - - @cachedList(cached_method_name="_get_state_group_from_group", - list_name="groups", num_args=2, inlineCallbacks=True) + @defer.inlineCallbacks def _get_state_groups_from_groups(self, groups, types): """Returns dictionary state_group -> (dict of (type, state_key) -> event id) """ -- cgit 1.4.1 From 22f3d3ae769a031c42cda1fb233dd91e28e585d0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Apr 2017 11:43:03 +0100 Subject: Reduce _get_state_group_for_event cache size --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 927a936013..e89001d994 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -507,7 +507,7 @@ class StateStore(SQLBaseStore): state_map = yield self.get_state_ids_for_events([event_id], types) defer.returnValue(state_map[event_id]) - @cached(num_args=2, max_entries=100000) + @cached(num_args=2, max_entries=50000) def _get_state_group_for_event(self, room_id, event_id): return self._simple_select_one_onecol( table="event_to_state_groups", -- cgit 1.4.1 From f053a1409e2c0105859fe6c9af02efd49206eb91 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 25 Apr 2017 17:22:55 +0100 Subject: Make state caches cache in ascii --- synapse/storage/state.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e89001d994..a16afa8df5 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -16,6 +16,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches import intern_string +from synapse.util.stringutils import to_ascii from synapse.storage.engines import PostgresEngine from twisted.internet import defer @@ -89,7 +90,7 @@ class StateStore(SQLBaseStore): ) return { - (r[0], r[1]): r[2] for r in txn + (intern_string(r[0]), intern_string(r[1])): to_ascii(r[2]) for r in txn } return self.runInteraction( @@ -655,7 +656,7 @@ class StateStore(SQLBaseStore): state_dict = results[group] state_dict.update( - ((intern_string(k[0]), intern_string(k[1])), v) + ((intern_string(k[0]), intern_string(k[1])), to_ascii(v)) for k, v in group_state_dict.iteritems() ) -- cgit 1.4.1 From a2c89a225c567df53cc6d29af397547f4e9a4a2f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 May 2017 10:40:31 +0100 Subject: Prefill state caches --- synapse/storage/_base.py | 8 ++++---- synapse/storage/events.py | 8 ++++++-- synapse/storage/state.py | 8 ++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index c659004e8d..58b73af7d2 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -60,12 +60,12 @@ class LoggingTransaction(object): object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - def call_after(self, callback, *args): + def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the transaction has finished. Used to invalidate the caches on the correct thread. """ - self.after_callbacks.append((callback, args)) + self.after_callbacks.append((callback, args, kwargs)) def __getattr__(self, name): return getattr(self.txn, name) @@ -319,8 +319,8 @@ class SQLBaseStore(object): inner_func, *args, **kwargs ) finally: - for after_callback, after_args in after_callbacks: - after_callback(*after_args) + for after_callback, after_args, after_kwargs in after_callbacks: + after_callback(*after_args, **after_kwargs) defer.returnValue(result) @defer.inlineCallbacks diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a3790419dd..a8d1b93d99 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -370,6 +370,10 @@ class EventsStore(SQLBaseStore): new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) + for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + self.get_current_state_ids.prefill( + (room_id, ), new_state + ) @defer.inlineCallbacks def _calculate_new_extremeties(self, room_id, event_contexts, latest_event_ids): @@ -529,7 +533,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert)) + defer.returnValue((to_delete, to_insert, current_state)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -682,7 +686,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert = current_state_tuple + to_delete, to_insert, _ = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a16afa8df5..1e1ce87e0e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,6 +227,14 @@ class StateStore(SQLBaseStore): ], ) + txn.call_after( + self._state_group_cache.update, + self._state_group_cache.sequence, + key=context.state_group, + value=context.current_state_ids, + full=True, + ) + self._simple_insert_many_txn( txn, table="event_to_state_groups", -- cgit 1.4.1 From 1827057acc2b8f821ec20538cbf781f798e34700 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 May 2017 09:56:05 +0100 Subject: Comments --- synapse/storage/events.py | 6 +++--- synapse/storage/state.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a37b7bad5a..d946024c9b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -441,10 +441,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 2-tuple (to_delete, to_insert) where both are state dicts, i.e. - (type, state_key) -> event_id. `to_delete` are the entries to + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. + to insert. `new_state` is the full set of state. May return None if there are no changes to be applied. """ # Now we need to work out the different state sets for diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1e1ce87e0e..5d6f7dfa28 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,6 +227,9 @@ class StateStore(SQLBaseStore): ], ) + # Prefill the state group cache with this group. + # It's fine to use the sequence like this as the state group map + # is immutable. txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, -- cgit 1.4.1 From 2c2dcf81d03fabfe4a018b60b4b840069627b3fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 May 2017 10:00:29 +0100 Subject: Update comment --- synapse/storage/state.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 5d6f7dfa28..03981f5d2b 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -229,7 +229,8 @@ class StateStore(SQLBaseStore): # Prefill the state group cache with this group. # It's fine to use the sequence like this as the state group map - # is immutable. + # is immutable. (If the map wasn't immutable then this prefill could + # race with another update) txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, -- cgit 1.4.1 From 587f07543fcf604da58903b22a7448e631d797ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 4 May 2017 15:07:27 +0100 Subject: Revert "Prefill state caches" --- synapse/storage/_base.py | 8 ++++---- synapse/storage/events.py | 16 +++++----------- synapse/storage/state.py | 12 ------------ 3 files changed, 9 insertions(+), 27 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 58b73af7d2..c659004e8d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -60,12 +60,12 @@ class LoggingTransaction(object): object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - def call_after(self, callback, *args, **kwargs): + def call_after(self, callback, *args): """Call the given callback on the main twisted thread after the transaction has finished. Used to invalidate the caches on the correct thread. """ - self.after_callbacks.append((callback, args, kwargs)) + self.after_callbacks.append((callback, args)) def __getattr__(self, name): return getattr(self.txn, name) @@ -319,8 +319,8 @@ class SQLBaseStore(object): inner_func, *args, **kwargs ) finally: - for after_callback, after_args, after_kwargs in after_callbacks: - after_callback(*after_args, **after_kwargs) + for after_callback, after_args in after_callbacks: + after_callback(*after_args) defer.returnValue(result) @defer.inlineCallbacks diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d946024c9b..98707d40ee 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -374,12 +374,6 @@ class EventsStore(SQLBaseStore): new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) - - for room_id, (_, _, new_state) in current_state_for_room.iteritems(): - self.get_current_state_ids.prefill( - (room_id, ), new_state - ) - for event, context in chunk: if context.app_service: origin_type = "local" @@ -441,10 +435,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to + 2-tuple (to_delete, to_insert) where both are state dicts, i.e. + (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. + to insert. May return None if there are no changes to be applied. """ # Now we need to work out the different state sets for @@ -551,7 +545,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert, current_state)) + defer.returnValue((to_delete, to_insert)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -704,7 +698,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert, _ = current_state_tuple + to_delete, to_insert = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 03981f5d2b..a16afa8df5 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,18 +227,6 @@ class StateStore(SQLBaseStore): ], ) - # Prefill the state group cache with this group. - # It's fine to use the sequence like this as the state group map - # is immutable. (If the map wasn't immutable then this prefill could - # race with another update) - txn.call_after( - self._state_group_cache.update, - self._state_group_cache.sequence, - key=context.state_group, - value=context.current_state_ids, - full=True, - ) - self._simple_insert_many_txn( txn, table="event_to_state_groups", -- cgit 1.4.1 From bfbc907cec96ce9a64730930f63ed400c1aa3b5b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 May 2017 10:40:31 +0100 Subject: Prefill state caches --- synapse/storage/_base.py | 8 ++++---- synapse/storage/events.py | 10 ++++++++-- synapse/storage/state.py | 8 ++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index c659004e8d..58b73af7d2 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -60,12 +60,12 @@ class LoggingTransaction(object): object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - def call_after(self, callback, *args): + def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the transaction has finished. Used to invalidate the caches on the correct thread. """ - self.after_callbacks.append((callback, args)) + self.after_callbacks.append((callback, args, kwargs)) def __getattr__(self, name): return getattr(self.txn, name) @@ -319,8 +319,8 @@ class SQLBaseStore(object): inner_func, *args, **kwargs ) finally: - for after_callback, after_args in after_callbacks: - after_callback(*after_args) + for after_callback, after_args, after_kwargs in after_callbacks: + after_callback(*after_args, **after_kwargs) defer.returnValue(result) @defer.inlineCallbacks diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dbd63078c6..0dffafd90d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -374,6 +374,7 @@ class EventsStore(SQLBaseStore): new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) + for event, context in chunk: if context.app_service: origin_type = "local" @@ -387,6 +388,11 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) + for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + self.get_current_state_ids.prefill( + (room_id, ), new_state + ) + @defer.inlineCallbacks def _calculate_new_extremeties(self, room_id, event_contexts, latest_event_ids): """Calculates the new forward extremeties for a room given events to @@ -545,7 +551,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert)) + defer.returnValue((to_delete, to_insert, current_state)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -698,7 +704,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert = current_state_tuple + to_delete, to_insert, _ = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a16afa8df5..1e1ce87e0e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,6 +227,14 @@ class StateStore(SQLBaseStore): ], ) + txn.call_after( + self._state_group_cache.update, + self._state_group_cache.sequence, + key=context.state_group, + value=context.current_state_ids, + full=True, + ) + self._simple_insert_many_txn( txn, table="event_to_state_groups", -- cgit 1.4.1 From 871605f4e20cce3f093b2eae0f3d2ad7fb43a640 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 May 2017 09:56:05 +0100 Subject: Comments --- synapse/storage/events.py | 6 +++--- synapse/storage/state.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 36574f78b8..5db7ec1622 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -453,10 +453,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 2-tuple (to_delete, to_insert) where both are state dicts, i.e. - (type, state_key) -> event_id. `to_delete` are the entries to + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. + to insert. `new_state` is the full set of state. May return None if there are no changes to be applied. """ # Now we need to work out the different state sets for diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1e1ce87e0e..5d6f7dfa28 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -227,6 +227,9 @@ class StateStore(SQLBaseStore): ], ) + # Prefill the state group cache with this group. + # It's fine to use the sequence like this as the state group map + # is immutable. txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, -- cgit 1.4.1 From e4435b014e50a10ad89c201d6f91b6be35a9b02f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 May 2017 10:00:29 +0100 Subject: Update comment --- synapse/storage/state.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 5d6f7dfa28..03981f5d2b 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -229,7 +229,8 @@ class StateStore(SQLBaseStore): # Prefill the state group cache with this group. # It's fine to use the sequence like this as the state group map - # is immutable. + # is immutable. (If the map wasn't immutable then this prefill could + # race with another update) txn.call_after( self._state_group_cache.update, self._state_group_cache.sequence, -- cgit 1.4.1 From 608b5a6317ce3797ff279f6d1a8a39f475b55736 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 May 2017 12:55:29 +0100 Subject: Take a copy before prefilling, as it may be a frozendict --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/state.py') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 03981f5d2b..85acf2ad1e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -235,7 +235,7 @@ class StateStore(SQLBaseStore): self._state_group_cache.update, self._state_group_cache.sequence, key=context.state_group, - value=context.current_state_ids, + value=dict(context.current_state_ids), full=True, ) -- cgit 1.4.1