From 9b334b3f97057ac145622d2e4d0ad036ef27b468 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:01:41 +0000 Subject: WIP experiment in lazyloading room members --- synapse/storage/state.py | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..da6bb685fa 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -198,8 +198,15 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_state_groups_from_groups_txn(self, txn, groups, types=None): results = {group: {} for group in groups} + + include_other_types = False + if types is not None: - types = list(set(types)) # deduplicate types list + type_set = set(types) + if (None, None) in type_set: + include_other_types = True + type_set.remove((None, None)) + types = list(type_set) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -238,11 +245,21 @@ class StateGroupWorkerStore(SQLBaseStore): if types: clause_to_args = [ ( - "AND type = ? AND state_key = ?", - (etype, state_key) + "AND type = ? AND state_key = ?" if state_key is not None else "AND type = ?", + (etype, state_key) if state_key is not None else (etype) ) for etype, state_key in types ] + + if include_other_types: + # XXX: check whether this slows postgres down like a list of + # ORs does too? + clause_to_args.append( + ( + "AND type <> ? " * len(types), + [t for (t, _) in types] + ) + ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -263,6 +280,10 @@ class StateGroupWorkerStore(SQLBaseStore): where_clause = "AND (%s)" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) + if include_other_types: + where_clause += " AND (%s)" % ( + " AND ".join(["type <> ?"] * len(types)), + ) else: where_clause = "" @@ -449,17 +470,27 @@ class StateGroupWorkerStore(SQLBaseStore): group: The state group to lookup types (list): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the - `type`. + `type`. Presence of type of `None` indicates that types not + in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() + include_other_types = False + for typ, state_key in types: key = (typ, state_key) + + if typ is None: + include_other_types = True + next + if state_key is None: type_to_key[typ] = None + # XXX: why do we mark the type as missing from our cache just + # because we weren't filtering on a specific value of state_key? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -478,7 +509,7 @@ class StateGroupWorkerStore(SQLBaseStore): return True if state_key in valid_state_keys: return True - return False + return include_other_types got_all = is_all or not missing_types @@ -507,6 +538,12 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. + + XXX: is it really true that `state_key` of None in `types` matches all + state_keys? it looks like _get-some_state_from_cache does the right thing, + but _get_state_groups_from_groups_txn treats ths None is turned into + 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew + I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) -- cgit 1.5.1 From 87133652657c5073616419b0afc533eac6ae6750 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:10:25 +0000 Subject: typos --- synapse/handlers/sync.py | 4 ++-- synapse/storage/state.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 809e9fece9..fa730ca760 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -471,13 +471,13 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: - types = ( + types = [ (EventTypes.Member, state_key) for state_key in set( event.sender # FIXME: we also care about targets etc. for event in batch.events ) - ) + ] types.append((None, None)) # don't just filter to room members # TODO: we should opportunistically deduplicate these members too diff --git a/synapse/storage/state.py b/synapse/storage/state.py index da6bb685fa..0238200286 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -301,6 +301,8 @@ class StateGroupWorkerStore(SQLBaseStore): args = [next_group] if types: args.extend(i for typ in types for i in typ) + if include_other_types: + args.extend(typ for (typ, _) in types) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" -- cgit 1.5.1 From 97c0496cfa89b037d89fccd05dd03442b80e07fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 00:27:06 +0000 Subject: fix sqlite where clause --- synapse/storage/state.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0238200286..b796d3c995 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -277,13 +277,14 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: if types is not None: - where_clause = "AND (%s)" % ( + where_clause = "AND (%s" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) if include_other_types: - where_clause += " AND (%s)" % ( + where_clause += " OR (%s)" % ( " AND ".join(["type <> ?"] * len(types)), ) + where_clause += ")" else: where_clause = "" -- cgit 1.5.1 From fdedcd1f4ddeaa3ed5bfd3c05ab2977b4e8ed457 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 01:39:06 +0000 Subject: correctly handle None state_keys and fix include_other_types thinko --- synapse/storage/state.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b796d3c995..405e6b6770 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -276,15 +276,23 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?) OR " + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?) OR " + where_args.extend([typ[0], typ[1]]) + if include_other_types: - where_clause += " OR (%s)" % ( + where_clause += "(%s) OR " % ( " AND ".join(["type <> ?"] * len(types)), ) - where_clause += ")" + where_args.extend(t for (t, _) in types) + where_clause += "0)" # 0 to terminate the last OR else: where_clause = "" @@ -301,9 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) - if include_other_types: - args.extend(typ for (typ, _) in types) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" @@ -507,12 +513,12 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return False + return include_other_types if valid_state_keys is None: return True if state_key in valid_state_keys: return True - return include_other_types + return False got_all = is_all or not missing_types -- cgit 1.5.1 From 1b1c13777154b5b0cf8bf8cf809381f889a2a82d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 17:52:52 +0000 Subject: fix bug #2926 --- synapse/storage/state.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..783cebb351 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,6 +240,10 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) + ) if state_key is not None else + ( + "AND type = ?", + (etype) ) for etype, state_key in types ] @@ -259,10 +263,19 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?)" + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?)" + where_args.extend([typ[0], typ[1]]) + if typ != types[-1]: + where_clause += " OR " + where_clause += ")" else: where_clause = "" @@ -279,7 +292,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" -- cgit 1.5.1 From 52f7e23c7276b2848aa5291d8b78875b7c32a658 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:07:55 +0000 Subject: PR feedbackz --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 783cebb351..77259a3141 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,10 +240,9 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) - ) if state_key is not None else - ( + ) if state_key is not None else ( "AND type = ?", - (etype) + (etype,) ) for etype, state_key in types ] -- cgit 1.5.1 From b2aba9e43053f9f297671fce0051bfc18a8b655a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:13:44 +0000 Subject: build where_clause sanely --- synapse/storage/state.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 77259a3141..82740266bf 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -263,18 +263,16 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: where_args = [] + where_clauses = [] if types is not None: - where_clause = "AND (" for typ in types: if typ[1] is None: - where_clause += "(type = ?)" + where_clauses.append("(type = ?)") where_args.extend(typ[0]) else: - where_clause += "(type = ? AND state_key = ?)" + where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if typ != types[-1]: - where_clause += " OR " - where_clause += ")" + where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" -- cgit 1.5.1 From 865377a70d9d7db27b89348d2ebbd394f701c490 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:45:36 +0000 Subject: disable optimisation for searching for state groups when type filter includes wildcards on state_key --- synapse/storage/state.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 82740266bf..39f73afaa2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -264,11 +264,13 @@ class StateGroupWorkerStore(SQLBaseStore): else: where_args = [] where_clauses = [] + wildcard_types = False if types is not None: for typ in types: if typ[1] is None: where_clauses.append("(type = ?)") where_args.extend(typ[0]) + wildcard_types = True else: where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) @@ -302,9 +304,17 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the lengths match then we must have all the types, - # so no need to go walk further down the tree. - if types is not None and len(results[group]) == len(types): + # If the number of entries inthe (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + types is not None and + not wildcard_types and + len(results[group]) == len(types) + ): break next_group = self._simple_select_one_onecol_txn( -- cgit 1.5.1 From afbf4d3dccb3d18276e4b119b0267490ca522b4b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:48:04 +0000 Subject: typoe --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 39f73afaa2..ffa4246031 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -304,7 +304,7 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the number of entries inthe (type,state_key)->event_id dict + # If the number of entries in the (type,state_key)->event_id dict # matches the number of (type,state_keys) types we were searching # for, then we must have found them all, so no need to go walk # further down the tree... UNLESS our types filter contained -- cgit 1.5.1 From f0f9a0605b1bddc1b01d1bbb6af93f00763b8496 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:12:15 +0000 Subject: remove comment now #2969 is fixed --- synapse/storage/state.py | 6 ------ 1 file changed, 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4ab16e18b2..4291cde7ab 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -561,12 +561,6 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. - - XXX: is it really true that `state_key` of None in `types` matches all - state_keys? it looks like _get-some_state_from_cache does the right thing, - but _get_state_groups_from_groups_txn treats ths None is turned into - 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew - I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) -- cgit 1.5.1 From ccca02846d07124f537b0c475308f9a26bfb3fb1 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:31:41 +0000 Subject: make it work --- synapse/handlers/sync.py | 6 +++--- synapse/storage/state.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c754cfdeeb..c05e3d107f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -498,7 +498,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } else: @@ -511,7 +511,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4291cde7ab..9c9994c073 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,10 +257,11 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? + unique_types = set([ t for (t, _) in types ]) clause_to_args.append( ( - "AND type <> ? " * len(types), - [t for (t, _) in types] + "AND type <> ? " * len(unique_types), + list(unique_types) ) ) else: @@ -293,10 +294,11 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: + unique_types = set([ t for (t, _) in types ]) where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(types)) + ")" + "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) - where_args.extend(t for (t, _) in types) + where_args.extend(list(unique_types)) where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: -- cgit 1.5.1 From 9f77001e2747c36046e136c5d3c706c0aef54b15 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:07:47 +0000 Subject: pep8 --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 9c9994c073..55159e64d0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,7 +257,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -294,7 +294,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) -- cgit 1.5.1 From bf49d2dca8db6d82f09441a35cd3655c746b6b4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:38:43 +0000 Subject: Replace some ujson with simplejson to make it work --- synapse/http/server.py | 3 ++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/storage/events.py | 2 +- synapse/storage/events_worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b567215c8..3c7a0ef97a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,6 +38,7 @@ import collections import logging import urllib import ujson +import simplejson logger = logging.getLogger(__name__) @@ -462,7 +463,7 @@ def respond_with_json(request, code, json_object, send_cors=False, json_bytes = encode_canonical_json(json_object) else: # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e4..eb91c0b293 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3890878170..9fc65229fd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 86c3b48ad4..2e23dd78ba 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -28,7 +28,7 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 -- cgit 1.5.1 From a6c8f7c875348ff8d63a7032c2f73a08551c516c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 01:09:55 +0100 Subject: add pydoc --- synapse/handlers/sync.py | 18 ++++++++---- synapse/storage/state.py | 76 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 27 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 05bf6d46dd..8e38078332 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -423,7 +423,11 @@ class SyncHandler(object): Args: event(synapse.events.EventBase): event of interest - + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) """ @@ -440,6 +444,11 @@ class SyncHandler(object): Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) @@ -472,8 +481,6 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - lazy_load_members(bool): Whether to only return state for members - referenced in this timeline segment Returns: A deferred new event dictionary @@ -496,7 +503,7 @@ class SyncHandler(object): types = [ (EventTypes.Member, state_key) for state_key in set( - event.sender # FIXME: we also care about targets etc. + event.sender # FIXME: we also care about invite targets etc. for event in batch.events ) ] @@ -1398,7 +1405,8 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, full_state=full_state + room_id, batch, sync_config, since_token, now_token, + full_state=full_state ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 55159e64d0..63b6834202 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -182,7 +182,19 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_groups_from_groups(self, groups, types): - """Returns dictionary state_group -> (dict of (type, state_key) -> event id) + """Returns the state groups for a given set of groups, filtering on + types of state events. + + Args: + groups(list[int]): list of state group IDs to query + types(list[str|None, str|None])|None: List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all types are returned. + + Returns: + dictionary state_group -> (dict of (type, state_key) -> event id) """ results = {} @@ -204,6 +216,9 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: type_set = set(types) if (None, None) in type_set: + # special case (None, None) to mean that other types should be + # returned - i.e. we were just filtering down the state keys + # for particular types. include_other_types = True type_set.remove((None, None)) types = list(type_set) # deduplicate types list @@ -360,10 +375,12 @@ class StateGroupWorkerStore(SQLBaseStore): that are in the `types` list. Args: - event_ids (list) - types (list): List of (type, state_key) tuples which are used to - filter the state fetched. `state_key` may be None, which matches - any `state_key` + event_ids (list[string]) + types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -399,9 +416,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_ids(list(str)): events whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -427,9 +446,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -444,9 +465,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -492,11 +515,11 @@ class StateGroupWorkerStore(SQLBaseStore): missing state. Args: - group: The state group to lookup - types (list): List of 2-tuples of the form (`type`, `state_key`), - where a `state_key` of `None` matches all state_keys for the - `type`. Presence of type of `None` indicates that types not - in the list should not be filtered out. + group(int): The state group to lookup + types(list[str|None, str|None]): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -560,9 +583,18 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): """Given list of groups returns dict of group -> list of state events - with matching types. `types` is a list of `(type, state_key)`, where - a `state_key` of None matches all state_keys. If `types` is None then - all events are returned. + with matching types. + + Args: + groups(list[int]): list of groups whose state to query + types(list[str|None, str|None]|None): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all events are returned. + + Returns: + dict of group -> list of state events """ if types: types = frozenset(types) -- cgit 1.5.1 From 5797f5542b687b73ebdd8613f64ce0f38e637b55 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 12 Jul 2018 01:32:39 +0100 Subject: WIP to announce deleted devices over federation Previously we queued up the poke correctly when the device was deleted, but then the actual EDU wouldn't get sent, as the device was no longer known. Instead, we now send EDUs for deleted devices too if there's a poke for them. --- synapse/notifier.py | 2 +- synapse/storage/devices.py | 40 ++++++++++++++++++++++++++------------ synapse/storage/end_to_end_keys.py | 16 ++++++++++++++- 3 files changed, 44 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/notifier.py b/synapse/notifier.py index 51cbd66f06..e650c3e494 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -274,7 +274,7 @@ class Notifier(object): logger.exception("Error notifying application services of event") def on_new_event(self, stream_key, new_token, users=[], rooms=[]): - """ Used to inform listeners that something has happend event wise. + """ Used to inform listeners that something has happened event wise. Will wake up all listeners for the given users and rooms. """ diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index ec68e39f1e..0c797f9f3e 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -239,6 +239,7 @@ class DeviceStore(SQLBaseStore): def update_remote_device_list_cache_entry(self, user_id, device_id, content, stream_id): """Updates a single user's device in the cache. + If the content is null, delete the device from the cache. """ return self.runInteraction( "update_remote_device_list_cache_entry", @@ -248,17 +249,32 @@ class DeviceStore(SQLBaseStore): def _update_remote_device_list_cache_entry_txn(self, txn, user_id, device_id, content, stream_id): - self._simple_upsert_txn( - txn, - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - values={ - "content": json.dumps(content), - } - ) + if content is None: + self._simple_delete_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + ) + + # Do we need this? + txn.call_after( + self.device_id_exists_cache.invalidate, (user_id, device_id,) + ) + else: + self._simple_upsert_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "content": json.dumps(content), + } + ) txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,)) txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) @@ -366,7 +382,7 @@ class DeviceStore(SQLBaseStore): now_stream_id = max(stream_id for stream_id in itervalues(query_map)) devices = self._get_e2e_device_keys_txn( - txn, query_map.keys(), include_all_devices=True + txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True ) prev_sent_id_sql = """ diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 7ae5c65482..f61553cec8 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -64,12 +64,17 @@ class EndToEndKeyStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_e2e_device_keys(self, query_list, include_all_devices=False): + def get_e2e_device_keys( + self, query_list, include_all_devices=False, + include_deleted_devices=False + ): """Fetch a list of device keys. Args: query_list(list): List of pairs of user_ids and device_ids. include_all_devices (bool): whether to include entries for devices that don't have device keys + include_deleted_devices (bool): whether to include null entries for + devices which no longer exist (but where in the query_list) Returns: Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". @@ -82,10 +87,19 @@ class EndToEndKeyStore(SQLBaseStore): query_list, include_all_devices, ) + if include_deleted_devices: + deleted_devices = set(query_list) + for user_id, device_keys in iteritems(results): for device_id, device_info in iteritems(device_keys): + if include_deleted_devices: + deleted_devices -= (user_id, device_id) device_info["keys"] = json.loads(device_info.pop("key_json")) + if include_deleted_devices: + for user_id, device_id in deleted_devices: + results.setdefault(user_id, {})[device_id] = None + defer.returnValue(results) def _get_e2e_device_keys_txn(self, txn, query_list, include_all_devices): -- cgit 1.5.1 From 12ec58301f946ced9702afbf6dfbfbc8c3dfd3dd Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 12 Jul 2018 11:39:43 +0100 Subject: shift to using an explicit deleted flag on m.device_list_update EDUs and generally make it work. --- synapse/storage/devices.py | 18 ++++++++++-------- synapse/storage/end_to_end_keys.py | 27 +++++++++++++++------------ 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 0c797f9f3e..203f50f07d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -239,7 +239,6 @@ class DeviceStore(SQLBaseStore): def update_remote_device_list_cache_entry(self, user_id, device_id, content, stream_id): """Updates a single user's device in the cache. - If the content is null, delete the device from the cache. """ return self.runInteraction( "update_remote_device_list_cache_entry", @@ -249,7 +248,7 @@ class DeviceStore(SQLBaseStore): def _update_remote_device_list_cache_entry_txn(self, txn, user_id, device_id, content, stream_id): - if content is None: + if content.get("deleted"): self._simple_delete_txn( txn, table="device_lists_remote_cache", @@ -409,12 +408,15 @@ class DeviceStore(SQLBaseStore): prev_id = stream_id - key_json = device.get("key_json", None) - if key_json: - result["keys"] = json.loads(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name + if device is not None: + key_json = device.get("key_json", None) + if key_json: + result["keys"] = json.loads(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + else: + result["deleted"] = True results.append(result) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index f61553cec8..6c28719420 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -74,7 +74,7 @@ class EndToEndKeyStore(SQLBaseStore): include_all_devices (bool): whether to include entries for devices that don't have device keys include_deleted_devices (bool): whether to include null entries for - devices which no longer exist (but where in the query_list) + devices which no longer exist (but were in the query_list) Returns: Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". @@ -84,28 +84,25 @@ class EndToEndKeyStore(SQLBaseStore): results = yield self.runInteraction( "get_e2e_device_keys", self._get_e2e_device_keys_txn, - query_list, include_all_devices, + query_list, include_all_devices, include_deleted_devices, ) - if include_deleted_devices: - deleted_devices = set(query_list) - for user_id, device_keys in iteritems(results): for device_id, device_info in iteritems(device_keys): - if include_deleted_devices: - deleted_devices -= (user_id, device_id) device_info["keys"] = json.loads(device_info.pop("key_json")) - if include_deleted_devices: - for user_id, device_id in deleted_devices: - results.setdefault(user_id, {})[device_id] = None - defer.returnValue(results) - def _get_e2e_device_keys_txn(self, txn, query_list, include_all_devices): + def _get_e2e_device_keys_txn( + self, txn, query_list, include_all_devices=False, + include_deleted_devices=False, + ): query_clauses = [] query_params = [] + if include_deleted_devices: + deleted_devices = set(query_list) + for (user_id, device_id) in query_list: query_clause = "user_id = ?" query_params.append(user_id) @@ -133,8 +130,14 @@ class EndToEndKeyStore(SQLBaseStore): result = {} for row in rows: + if include_deleted_devices: + deleted_devices.remove((row["user_id"], row["device_id"])) result.setdefault(row["user_id"], {})[row["device_id"]] = row + if include_deleted_devices: + for user_id, device_id in deleted_devices: + result.setdefault(user_id, {})[device_id] = None + return result @defer.inlineCallbacks -- cgit 1.5.1 From 4a27000548d2b5194256152305fe121f671f0c81 Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 16 Jul 2018 13:46:49 +0200 Subject: check isort by travis --- .travis.yml | 3 +++ changelog.d/3540.misc | 1 + synapse/config/consent_config.py | 1 + synapse/config/logger.py | 1 + synapse/config/server_notices_config.py | 1 + synapse/http/client.py | 4 +++- synapse/http/site.py | 2 +- synapse/rest/__init__.py | 16 +++++++++++++--- synapse/rest/client/v1/admin.py | 4 ++-- synapse/rest/client/v1/initial_sync.py | 2 +- synapse/rest/client/v2_alpha/devices.py | 2 +- synapse/rest/media/v1/identicon_resource.py | 4 ++-- synapse/storage/schema/delta/48/group_unique_indexes.py | 1 + tox.ini | 8 ++++++-- 14 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 changelog.d/3540.misc (limited to 'synapse/storage') diff --git a/.travis.yml b/.travis.yml index a98d547978..e7fcd8b5d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,9 @@ matrix: - python: 3.6 env: TOX_ENV=py36 + - python: 3.6 + env: TOX_ENV=isort + - python: 3.6 env: TOX_ENV=check-newsfragment diff --git a/changelog.d/3540.misc b/changelog.d/3540.misc new file mode 100644 index 0000000000..99dcad8e46 --- /dev/null +++ b/changelog.d/3540.misc @@ -0,0 +1 @@ +check isort for each PR diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index e22c731aad..cec978125b 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -15,6 +15,7 @@ from ._base import Config + DEFAULT_CONFIG = """\ # User Consent configuration # diff --git a/synapse/config/logger.py b/synapse/config/logger.py index a87b11a1df..cc1051057c 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -29,6 +29,7 @@ from synapse.util.versionstring import get_version_string from ._base import Config + DEFAULT_LOG_CONFIG = Template(""" version: 1 diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 3c39850ac6..91b08446b7 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -16,6 +16,7 @@ from synapse.types import UserID from ._base import Config + DEFAULT_CONFIG = """\ # Server Notices room configuration # diff --git a/synapse/http/client.py b/synapse/http/client.py index d6a0d75b2b..0622a69e86 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -26,9 +26,11 @@ from OpenSSL.SSL import VERIFY_NONE from twisted.internet import defer, protocol, reactor, ssl, task from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.client import ( + Agent, + BrowserLikeRedirectAgent, + ContentDecoderAgent, GzipDecoder, HTTPConnectionPool, PartialDownloadError, diff --git a/synapse/http/site.py b/synapse/http/site.py index 21e26f9c5e..41dd974cea 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -20,7 +20,7 @@ from twisted.web.server import Request, Site from synapse.http import redact_uri from synapse.http.request_metrics import RequestMetrics -from synapse.util.logcontext import LoggingContext, ContextResourceUsage +from synapse.util.logcontext import ContextResourceUsage, LoggingContext logger = logging.getLogger(__name__) diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 75c2a4ec8e..0d69d37f7e 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -15,11 +15,21 @@ from synapse.http.server import JsonResource from synapse.rest.client import versions -from synapse.rest.client.v1 import admin, directory, events, initial_sync from synapse.rest.client.v1 import login as v1_login -from synapse.rest.client.v1 import logout, presence, profile, push_rule, pusher from synapse.rest.client.v1 import register as v1_register -from synapse.rest.client.v1 import room, voip +from synapse.rest.client.v1 import ( + admin, + directory, + events, + initial_sync, + logout, + presence, + profile, + push_rule, + pusher, + room, + voip, +) from synapse.rest.client.v2_alpha import ( account, account_data, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 01c3f2eb04..2dc50e582b 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -24,9 +24,9 @@ from synapse.api.constants import Membership from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( assert_params_in_dict, - parse_json_object_from_request, parse_integer, - parse_string + parse_json_object_from_request, + parse_string, ) from synapse.types import UserID, create_requester diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/v1/initial_sync.py index 00a1a99feb..fd5f85b53e 100644 --- a/synapse/rest/client/v1/initial_sync.py +++ b/synapse/rest/client/v1/initial_sync.py @@ -15,8 +15,8 @@ from twisted.internet import defer -from synapse.streams.config import PaginationConfig from synapse.http.servlet import parse_boolean +from synapse.streams.config import PaginationConfig from .base import ClientV1RestServlet, client_path_patterns diff --git a/synapse/rest/client/v2_alpha/devices.py b/synapse/rest/client/v2_alpha/devices.py index aded2409be..9b75bb1377 100644 --- a/synapse/rest/client/v2_alpha/devices.py +++ b/synapse/rest/client/v2_alpha/devices.py @@ -19,9 +19,9 @@ from twisted.internet import defer from synapse.api import errors from synapse.http.servlet import ( + RestServlet, assert_params_in_dict, parse_json_object_from_request, - RestServlet ) from ._base import client_v2_patterns, interactive_auth_handler diff --git a/synapse/rest/media/v1/identicon_resource.py b/synapse/rest/media/v1/identicon_resource.py index b3217eff53..bdbd8d50dd 100644 --- a/synapse/rest/media/v1/identicon_resource.py +++ b/synapse/rest/media/v1/identicon_resource.py @@ -14,10 +14,10 @@ from pydenticon import Generator -from synapse.http.servlet import parse_integer - from twisted.web.resource import Resource +from synapse.http.servlet import parse_integer + FOREGROUND = [ "rgb(45,79,255)", "rgb(254,180,44)", diff --git a/synapse/storage/schema/delta/48/group_unique_indexes.py b/synapse/storage/schema/delta/48/group_unique_indexes.py index 2233af87d7..ac4ac66cd4 100644 --- a/synapse/storage/schema/delta/48/group_unique_indexes.py +++ b/synapse/storage/schema/delta/48/group_unique_indexes.py @@ -15,6 +15,7 @@ from synapse.storage.engines import PostgresEngine from synapse.storage.prepare_database import get_statements + FIX_INDEXES = """ -- rebuild indexes as uniques DROP INDEX groups_invites_g_idx; diff --git a/tox.ini b/tox.ini index 61a20a10cb..c7491690c5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = packaging, py27, py36, pep8 +envlist = packaging, py27, py36, pep8, isort [testenv] deps = @@ -103,10 +103,14 @@ deps = flake8 commands = /bin/sh -c "flake8 synapse tests {env:PEP8SUFFIX:}" +[testenv:isort] +skip_install = True +deps = isort +commands = /bin/sh -c "isort -c -sp setup.cfg -rc synapse tests" [testenv:check-newsfragment] skip_install = True deps = towncrier>=18.6.0rc1 commands = python -m towncrier.check --compare-with=origin/develop -basepython = python3.6 \ No newline at end of file +basepython = python3.6 -- cgit 1.5.1 From 78a9ddcf9a5ddddd1ba01c58948d0c4a1ce294d9 Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 16 Jul 2018 14:23:25 +0200 Subject: rerun isort with latest version --- synapse/config/consent_config.py | 1 - synapse/config/logger.py | 1 - synapse/config/server_notices_config.py | 1 - synapse/http/client.py | 4 +--- synapse/rest/__init__.py | 16 +++------------- synapse/storage/schema/delta/48/group_unique_indexes.py | 1 - 6 files changed, 4 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index cec978125b..e22c731aad 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -15,7 +15,6 @@ from ._base import Config - DEFAULT_CONFIG = """\ # User Consent configuration # diff --git a/synapse/config/logger.py b/synapse/config/logger.py index cc1051057c..a87b11a1df 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -29,7 +29,6 @@ from synapse.util.versionstring import get_version_string from ._base import Config - DEFAULT_LOG_CONFIG = Template(""" version: 1 diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 91b08446b7..3c39850ac6 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -16,7 +16,6 @@ from synapse.types import UserID from ._base import Config - DEFAULT_CONFIG = """\ # Server Notices room configuration # diff --git a/synapse/http/client.py b/synapse/http/client.py index 0622a69e86..d6a0d75b2b 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -26,11 +26,9 @@ from OpenSSL.SSL import VERIFY_NONE from twisted.internet import defer, protocol, reactor, ssl, task from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone +from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.client import ( - Agent, - BrowserLikeRedirectAgent, - ContentDecoderAgent, GzipDecoder, HTTPConnectionPool, PartialDownloadError, diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 0d69d37f7e..75c2a4ec8e 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -15,21 +15,11 @@ from synapse.http.server import JsonResource from synapse.rest.client import versions +from synapse.rest.client.v1 import admin, directory, events, initial_sync from synapse.rest.client.v1 import login as v1_login +from synapse.rest.client.v1 import logout, presence, profile, push_rule, pusher from synapse.rest.client.v1 import register as v1_register -from synapse.rest.client.v1 import ( - admin, - directory, - events, - initial_sync, - logout, - presence, - profile, - push_rule, - pusher, - room, - voip, -) +from synapse.rest.client.v1 import room, voip from synapse.rest.client.v2_alpha import ( account, account_data, diff --git a/synapse/storage/schema/delta/48/group_unique_indexes.py b/synapse/storage/schema/delta/48/group_unique_indexes.py index ac4ac66cd4..2233af87d7 100644 --- a/synapse/storage/schema/delta/48/group_unique_indexes.py +++ b/synapse/storage/schema/delta/48/group_unique_indexes.py @@ -15,7 +15,6 @@ from synapse.storage.engines import PostgresEngine from synapse.storage.prepare_database import get_statements - FIX_INDEXES = """ -- rebuild indexes as uniques DROP INDEX groups_invites_g_idx; -- cgit 1.5.1 From 667fba68f3ca808f48143a2a739a54665b0162c6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 18 Jul 2018 14:35:24 +0100 Subject: Run things as background processes This fixes #3518, and ensures that we get useful logs and metrics for lots of things that happen in the background. (There are certainly more things that happen in the background; these are just the common ones I've found running a single-process synapse locally). --- synapse/federation/transaction_queue.py | 15 ++++++--------- synapse/storage/background_updates.py | 10 ++++++++-- synapse/storage/client_ips.py | 15 +++++++++++---- synapse/storage/events.py | 10 ++++------ synapse/storage/events_worker.py | 10 ++++++---- synapse/util/caches/expiringcache.py | 6 +++++- synapse/util/distributor.py | 4 ++++ 7 files changed, 44 insertions(+), 26 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 5c5a73b73c..6996d6b695 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -168,7 +168,7 @@ class TransactionQueue(object): # fire off a processing loop in the background run_as_background_process( - "process_transaction_queue", + "process_event_queue_for_federation", self._process_event_queue_loop, ) @@ -434,14 +434,11 @@ class TransactionQueue(object): logger.debug("TX [%s] Starting transaction loop", destination) - # Drop the logcontext before starting the transaction. It doesn't - # really make sense to log all the outbound transactions against - # whatever path led us to this point: that's pretty arbitrary really. - # - # (this also means we can fire off _perform_transaction without - # yielding) - with logcontext.PreserveLoggingContext(): - self._transaction_transmission_loop(destination) + run_as_background_process( + "federation_transaction_transmission_loop", + self._transaction_transmission_loop, + destination, + ) @defer.inlineCallbacks def _transaction_transmission_loop(self, destination): diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index dc9eca7d15..5fe1ca2de7 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -19,6 +19,8 @@ from canonicaljson import json from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process + from . import engines from ._base import SQLBaseStore @@ -87,10 +89,14 @@ class BackgroundUpdateStore(SQLBaseStore): self._background_update_handlers = {} self._all_done = False - @defer.inlineCallbacks def start_doing_background_updates(self): - logger.info("Starting background schema updates") + run_as_background_process( + "background_updates", self._run_background_updates, + ) + @defer.inlineCallbacks + def _run_background_updates(self): + logger.info("Starting background schema updates") while True: yield self.hs.get_clock().sleep( self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index b78eda3413..77ae10da3d 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -19,6 +19,7 @@ from six import iteritems from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import CACHE_SIZE_FACTOR from . import background_updates @@ -93,10 +94,16 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) def _update_client_ips_batch(self): - to_update = self._batch_row_update - self._batch_row_update = {} - return self.runInteraction( - "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update + def update(): + to_update = self._batch_row_update + self._batch_row_update = {} + return self.runInteraction( + "_update_client_ips_batch", self._update_client_ips_batch_txn, + to_update, + ) + + run_as_background_process( + "update_client_ips", update, ) def _update_client_ips_batch_txn(self, txn, to_update): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2aaab0d02c..4ff0fdc4ab 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -33,12 +33,13 @@ from synapse.api.errors import SynapseError # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.events_worker import EventsWorkerStore from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util.async import ObservableDeferred from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.frozenutils import frozendict_json_encoder -from synapse.util.logcontext import PreserveLoggingContext, make_deferred_yieldable +from synapse.util.logcontext import make_deferred_yieldable from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -155,11 +156,8 @@ class _EventPeristenceQueue(object): self._event_persist_queues[room_id] = queue self._currently_persisting_rooms.discard(room_id) - # set handle_queue_loop off on the background. We don't want to - # attribute work done in it to the current request, so we drop the - # logcontext altogether. - with PreserveLoggingContext(): - handle_queue_loop() + # set handle_queue_loop off in the background + run_as_background_process("persist_events", handle_queue_loop) def _get_drainining_queue(self, room_id): queue = self._event_persist_queues.setdefault(room_id, deque()) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 67433606c6..f28239a808 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -25,6 +25,7 @@ from synapse.events import EventBase # noqa: F401 from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.logcontext import ( LoggingContext, PreserveLoggingContext, @@ -322,10 +323,11 @@ class EventsWorkerStore(SQLBaseStore): should_start = False if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) + run_as_background_process( + "fetch_events", + self.runWithConnection, + self._do_fetch, + ) logger.debug("Loading %d events", len(events)) with PreserveLoggingContext(): diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 4abca91f6d..465adc54a8 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -16,6 +16,7 @@ import logging from collections import OrderedDict +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import register_cache logger = logging.getLogger(__name__) @@ -63,7 +64,10 @@ class ExpiringCache(object): return def f(): - self._prune_cache() + run_as_background_process( + "prune_cache_%s" % self._cache_name, + self._prune_cache, + ) self._clock.looping_call(f, self._expiry_ms / 2) diff --git a/synapse/util/distributor.py b/synapse/util/distributor.py index 734331caaa..d91ae400eb 100644 --- a/synapse/util/distributor.py +++ b/synapse/util/distributor.py @@ -75,6 +75,10 @@ class Distributor(object): self.pre_registration[name].append(observer) def fire(self, name, *args, **kwargs): + """Dispatches the given signal to the registered observers. + + Runs the observers as a background process. Does not return a deferred. + """ if name not in self.signals: raise KeyError("%r does not have a signal named %s" % (self, name)) -- cgit 1.5.1 From 00bc9791379109690fb5e465adf0e0e8e76167aa Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Jul 2018 10:51:15 +0100 Subject: Disable logcontext warning Temporary workaround to #3518 while we release 0.33.0. --- synapse/storage/_base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 98dde77431..a6a0e6ec9f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -343,9 +343,10 @@ class SQLBaseStore(object): """ parent_context = LoggingContext.current_context() if parent_context == LoggingContext.sentinel: - logger.warn( - "Running db txn from sentinel context: metrics will be lost", - ) + # warning disabled for 0.33.0 release; proper fixes will land imminently. + # logger.warn( + # "Running db txn from sentinel context: metrics will be lost", + # ) parent_context = None start_time = time.time() -- cgit 1.5.1 From c0685f67c001ab156fb6922877c35f70536100dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 10:59:02 +0100 Subject: spell out that include_deleted_devices requires include_all_devices --- synapse/storage/end_to_end_keys.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 6c28719420..ffe4d7235a 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -74,7 +74,8 @@ class EndToEndKeyStore(SQLBaseStore): include_all_devices (bool): whether to include entries for devices that don't have device keys include_deleted_devices (bool): whether to include null entries for - devices which no longer exist (but were in the query_list) + devices which no longer exist (but were in the query_list). + This option only takes effect if include_all_devices is true. Returns: Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". @@ -100,6 +101,9 @@ class EndToEndKeyStore(SQLBaseStore): query_clauses = [] query_params = [] + if include_all_devices is False: + include_deleted_devices = False + if include_deleted_devices: deleted_devices = set(query_list) -- cgit 1.5.1 From f1a15ea20692e9f4b347cdddf6d714912356ba64 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Jul 2018 11:14:20 +0100 Subject: revert 00bc979 ... we've fixed the things that caused the warnings, so we should reinstate the warning. --- synapse/storage/_base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a6a0e6ec9f..1d41d8d445 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -343,10 +343,9 @@ class SQLBaseStore(object): """ parent_context = LoggingContext.current_context() if parent_context == LoggingContext.sentinel: - # warning disabled for 0.33.0 release; proper fixes will land imminently. - # logger.warn( - # "Running db txn from sentinel context: metrics will be lost", - # ) + logger.warn( + "Starting db connection from sentinel context: metrics will be lost", + ) parent_context = None start_time = time.time() -- cgit 1.5.1 From 9e40834f742d95c324183f3e71ae73aafe3c6a99 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 11:15:10 +0100 Subject: yes, we do need to invalidate the device_id_exists_cache when deleting a remote device --- synapse/storage/devices.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 203f50f07d..cc3cdf2ebc 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -258,7 +258,6 @@ class DeviceStore(SQLBaseStore): }, ) - # Do we need this? txn.call_after( self.device_id_exists_cache.invalidate, (user_id, device_id,) ) -- cgit 1.5.1 From 924eb34d9428a4163a03249abbb6f40d4baa29c6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 18:32:02 +0100 Subject: add a filtered_types param to limit filtering to specific types --- synapse/handlers/sync.py | 65 +++++++++++++++------------ synapse/storage/state.py | 113 +++++++++++++++++++++++++---------------------- 2 files changed, 96 insertions(+), 82 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0c21ac2c77..cb711b8758 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -417,38 +417,44 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event, types=None): + def get_state_after_event(self, event, types=None, filtered_types=None): """ Get the room state after the given event Args: event(synapse.events.EventBase): event of interest - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. + Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) + state_ids = yield self.store.get_state_ids_for_event( + event.event_id, types, filtered_types=filtered_types + ) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position, types=None): + def get_state_at(self, room_id, stream_position, types=None, filtered_types=None): """ Get the room state at a particular stream position Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. - May be None, which matches any key. + all events are returned of the given type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A Deferred map from ((type, state_key)->Event) @@ -463,7 +469,9 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event, types) + state = yield self.get_state_after_event( + last_event, types, filtered_types=filtered_types + ) else: # no events in this room - so presumably no state @@ -499,6 +507,7 @@ class SyncHandler(object): types = None member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() + filtered_types = None if lazy_load_members: # We only request state for the members needed to display the @@ -516,29 +525,25 @@ class SyncHandler(object): # to be done based on event_id, and we don't have the member # event ids until we've pulled them out of the DB. - if not types: - # an optimisation to stop needlessly trying to calculate - # member_state_ids - # - # XXX: i can't remember what this trying to do. why would - # types ever be []? --matthew - lazy_load_members = False - - types.append((None, None)) # don't just filter to room members + # only apply the filtering to room members + filtered_types = [EventTypes.Member] if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token, types=types + room_id, stream_position=now_token, types=types, + filtered_types=filtered_types ) state_ids = current_state_ids @@ -563,15 +568,18 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token, types=types + room_id, stream_position=since_token, types=types, + filtered_types=filtered_types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) if lazy_load_members: @@ -603,11 +611,10 @@ class SyncHandler(object): # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. - # strip off the (None, None) and filter to just room members - types = types[:-1] if types: state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c5ff44fef7..ee531a2ce0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types): + def _get_state_groups_from_groups(self, groups, types, filtered_types=None): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -193,9 +193,10 @@ class StateGroupWorkerStore(SQLBaseStore): groups(list[int]): list of state group IDs to query types(list[str|None, str|None])|None: List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. If None, - all types are returned. + state_keys for the `type`. If None, all types are returned. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -206,26 +207,21 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, + self._get_state_groups_from_groups_txn, chunk, types, filtered_types ) results.update(res) defer.returnValue(results) - def _get_state_groups_from_groups_txn(self, txn, groups, types=None): + def _get_state_groups_from_groups_txn( + self, txn, groups, types=None, filtered_types=None + ): results = {group: {} for group in groups} - include_other_types = False + include_other_types = False if filtered_types is None else True if types is not None: - type_set = set(types) - if (None, None) in type_set: - # special case (None, None) to mean that other types should be - # returned - i.e. we were just filtering down the state keys - # for particular types. - include_other_types = True - type_set.remove((None, None)) - types = list(type_set) # deduplicate types list + types = list(set(types)) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -276,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -313,7 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) @@ -373,18 +369,20 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types): + def get_state_for_events(self, event_ids, types, filtered_types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: event_ids (list[string]) - types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + types (list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -395,7 +393,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) state_event_map = yield self.get_events( [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)], @@ -414,17 +412,19 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_ids_for_events(self, event_ids, types=None): + def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None): """ Get the state dicts corresponding to a list of events Args: event_ids(list(str)): events whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -434,7 +434,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) event_to_state = { event_id: group_to_state[group] @@ -444,41 +444,45 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_for_event(self, event_id, types=None): + def get_state_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_for_events([event_id], types) + state_map = yield self.get_state_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @defer.inlineCallbacks - def get_state_ids_for_event(self, event_id, types=None): + def get_state_ids_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_ids_for_events([event_id], types) + state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @cached(max_entries=50000) @@ -509,7 +513,7 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) - def _get_some_state_from_cache(self, group, types): + def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). @@ -520,29 +524,30 @@ class StateGroupWorkerStore(SQLBaseStore): Args: group(int): The state group to lookup - types(list[str|None, str|None]): List of 2-tuples of the form + types(list[str, str|None]): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. + state_keys for the `type`. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} + + # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False + include_other_types = True if filtered_types is None else False for typ, state_key in types: key = (typ, state_key) - if typ is None: - include_other_types = True - next - if state_key is None: type_to_key[typ] = None # XXX: why do we mark the type as missing from our cache just # because we weren't filtering on a specific value of state_key? + # is it because the cache doesn't handle wildcards? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -556,7 +561,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types + return include_other_types and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -585,21 +590,23 @@ class StateGroupWorkerStore(SQLBaseStore): return state_dict_ids, is_all @defer.inlineCallbacks - def _get_state_for_groups(self, groups, types=None): + def _get_state_for_groups(self, groups, types=None, filtered_types=None): """Gets the state at each of a list of state groups, optionally filtering by type/state_key Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None|str, None|str)]): + types (None|iterable[(None, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. Otherwise, each entry should be a `(type, state_key)` tuple to include in the response. A `state_key` of None is a wildcard - meaning that we require all state with that type. A `type` of None - indicates that types not in the list should not be filtered out. + meaning that we require all state with that type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: Deferred[dict[int, dict[(type, state_key), EventBase]]] @@ -612,7 +619,7 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: for group in set(groups): state_dict_ids, _, got_all = self._get_some_state_from_cache( - group, types, + group, types, filtered_types ) results[group] = state_dict_ids @@ -645,7 +652,7 @@ class StateGroupWorkerStore(SQLBaseStore): types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, + missing_groups, types_to_fetch, filtered_types ) for group, group_state_dict in iteritems(group_to_state_dict): -- cgit 1.5.1 From bcaec2915ac74937171e27d507b8f9c0e39d3677 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:03:50 +0100 Subject: incorporate review --- synapse/handlers/sync.py | 44 +++++++++++++++++++++++++++----------------- synapse/storage/state.py | 7 ++++--- 2 files changed, 31 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index cb711b8758..b597f94cf6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -435,7 +435,7 @@ class SyncHandler(object): A Deferred map from ((type, state_key)->Event) """ state_ids = yield self.store.get_state_ids_for_event( - event.event_id, types, filtered_types=filtered_types + event.event_id, types, filtered_types=filtered_types, ) if event.is_state(): state_ids = state_ids.copy() @@ -470,7 +470,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] state = yield self.get_state_after_event( - last_event, types, filtered_types=filtered_types + last_event, types, filtered_types=filtered_types, ) else: @@ -505,7 +505,6 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None - member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() filtered_types = None @@ -521,10 +520,6 @@ class SyncHandler(object): ) ] - # We can't remove redundant member types at this stage as it has - # to be done based on event_id, and we don't have the member - # event ids until we've pulled them out of the DB. - # only apply the filtering to room members filtered_types = [EventTypes.Member] @@ -532,27 +527,32 @@ class SyncHandler(object): if batch: current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = current_state_ids + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -569,28 +569,38 @@ class SyncHandler(object): elif batch.limited: state_at_previous_sync = yield self.get_state_at( room_id, stream_position=since_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_at_timeline_start = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: - # TODO: filter out redundant members based on their event_ids - # (not mxids) at this point. In practice, limited syncs are + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implement, this should filter using event_ids (not mxids). + # In practice, limited syncs are # relatively rare so it's not a total disaster to send redundant - # members down at this point. + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -614,7 +624,7 @@ class SyncHandler(object): if types: state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ee531a2ce0..75c6366e7a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -545,9 +545,10 @@ class StateGroupWorkerStore(SQLBaseStore): if state_key is None: type_to_key[typ] = None - # XXX: why do we mark the type as missing from our cache just - # because we weren't filtering on a specific value of state_key? - # is it because the cache doesn't handle wildcards? + # we mark the type as missing from the cache because + # when the cache was populated it might have been done with a + # restricted set of state_keys, so the wildcard will not work + # and the cache may be incomplete. missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: -- cgit 1.5.1 From 2f558300cc648e633342746dc7b42a36fcb6b32e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:22:27 +0100 Subject: fix thinkos; unbreak tests --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 75c6366e7a..f09be7172d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -369,7 +369,7 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types, filtered_types): + def get_state_for_events(self, event_ids, types, filtered_types=None): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. @@ -538,7 +538,7 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = True if filtered_types is None else False + include_other_types = False if filtered_types is None else True for typ, state_key in types: key = (typ, state_key) -- cgit 1.5.1 From c1bf2b587eaa718e28a33f76a7b5f6e288255fca Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 09:56:23 +0100 Subject: add trailing comma --- synapse/storage/end_to_end_keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index ffe4d7235a..523b4360c3 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -66,7 +66,7 @@ class EndToEndKeyStore(SQLBaseStore): @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, - include_deleted_devices=False + include_deleted_devices=False, ): """Fetch a list of device keys. Args: -- cgit 1.5.1 From e42510ba635b3e4d83215e4f5634ca51411996e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 13:00:22 +0100 Subject: Use new getters --- synapse/api/auth.py | 6 ++++-- synapse/handlers/_base.py | 3 ++- synapse/handlers/federation.py | 23 ++++++++++++++++------- synapse/handlers/message.py | 26 ++++++++++++++++---------- synapse/handlers/room_member.py | 9 ++++++--- synapse/push/bulk_push_rule_evaluator.py | 7 ++++--- synapse/storage/events.py | 2 +- synapse/storage/push_rule.py | 7 +++++-- synapse/storage/roommember.py | 7 +++++-- 9 files changed, 59 insertions(+), 31 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index bc629832d9..535bdb449d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -65,8 +65,9 @@ class Auth(object): @defer.inlineCallbacks def check_from_context(self, event, context, do_sig_check=True): + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.compute_auth_events( - event, context.prev_state_ids, for_verification=True, + event, prev_state_ids, for_verification=True, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -544,7 +545,8 @@ class Auth(object): @defer.inlineCallbacks def add_auth_events(self, builder, context): - auth_ids = yield self.compute_auth_events(builder, context.prev_state_ids) + prev_state_ids = yield context.get_prev_state_ids(self.store) + auth_ids = yield self.compute_auth_events(builder, prev_state_ids) auth_events_entries = yield self.store.add_event_hashes( auth_ids diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index b6a8b3aa3b..704181d2d3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -112,8 +112,9 @@ class BaseHandler(object): guest_access = event.content.get("guest_access", "forbidden") if guest_access != "can_join": if context: + current_state_ids = yield context.get_current_state_ids(self.store) current_state = yield self.store.get_events( - list(context.current_state_ids.values()) + list(current_state_ids.values()) ) else: current_state = yield self.state_handler.get_current_state( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a6d391c4e8..98dd4a7fd1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -486,7 +486,10 @@ class FederationHandler(BaseHandler): # joined the room. Don't bother if the user is just # changing their profile info. newly_joined = True - prev_state_id = context.prev_state_ids.get( + + prev_state_ids = yield context.get_prev_state_ids(self.store) + + prev_state_id = prev_state_ids.get( (event.type, event.state_key) ) if prev_state_id: @@ -1106,10 +1109,12 @@ class FederationHandler(BaseHandler): user = UserID.from_string(event.state_key) yield user_joined_room(self.distributor, user, event.room_id) - state_ids = list(context.prev_state_ids.values()) + prev_state_ids = yield context.get_prev_state_ids(self.store) + + state_ids = list(prev_state_ids.values()) auth_chain = yield self.store.get_auth_chain(state_ids) - state = yield self.store.get_events(list(context.prev_state_ids.values())) + state = yield self.store.get_events(list(prev_state_ids.values())) defer.returnValue({ "state": list(state.values()), @@ -1635,8 +1640,9 @@ class FederationHandler(BaseHandler): ) if not auth_events: + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids, for_verification=True, + event, prev_state_ids, for_verification=True, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -1876,9 +1882,10 @@ class FederationHandler(BaseHandler): break if do_resolution: + prev_state_ids = yield context.get_prev_state_ids(self.store) # 1. Get what we think is the auth chain. auth_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids + event, prev_state_ids ) local_auth_chain = yield self.store.get_auth_chain( auth_ids, include_given=True @@ -2222,7 +2229,8 @@ class FederationHandler(BaseHandler): event.content["third_party_invite"]["signed"]["token"] ) original_invite = None - original_invite_id = context.prev_state_ids.get(key) + prev_state_ids = yield context.get_prev_state_ids(self.store) + original_invite_id = prev_state_ids.get(key) if original_invite_id: original_invite = yield self.store.get_event( original_invite_id, allow_none=True @@ -2264,7 +2272,8 @@ class FederationHandler(BaseHandler): signed = event.content["third_party_invite"]["signed"] token = signed["token"] - invite_event_id = context.prev_state_ids.get( + prev_state_ids = yield context.get_prev_state_ids(self.store) + invite_event_id = prev_state_ids.get( (EventTypes.ThirdPartyInvite, token,) ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index abc07ea87c..c4bcd9018b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -630,7 +630,8 @@ class EventCreationHandler(object): If so, returns the version of the event in context. Otherwise, returns None. """ - prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) + prev_state_ids = yield context.get_prev_state_ids(self.store) + prev_event_id = prev_state_ids.get((event.type, event.state_key)) prev_event = yield self.store.get_event(prev_event_id, allow_none=True) if not prev_event: return @@ -752,8 +753,8 @@ class EventCreationHandler(object): event = builder.build() logger.debug( - "Created event %s with state: %s", - event.event_id, context.prev_state_ids, + "Created event %s", + event.event_id, ) defer.returnValue( @@ -884,9 +885,11 @@ class EventCreationHandler(object): e.sender == event.sender ) + current_state_ids = yield context.get_current_state_ids(self.store) + state_to_include_ids = [ e_id - for k, e_id in iteritems(context.current_state_ids) + for k, e_id in iteritems(current_state_ids) if k[0] in self.hs.config.room_invite_state_types or k == (EventTypes.Member, event.sender) ] @@ -922,8 +925,9 @@ class EventCreationHandler(object): ) if event.type == EventTypes.Redaction: + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids, for_verification=True, + event, prev_state_ids, for_verification=True, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -943,11 +947,13 @@ class EventCreationHandler(object): "You don't have permission to redact events" ) - if event.type == EventTypes.Create and context.prev_state_ids: - raise AuthError( - 403, - "Changing the room create event is forbidden", - ) + if event.type == EventTypes.Create: + prev_state_ids = yield context.get_prev_state_ids(self.store) + if prev_state_ids: + raise AuthError( + 403, + "Changing the room create event is forbidden", + ) (event_stream_id, max_stream_id) = yield self.store.persist_event( event, context=context diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 00f2e279bc..a832d91809 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -201,7 +201,9 @@ class RoomMemberHandler(object): ratelimit=ratelimit, ) - prev_member_event_id = context.prev_state_ids.get( + prev_state_ids = yield context.get_prev_state_ids(self.store) + + prev_member_event_id = prev_state_ids.get( (EventTypes.Member, target.to_string()), None ) @@ -496,9 +498,10 @@ class RoomMemberHandler(object): if prev_event is not None: return + prev_state_ids = yield context.get_prev_state_ids(self.store) if event.membership == Membership.JOIN: if requester.is_guest: - guest_can_join = yield self._can_guest_join(context.prev_state_ids) + guest_can_join = yield self._can_guest_join(prev_state_ids) if not guest_can_join: # This should be an auth check, but guests are a local concept, # so don't really fit into the general auth process. @@ -517,7 +520,7 @@ class RoomMemberHandler(object): ratelimit=ratelimit, ) - prev_member_event_id = context.prev_state_ids.get( + prev_member_event_id = prev_state_ids.get( (EventTypes.Member, event.state_key), None ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index bb181d94ee..1d14d3639c 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -112,7 +112,8 @@ class BulkPushRuleEvaluator(object): @defer.inlineCallbacks def _get_power_levels_and_sender_level(self, event, context): - pl_event_id = context.prev_state_ids.get(POWER_KEY) + prev_state_ids = yield context.get_prev_state_ids(self.store) + pl_event_id = prev_state_ids.get(POWER_KEY) if pl_event_id: # fastpath: if there's a power level event, that's all we need, and # not having a power level event is an extreme edge case @@ -120,7 +121,7 @@ class BulkPushRuleEvaluator(object): auth_events = {POWER_KEY: pl_event} else: auth_events_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids, for_verification=False, + event, prev_state_ids, for_verification=False, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -304,7 +305,7 @@ class RulesForRoom(object): push_rules_delta_state_cache_metric.inc_hits() else: - current_state_ids = context.current_state_ids + current_state_ids = yield context.get_current_state_ids(self.store) push_rules_delta_state_cache_metric.inc_misses() push_rules_state_size_counter.inc(len(current_state_ids)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 4ff0fdc4ab..bf4f3ee92a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -549,7 +549,7 @@ class EventsStore(EventsWorkerStore): if ctx.state_group in state_groups_map: continue - state_groups_map[ctx.state_group] = ctx.current_state_ids + state_groups_map[ctx.state_group] = yield ctx.get_current_state_ids(self) # We need to map the event_ids to their state groups. First, let's # check if the event is one we're persisting, in which case we can diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index be655d287b..af564b1b4e 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -186,6 +186,7 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, defer.returnValue(results) + @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): state_group = context.state_group if not state_group: @@ -195,9 +196,11 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._bulk_get_push_rules_for_room( - event.room_id, state_group, context.current_state_ids, event=event + current_state_ids = yield context.get_current_state_ids(self) + result = yield self._bulk_get_push_rules_for_room( + event.room_id, state_group, current_state_ids, event=event ) + defer.returnValue(result) @cachedInlineCallbacks(num_args=2, cache_context=True) def _bulk_get_push_rules_for_room(self, room_id, state_group, current_state_ids, diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 02a802bed9..a27702a7a0 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -232,6 +232,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): defer.returnValue(user_who_share_room) + @defer.inlineCallbacks def get_joined_users_from_context(self, event, context): state_group = context.state_group if not state_group: @@ -241,11 +242,13 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_users_from_context( - event.room_id, state_group, context.current_state_ids, + current_state_ids = yield context.get_current_state_ids(self) + result = yield self._get_joined_users_from_context( + event.room_id, state_group, current_state_ids, event=event, context=context, ) + defer.returnValue(result) def get_joined_users_from_state(self, room_id, state_entry): state_group = state_entry.state_group -- cgit 1.5.1 From dae6dc1e776cc6198dc1f71575876fc16693c170 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 17:13:34 +0100 Subject: Remove redundant checks on room forgottenness Fixes #3550 --- changelog.d/3350.misc | 1 + synapse/storage/push_rule.py | 13 ------------- synapse/storage/roommember.py | 15 --------------- synapse/visibility.py | 19 +------------------ 4 files changed, 2 insertions(+), 46 deletions(-) create mode 100644 changelog.d/3350.misc (limited to 'synapse/storage') diff --git a/changelog.d/3350.misc b/changelog.d/3350.misc new file mode 100644 index 0000000000..3713cd6d63 --- /dev/null +++ b/changelog.d/3350.misc @@ -0,0 +1 @@ +Remove redundant checks on who_forgot_in_room \ No newline at end of file diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index be655d287b..d25b39ec02 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -21,7 +21,6 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.constants import EventTypes from synapse.push.baserules import list_with_base_rules from synapse.storage.appservice import ApplicationServiceWorkerStore from synapse.storage.pusher import PusherWorkerStore @@ -247,18 +246,6 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, if uid in local_users_in_room: user_ids.add(uid) - forgotten = yield self.who_forgot_in_room( - event.room_id, on_invalidate=cache_context.invalidate, - ) - - for row in forgotten: - user_id = row["user_id"] - event_id = row["event_id"] - - mem_id = current_state_ids.get((EventTypes.Member, user_id), None) - if event_id == mem_id: - user_ids.discard(user_id) - rules_by_user = yield self.bulk_get_push_rules( user_ids, on_invalidate=cache_context.invalidate, ) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 02a802bed9..9eb5b18144 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -458,18 +458,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -578,9 +566,6 @@ class RoomMemberStore(RoomMemberWorkerStore): txn.execute(sql, (user_id, room_id)) txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.who_forgot_in_room, (room_id,) - ) return self.runInteraction("forget_membership", f) @cachedInlineCallbacks(num_args=2) diff --git a/synapse/visibility.py b/synapse/visibility.py index 9b97ea2b83..e0b2eccceb 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -23,7 +23,6 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events.utils import prune_event from synapse.types import get_domain_from_id -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn logger = logging.getLogger(__name__) @@ -75,19 +74,6 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, types=types, ) - forgotten = yield make_deferred_yieldable(defer.gatherResults([ - defer.maybeDeferred( - preserve_fn(store.who_forgot_in_room), - room_id, - ) - for room_id in frozenset(e.room_id for e in events) - ], consumeErrors=True)) - - # Set of membership event_ids that have been forgotten - event_id_forgotten = frozenset( - row["event_id"] for rows in forgotten for row in rows - ) - ignore_dict_content = yield store.get_global_account_data_by_type_for_user( "m.ignored_user_list", user_id, ) @@ -176,10 +162,7 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, if membership is None: membership_event = state.get((EventTypes.Member, user_id), None) if membership_event: - # XXX why do we do this? - # https://github.com/matrix-org/synapse/issues/3350 - if membership_event.event_id not in event_id_forgotten: - membership = membership_event.membership + membership = membership_event.membership # if the user was a member of the room at the time of the event, # they can see it. -- cgit 1.5.1 From 50c60e5fadbefff6785c17dda9eecf88286dba30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 17:21:40 +0100 Subject: Only get cached state from context in persist_event We don't want to bother pulling out the current state from the DB since until we know we have to. Checking the context for state is just an optimisation. --- synapse/events/snapshot.py | 13 +++++++++++++ synapse/storage/events.py | 4 +++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index a59064b416..c439b53801 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -163,6 +163,9 @@ class EventContext(object): context._prev_state_id = input["prev_state_id"] context._event_type = input["event_type"] context._event_state_key = input["event_state_key"] + + context._current_state_ids = None + context._prev_state_ids = None context._fetching_state_deferred = None context.state_group = input["state_group"] @@ -214,6 +217,16 @@ class EventContext(object): defer.returnValue(self._prev_state_ids) + def get_cached_current_state_ids(self): + """Gets the current state IDs if we have them already cached. + + Returns: + dict[(str, str), str]|None: Returns None if state_group + is None, which happens when the associated event is an outlier. + """ + + return self._current_state_ids + @defer.inlineCallbacks def _fill_out_state(self, store): """Called to populate the _current_state_ids and _prev_state_ids diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bf4f3ee92a..dc0b3c2eba 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -549,7 +549,9 @@ class EventsStore(EventsWorkerStore): if ctx.state_group in state_groups_map: continue - state_groups_map[ctx.state_group] = yield ctx.get_current_state_ids(self) + current_state_ids = ctx.get_cached_current_state_ids() + if current_state_ids is not None: + state_groups_map[ctx.state_group] = current_state_ids # We need to map the event_ids to their state groups. First, let's # check if the event is one we're persisting, in which case we can -- cgit 1.5.1 From 8b9f164fff6cf821ff5bc702f3660c0f0eb320e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 17:43:01 +0100 Subject: Comments --- synapse/events/snapshot.py | 5 +++-- synapse/storage/events.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index c439b53801..189212b0fa 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -221,8 +221,9 @@ class EventContext(object): """Gets the current state IDs if we have them already cached. Returns: - dict[(str, str), str]|None: Returns None if state_group - is None, which happens when the associated event is an outlier. + dict[(str, str), str]|None: Returns None if we haven't cached the + state or if state_group is None, which happens when the associated + event is an outlier. """ return self._current_state_ids diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dc0b3c2eba..c2910094d0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -549,6 +549,9 @@ class EventsStore(EventsWorkerStore): if ctx.state_group in state_groups_map: continue + # We're only interested in pulling out state that has already + # been cached in the context. We'll pull stuff out of the DB later + # if necessary. current_state_ids = ctx.get_cached_current_state_ids() if current_state_ids is not None: state_groups_map[ctx.state_group] = current_state_ids -- cgit 1.5.1 From 254fb430d1662c93c56c2abbd6984e07fb04c36b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 19:21:20 +0100 Subject: incorporate review --- synapse/handlers/sync.py | 67 +++++++++++++++++++----------------------------- synapse/storage/state.py | 20 ++++++--------- 2 files changed, 35 insertions(+), 52 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b597f94cf6..5689ad2f58 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -543,17 +543,6 @@ class SyncHandler(object): state_ids = current_state_ids - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -562,9 +551,9 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, - timeline_start_members=member_state_ids, previous={}, current=current_state_ids, + lazy_load_members=lazy_load_members, ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( @@ -582,37 +571,27 @@ class SyncHandler(object): filtered_types=filtered_types, ) - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implement, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implemented, this should filter using event_ids (not mxids). + # In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. + state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, + lazy_load_members=lazy_load_members, ) else: state_ids = {} @@ -1536,16 +1515,14 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, timeline_start_members, - previous, current): +def _calculate_state( + timeline_contains, timeline_start, previous, current, lazy_load_members, +): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline - timeline_start_members (dict): state at the start of the timeline - for room members who participate in this chunk of timeline. - Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members, c_ids = set(e for e in current.values()) ts_ids = set(e for e in timeline_start.values()) - tsm_ids = set(e for e in timeline_start_members.values()) p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids + # track the membership events in the state as of the start of the timeline + # so we can add them back in to the state if we're lazyloading. We don't + # add them into state if they're already contained in the timeline. + if lazy_load_members: + ll_ids = set( + e for t, e in timeline_start.iteritems() + if t[0] == EventTypes.Member and e not in tc_ids + ) + else: + ll_ids = set() + + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids return { event_id_to_key[e]: e for e in state_ids diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f09be7172d..40ca8bd2a2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups(list[int]): list of state group IDs to query - types(list[str|None, str|None])|None: List of 2-tuples of the form + types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(list[str]|None): Only apply filtering via `types` to this + filtered_types(Iterable[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. @@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types + self._get_state_groups_from_groups_txn, chunk, types, filtered_types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None + self, txn, groups, types=None, filtered_types=None, ): results = {group: {} for group in groups} - include_other_types = False if filtered_types is None else True - if types is not None: types = list(set(types)) # deduplicate types list @@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore): for etype, state_key in types ] - if include_other_types: + if filtered_types is not None: # XXX: check whether this slows postgres down like a list of # ORs does too? unique_types = set(filtered_types) @@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if include_other_types: + if filtered_types is not None: unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" @@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False if filtered_types is None else True - for typ, state_key in types: key = (typ, state_key) @@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types and typ not in filtered_types + return filtered_types is not None and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None, None|str)]): + types (None|iterable[(str, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. -- cgit 1.5.1 From efcdacad7d1b7f52f879179701c7e0d9b763511f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 22:41:05 +0100 Subject: handle case where types is [] on postgres correctly --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 40ca8bd2a2..f99d3871e4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -255,7 +255,7 @@ class StateGroupWorkerStore(SQLBaseStore): # Turns out that postgres doesn't like doing a list of OR's and # is about 1000x slower, so we just issue a query for each specific # type seperately. - if types: + if types is not None: clause_to_args = [ ( "AND type = ? AND state_key = ?", -- cgit 1.5.1 From 1938cffaea465375d5dd410baa94a6b9b6ded0cc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 23:48:19 +0100 Subject: Add some measure blocks to persist_events ... to help us figure out where 40% of CPU is going --- synapse/storage/events.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c2910094d0..d13d099d1e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -417,19 +417,23 @@ class EventsStore(EventsWorkerStore): logger.info( "Calculating state delta for room %s", room_id, ) - current_state = yield self._get_new_state_after_events( - room_id, - ev_ctx_rm, - latest_event_ids, - new_latest_event_ids, - ) + + with Measure("persist_events.get_new_state_after_events"): + current_state = yield self._get_new_state_after_events( + room_id, + ev_ctx_rm, + latest_event_ids, + new_latest_event_ids, + ) + if current_state is not None: current_state_for_room[room_id] = current_state - delta = yield self._calculate_state_delta( - room_id, current_state, - ) - if delta is not None: - state_delta_for_room[room_id] = delta + with Measure("persist_events.calculate_state_delta"): + delta = yield self._calculate_state_delta( + room_id, current_state, + ) + if delta is not None: + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", -- cgit 1.5.1 From 69fb5dbdabf50dffe34e2a0d012b5a227fc16c55 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 24 Jul 2018 00:04:44 +0100 Subject: fix idiocy --- synapse/storage/events.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d13d099d1e..392935cacf 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -418,7 +418,10 @@ class EventsStore(EventsWorkerStore): "Calculating state delta for room %s", room_id, ) - with Measure("persist_events.get_new_state_after_events"): + with Measure( + self._clock, + "persist_events.get_new_state_after_events", + ): current_state = yield self._get_new_state_after_events( room_id, ev_ctx_rm, @@ -428,7 +431,10 @@ class EventsStore(EventsWorkerStore): if current_state is not None: current_state_for_room[room_id] = current_state - with Measure("persist_events.calculate_state_delta"): + with Measure( + self._clock, + "persist_events.calculate_state_delta", + ): delta = yield self._calculate_state_delta( room_id, current_state, ) -- cgit 1.5.1 From 8dff6e0322718ec9c446465c1e10ab331a417b8a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 24 Jul 2018 00:37:17 +0100 Subject: Logcontext fixes Fix some random logcontext leaks. --- synapse/handlers/initial_sync.py | 28 +++++++++++++++------------- synapse/storage/events.py | 5 +++-- synapse/storage/pusher.py | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index fb11716eb8..50b13d8820 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -387,19 +387,21 @@ class InitialSyncHandler(BaseHandler): receipts = [] defer.returnValue(receipts) - presence, receipts, (messages, token) = yield defer.gatherResults( - [ - run_in_background(get_presence), - run_in_background(get_receipts), - run_in_background( - self.store.get_recent_events_for_room, - room_id, - limit=limit, - end_token=now_token.room_key, - ) - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + presence, receipts, (messages, token) = yield make_deferred_yieldable( + defer.gatherResults( + [ + run_in_background(get_presence), + run_in_background(get_receipts), + run_in_background( + self.store.get_recent_events_for_room, + room_id, + limit=limit, + end_token=now_token.room_key, + ) + ], + consumeErrors=True, + ).addErrback(unwrapFirstError), + ) messages = yield filter_events_for_client( self.store, user_id, messages, is_peeking=is_peeking, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c2910094d0..c06dbb3768 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -39,7 +39,7 @@ from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util.async import ObservableDeferred from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.frozenutils import frozendict_json_encoder -from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.logcontext import PreserveLoggingContext, make_deferred_yieldable from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -147,7 +147,8 @@ class _EventPeristenceQueue(object): # callbacks on the deferred. try: ret = yield per_item_callback(item) - item.deferred.callback(ret) + with PreserveLoggingContext(): + item.deferred.callback(ret) except Exception: item.deferred.errback() finally: diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index cc273a57b2..8443bd4c1b 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -233,7 +233,7 @@ class PusherStore(PusherWorkerStore): ) if newly_inserted: - self.runInteraction( + yield self.runInteraction( "add_pusher", self._invalidate_cache_and_stream, self.get_if_user_has_pusher, (user_id,) -- cgit 1.5.1 From ff5426f6b8263b416f412725a6d7be2fac284824 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 10:55:11 +0100 Subject: Speed up _calculate_state_delta --- synapse/storage/events.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c2910094d0..1b075e6cc6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -644,21 +644,14 @@ class EventsStore(EventsWorkerStore): """ existing_state = yield self.get_current_state_ids(room_id) - existing_events = set(itervalues(existing_state)) - new_events = set(ev_id for ev_id in itervalues(current_state)) - changed_events = existing_events ^ new_events - - if not changed_events: - return - to_delete = { key: ev_id for key, ev_id in iteritems(existing_state) - if ev_id in changed_events + if ev_id != current_state.get(key) } - events_to_insert = (new_events - existing_events) + to_insert = { key: ev_id for key, ev_id in iteritems(current_state) - if ev_id in events_to_insert + if ev_id != existing_state.get(key) } defer.returnValue((to_delete, to_insert)) -- cgit 1.5.1 From 0fa73e4a63d6cd5d402ea5213eac6b766e650321 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 11:19:23 +0100 Subject: Remove unnecessary if --- synapse/storage/events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1b075e6cc6..cc8ab5165b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -428,8 +428,7 @@ class EventsStore(EventsWorkerStore): delta = yield self._calculate_state_delta( room_id, current_state, ) - if delta is not None: - state_delta_for_room[room_id] = delta + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", -- cgit 1.5.1 From cd241d6bda01a761fbe1ca29727dacd918fb8975 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 12:39:40 +0100 Subject: incorporate more review --- synapse/handlers/sync.py | 12 +++++++++--- synapse/storage/state.py | 36 +++++++++--------------------------- tests/storage/test_state.py | 9 +++++++++ 3 files changed, 27 insertions(+), 30 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5689ad2f58..e5a2329d73 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1526,6 +1526,9 @@ def _calculate_state( previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline + lazy_load_members (bool): whether to return members from timeline_start + or not. assumes that timeline_start has already been filtered to + include only the members the client needs to know about. Returns: dict @@ -1545,9 +1548,12 @@ def _calculate_state( p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - # track the membership events in the state as of the start of the timeline - # so we can add them back in to the state if we're lazyloading. We don't - # add them into state if they're already contained in the timeline. + # If we are lazyloading room members, we explicitly add the membership events + # for the senders in the timeline into the state block returned by /sync, + # as we may not have sent them to the client before. We find these membership + # events by filtering them out of timeline_start, which has already been filtered + # to only include membership events for the senders in the timeline. + if lazy_load_members: ll_ids = set( e for t, e in timeline_start.iteritems() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f99d3871e4..1413a6f910 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types, filtered_types=None): + def _get_state_groups_from_groups(self, groups, types): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -194,9 +194,6 @@ class StateGroupWorkerStore(SQLBaseStore): types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(Iterable[str]|None): Only apply filtering via `types` to this - list of event types. Other types of events are returned unfiltered. - If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -207,14 +204,14 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types, + self._get_state_groups_from_groups_txn, chunk, types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None, + self, txn, groups, types=None, ): results = {group: {} for group in groups} @@ -266,17 +263,6 @@ class StateGroupWorkerStore(SQLBaseStore): ) for etype, state_key in types ] - - if filtered_types is not None: - # XXX: check whether this slows postgres down like a list of - # ORs does too? - unique_types = set(filtered_types) - clause_to_args.append( - ( - "AND type <> ? " * len(unique_types), - list(unique_types) - ) - ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -306,13 +292,6 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if filtered_types is not None: - unique_types = set(filtered_types) - where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" - ) - where_args.extend(list(unique_types)) - where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" @@ -643,13 +622,13 @@ class StateGroupWorkerStore(SQLBaseStore): # cache. Hence, if we are doing a wildcard lookup, populate the # cache fully so that we can do an efficient lookup next time. - if types and any(k is None for (t, k) in types): + if filtered_types or (types and any(k is None for (t, k) in types)): types_to_fetch = None else: types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, filtered_types + missing_groups, types_to_fetch ) for group, group_state_dict in iteritems(group_to_state_dict): @@ -659,7 +638,10 @@ class StateGroupWorkerStore(SQLBaseStore): if types: for k, v in iteritems(group_state_dict): (typ, _) = k - if k in types or (typ, None) in types: + if ( + (k in types or (typ, None) in types) or + (filtered_types and typ not in filtered_types) + ): state_dict[k] = v else: state_dict.update(group_state_dict) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 8924ba9f7f..b2f314e9db 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -158,3 +158,12 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2, (e3.type, e3.state_key): e3, }, state) + + state = yield self.store.get_state_for_event( + e5.event_id, [], filtered_types=[EventTypes.Member], + ) + + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + }, state) -- cgit 1.5.1 From e22700c3dd929f9f0953b3d2b37e503eece82b38 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:59:07 +0100 Subject: consider non-filter_type types as wildcards, thus missing from the state-group-cache --- synapse/storage/state.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1413a6f910..86f2c2e6b1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -518,7 +518,10 @@ class StateGroupWorkerStore(SQLBaseStore): for typ, state_key in types: key = (typ, state_key) - if state_key is None: + if ( + state_key is None or + filtered_types is not None and typ not in filtered_types + ): type_to_key[typ] = None # we mark the type as missing from the cache because # when the cache was populated it might have been done with a -- cgit 1.5.1 From 223341205edf725c5583a73d5680245a24790452 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 11:59:16 +0100 Subject: Don't require to_delete to have event_ids --- synapse/storage/events.py | 91 ++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 36 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 19c05dc8d6..87d0b78a2d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -19,7 +19,7 @@ import logging from collections import OrderedDict, deque, namedtuple from functools import wraps -from six import iteritems, itervalues +from six import iteritems from six.moves import range from canonicaljson import json @@ -347,8 +347,9 @@ class EventsStore(EventsWorkerStore): # state in each room after adding these events current_state_for_room = {} - # map room_id->(to_delete, to_insert) where each entry is - # a map (type,key)->event_id giving the state delta in each + # map room_id->(to_delete, to_insert) where to_delete is a list + # of type/state keys to remove from current state, and to_insert + # is a map (type,key)->event_id giving the state delta in each # room state_delta_for_room = {} @@ -647,17 +648,16 @@ class EventsStore(EventsWorkerStore): Assumes that we are only persisting events for one room at a time. Returns: - 2-tuple (to_delete, to_insert) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to - first be deleted from current_state_events, `to_insert` are entries - to insert. + tuple[list, dict] (to_delete, to_insert): where to_delete are the + type/state_keys to remove from current_state_events and `to_insert` + are the updates to current_state_events. """ existing_state = yield self.get_current_state_ids(room_id) - to_delete = { - key: ev_id for key, ev_id in iteritems(existing_state) - if ev_id != current_state.get(key) - } + to_delete = [ + key for key, ev_id in iteritems(existing_state) + if key not in current_state + ] to_insert = { key: ev_id for key, ev_id in iteritems(current_state) @@ -684,10 +684,10 @@ class EventsStore(EventsWorkerStore): delete_existing (bool): True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - state_delta_for_room (dict[str, (list[str], list[str])]): + state_delta_for_room (dict[str, (list, dict)]): The current-state delta for each room. For each room, a tuple - (to_delete, to_insert), being a list of event ids to be removed - from the current state, and a list of event ids to be added to + (to_delete, to_insert), being a list of type/state keys to be + removed from the current state, and a state set to be added to the current state. new_forward_extremeties (dict[str, list[str]]): The new forward extremities for each room. For each room, a @@ -765,9 +765,46 @@ class EventsStore(EventsWorkerStore): def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in iteritems(state_delta_by_room): to_delete, to_insert = current_state_tuple + + # First we add entries to the current_state_delta_stream. We + # do this before updating the current_state_events table so + # that we can use it to calculate the `prev_event_id`. (This + # allows us to not have to pull out the existing state + # unnecessarily). + sql = """ + INSERT INTO current_state_delta_stream + (stream_id, room_id, type, state_key, event_id, prev_event_id) + SELECT ?, ?, ?, ?, ?, ( + SELECT event_id FROM current_state_events + WHERE room_id = ? AND type = ? AND state_key = ? + ) + """ + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, None, + room_id, etype, state_key, + ) + for etype, state_key in to_delete + # We sanity check that we're deleting rather than updating + if (etype, state_key) not in to_insert + )) + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, ev_id, + room_id, etype, state_key, + ) + for (etype, state_key), ev_id in iteritems(to_insert) + )) + + # Now we actually update the current_state_events table + txn.executemany( - "DELETE FROM current_state_events WHERE event_id = ?", - [(ev_id,) for ev_id in itervalues(to_delete)], + "DELETE FROM current_state_events" + " WHERE room_id = ? AND type = ? AND state_key = ?", + ( + (room_id, etype, state_key) + for etype, state_key in itertools.chain(to_delete, to_insert) + ), ) self._simple_insert_many_txn( @@ -784,25 +821,6 @@ class EventsStore(EventsWorkerStore): ], ) - state_deltas = {key: None for key in to_delete} - state_deltas.update(to_insert) - - self._simple_insert_many_txn( - txn, - table="current_state_delta_stream", - values=[ - { - "stream_id": max_stream_order, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": ev_id, - "prev_event_id": to_delete.get(key, None), - } - for key, ev_id in iteritems(state_deltas) - ] - ) - txn.call_after( self._curr_state_delta_stream_cache.entity_has_changed, room_id, max_stream_order, @@ -816,7 +834,8 @@ class EventsStore(EventsWorkerStore): # and which we have added, then we invlidate the caches for all # those users. members_changed = set( - state_key for ev_type, state_key in state_deltas + state_key + for ev_type, state_key in itertools.chain(to_delete, to_insert) if ev_type == EventTypes.Member ) -- cgit 1.5.1 From a79410e7b8dce3fb8b4b40901a66fd9fde9c47a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 11:13:47 +0100 Subject: Have _get_new_state_after_events return delta If we have a delta from the existing to new current state, then we can reuse that rather than manually working it out by fetching both lots of state. --- synapse/storage/events.py | 65 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 17 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 87d0b78a2d..6d2aae9c4a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -344,7 +344,9 @@ class EventsStore(EventsWorkerStore): new_forward_extremeties = {} # map room_id->(type,state_key)->event_id tracking the full - # state in each room after adding these events + # state in each room after adding these events. + # This is simply used to prefill the get_current_state_ids + # cache current_state_for_room = {} # map room_id->(to_delete, to_insert) where to_delete is a list @@ -419,28 +421,40 @@ class EventsStore(EventsWorkerStore): logger.info( "Calculating state delta for room %s", room_id, ) - with Measure( - self._clock, - "persist_events.get_new_state_after_events", + self._clock, + "persist_events.get_new_state_after_events", ): - current_state = yield self._get_new_state_after_events( + res = yield self._get_new_state_after_events( room_id, ev_ctx_rm, latest_event_ids, new_latest_event_ids, ) - - if current_state is not None: - current_state_for_room[room_id] = current_state + current_state, delta_ids = res + + # If either are not None then there has been a change, + # and we need to work out the delta (or use that + # given) + if delta_ids is not None: + # If there is a delta we know that we've + # only added or replaced state, never + # removed keys entirely. + state_delta_for_room[room_id] = ([], delta_ids) + elif current_state is not None: with Measure( - self._clock, - "persist_events.calculate_state_delta", + self._clock, + "persist_events.calculate_state_delta", ): delta = yield self._calculate_state_delta( room_id, current_state, ) - state_delta_for_room[room_id] = delta + state_delta_for_room[room_id] = delta + + # If we have the current_state then lets prefill + # the cache with it. + if current_state is not None: + current_state_for_room[room_id] = current_state yield self.runInteraction( "persist_events", @@ -539,9 +553,10 @@ class EventsStore(EventsWorkerStore): the new forward extremities for the room. Returns: - Deferred[dict[(str,str), str]|None]: - None if there are no changes to the room state, or - a dict of (type, state_key) -> event_id]. + Deferred[tuple[dict[(str,str), str]|None, dict[(str,str), str]|None]]: + Returns a tuple of two state maps, the first being the full new current + state and the second being the delta to the existing current state. + If both are None then there has been no change. """ if not new_latest_event_ids: @@ -549,6 +564,9 @@ class EventsStore(EventsWorkerStore): # map from state_group to ((type, key) -> event_id) state map state_groups_map = {} + + state_group_deltas = {} + for ev, ctx in events_context: if ctx.state_group is None: # I don't think this can happen, but let's double-check @@ -567,6 +585,9 @@ class EventsStore(EventsWorkerStore): if current_state_ids is not None: state_groups_map[ctx.state_group] = current_state_ids + if ctx.prev_group: + state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids + # We need to map the event_ids to their state groups. First, let's # check if the event is one we're persisting, in which case we can # pull the state group from its context. @@ -608,7 +629,7 @@ class EventsStore(EventsWorkerStore): # If they old and new groups are the same then we don't need to do # anything. if old_state_groups == new_state_groups: - return + defer.returnValue((None, None)) # Now that we have calculated new_state_groups we need to get # their state IDs so we can resolve to a single state set. @@ -620,7 +641,17 @@ class EventsStore(EventsWorkerStore): if len(new_state_groups) == 1: # If there is only one state group, then we know what the current # state is. - defer.returnValue(state_groups_map[new_state_groups.pop()]) + new_state_group = new_state_groups.pop() + + delta_ids = None + if len(old_state_groups) == 1: + old_state_group = old_state_groups.pop() + + delta_ids = state_group_deltas.get( + (old_state_group, new_state_group,), None + ) + + defer.returnValue((state_groups_map[new_state_group], delta_ids)) # Ok, we need to defer to the state handler to resolve our state sets. @@ -639,7 +670,7 @@ class EventsStore(EventsWorkerStore): room_id, state_groups, events_map, get_events ) - defer.returnValue(res.state) + defer.returnValue((res.state, None)) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): -- cgit 1.5.1 From 811ac73a42db16db7211a7517ad5651a1f6aee71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 13:40:42 +0100 Subject: Don't fetch state from the database unless needed --- synapse/storage/events.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6d2aae9c4a..eb8bbe13ab 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -631,27 +631,33 @@ class EventsStore(EventsWorkerStore): if old_state_groups == new_state_groups: defer.returnValue((None, None)) - # Now that we have calculated new_state_groups we need to get - # their state IDs so we can resolve to a single state set. - missing_state = new_state_groups - set(state_groups_map) - if missing_state: - group_to_state = yield self._get_state_for_groups(missing_state) - state_groups_map.update(group_to_state) - if len(new_state_groups) == 1: # If there is only one state group, then we know what the current # state is. - new_state_group = new_state_groups.pop() - - delta_ids = None if len(old_state_groups) == 1: - old_state_group = old_state_groups.pop() + new_state_group = next(iter(new_state_groups)) + old_state_group = next(iter(old_state_groups)) delta_ids = state_group_deltas.get( (old_state_group, new_state_group,), None ) + if delta_ids is not None: + # We have a delta from the existing to new current state, + # so lets just return that. If we happen to already have + # the current state in memory then lets also return that, + # but it doesn't matter if we don't. + new_state = state_groups_map.get(new_state_group) + defer.returnValue((new_state, delta_ids)) - defer.returnValue((state_groups_map[new_state_group], delta_ids)) + # Now that we have calculated new_state_groups we need to get + # their state IDs so we can resolve to a single state set. + missing_state = new_state_groups - set(state_groups_map) + if missing_state: + group_to_state = yield self._get_state_for_groups(missing_state) + state_groups_map.update(group_to_state) + + if len(new_state_groups) == 1: + defer.returnValue((state_groups_map[new_state_groups.pop()], None)) # Ok, we need to defer to the state handler to resolve our state sets. -- cgit 1.5.1 From ed0dd68731fa1549b4ea9b2dab08c2afb6ccb64e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 14:31:38 +0100 Subject: Fixup comment (and indent) --- synapse/storage/events.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index eb8bbe13ab..76cfbc90fe 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -565,6 +565,7 @@ class EventsStore(EventsWorkerStore): # map from state_group to ((type, key) -> event_id) state map state_groups_map = {} + # Map from (prev state group, new state group) -> delta state dict state_group_deltas = {} for ev, ctx in events_context: @@ -631,23 +632,24 @@ class EventsStore(EventsWorkerStore): if old_state_groups == new_state_groups: defer.returnValue((None, None)) - if len(new_state_groups) == 1: - # If there is only one state group, then we know what the current - # state is. - if len(old_state_groups) == 1: - new_state_group = next(iter(new_state_groups)) - old_state_group = next(iter(old_state_groups)) + if len(new_state_groups) == 1 and len(old_state_groups) == 1: + # If we're going from one state group to another, lets check if + # we have a delta for that transition. If we do then we can just + # return that. - delta_ids = state_group_deltas.get( - (old_state_group, new_state_group,), None - ) - if delta_ids is not None: - # We have a delta from the existing to new current state, - # so lets just return that. If we happen to already have - # the current state in memory then lets also return that, - # but it doesn't matter if we don't. - new_state = state_groups_map.get(new_state_group) - defer.returnValue((new_state, delta_ids)) + new_state_group = next(iter(new_state_groups)) + old_state_group = next(iter(old_state_groups)) + + delta_ids = state_group_deltas.get( + (old_state_group, new_state_group,), None + ) + if delta_ids is not None: + # We have a delta from the existing to new current state, + # so lets just return that. If we happen to already have + # the current state in memory then lets also return that, + # but it doesn't matter if we don't. + new_state = state_groups_map.get(new_state_group) + defer.returnValue((new_state, delta_ids)) # Now that we have calculated new_state_groups we need to get # their state IDs so we can resolve to a single state set. @@ -657,6 +659,8 @@ class EventsStore(EventsWorkerStore): state_groups_map.update(group_to_state) if len(new_state_groups) == 1: + # If there is only one state group, then we know what the current + # state is. defer.returnValue((state_groups_map[new_state_groups.pop()], None)) # Ok, we need to defer to the state handler to resolve our state sets. -- cgit 1.5.1 From 8f65ab98d2ab35bbb29c23cc7978d54b12fd3db0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 15:08:01 +0100 Subject: Remove unnecessary iteritems --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 76cfbc90fe..9c94d3454f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -696,7 +696,7 @@ class EventsStore(EventsWorkerStore): existing_state = yield self.get_current_state_ids(room_id) to_delete = [ - key for key, ev_id in iteritems(existing_state) + key for key in existing_state if key not in current_state ] -- cgit 1.5.1 From 709c309b0e1756afd8b4514b95b0e901c567a0f1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 15:12:50 +0100 Subject: Expand on docstring comment about return value --- synapse/storage/events.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 9c94d3454f..906a405031 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -557,6 +557,11 @@ class EventsStore(EventsWorkerStore): Returns a tuple of two state maps, the first being the full new current state and the second being the delta to the existing current state. If both are None then there has been no change. + + If there has been a change then we only return the delta if its + already been calculated. Conversely if we do know the delta then + the new current state is only returned if we've already calculated + it. """ if not new_latest_event_ids: -- cgit 1.5.1 From 3188973857d5bab5c1faf968917ad0ced42b5a0d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 16:20:14 +0100 Subject: Pull out did_forget to worker store --- synapse/app/synchrotron.py | 5 +---- synapse/storage/roommember.py | 52 ++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 29 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 26b9ec85f2..e201f18efd 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -55,7 +55,6 @@ from synapse.rest.client.v2_alpha import sync from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.storage.presence import UserPresenceState -from synapse.storage.roommember import RoomMemberStore from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, run_in_background from synapse.util.manhole import manhole @@ -81,9 +80,7 @@ class SynchrotronSlavedStore( RoomStore, BaseSlavedStore, ): - did_forget = ( - RoomMemberStore.__dict__["did_forget"] - ) + pass UPDATE_SYNCING_USERS_MS = 10 * 1000 diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 01697ab2c9..027bf8c85e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -461,6 +461,30 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) + @cachedInlineCallbacks(num_args=2) + def did_forget(self, user_id, room_id): + """Returns whether user_id has elected to discard history for room_id. + + Returns False if they have since re-joined.""" + def f(txn): + sql = ( + "SELECT" + " COUNT(*)" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " forgotten = 0" + ) + txn.execute(sql, (user_id, room_id)) + rows = txn.fetchall() + return rows[0][0] + count = yield self.runInteraction("did_forget_membership", f) + defer.returnValue(count == 0) + class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -568,32 +592,10 @@ class RoomMemberStore(RoomMemberWorkerStore): ) txn.execute(sql, (user_id, room_id)) - txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - return self.runInteraction("forget_membership", f) - - @cachedInlineCallbacks(num_args=2) - def did_forget(self, user_id, room_id): - """Returns whether user_id has elected to discard history for room_id. - - Returns False if they have since re-joined.""" - def f(txn): - sql = ( - "SELECT" - " COUNT(*)" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " forgotten = 0" + self._invalidate_cache_and_stream( + txn, self.did_forget, (user_id, room_id,), ) - txn.execute(sql, (user_id, room_id)) - rows = txn.fetchall() - return rows[0][0] - count = yield self.runInteraction("did_forget_membership", f) - defer.returnValue(count == 0) + return self.runInteraction("forget_membership", f) @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): -- cgit 1.5.1 From cb5c37a57c387a74f6079008870cf024e674dfe5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 20:34:45 +0100 Subject: handle the edge case for _get_some_state_from_cache where types is [] --- synapse/storage/state.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 86f2c2e6b1..989977c644 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -547,7 +547,13 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - got_all = is_all or not missing_types + if types == [] and filtered_types is not None: + # special wildcard case for empty type-list but an explicit filtered_types + # which means that we'll try to return all types which aren't in the + # filtered_types list. missing_types will always be empty, so we ignore it. + got_all = is_all + else: + got_all = is_all or not missing_types return { k: v for k, v in iteritems(state_dict_ids) -- cgit 1.5.1 From ec56121b0d099824a1455260e05904f081b6d14a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Jul 2018 09:35:02 +0100 Subject: Correctly handle outliers during persist events We incorrectly asserted that all contexts must have a non None state group without consider outliers. This would usually be fine as the assertion would never be hit, as there is a shortcut during persistence if the forward extremities don't change. However, if the outlier is being persisted with non-outlier events, the function would be called and the assertion would be hit. Fixes #3601 --- synapse/storage/events.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 906a405031..bd05f23b06 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -575,11 +575,12 @@ class EventsStore(EventsWorkerStore): for ev, ctx in events_context: if ctx.state_group is None: - # I don't think this can happen, but let's double-check - raise Exception( - "Context for new extremity event %s has no state " - "group" % (ev.event_id, ), - ) + # This should only happen for outlier events. + if not event.internal_metadata.is_outlier(): + raise Exception( + "Context for new event %s has no state " + "group" % (ev.event_id, ), + ) if ctx.state_group in state_groups_map: continue @@ -607,7 +608,7 @@ class EventsStore(EventsWorkerStore): for event_id in new_latest_event_ids: # First search in the list of new events we're adding. for ev, ctx in events_context: - if event_id == ev.event_id: + if event_id == ev.event_id and ctx.state_group is not None: event_id_to_state_group[event_id] = ctx.state_group break else: -- cgit 1.5.1 From 371da42ae4ec093f3887e9485d149cdfbedd4e58 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 09:41:12 +0100 Subject: Wrap a number of things that run in the background This will reduce the number of "Starting db connection from sentinel context" warnings, and will help with our metrics. --- synapse/app/homeserver.py | 13 ++++++++++--- synapse/groups/attestations.py | 6 +++++- synapse/replication/tcp/resource.py | 14 ++++++++------ synapse/rest/media/v1/media_repository.py | 8 +++++++- synapse/rest/media/v1/preview_url_resource.py | 8 +++++++- synapse/storage/devices.py | 8 ++++++-- synapse/storage/event_federation.py | 9 ++++++--- synapse/storage/event_push_actions.py | 13 +++++++++---- synapse/storage/transactions.py | 6 +++++- 9 files changed, 63 insertions(+), 22 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 2ad1beb8d8..b7e7718290 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -49,6 +49,7 @@ from synapse.http.additional_resource import AdditionalResource from synapse.http.server import RootRedirect from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.module_api import ModuleApi from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, check_requirements @@ -427,6 +428,9 @@ def run(hs): # currently either 0 or 1 stats_process = [] + def start_phone_stats_home(): + run_as_background_process("phone_stats_home", phone_stats_home) + @defer.inlineCallbacks def phone_stats_home(): logger.info("Gathering stats for reporting") @@ -498,7 +502,10 @@ def run(hs): ) def generate_user_daily_visit_stats(): - hs.get_datastore().generate_user_daily_visits() + run_as_background_process( + "generate_user_daily_visits", + hs.get_datastore().generate_user_daily_visits, + ) # Rather than update on per session basis, batch up the requests. # If you increase the loop period, the accuracy of user_daily_visits @@ -507,7 +514,7 @@ def run(hs): if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") - clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) + clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000) # We need to defer this init for the cases that we daemonize # otherwise the process ID we get is that of the non-daemon process @@ -515,7 +522,7 @@ def run(hs): # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes - clock.call_later(5 * 60, phone_stats_home) + clock.call_later(5 * 60, start_phone_stats_home) if hs.config.daemonize and hs.config.print_pidfile: print (hs.config.pid_file) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 47452700a8..4216af0a27 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -43,6 +43,7 @@ from signedjson.sign import sign_json from twisted.internet import defer from synapse.api.errors import SynapseError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from synapse.util.logcontext import run_in_background @@ -129,7 +130,7 @@ class GroupAttestionRenewer(object): self.attestations = hs.get_groups_attestation_signing() self._renew_attestations_loop = self.clock.looping_call( - self._renew_attestations, 30 * 60 * 1000, + self._start_renew_attestations, 30 * 60 * 1000, ) @defer.inlineCallbacks @@ -151,6 +152,9 @@ class GroupAttestionRenewer(object): defer.returnValue({}) + def _start_renew_attestations(self): + run_as_background_process("renew_attestations", self._renew_attestations) + @defer.inlineCallbacks def _renew_attestations(self): """Called periodically to check if we need to update any of our attestations diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 611fb66e1d..fd59f1595f 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -25,6 +25,7 @@ from twisted.internet import defer from twisted.internet.protocol import Factory from synapse.metrics import LaterGauge +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.metrics import Measure, measure_func from .protocol import ServerReplicationStreamProtocol @@ -117,7 +118,6 @@ class ReplicationStreamer(object): for conn in self.connections: conn.send_error("server shutting down") - @defer.inlineCallbacks def on_notifier_poke(self): """Checks if there is actually any new data and sends it to the connections if there are. @@ -132,14 +132,16 @@ class ReplicationStreamer(object): stream.discard_updates_and_advance() return - # If we're in the process of checking for new updates, mark that fact - # and return + self.pending_updates = True + if self.is_looping: - logger.debug("Noitifier poke loop already running") - self.pending_updates = True + logger.debug("Notifier poke loop already running") return - self.pending_updates = True + run_as_background_process("replication_notifier", self._run_notifier_loop) + + @defer.inlineCallbacks + def _run_notifier_loop(self): self.is_looping = True try: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 30242c525a..5b13378caa 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.async import Linearizer from synapse.util.logcontext import make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -100,10 +101,15 @@ class MediaRepository(object): ) self.clock.looping_call( - self._update_recently_accessed, + self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS, ) + def _start_update_recently_accessed(self): + run_as_background_process( + "update_recently_accessed_media", self._update_recently_accessed, + ) + @defer.inlineCallbacks def _update_recently_accessed(self): remote_media = self.recently_accessed_remotes diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index b70b15c4c2..4efd5339a4 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -41,6 +41,7 @@ from synapse.http.server import ( wrap_json_request_handler, ) from synapse.http.servlet import parse_integer, parse_string +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.async import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background @@ -81,7 +82,7 @@ class PreviewUrlResource(Resource): self._cache.start() self._cleaner_loop = self.clock.looping_call( - self._expire_url_cache_data, 10 * 1000 + self._start_expire_url_cache_data, 10 * 1000, ) def render_OPTIONS(self, request): @@ -371,6 +372,11 @@ class PreviewUrlResource(Resource): "etag": headers["ETag"][0] if "ETag" in headers else None, }) + def _start_expire_url_cache_data(self): + run_as_background_process( + "expire_url_cache_data", self._expire_url_cache_data, + ) + @defer.inlineCallbacks def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index cc3cdf2ebc..52dccb1507 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -21,6 +21,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList from ._base import Cache, SQLBaseStore @@ -711,6 +712,9 @@ class DeviceStore(SQLBaseStore): logger.info("Pruned %d device list outbound pokes", txn.rowcount) - return self.runInteraction( - "_prune_old_outbound_device_pokes", _prune_txn + run_as_background_process( + "prune_old_outbound_device_pokes", + self.runInteraction, + "_prune_old_outbound_device_pokes", + _prune_txn, ) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 8d366d1b91..65f2d19e20 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -23,6 +23,7 @@ from unpaddedbase64 import encode_base64 from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import SQLBaseStore from synapse.storage.events import EventsWorkerStore from synapse.storage.signatures import SignatureWorkerStore @@ -446,7 +447,7 @@ class EventFederationStore(EventFederationWorkerStore): ) hs.get_clock().looping_call( - self._delete_old_forward_extrem_cache, 60 * 60 * 1000 + self._delete_old_forward_extrem_cache, 60 * 60 * 1000, ) def _update_min_depth_for_room_txn(self, txn, room_id, depth): @@ -548,9 +549,11 @@ class EventFederationStore(EventFederationWorkerStore): sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) ) - return self.runInteraction( + run_as_background_process( + "delete_old_forward_extrem_cache", + self.runInteraction, "_delete_old_forward_extrem_cache", - _delete_old_forward_extrem_cache_txn + _delete_old_forward_extrem_cache_txn, ) def clean_room_for_join(self, room_id): diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 29b511ae5e..4f44b0ad47 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -22,6 +22,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import LoggingTransaction, SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -458,11 +459,12 @@ class EventPushActionsWorkerStore(SQLBaseStore): "Error removing push actions after event persistence failure", ) - @defer.inlineCallbacks def _find_stream_orderings_for_times(self): - yield self.runInteraction( + run_as_background_process( + "event_push_action_stream_orderings", + self.runInteraction, "_find_stream_orderings_for_times", - self._find_stream_orderings_for_times_txn + self._find_stream_orderings_for_times_txn, ) def _find_stream_orderings_for_times_txn(self, txn): @@ -604,7 +606,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): self._doing_notif_rotation = False self._rotate_notif_loop = self._clock.looping_call( - self._rotate_notifs, 30 * 60 * 1000 + self._start_rotate_notifs, 30 * 60 * 1000, ) def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts, @@ -787,6 +789,9 @@ class EventPushActionsStore(EventPushActionsWorkerStore): WHERE room_id = ? AND user_id = ? AND stream_ordering <= ? """, (room_id, user_id, stream_ordering)) + def _start_rotate_notifs(self): + run_as_background_process("rotate_notifs", self._rotate_notifs) + @defer.inlineCallbacks def _rotate_notifs(self): if self._doing_notif_rotation or self.stream_ordering_day_ago is None: diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index c3bc94f56d..b4b479d94c 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -22,6 +22,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore @@ -57,7 +58,7 @@ class TransactionStore(SQLBaseStore): def __init__(self, db_conn, hs): super(TransactionStore, self).__init__(db_conn, hs) - self._clock.looping_call(self._cleanup_transactions, 30 * 60 * 1000) + self._clock.looping_call(self._start_cleanup_transactions, 30 * 60 * 1000) def get_received_txn_response(self, transaction_id, origin): """For an incoming transaction from a given origin, check if we have @@ -271,6 +272,9 @@ class TransactionStore(SQLBaseStore): txn.execute(query, (self._clock.time_msec(),)) return self.cursor_to_dict(txn) + def _start_cleanup_transactions(self): + run_as_background_process("cleanup_transactions", self._cleanup_transactions) + def _cleanup_transactions(self): now = self._clock.time_msec() month_ago = now - 30 * 24 * 60 * 60 * 1000 -- cgit 1.5.1 From a297ff2b16094f358cc8432f1db660ca539f8b4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Jul 2018 09:48:01 +0100 Subject: Fix typo in conditional --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bd05f23b06..5024c4714d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -576,7 +576,7 @@ class EventsStore(EventsWorkerStore): for ev, ctx in events_context: if ctx.state_group is None: # This should only happen for outlier events. - if not event.internal_metadata.is_outlier(): + if not ev.internal_metadata.is_outlier(): raise Exception( "Context for new event %s has no state " "group" % (ev.event_id, ), -- cgit 1.5.1 From 07defd5fe6cc6f645195ddf0d679290bb214ca73 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 10:47:57 +0100 Subject: Fix another logcontext leak in _persist_events We need to run the errback in the sentinel context to avoid losing our own context. Also: add logging to runInteraction to help identify where "Starting db connection from sentinel context" warnings are coming from --- changelog.d/3606.misc | 1 + synapse/storage/_base.py | 6 ++++++ synapse/storage/events.py | 9 ++++----- 3 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 changelog.d/3606.misc (limited to 'synapse/storage') diff --git a/changelog.d/3606.misc b/changelog.d/3606.misc new file mode 100644 index 0000000000..f0137766a0 --- /dev/null +++ b/changelog.d/3606.misc @@ -0,0 +1 @@ +Fix some random logcontext leaks. \ No newline at end of file diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1d41d8d445..44f37b4c1e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -311,6 +311,12 @@ class SQLBaseStore(object): after_callbacks = [] exception_callbacks = [] + if LoggingContext.current_context() == LoggingContext.sentinel: + logger.warn( + "Starting db txn '%s' from sentinel context", + desc, + ) + try: result = yield self.runWithConnection( self._new_transaction, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 906a405031..ee9e4d4b65 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -142,15 +142,14 @@ class _EventPeristenceQueue(object): try: queue = self._get_drainining_queue(room_id) for item in queue: - # handle_queue_loop runs in the sentinel logcontext, so - # there is no need to preserve_fn when running the - # callbacks on the deferred. try: ret = yield per_item_callback(item) + except Exception: + with PreserveLoggingContext(): + item.deferred.errback() + else: with PreserveLoggingContext(): item.deferred.callback(ret) - except Exception: - item.deferred.errback() finally: queue = self._event_persist_queues.pop(room_id, None) if queue: -- cgit 1.5.1 From 1be94440d38ad6af64486ce31c650d0540d4049c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 11:01:36 +0100 Subject: Fix occasional 'tuple index out of range' error This fixes a bug in _delete_existing_rows_txn which was introduced in #3435 (though it's been on matrix-org-hotfixes for *years*). This code is only called when there is some sort of conflict the first time we try to persist an event, so it only happens rarely. Still, the exceptions are annoying. --- changelog.d/3607.bugfix | 1 + synapse/storage/events.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/3607.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/3607.bugfix b/changelog.d/3607.bugfix new file mode 100644 index 0000000000..7ad64593b8 --- /dev/null +++ b/changelog.d/3607.bugfix @@ -0,0 +1 @@ +Fix 'tuple index out of range' error \ No newline at end of file diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 906a405031..f7c4226ea2 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1137,7 +1137,7 @@ class EventsStore(EventsWorkerStore): ): txn.executemany( "DELETE FROM %s WHERE room_id = ? AND event_id = ?" % (table,), - [(ev.event_id,) for ev, _ in events_and_contexts] + [(ev.room_id, ev.event_id) for ev, _ in events_and_contexts] ) def _store_event_txn(self, txn, events_and_contexts): -- cgit 1.5.1 From 7780a7b47ca0fa039886dda8f6ca84bb197e9425 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Jul 2018 11:13:20 +0100 Subject: Actually fix it by adding continue --- synapse/storage/events.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5024c4714d..d7fea0986b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -581,6 +581,7 @@ class EventsStore(EventsWorkerStore): "Context for new event %s has no state " "group" % (ev.event_id, ), ) + continue if ctx.state_group in state_groups_map: continue -- cgit 1.5.1 From 7d9fb88617f475ac7e064c5cccc8d78dbd78d2a3 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 16:15:33 +0100 Subject: incorporate more review. --- synapse/storage/state.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 989977c644..e38427bf9d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -493,12 +493,6 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` - Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). - `missing_types` is the list of types that aren't in the cache for that - group. `got_all` is a bool indicating if we successfully retrieved all - requests state from the cache, if False we need to query the DB for the - missing state. - Args: group(int): The state group to lookup types(list[str, str|None]): List of 2-tuples of the form @@ -507,6 +501,11 @@ class StateGroupWorkerStore(SQLBaseStore): filtered_types(list[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. + + Returns 2-tuple (`state_dict`, `got_all`). + `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -520,7 +519,7 @@ class StateGroupWorkerStore(SQLBaseStore): if ( state_key is None or - filtered_types is not None and typ not in filtered_types + (filtered_types is not None and typ not in filtered_types) ): type_to_key[typ] = None # we mark the type as missing from the cache because @@ -547,18 +546,17 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - if types == [] and filtered_types is not None: - # special wildcard case for empty type-list but an explicit filtered_types - # which means that we'll try to return all types which aren't in the - # filtered_types list. missing_types will always be empty, so we ignore it. - got_all = is_all - else: - got_all = is_all or not missing_types + got_all = is_all + if not got_all: + # the cache is incomplete. We may still have got all the results we need, if + # we don't have any wildcards in the match list. + if not missing_types and filtered_types is None: + got_all = True return { k: v for k, v in iteritems(state_dict_ids) if include(k[0], k[1]) - }, missing_types, got_all + }, got_all def _get_all_state_from_cache(self, group): """Checks if group is in cache. See `_get_state_for_groups` @@ -603,7 +601,7 @@ class StateGroupWorkerStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict_ids, _, got_all = self._get_some_state_from_cache( + state_dict_ids, got_all = self._get_some_state_from_cache( group, types, filtered_types ) results[group] = state_dict_ids -- cgit 1.5.1 From bc7944e6d2ea0076badd0eba414e1ba7020eb1e6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 23:36:31 +0100 Subject: switch missing_types to be a bool --- synapse/storage/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e38427bf9d..b27b3ae144 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -511,8 +511,8 @@ class StateGroupWorkerStore(SQLBaseStore): type_to_key = {} - # tracks which of the requested types are missing from our cache - missing_types = set() + # tracks whether any of ourrequested types are missing from the cache + missing_types = False for typ, state_key in types: key = (typ, state_key) @@ -526,13 +526,13 @@ class StateGroupWorkerStore(SQLBaseStore): # when the cache was populated it might have been done with a # restricted set of state_keys, so the wildcard will not work # and the cache may be incomplete. - missing_types.add(key) + missing_types = True else: if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) if key not in state_dict_ids and key not in known_absent: - missing_types.add(key) + missing_types = True sentinel = object() -- cgit 1.5.1 From 03751a64203b169cbf33b636b6d940ca6d414c31 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 11:44:26 +0100 Subject: Fix some looping_call calls which were broken in #3604 It turns out that looping_call does check the deferred returned by its callback, and (at least in the case of client_ips), we were relying on this, and I broke it in #3604. Update run_as_background_process to return the deferred, and make sure we return it to clock.looping_call. --- changelog.d/3610.feature | 1 + synapse/app/homeserver.py | 4 ++-- synapse/groups/attestations.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/metrics/background_process_metrics.py | 10 ++++++++-- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/storage/client_ips.py | 2 +- synapse/storage/devices.py | 2 +- synapse/storage/event_federation.py | 2 +- synapse/storage/event_push_actions.py | 4 ++-- synapse/storage/transactions.py | 4 +++- synapse/util/caches/expiringcache.py | 2 +- 13 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 changelog.d/3610.feature (limited to 'synapse/storage') diff --git a/changelog.d/3610.feature b/changelog.d/3610.feature new file mode 100644 index 0000000000..77a294cb9f --- /dev/null +++ b/changelog.d/3610.feature @@ -0,0 +1 @@ +Add metrics to track resource usage by background processes diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index b7e7718290..57b815d777 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -429,7 +429,7 @@ def run(hs): stats_process = [] def start_phone_stats_home(): - run_as_background_process("phone_stats_home", phone_stats_home) + return run_as_background_process("phone_stats_home", phone_stats_home) @defer.inlineCallbacks def phone_stats_home(): @@ -502,7 +502,7 @@ def run(hs): ) def generate_user_daily_visit_stats(): - run_as_background_process( + return run_as_background_process( "generate_user_daily_visits", hs.get_datastore().generate_user_daily_visits, ) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 4216af0a27..b04f4234ca 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -153,7 +153,7 @@ class GroupAttestionRenewer(object): defer.returnValue({}) def _start_renew_attestations(self): - run_as_background_process("renew_attestations", self._renew_attestations) + return run_as_background_process("renew_attestations", self._renew_attestations) @defer.inlineCallbacks def _renew_attestations(self): diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 43692b83a8..cb5c6d587e 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -256,7 +256,7 @@ class ProfileHandler(BaseHandler): ) def _start_update_remote_profile_cache(self): - run_as_background_process( + return run_as_background_process( "Update remote profile", self._update_remote_profile_cache, ) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 9d820e44a6..ce678d5f75 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -151,13 +151,19 @@ def run_as_background_process(desc, func, *args, **kwargs): This should be used to wrap processes which are fired off to run in the background, instead of being associated with a particular request. + It returns a Deferred which completes when the function completes, but it doesn't + follow the synapse logcontext rules, which makes it appropriate for passing to + clock.looping_call and friends (or for firing-and-forgetting in the middle of a + normal synapse inlineCallbacks function). + Args: desc (str): a description for this background process type func: a function, which may return a Deferred args: positional args for func kwargs: keyword args for func - Returns: None + Returns: Deferred which returns the result of func, but note that it does not + follow the synapse logcontext rules. """ @defer.inlineCallbacks def run(): @@ -176,4 +182,4 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes[desc].remove(proc) with PreserveLoggingContext(): - run() + return run() diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5b13378caa..174ad20123 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -106,7 +106,7 @@ class MediaRepository(object): ) def _start_update_recently_accessed(self): - run_as_background_process( + return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed, ) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 4efd5339a4..27aa0def2f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -373,7 +373,7 @@ class PreviewUrlResource(Resource): }) def _start_expire_url_cache_data(self): - run_as_background_process( + return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data, ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 77ae10da3d..b8cefd43d6 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -102,7 +102,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): to_update, ) - run_as_background_process( + return run_as_background_process( "update_client_ips", update, ) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 52dccb1507..c0943ecf91 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -712,7 +712,7 @@ class DeviceStore(SQLBaseStore): logger.info("Pruned %d device list outbound pokes", txn.rowcount) - run_as_background_process( + return run_as_background_process( "prune_old_outbound_device_pokes", self.runInteraction, "_prune_old_outbound_device_pokes", diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 65f2d19e20..f269ec6fb3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -549,7 +549,7 @@ class EventFederationStore(EventFederationWorkerStore): sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) ) - run_as_background_process( + return run_as_background_process( "delete_old_forward_extrem_cache", self.runInteraction, "_delete_old_forward_extrem_cache", diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 4f44b0ad47..6840320641 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -460,7 +460,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): ) def _find_stream_orderings_for_times(self): - run_as_background_process( + return run_as_background_process( "event_push_action_stream_orderings", self.runInteraction, "_find_stream_orderings_for_times", @@ -790,7 +790,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): """, (room_id, user_id, stream_ordering)) def _start_rotate_notifs(self): - run_as_background_process("rotate_notifs", self._rotate_notifs) + return run_as_background_process("rotate_notifs", self._rotate_notifs) @defer.inlineCallbacks def _rotate_notifs(self): diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index b4b479d94c..428e7fa36e 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -273,7 +273,9 @@ class TransactionStore(SQLBaseStore): return self.cursor_to_dict(txn) def _start_cleanup_transactions(self): - run_as_background_process("cleanup_transactions", self._cleanup_transactions) + return run_as_background_process( + "cleanup_transactions", self._cleanup_transactions, + ) def _cleanup_transactions(self): now = self._clock.time_msec() diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 465adc54a8..ce85b2ae11 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -64,7 +64,7 @@ class ExpiringCache(object): return def f(): - run_as_background_process( + return run_as_background_process( "prune_cache_%s" % self._cache_name, self._prune_cache, ) -- cgit 1.5.1 From 1b4d73fa520a301ec35ba417d14a4549a44f33e0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 12:53:51 +0100 Subject: comment on event_edges --- synapse/storage/schema/full_schemas/16/event_edges.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/full_schemas/16/event_edges.sql b/synapse/storage/schema/full_schemas/16/event_edges.sql index 52eec88357..6b5a5a88fa 100644 --- a/synapse/storage/schema/full_schemas/16/event_edges.sql +++ b/synapse/storage/schema/full_schemas/16/event_edges.sql @@ -37,7 +37,8 @@ CREATE TABLE IF NOT EXISTS event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, - is_state BOOL NOT NULL, + is_state BOOL NOT NULL, -- true if this is a prev_state edge rather than a regular + -- event dag edge. UNIQUE (event_id, prev_event_id, room_id, is_state) ); -- cgit 1.5.1 From bd4b25f4d07e07a2da0382cfc59a5a262883c0fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 13:19:08 +0100 Subject: Remove some redundant joins on event_edges.room_id We've long passed the point where it's possible to have the same event_id in different tables, so these join conditions are redundant: we can just join on event_id. event_edges is of non-trivial size, and the room_id column is wasteful, so let's stop reading from it. In future, we can stop writing to it, and then drop it. --- synapse/storage/event_federation.py | 13 ++++++------- synapse/storage/events.py | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 65f2d19e20..801581dcf7 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -114,9 +114,9 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, sql = ( "SELECT b.event_id, MAX(e.depth) FROM events as e" " INNER JOIN event_edges as g" - " ON g.event_id = e.event_id AND g.room_id = e.room_id" + " ON g.event_id = e.event_id" " INNER JOIN event_backward_extremities as b" - " ON g.prev_event_id = b.event_id AND g.room_id = b.room_id" + " ON g.prev_event_id = b.event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" ) @@ -330,8 +330,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, "SELECT depth, prev_event_id FROM event_edges" " INNER JOIN events" " ON prev_event_id = events.event_id" - " AND event_edges.room_id = events.room_id" - " WHERE event_edges.room_id = ? AND event_edges.event_id = ?" + " WHERE event_edges.event_id = ?" " AND event_edges.is_state = ?" " LIMIT ?" ) @@ -365,7 +364,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, txn.execute( query, - (room_id, event_id, False, limit - len(event_results)) + (event_id, False, limit - len(event_results)) ) for row in txn: @@ -402,7 +401,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, query = ( "SELECT prev_event_id FROM event_edges " - "WHERE room_id = ? AND event_id = ? AND is_state = ? " + "WHERE event_id = ? AND is_state = ? " "LIMIT ?" ) @@ -411,7 +410,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, for event_id in front: txn.execute( query, - (room_id, event_id, False, limit - len(event_results)) + (event_id, False, limit - len(event_results)) ) for e_id, in txn: diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 200f5ec95f..cb10fdedc0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -520,7 +520,6 @@ class EventsStore(EventsWorkerStore): iterable=list(new_latest_event_ids), retcols=["prev_event_id"], keyvalues={ - "room_id": room_id, "is_state": False, }, desc="_calculate_new_extremeties", -- cgit 1.5.1 From 5c1d301fd9e669b6704d54caeb1e8a3a223ba053 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:12:00 +0100 Subject: Stop populating events.content This field is no longer read from, so we should stop populating it. Once we're happy that this doesn't break everything, and a rollback is unlikely, we can think about dropping the column. --- synapse/storage/events.py | 1 - .../schema/delta/50/make_event_content_nullable.py | 93 ++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/50/make_event_content_nullable.py (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 200f5ec95f..94515cd153 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1189,7 +1189,6 @@ class EventsStore(EventsWorkerStore): "type": event.type, "processed": True, "outlier": event.internal_metadata.is_outlier(), - "content": encode_json(event.content).decode("UTF-8"), "origin_server_ts": int(event.origin_server_ts), "received_ts": self._clock.time_msec(), "sender": event.sender, diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py new file mode 100644 index 0000000000..fa4a289514 --- /dev/null +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +We want to stop populating 'event.content', so we need to make it nullable. + +If this has to be rolled back, then the following should populate the missing data: + +Postgres: + + UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej + WHERE ej.event_id = events.event_id AND + stream_ordering < ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering LIMIT 1 + ); + + UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej + WHERE ej.event_id = events.event_id AND + stream_ordering > ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering DESC LIMIT 1 + ); + +SQLite: + + UPDATE events SET content=( + SELECT json_extract(json,'$.content') FROM event_json ej + WHERE ej.event_id = events.event_id + ) + WHERE + stream_ordering < ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering LIMIT 1 + ) + OR stream_ordering > ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering DESC LIMIT 1 + ); + +""" + +import logging + +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + cur.execute(""" + ALTER TABLE events ALTER COLUMN content DROP NOT NULL; + """) + return + + # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html + cur.execute("PRAGMA schema_version") + (oldver,) = cur.fetchone() + + cur.execute("SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'") + (oldsql,) = cur.fetchone() + sql = oldsql.replace("content TEXT NOT NULL", "content TEXT") + if sql == oldsql: + raise Exception("Couldn't find null constraint to drop in %s" % oldsql) + + logger.info("Replacing definition of 'events' with: %s", sql) + + cur.execute("PRAGMA writable_schema=ON") + + cur.execute( + "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", + (sql, ), + ) + + cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) + cur.execute("PRAGMA writable_schema=OFF") + + +def run_upgrade(*args, **kwargs): + pass -- cgit 1.5.1 From 51d7df19158de0f21e659625bf43716ff0a700bc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:54:04 +0100 Subject: Create the column nullable There's no real point in ever making the column non-nullable, and doing so breaks the sytests. --- .../schema/delta/50/make_event_content_nullable.py | 15 +++++++-------- synapse/storage/schema/full_schemas/16/im.sql | 7 ++++++- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py index fa4a289514..7d27342e39 100644 --- a/synapse/storage/schema/delta/50/make_event_content_nullable.py +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -60,6 +60,10 @@ logger = logging.getLogger(__name__) def run_create(cur, database_engine, *args, **kwargs): + pass + + +def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): cur.execute(""" ALTER TABLE events ALTER COLUMN content DROP NOT NULL; @@ -67,27 +71,22 @@ def run_create(cur, database_engine, *args, **kwargs): return # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html - cur.execute("PRAGMA schema_version") - (oldver,) = cur.fetchone() cur.execute("SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'") (oldsql,) = cur.fetchone() + sql = oldsql.replace("content TEXT NOT NULL", "content TEXT") if sql == oldsql: raise Exception("Couldn't find null constraint to drop in %s" % oldsql) logger.info("Replacing definition of 'events' with: %s", sql) + cur.execute("PRAGMA schema_version") + (oldver,) = cur.fetchone() cur.execute("PRAGMA writable_schema=ON") - cur.execute( "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", (sql, ), ) - cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) cur.execute("PRAGMA writable_schema=OFF") - - -def run_upgrade(*args, **kwargs): - pass diff --git a/synapse/storage/schema/full_schemas/16/im.sql b/synapse/storage/schema/full_schemas/16/im.sql index ba5346806e..5f5cb8d01d 100644 --- a/synapse/storage/schema/full_schemas/16/im.sql +++ b/synapse/storage/schema/full_schemas/16/im.sql @@ -19,7 +19,12 @@ CREATE TABLE IF NOT EXISTS events( event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, - content TEXT NOT NULL, + + -- 'content' used to be created NULLable, but as of delta 50 we drop that constraint. + -- the hack we use to drop the constraint doesn't work for an in-memory sqlite + -- database, which breaks the sytests. Hence, we no longer make it nullable. + content TEXT, + unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, -- cgit 1.5.1