From 9b334b3f97057ac145622d2e4d0ad036ef27b468 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:01:41 +0000 Subject: WIP experiment in lazyloading room members --- synapse/handlers/sync.py | 43 +++++++++++++++++++++++++++++++------------ synapse/storage/state.py | 47 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 17 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0f713ce038..809e9fece9 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -399,7 +399,7 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event): + def get_state_after_event(self, event, types=None): """ Get the room state after the given event @@ -409,14 +409,14 @@ class SyncHandler(object): Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id) + state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position): + def get_state_at(self, room_id, stream_position, types=None): """ Get the room state at a particular stream position Args: @@ -432,7 +432,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event) + state = yield self.get_state_after_event(last_event, types) else: # no events in this room - so presumably no state @@ -441,7 +441,7 @@ class SyncHandler(object): @defer.inlineCallbacks def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token, - full_state): + full_state, filter_members): """ Works out the differnce in state between the start of the timeline and the previous sync. @@ -454,6 +454,8 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. + filter_members(bool): Whether to only return state for members + referenced in this timeline segment Returns: A deferred new event dictionary @@ -464,18 +466,35 @@ class SyncHandler(object): # TODO(mjark) Check for new redactions in the state events. with Measure(self.clock, "compute_state_delta"): + + types = None + if filter_members: + # We only request state for the members needed to display the + # timeline: + types = ( + (EventTypes.Member, state_key) + for state_key in set( + event.sender # FIXME: we also care about targets etc. + for event in batch.events + ) + ) + types.append((None, None)) # don't just filter to room members + + # TODO: we should opportunistically deduplicate these members too + # within the same sync series (based on an in-memory cache) + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id + batch.events[-1].event_id, types=types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id + batch.events[0].event_id, types=types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token + room_id, stream_position=now_token, types=types ) state_ids = current_state_ids @@ -493,15 +512,15 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token + room_id, stream_position=since_token, types=types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id + batch.events[-1].event_id, types=types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id + batch.events[0].event_id, types=types ) timeline_state = { @@ -1325,7 +1344,7 @@ class SyncHandler(object): state = yield self.compute_state_delta( room_id, batch, sync_config, since_token, now_token, - full_state=full_state + full_state=full_state, filter_members=True ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..da6bb685fa 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -198,8 +198,15 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_state_groups_from_groups_txn(self, txn, groups, types=None): results = {group: {} for group in groups} + + include_other_types = False + if types is not None: - types = list(set(types)) # deduplicate types list + type_set = set(types) + if (None, None) in type_set: + include_other_types = True + type_set.remove((None, None)) + types = list(type_set) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -238,11 +245,21 @@ class StateGroupWorkerStore(SQLBaseStore): if types: clause_to_args = [ ( - "AND type = ? AND state_key = ?", - (etype, state_key) + "AND type = ? AND state_key = ?" if state_key is not None else "AND type = ?", + (etype, state_key) if state_key is not None else (etype) ) for etype, state_key in types ] + + if include_other_types: + # XXX: check whether this slows postgres down like a list of + # ORs does too? + clause_to_args.append( + ( + "AND type <> ? " * len(types), + [t for (t, _) in types] + ) + ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -263,6 +280,10 @@ class StateGroupWorkerStore(SQLBaseStore): where_clause = "AND (%s)" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) + if include_other_types: + where_clause += " AND (%s)" % ( + " AND ".join(["type <> ?"] * len(types)), + ) else: where_clause = "" @@ -449,17 +470,27 @@ class StateGroupWorkerStore(SQLBaseStore): group: The state group to lookup types (list): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the - `type`. + `type`. Presence of type of `None` indicates that types not + in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() + include_other_types = False + for typ, state_key in types: key = (typ, state_key) + + if typ is None: + include_other_types = True + next + if state_key is None: type_to_key[typ] = None + # XXX: why do we mark the type as missing from our cache just + # because we weren't filtering on a specific value of state_key? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -478,7 +509,7 @@ class StateGroupWorkerStore(SQLBaseStore): return True if state_key in valid_state_keys: return True - return False + return include_other_types got_all = is_all or not missing_types @@ -507,6 +538,12 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. + + XXX: is it really true that `state_key` of None in `types` matches all + state_keys? it looks like _get-some_state_from_cache does the right thing, + but _get_state_groups_from_groups_txn treats ths None is turned into + 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew + I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) -- cgit 1.4.1 From 87133652657c5073616419b0afc533eac6ae6750 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:10:25 +0000 Subject: typos --- synapse/handlers/sync.py | 4 ++-- synapse/storage/state.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 809e9fece9..fa730ca760 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -471,13 +471,13 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: - types = ( + types = [ (EventTypes.Member, state_key) for state_key in set( event.sender # FIXME: we also care about targets etc. for event in batch.events ) - ) + ] types.append((None, None)) # don't just filter to room members # TODO: we should opportunistically deduplicate these members too diff --git a/synapse/storage/state.py b/synapse/storage/state.py index da6bb685fa..0238200286 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -301,6 +301,8 @@ class StateGroupWorkerStore(SQLBaseStore): args = [next_group] if types: args.extend(i for typ in types for i in typ) + if include_other_types: + args.extend(typ for (typ, _) in types) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" -- cgit 1.4.1 From 97c0496cfa89b037d89fccd05dd03442b80e07fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 00:27:06 +0000 Subject: fix sqlite where clause --- synapse/storage/state.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0238200286..b796d3c995 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -277,13 +277,14 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: if types is not None: - where_clause = "AND (%s)" % ( + where_clause = "AND (%s" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) if include_other_types: - where_clause += " AND (%s)" % ( + where_clause += " OR (%s)" % ( " AND ".join(["type <> ?"] * len(types)), ) + where_clause += ")" else: where_clause = "" -- cgit 1.4.1 From fdedcd1f4ddeaa3ed5bfd3c05ab2977b4e8ed457 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 01:39:06 +0000 Subject: correctly handle None state_keys and fix include_other_types thinko --- synapse/storage/state.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b796d3c995..405e6b6770 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -276,15 +276,23 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?) OR " + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?) OR " + where_args.extend([typ[0], typ[1]]) + if include_other_types: - where_clause += " OR (%s)" % ( + where_clause += "(%s) OR " % ( " AND ".join(["type <> ?"] * len(types)), ) - where_clause += ")" + where_args.extend(t for (t, _) in types) + where_clause += "0)" # 0 to terminate the last OR else: where_clause = "" @@ -301,9 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) - if include_other_types: - args.extend(typ for (typ, _) in types) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" @@ -507,12 +513,12 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return False + return include_other_types if valid_state_keys is None: return True if state_key in valid_state_keys: return True - return include_other_types + return False got_all = is_all or not missing_types -- cgit 1.4.1 From 1b1c13777154b5b0cf8bf8cf809381f889a2a82d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 17:52:52 +0000 Subject: fix bug #2926 --- synapse/storage/state.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..783cebb351 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,6 +240,10 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) + ) if state_key is not None else + ( + "AND type = ?", + (etype) ) for etype, state_key in types ] @@ -259,10 +263,19 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?)" + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?)" + where_args.extend([typ[0], typ[1]]) + if typ != types[-1]: + where_clause += " OR " + where_clause += ")" else: where_clause = "" @@ -279,7 +292,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" -- cgit 1.4.1 From 52f7e23c7276b2848aa5291d8b78875b7c32a658 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:07:55 +0000 Subject: PR feedbackz --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 783cebb351..77259a3141 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,10 +240,9 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) - ) if state_key is not None else - ( + ) if state_key is not None else ( "AND type = ?", - (etype) + (etype,) ) for etype, state_key in types ] -- cgit 1.4.1 From b2aba9e43053f9f297671fce0051bfc18a8b655a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:13:44 +0000 Subject: build where_clause sanely --- synapse/storage/state.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 77259a3141..82740266bf 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -263,18 +263,16 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: where_args = [] + where_clauses = [] if types is not None: - where_clause = "AND (" for typ in types: if typ[1] is None: - where_clause += "(type = ?)" + where_clauses.append("(type = ?)") where_args.extend(typ[0]) else: - where_clause += "(type = ? AND state_key = ?)" + where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if typ != types[-1]: - where_clause += " OR " - where_clause += ")" + where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" -- cgit 1.4.1 From 865377a70d9d7db27b89348d2ebbd394f701c490 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:45:36 +0000 Subject: disable optimisation for searching for state groups when type filter includes wildcards on state_key --- synapse/storage/state.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 82740266bf..39f73afaa2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -264,11 +264,13 @@ class StateGroupWorkerStore(SQLBaseStore): else: where_args = [] where_clauses = [] + wildcard_types = False if types is not None: for typ in types: if typ[1] is None: where_clauses.append("(type = ?)") where_args.extend(typ[0]) + wildcard_types = True else: where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) @@ -302,9 +304,17 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the lengths match then we must have all the types, - # so no need to go walk further down the tree. - if types is not None and len(results[group]) == len(types): + # If the number of entries inthe (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + types is not None and + not wildcard_types and + len(results[group]) == len(types) + ): break next_group = self._simple_select_one_onecol_txn( -- cgit 1.4.1 From afbf4d3dccb3d18276e4b119b0267490ca522b4b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:48:04 +0000 Subject: typoe --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 39f73afaa2..ffa4246031 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -304,7 +304,7 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the number of entries inthe (type,state_key)->event_id dict + # If the number of entries in the (type,state_key)->event_id dict # matches the number of (type,state_keys) types we were searching # for, then we must have found them all, so no need to go walk # further down the tree... UNLESS our types filter contained -- cgit 1.4.1 From 14a9d2f73d50225f190f42e270cbf9ef7447bd8c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:03:42 +0000 Subject: ensure we always include the members for a given timeline block --- synapse/handlers/sync.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index fa730ca760..c754cfdeeb 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -468,6 +468,8 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None + member_state_ids = {} + if filter_members: # We only request state for the members needed to display the # timeline: @@ -492,6 +494,13 @@ class SyncHandler(object): state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types ) + + if filter_members: + member_state_ids = { + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.member + } + else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types @@ -499,6 +508,12 @@ class SyncHandler(object): state_ids = current_state_ids + if filter_members: + member_state_ids = { + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.member + } + timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -507,6 +522,7 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, + timeline_start_members=member_state_ids, previous={}, current=current_state_ids, ) @@ -523,6 +539,12 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) + if filter_members: + member_state_ids = { + t: state_at_timeline_start[t] + for t in state_ids if t[0] == EventTypes.member + } + timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -531,6 +553,7 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, + timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, ) @@ -1440,12 +1463,16 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, previous, current): +def _calculate_state(timeline_contains, timeline_start, timeline_start_members, + previous, current): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline + timeline_start_members (dict): state at the start of the timeline + for room members who participate in this chunk of timeline. + Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1464,11 +1491,12 @@ def _calculate_state(timeline_contains, timeline_start, previous, current): } c_ids = set(e for e in current.values()) - tc_ids = set(e for e in timeline_contains.values()) - p_ids = set(e for e in previous.values()) ts_ids = set(e for e in timeline_start.values()) + tsm_ids = set(e for e in timeline_start_members.values()) + p_ids = set(e for e in previous.values()) + tc_ids = set(e for e in timeline_contains.values()) - state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids return { event_id_to_key[e]: e for e in state_ids -- cgit 1.4.1 From f0f9a0605b1bddc1b01d1bbb6af93f00763b8496 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:12:15 +0000 Subject: remove comment now #2969 is fixed --- synapse/storage/state.py | 6 ------ 1 file changed, 6 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4ab16e18b2..4291cde7ab 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -561,12 +561,6 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. - - XXX: is it really true that `state_key` of None in `types` matches all - state_keys? it looks like _get-some_state_from_cache does the right thing, - but _get_state_groups_from_groups_txn treats ths None is turned into - 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew - I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) -- cgit 1.4.1 From ccca02846d07124f537b0c475308f9a26bfb3fb1 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:31:41 +0000 Subject: make it work --- synapse/handlers/sync.py | 6 +++--- synapse/storage/state.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c754cfdeeb..c05e3d107f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -498,7 +498,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } else: @@ -511,7 +511,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4291cde7ab..9c9994c073 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,10 +257,11 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? + unique_types = set([ t for (t, _) in types ]) clause_to_args.append( ( - "AND type <> ? " * len(types), - [t for (t, _) in types] + "AND type <> ? " * len(unique_types), + list(unique_types) ) ) else: @@ -293,10 +294,11 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: + unique_types = set([ t for (t, _) in types ]) where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(types)) + ")" + "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) - where_args.extend(t for (t, _) in types) + where_args.extend(list(unique_types)) where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: -- cgit 1.4.1 From c9d72e4571752554dfe01d755ae23f55c5f84ade Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 23:46:45 +0000 Subject: oops --- synapse/handlers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c05e3d107f..887624c431 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_at_timeline_start if t[0] == EventTypes.Member } timeline_state = { -- cgit 1.4.1 From 4d0cfef6ee023bfe83113a0378321830ebde1619 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:02:20 +0000 Subject: add copyright to nudge CI --- synapse/handlers/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 887624c431..edbd2ae771 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 - 2016 OpenMarket Ltd +# Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -- cgit 1.4.1 From 9f77001e2747c36046e136c5d3c706c0aef54b15 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:07:47 +0000 Subject: pep8 --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 9c9994c073..55159e64d0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,7 +257,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -294,7 +294,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) -- cgit 1.4.1 From 3bc5bd2d22e6b53ec1f89760301df1517e71b53a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 00:52:04 +0000 Subject: make incr syncs work --- synapse/handlers/sync.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index edbd2ae771..84c894ca4a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -499,7 +499,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_ids if state_ids[t][0] == EventTypes.Member } else: @@ -512,7 +512,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_ids if state_ids[t][0] == EventTypes.Member } timeline_state = { @@ -543,7 +543,8 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member + for t in state_at_timeline_start + if state_at_timeline_start[t][0] == EventTypes.Member } timeline_state = { -- cgit 1.4.1 From bf49d2dca8db6d82f09441a35cd3655c746b6b4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:38:43 +0000 Subject: Replace some ujson with simplejson to make it work --- synapse/http/server.py | 3 ++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/storage/events.py | 2 +- synapse/storage/events_worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b567215c8..3c7a0ef97a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,6 +38,7 @@ import collections import logging import urllib import ujson +import simplejson logger = logging.getLogger(__name__) @@ -462,7 +463,7 @@ def respond_with_json(request, code, json_object, send_cors=False, json_bytes = encode_canonical_json(json_object) else: # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e4..eb91c0b293 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3890878170..9fc65229fd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 86c3b48ad4..2e23dd78ba 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -28,7 +28,7 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 -- cgit 1.4.1 From 5b3b3aada8952b53f82723227c9758ed47450a2e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:17:34 +0000 Subject: simplify timeline_start_members --- synapse/handlers/sync.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 84c894ca4a..ffb4f7915e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -496,12 +496,6 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) - if filter_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if state_ids[t][0] == EventTypes.Member - } - else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types @@ -509,11 +503,13 @@ class SyncHandler(object): state_ids = current_state_ids - if filter_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if state_ids[t][0] == EventTypes.Member - } + if filter_members: + logger.info("Finding members from %r", state_ids) + member_state_ids = { + e: state_ids[e] + for e in state_ids if state_ids[e][0] == EventTypes.Member + } + logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id @@ -541,11 +537,14 @@ class SyncHandler(object): ) if filter_members: + logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start - if state_at_timeline_start[t][0] == EventTypes.Member + e: state_at_timeline_start[e] + for e in state_at_timeline_start + if state_at_timeline_start[e][0] == EventTypes.Member } + logger.info("Found members %r", member_state_ids) + timeline_state = { (event.type, event.state_key): event.event_id -- cgit 1.4.1 From f7dcc404f216383bfd62e4611c6a28c3f13576dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:37:53 +0000 Subject: add state_ids for timeline entries --- synapse/handlers/sync.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ffb4f7915e..9b7e598e74 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -474,6 +474,7 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: + types = [ (EventTypes.Member, state_key) for state_key in set( @@ -481,11 +482,14 @@ class SyncHandler(object): for event in batch.events ) ] - types.append((None, None)) # don't just filter to room members - # TODO: we should opportunistically deduplicate these members too + # TODO: we should opportunistically deduplicate these members here # within the same sync series (based on an in-memory cache) + if not types: + filter_members = False + types.append((None, None)) # don't just filter to room members + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( @@ -545,7 +549,6 @@ class SyncHandler(object): } logger.info("Found members %r", member_state_ids) - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -559,7 +562,14 @@ class SyncHandler(object): current=current_state_ids, ) else: - state_ids = {} + if filter_members: + # strip off the (None, None) and filter to just room members + types = types[:-1] + state_ids = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types + ) + else: + state_ids = {} state = {} if state_ids: -- cgit 1.4.1 From 4f0493c850d4611e8ada42c1de54a18e8dc15a37 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:43:37 +0000 Subject: fix tsm search again --- synapse/handlers/sync.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9b7e598e74..4bf85a128f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -510,8 +510,8 @@ class SyncHandler(object): if filter_members: logger.info("Finding members from %r", state_ids) member_state_ids = { - e: state_ids[e] - for e in state_ids if state_ids[e][0] == EventTypes.Member + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.Member } logger.info("Found members %r", member_state_ids) @@ -543,9 +543,8 @@ class SyncHandler(object): if filter_members: logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { - e: state_at_timeline_start[e] - for e in state_at_timeline_start - if state_at_timeline_start[e][0] == EventTypes.Member + t: state_at_timeline_start[t] + for t in state_at_timeline_start if t[0] == EventTypes.Member } logger.info("Found members %r", member_state_ids) -- cgit 1.4.1 From fc5397fdf5acefd33bd3b808b6d8cc7c31b69b55 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:44:55 +0000 Subject: remove debug --- synapse/handlers/sync.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4bf85a128f..b7f42bd594 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -508,12 +508,10 @@ class SyncHandler(object): state_ids = current_state_ids if filter_members: - logger.info("Finding members from %r", state_ids) member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } - logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id @@ -541,12 +539,10 @@ class SyncHandler(object): ) if filter_members: - logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } - logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id -- cgit 1.4.1 From 0b56290f0b14025939515d01aa72f512f0c629dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:45:49 +0000 Subject: remove stale import --- synapse/http/server.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse') diff --git a/synapse/http/server.py b/synapse/http/server.py index 3c7a0ef97a..e64aa92729 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -37,7 +37,6 @@ from twisted.web.util import redirectTo import collections import logging import urllib -import ujson import simplejson logger = logging.getLogger(__name__) -- cgit 1.4.1 From 366f730bf697fe8fbb18a509ec1852987bc80410 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 18 Mar 2018 21:40:35 +0000 Subject: only get member state IDs for incremental syncs if we're filtering --- synapse/handlers/sync.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b7f42bd594..6b57afd97b 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -557,14 +557,14 @@ class SyncHandler(object): current=current_state_ids, ) else: + state_ids = {} if filter_members: # strip off the (None, None) and filter to just room members types = types[:-1] - state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types - ) - else: - state_ids = {} + if types: + state_ids = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types + ) state = {} if state_ids: -- cgit 1.4.1 From 478af0f72005708dbbed23e30c547c3d66c07c0e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 01:00:12 +0000 Subject: reshuffle todo & comments --- synapse/handlers/sync.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6b57afd97b..76f5057377 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -483,11 +483,15 @@ class SyncHandler(object): ) ] - # TODO: we should opportunistically deduplicate these members here - # within the same sync series (based on an in-memory cache) + # We can't remove redundant member types at this stage as it has + # to be done based on event_id, and we don't have the member + # event ids until we've pulled them out of the DB. if not types: + # an optimisation to stop needlessly trying to calculate + # member_state_ids filter_members = False + types.append((None, None)) # don't just filter to room members if full_state: @@ -559,6 +563,10 @@ class SyncHandler(object): else: state_ids = {} if filter_members: + # TODO: filter out redundant members based on their mxids (not their + # event_ids) at this point. We know we can do it based on mxid as this + # is an non-gappy incremental sync. + # strip off the (None, None) and filter to just room members types = types[:-1] if types: -- cgit 1.4.1 From b2f22829475ccfe19e994aedddb8d04995018bf4 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 01:15:13 +0000 Subject: make lazy_load_members configurable in filters --- synapse/api/filtering.py | 6 ++++++ synapse/handlers/sync.py | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'synapse') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 83206348e5..339e4a31d6 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -260,6 +260,9 @@ class FilterCollection(object): def ephemeral_limit(self): return self._room_ephemeral_filter.limit() + def lazy_load_members(self): + return self._room_state_filter.lazy_load_members() + def filter_presence(self, events): return self._presence_filter.filter(events) @@ -416,6 +419,9 @@ class Filter(object): def limit(self): return self.filter_json.get("limit", 10) + def lazy_load_members(self): + return self.filter_json.get("lazy_load_members", False) + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 76f5057377..f521d22e91 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -442,7 +442,7 @@ class SyncHandler(object): @defer.inlineCallbacks def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token, - full_state, filter_members): + full_state): """ Works out the differnce in state between the start of the timeline and the previous sync. @@ -455,7 +455,7 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - filter_members(bool): Whether to only return state for members + lazy_load_members(bool): Whether to only return state for members referenced in this timeline segment Returns: @@ -470,8 +470,9 @@ class SyncHandler(object): types = None member_state_ids = {} + lazy_load_members = sync_config.filter_collection.lazy_load_members() - if filter_members: + if lazy_load_members: # We only request state for the members needed to display the # timeline: @@ -490,7 +491,7 @@ class SyncHandler(object): if not types: # an optimisation to stop needlessly trying to calculate # member_state_ids - filter_members = False + lazy_load_members = False types.append((None, None)) # don't just filter to room members @@ -511,7 +512,7 @@ class SyncHandler(object): state_ids = current_state_ids - if filter_members: + if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member @@ -542,7 +543,7 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) - if filter_members: + if lazy_load_members: member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member @@ -562,7 +563,7 @@ class SyncHandler(object): ) else: state_ids = {} - if filter_members: + if lazy_load_members: # TODO: filter out redundant members based on their mxids (not their # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. @@ -1380,8 +1381,7 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, - full_state=full_state, filter_members=True + room_id, batch, sync_config, since_token, now_token, full_state=full_state ) if room_builder.rtype == "joined": -- cgit 1.4.1 From a6c8f7c875348ff8d63a7032c2f73a08551c516c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 01:09:55 +0100 Subject: add pydoc --- synapse/handlers/sync.py | 18 ++++++++---- synapse/storage/state.py | 76 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 27 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 05bf6d46dd..8e38078332 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -423,7 +423,11 @@ class SyncHandler(object): Args: event(synapse.events.EventBase): event of interest - + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) """ @@ -440,6 +444,11 @@ class SyncHandler(object): Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) @@ -472,8 +481,6 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - lazy_load_members(bool): Whether to only return state for members - referenced in this timeline segment Returns: A deferred new event dictionary @@ -496,7 +503,7 @@ class SyncHandler(object): types = [ (EventTypes.Member, state_key) for state_key in set( - event.sender # FIXME: we also care about targets etc. + event.sender # FIXME: we also care about invite targets etc. for event in batch.events ) ] @@ -1398,7 +1405,8 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, full_state=full_state + room_id, batch, sync_config, since_token, now_token, + full_state=full_state ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 55159e64d0..63b6834202 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -182,7 +182,19 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_groups_from_groups(self, groups, types): - """Returns dictionary state_group -> (dict of (type, state_key) -> event id) + """Returns the state groups for a given set of groups, filtering on + types of state events. + + Args: + groups(list[int]): list of state group IDs to query + types(list[str|None, str|None])|None: List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all types are returned. + + Returns: + dictionary state_group -> (dict of (type, state_key) -> event id) """ results = {} @@ -204,6 +216,9 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: type_set = set(types) if (None, None) in type_set: + # special case (None, None) to mean that other types should be + # returned - i.e. we were just filtering down the state keys + # for particular types. include_other_types = True type_set.remove((None, None)) types = list(type_set) # deduplicate types list @@ -360,10 +375,12 @@ class StateGroupWorkerStore(SQLBaseStore): that are in the `types` list. Args: - event_ids (list) - types (list): List of (type, state_key) tuples which are used to - filter the state fetched. `state_key` may be None, which matches - any `state_key` + event_ids (list[string]) + types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -399,9 +416,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_ids(list(str)): events whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -427,9 +446,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -444,9 +465,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -492,11 +515,11 @@ class StateGroupWorkerStore(SQLBaseStore): missing state. Args: - group: The state group to lookup - types (list): List of 2-tuples of the form (`type`, `state_key`), - where a `state_key` of `None` matches all state_keys for the - `type`. Presence of type of `None` indicates that types not - in the list should not be filtered out. + group(int): The state group to lookup + types(list[str|None, str|None]): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -560,9 +583,18 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): """Given list of groups returns dict of group -> list of state events - with matching types. `types` is a list of `(type, state_key)`, where - a `state_key` of None matches all state_keys. If `types` is None then - all events are returned. + with matching types. + + Args: + groups(list[int]): list of groups whose state to query + types(list[str|None, str|None]|None): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all events are returned. + + Returns: + dict of group -> list of state events """ if types: types = frozenset(types) -- cgit 1.4.1 From b69ff33d9e26e9efe89c745e0fda4801db8bede0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:22:27 +0100 Subject: disable CPUMetrics if no /proc/self/stat fixes build on macOS again --- synapse/metrics/__init__.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse') diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 371c300cac..6b5b0c9471 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -34,6 +34,7 @@ all_metrics = [] all_collectors = [] all_gauges = {} +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") class RegistryProxy(object): @@ -99,6 +100,8 @@ class CPUMetrics(object): self.ticks_per_sec = ticks_per_sec def collect(self): + if not HAVE_PROC_SELF_STAT: + return with open("/proc/self/stat") as s: line = s.read() -- cgit 1.4.1 From 8df7bad839f04cbcabf066fe549df14879639ef5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:32:15 +0100 Subject: pep8 --- synapse/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse') diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 6b5b0c9471..bfdbbc9a23 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -36,6 +36,7 @@ all_gauges = {} HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") + class RegistryProxy(object): def collect(self): -- cgit 1.4.1 From 9bbb9f5556496529478753d2123526ca6894535c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 04:26:10 +0100 Subject: add lazy_load_members to the filter json schema --- synapse/api/filtering.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 7158dd75e9..fd58961862 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -112,7 +112,10 @@ ROOM_EVENT_FILTER_SCHEMA = { }, "contains_url": { "type": "boolean" - } + }, + "lazy_load_members": { + "type": "boolean" + }, } } -- cgit 1.4.1 From 5f6122fe102f994e023d530cb6076730f31f619f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Jun 2018 00:08:52 +0300 Subject: more comments --- synapse/handlers/sync.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 8e38078332..7ab97b24a6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -515,6 +515,9 @@ class SyncHandler(object): if not types: # an optimisation to stop needlessly trying to calculate # member_state_ids + # + # XXX: i can't remember what this trying to do. why would + # types ever be []? --matthew lazy_load_members = False types.append((None, None)) # don't just filter to room members @@ -568,6 +571,10 @@ class SyncHandler(object): ) if lazy_load_members: + # TODO: filter out redundant members based on their event_ids + # (not mxids) at this point. In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member -- cgit 1.4.1 From 8503dd0047119caa5b98a3fd56ac2b14dd09af0b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 7 Jun 2018 16:03:16 +0100 Subject: Remove event re-signing hacks These "temporary fixes" have been here three and a half years, and I can't find any events in the matrix.org database where the calculated signature differs from what's in the db. It's time for them to go away. --- synapse/handlers/federation.py | 43 ------------------------------------------ 1 file changed, 43 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index fcf94befb7..60b97b140e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -938,16 +938,6 @@ class FederationHandler(BaseHandler): [auth_id for auth_id, _ in event.auth_events], include_given=True ) - - for event in auth: - event.signatures.update( - compute_event_signature( - event, - self.hs.hostname, - self.hs.config.signing_key[0] - ) - ) - defer.returnValue([e for e in auth]) @log_function @@ -1405,18 +1395,6 @@ class FederationHandler(BaseHandler): del results[(event.type, event.state_key)] res = list(results.values()) - for event in res: - # We sign these again because there was a bug where we - # incorrectly signed things the first time round - if self.is_mine_id(event.event_id): - event.signatures.update( - compute_event_signature( - event, - self.hs.hostname, - self.hs.config.signing_key[0] - ) - ) - defer.returnValue(res) else: defer.returnValue([]) @@ -1481,18 +1459,6 @@ class FederationHandler(BaseHandler): ) if event: - if self.is_mine_id(event.event_id): - # FIXME: This is a temporary work around where we occasionally - # return events slightly differently than when they were - # originally signed - event.signatures.update( - compute_event_signature( - event, - self.hs.hostname, - self.hs.config.signing_key[0] - ) - ) - if do_auth: in_room = yield self.auth.check_host_in_room( event.room_id, @@ -1760,15 +1726,6 @@ class FederationHandler(BaseHandler): local_auth_chain, remote_auth_chain ) - for event in ret["auth_chain"]: - event.signatures.update( - compute_event_signature( - event, - self.hs.hostname, - self.hs.config.signing_key[0] - ) - ) - logger.debug("on_query_auth returning: %s", ret) defer.returnValue(ret) -- cgit 1.4.1 From 36f4fd3e1edfa6032a5ace89e38ec7268e06dee6 Mon Sep 17 00:00:00 2001 From: David Baker Date: Wed, 11 Jul 2018 15:45:44 +0100 Subject: Comment dummy TURN parameters in default config This default config is parsed and used a base before the actual config is overlaid, so with these values not commented out, the code to detect when no turn params were set and refuse to generate credentials was never firing because the dummy default was always set. --- changelog.d/3511.bugfix | 1 + synapse/config/voip.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/3511.bugfix (limited to 'synapse') diff --git a/changelog.d/3511.bugfix b/changelog.d/3511.bugfix new file mode 100644 index 0000000000..460fe24ac9 --- /dev/null +++ b/changelog.d/3511.bugfix @@ -0,0 +1 @@ +Don't generate TURN credentials if no TURN config options are set diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 3a4e16fa96..d07bd24ffd 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -30,10 +30,10 @@ class VoipConfig(Config): ## Turn ## # The public URIs of the TURN server to give to clients - turn_uris: [] + #turn_uris: [] # The shared secret used to compute passwords for the TURN server - turn_shared_secret: "YOUR_SHARED_SECRET" + #turn_shared_secret: "YOUR_SHARED_SECRET" # The Username and password if the TURN server needs them and # does not use a token -- cgit 1.4.1 From 5797f5542b687b73ebdd8613f64ce0f38e637b55 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 12 Jul 2018 01:32:39 +0100 Subject: WIP to announce deleted devices over federation Previously we queued up the poke correctly when the device was deleted, but then the actual EDU wouldn't get sent, as the device was no longer known. Instead, we now send EDUs for deleted devices too if there's a poke for them. --- synapse/notifier.py | 2 +- synapse/storage/devices.py | 40 ++++++++++++++++++++++++++------------ synapse/storage/end_to_end_keys.py | 16 ++++++++++++++- 3 files changed, 44 insertions(+), 14 deletions(-) (limited to 'synapse') diff --git a/synapse/notifier.py b/synapse/notifier.py index 51cbd66f06..e650c3e494 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -274,7 +274,7 @@ class Notifier(object): logger.exception("Error notifying application services of event") def on_new_event(self, stream_key, new_token, users=[], rooms=[]): - """ Used to inform listeners that something has happend event wise. + """ Used to inform listeners that something has happened event wise. Will wake up all listeners for the given users and rooms. """ diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index ec68e39f1e..0c797f9f3e 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -239,6 +239,7 @@ class DeviceStore(SQLBaseStore): def update_remote_device_list_cache_entry(self, user_id, device_id, content, stream_id): """Updates a single user's device in the cache. + If the content is null, delete the device from the cache. """ return self.runInteraction( "update_remote_device_list_cache_entry", @@ -248,17 +249,32 @@ class DeviceStore(SQLBaseStore): def _update_remote_device_list_cache_entry_txn(self, txn, user_id, device_id, content, stream_id): - self._simple_upsert_txn( - txn, - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - values={ - "content": json.dumps(content), - } - ) + if content is None: + self._simple_delete_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + ) + + # Do we need this? + txn.call_after( + self.device_id_exists_cache.invalidate, (user_id, device_id,) + ) + else: + self._simple_upsert_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "content": json.dumps(content), + } + ) txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,)) txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) @@ -366,7 +382,7 @@ class DeviceStore(SQLBaseStore): now_stream_id = max(stream_id for stream_id in itervalues(query_map)) devices = self._get_e2e_device_keys_txn( - txn, query_map.keys(), include_all_devices=True + txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True ) prev_sent_id_sql = """ diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 7ae5c65482..f61553cec8 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -64,12 +64,17 @@ class EndToEndKeyStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_e2e_device_keys(self, query_list, include_all_devices=False): + def get_e2e_device_keys( + self, query_list, include_all_devices=False, + include_deleted_devices=False + ): """Fetch a list of device keys. Args: query_list(list): List of pairs of user_ids and device_ids. include_all_devices (bool): whether to include entries for devices that don't have device keys + include_deleted_devices (bool): whether to include null entries for + devices which no longer exist (but where in the query_list) Returns: Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". @@ -82,10 +87,19 @@ class EndToEndKeyStore(SQLBaseStore): query_list, include_all_devices, ) + if include_deleted_devices: + deleted_devices = set(query_list) + for user_id, device_keys in iteritems(results): for device_id, device_info in iteritems(device_keys): + if include_deleted_devices: + deleted_devices -= (user_id, device_id) device_info["keys"] = json.loads(device_info.pop("key_json")) + if include_deleted_devices: + for user_id, device_id in deleted_devices: + results.setdefault(user_id, {})[device_id] = None + defer.returnValue(results) def _get_e2e_device_keys_txn(self, txn, query_list, include_all_devices): -- cgit 1.4.1 From 12ec58301f946ced9702afbf6dfbfbc8c3dfd3dd Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 12 Jul 2018 11:39:43 +0100 Subject: shift to using an explicit deleted flag on m.device_list_update EDUs and generally make it work. --- synapse/storage/devices.py | 18 ++++++++++-------- synapse/storage/end_to_end_keys.py | 27 +++++++++++++++------------ 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 0c797f9f3e..203f50f07d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -239,7 +239,6 @@ class DeviceStore(SQLBaseStore): def update_remote_device_list_cache_entry(self, user_id, device_id, content, stream_id): """Updates a single user's device in the cache. - If the content is null, delete the device from the cache. """ return self.runInteraction( "update_remote_device_list_cache_entry", @@ -249,7 +248,7 @@ class DeviceStore(SQLBaseStore): def _update_remote_device_list_cache_entry_txn(self, txn, user_id, device_id, content, stream_id): - if content is None: + if content.get("deleted"): self._simple_delete_txn( txn, table="device_lists_remote_cache", @@ -409,12 +408,15 @@ class DeviceStore(SQLBaseStore): prev_id = stream_id - key_json = device.get("key_json", None) - if key_json: - result["keys"] = json.loads(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name + if device is not None: + key_json = device.get("key_json", None) + if key_json: + result["keys"] = json.loads(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + else: + result["deleted"] = True results.append(result) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index f61553cec8..6c28719420 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -74,7 +74,7 @@ class EndToEndKeyStore(SQLBaseStore): include_all_devices (bool): whether to include entries for devices that don't have device keys include_deleted_devices (bool): whether to include null entries for - devices which no longer exist (but where in the query_list) + devices which no longer exist (but were in the query_list) Returns: Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". @@ -84,28 +84,25 @@ class EndToEndKeyStore(SQLBaseStore): results = yield self.runInteraction( "get_e2e_device_keys", self._get_e2e_device_keys_txn, - query_list, include_all_devices, + query_list, include_all_devices, include_deleted_devices, ) - if include_deleted_devices: - deleted_devices = set(query_list) - for user_id, device_keys in iteritems(results): for device_id, device_info in iteritems(device_keys): - if include_deleted_devices: - deleted_devices -= (user_id, device_id) device_info["keys"] = json.loads(device_info.pop("key_json")) - if include_deleted_devices: - for user_id, device_id in deleted_devices: - results.setdefault(user_id, {})[device_id] = None - defer.returnValue(results) - def _get_e2e_device_keys_txn(self, txn, query_list, include_all_devices): + def _get_e2e_device_keys_txn( + self, txn, query_list, include_all_devices=False, + include_deleted_devices=False, + ): query_clauses = [] query_params = [] + if include_deleted_devices: + deleted_devices = set(query_list) + for (user_id, device_id) in query_list: query_clause = "user_id = ?" query_params.append(user_id) @@ -133,8 +130,14 @@ class EndToEndKeyStore(SQLBaseStore): result = {} for row in rows: + if include_deleted_devices: + deleted_devices.remove((row["user_id"], row["device_id"])) result.setdefault(row["user_id"], {})[row["device_id"]] = row + if include_deleted_devices: + for user_id, device_id in deleted_devices: + result.setdefault(user_id, {})[device_id] = None + return result @defer.inlineCallbacks -- cgit 1.4.1 From 77b692e65dfe5da79e9f69f4a24bde1c768a998c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Jul 2018 15:26:10 +0100 Subject: Don't return unknown entities in get_entities_changed The stream cache keeps track of all entities that have changed since a particular stream position, so get_entities_changed does not need to return unknown entites when given a larger stream position. This makes it consistent with the behaviour of has_entity_changed. --- synapse/util/caches/stream_change_cache.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'synapse') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index a1f8ff8f10..258655349b 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -74,19 +74,12 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - changed_entities = { + result = { self._cache[k] for k in self._cache.islice( start=self._cache.bisect_right(stream_pos), ) } - # we need to include entities which we don't know about, as well as - # those which are known to have changed since the stream pos. - result = { - e for e in entities - if e in changed_entities or e not in self._entity_to_key - } - self.metrics.inc_hits() else: result = set(entities) -- cgit 1.4.1 From 4a27000548d2b5194256152305fe121f671f0c81 Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 16 Jul 2018 13:46:49 +0200 Subject: check isort by travis --- .travis.yml | 3 +++ changelog.d/3540.misc | 1 + synapse/config/consent_config.py | 1 + synapse/config/logger.py | 1 + synapse/config/server_notices_config.py | 1 + synapse/http/client.py | 4 +++- synapse/http/site.py | 2 +- synapse/rest/__init__.py | 16 +++++++++++++--- synapse/rest/client/v1/admin.py | 4 ++-- synapse/rest/client/v1/initial_sync.py | 2 +- synapse/rest/client/v2_alpha/devices.py | 2 +- synapse/rest/media/v1/identicon_resource.py | 4 ++-- synapse/storage/schema/delta/48/group_unique_indexes.py | 1 + tox.ini | 8 ++++++-- 14 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 changelog.d/3540.misc (limited to 'synapse') diff --git a/.travis.yml b/.travis.yml index a98d547978..e7fcd8b5d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,9 @@ matrix: - python: 3.6 env: TOX_ENV=py36 + - python: 3.6 + env: TOX_ENV=isort + - python: 3.6 env: TOX_ENV=check-newsfragment diff --git a/changelog.d/3540.misc b/changelog.d/3540.misc new file mode 100644 index 0000000000..99dcad8e46 --- /dev/null +++ b/changelog.d/3540.misc @@ -0,0 +1 @@ +check isort for each PR diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index e22c731aad..cec978125b 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -15,6 +15,7 @@ from ._base import Config + DEFAULT_CONFIG = """\ # User Consent configuration # diff --git a/synapse/config/logger.py b/synapse/config/logger.py index a87b11a1df..cc1051057c 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -29,6 +29,7 @@ from synapse.util.versionstring import get_version_string from ._base import Config + DEFAULT_LOG_CONFIG = Template(""" version: 1 diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 3c39850ac6..91b08446b7 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -16,6 +16,7 @@ from synapse.types import UserID from ._base import Config + DEFAULT_CONFIG = """\ # Server Notices room configuration # diff --git a/synapse/http/client.py b/synapse/http/client.py index d6a0d75b2b..0622a69e86 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -26,9 +26,11 @@ from OpenSSL.SSL import VERIFY_NONE from twisted.internet import defer, protocol, reactor, ssl, task from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.client import ( + Agent, + BrowserLikeRedirectAgent, + ContentDecoderAgent, GzipDecoder, HTTPConnectionPool, PartialDownloadError, diff --git a/synapse/http/site.py b/synapse/http/site.py index 21e26f9c5e..41dd974cea 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -20,7 +20,7 @@ from twisted.web.server import Request, Site from synapse.http import redact_uri from synapse.http.request_metrics import RequestMetrics -from synapse.util.logcontext import LoggingContext, ContextResourceUsage +from synapse.util.logcontext import ContextResourceUsage, LoggingContext logger = logging.getLogger(__name__) diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 75c2a4ec8e..0d69d37f7e 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -15,11 +15,21 @@ from synapse.http.server import JsonResource from synapse.rest.client import versions -from synapse.rest.client.v1 import admin, directory, events, initial_sync from synapse.rest.client.v1 import login as v1_login -from synapse.rest.client.v1 import logout, presence, profile, push_rule, pusher from synapse.rest.client.v1 import register as v1_register -from synapse.rest.client.v1 import room, voip +from synapse.rest.client.v1 import ( + admin, + directory, + events, + initial_sync, + logout, + presence, + profile, + push_rule, + pusher, + room, + voip, +) from synapse.rest.client.v2_alpha import ( account, account_data, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 01c3f2eb04..2dc50e582b 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -24,9 +24,9 @@ from synapse.api.constants import Membership from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( assert_params_in_dict, - parse_json_object_from_request, parse_integer, - parse_string + parse_json_object_from_request, + parse_string, ) from synapse.types import UserID, create_requester diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/v1/initial_sync.py index 00a1a99feb..fd5f85b53e 100644 --- a/synapse/rest/client/v1/initial_sync.py +++ b/synapse/rest/client/v1/initial_sync.py @@ -15,8 +15,8 @@ from twisted.internet import defer -from synapse.streams.config import PaginationConfig from synapse.http.servlet import parse_boolean +from synapse.streams.config import PaginationConfig from .base import ClientV1RestServlet, client_path_patterns diff --git a/synapse/rest/client/v2_alpha/devices.py b/synapse/rest/client/v2_alpha/devices.py index aded2409be..9b75bb1377 100644 --- a/synapse/rest/client/v2_alpha/devices.py +++ b/synapse/rest/client/v2_alpha/devices.py @@ -19,9 +19,9 @@ from twisted.internet import defer from synapse.api import errors from synapse.http.servlet import ( + RestServlet, assert_params_in_dict, parse_json_object_from_request, - RestServlet ) from ._base import client_v2_patterns, interactive_auth_handler diff --git a/synapse/rest/media/v1/identicon_resource.py b/synapse/rest/media/v1/identicon_resource.py index b3217eff53..bdbd8d50dd 100644 --- a/synapse/rest/media/v1/identicon_resource.py +++ b/synapse/rest/media/v1/identicon_resource.py @@ -14,10 +14,10 @@ from pydenticon import Generator -from synapse.http.servlet import parse_integer - from twisted.web.resource import Resource +from synapse.http.servlet import parse_integer + FOREGROUND = [ "rgb(45,79,255)", "rgb(254,180,44)", diff --git a/synapse/storage/schema/delta/48/group_unique_indexes.py b/synapse/storage/schema/delta/48/group_unique_indexes.py index 2233af87d7..ac4ac66cd4 100644 --- a/synapse/storage/schema/delta/48/group_unique_indexes.py +++ b/synapse/storage/schema/delta/48/group_unique_indexes.py @@ -15,6 +15,7 @@ from synapse.storage.engines import PostgresEngine from synapse.storage.prepare_database import get_statements + FIX_INDEXES = """ -- rebuild indexes as uniques DROP INDEX groups_invites_g_idx; diff --git a/tox.ini b/tox.ini index 61a20a10cb..c7491690c5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = packaging, py27, py36, pep8 +envlist = packaging, py27, py36, pep8, isort [testenv] deps = @@ -103,10 +103,14 @@ deps = flake8 commands = /bin/sh -c "flake8 synapse tests {env:PEP8SUFFIX:}" +[testenv:isort] +skip_install = True +deps = isort +commands = /bin/sh -c "isort -c -sp setup.cfg -rc synapse tests" [testenv:check-newsfragment] skip_install = True deps = towncrier>=18.6.0rc1 commands = python -m towncrier.check --compare-with=origin/develop -basepython = python3.6 \ No newline at end of file +basepython = python3.6 -- cgit 1.4.1 From ea69d3565110a20f5abe00f1a5c2357b483140fb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 11:38:45 +0100 Subject: Move filter_events_for_server out of FederationHandler for easier unit testing. --- synapse/handlers/federation.py | 144 ++--------------------------------------- synapse/visibility.py | 132 +++++++++++++++++++++++++++++++++++++ tests/test_visibility.py | 107 ++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 138 deletions(-) create mode 100644 tests/test_visibility.py (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d3ecebd29f..20fb46fc89 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -43,7 +43,6 @@ from synapse.crypto.event_signing import ( add_hashes_and_signatures, compute_event_signature, ) -from synapse.events.utils import prune_event from synapse.events.validator import EventValidator from synapse.state import resolve_events_with_factory from synapse.types import UserID, get_domain_from_id @@ -52,8 +51,8 @@ from synapse.util.async import Linearizer from synapse.util.distributor import user_joined_room from synapse.util.frozenutils import unfreeze from synapse.util.logutils import log_function -from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination +from synapse.visibility import filter_events_for_server from ._base import BaseHandler @@ -501,137 +500,6 @@ class FederationHandler(BaseHandler): user = UserID.from_string(event.state_key) yield user_joined_room(self.distributor, user, event.room_id) - @measure_func("_filter_events_for_server") - @defer.inlineCallbacks - def _filter_events_for_server(self, server_name, room_id, events): - """Filter the given events for the given server, redacting those the - server can't see. - - Assumes the server is currently in the room. - - Returns - list[FrozenEvent] - """ - # First lets check to see if all the events have a history visibility - # of "shared" or "world_readable". If thats the case then we don't - # need to check membership (as we know the server is in the room). - event_to_state_ids = yield self.store.get_state_ids_for_events( - frozenset(e.event_id for e in events), - types=( - (EventTypes.RoomHistoryVisibility, ""), - ) - ) - - visibility_ids = set() - for sids in event_to_state_ids.itervalues(): - hist = sids.get((EventTypes.RoomHistoryVisibility, "")) - if hist: - visibility_ids.add(hist) - - # If we failed to find any history visibility events then the default - # is "shared" visiblity. - if not visibility_ids: - defer.returnValue(events) - - event_map = yield self.store.get_events(visibility_ids) - all_open = all( - e.content.get("history_visibility") in (None, "shared", "world_readable") - for e in event_map.itervalues() - ) - - if all_open: - defer.returnValue(events) - - # Ok, so we're dealing with events that have non-trivial visibility - # rules, so we need to also get the memberships of the room. - - event_to_state_ids = yield self.store.get_state_ids_for_events( - frozenset(e.event_id for e in events), - types=( - (EventTypes.RoomHistoryVisibility, ""), - (EventTypes.Member, None), - ) - ) - - # We only want to pull out member events that correspond to the - # server's domain. - - def check_match(id): - try: - return server_name == get_domain_from_id(id) - except Exception: - return False - - # Parses mapping `event_id -> (type, state_key) -> state event_id` - # to get all state ids that we're interested in. - event_map = yield self.store.get_events([ - e_id - for key_to_eid in list(event_to_state_ids.values()) - for key, e_id in key_to_eid.items() - if key[0] != EventTypes.Member or check_match(key[1]) - ]) - - event_to_state = { - e_id: { - key: event_map[inner_e_id] - for key, inner_e_id in key_to_eid.iteritems() - if inner_e_id in event_map - } - for e_id, key_to_eid in event_to_state_ids.iteritems() - } - - erased_senders = yield self.store.are_users_erased( - e.sender for e in events, - ) - - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. - if erased_senders[event.sender]: - logger.info( - "Sender of %s has been erased, redacting", - event.event_id, - ) - return prune_event(event) - - if not state: - return event - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - if visibility in ["invited", "joined"]: - # We now loop through all state events looking for - # membership states for the requesting server to determine - # if the server is either in the room or has been invited - # into the room. - for ev in state.itervalues(): - if ev.type != EventTypes.Member: - continue - try: - domain = get_domain_from_id(ev.state_key) - except Exception: - continue - - if domain != server_name: - continue - - memtype = ev.membership - if memtype == Membership.JOIN: - return event - elif memtype == Membership.INVITE: - if visibility == "invited": - return event - else: - return prune_event(event) - - return event - - defer.returnValue([ - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ]) - @log_function @defer.inlineCallbacks def backfill(self, dest, room_id, limit, extremities): @@ -1558,7 +1426,7 @@ class FederationHandler(BaseHandler): limit ) - events = yield self._filter_events_for_server(origin, room_id, events) + events = yield filter_events_for_server(self.store, origin, events) defer.returnValue(events) @@ -1605,8 +1473,8 @@ class FederationHandler(BaseHandler): if not in_room: raise AuthError(403, "Host not in room.") - events = yield self._filter_events_for_server( - origin, event.room_id, [event] + events = yield filter_events_for_server( + self.store, origin, [event], ) event = events[0] defer.returnValue(event) @@ -1896,8 +1764,8 @@ class FederationHandler(BaseHandler): min_depth=min_depth, ) - missing_events = yield self._filter_events_for_server( - origin, room_id, missing_events, + missing_events = yield filter_events_for_server( + self.store, origin, missing_events, ) defer.returnValue(missing_events) diff --git a/synapse/visibility.py b/synapse/visibility.py index 015c2bab37..ddce41efaf 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -20,6 +20,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events.utils import prune_event +from synapse.types import get_domain_from_id from synapse.util.logcontext import make_deferred_yieldable, preserve_fn logger = logging.getLogger(__name__) @@ -225,3 +226,134 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, # we turn it into a list before returning it. defer.returnValue(list(filtered_events)) + + +@defer.inlineCallbacks +def filter_events_for_server(store, server_name, events): + """Filter the given events for the given server, redacting those the + server can't see. + + Assumes the server is currently in the room. + + Returns + list[FrozenEvent] + """ + # First lets check to see if all the events have a history visibility + # of "shared" or "world_readable". If thats the case then we don't + # need to check membership (as we know the server is in the room). + event_to_state_ids = yield store.get_state_ids_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + ) + ) + + visibility_ids = set() + for sids in event_to_state_ids.itervalues(): + hist = sids.get((EventTypes.RoomHistoryVisibility, "")) + if hist: + visibility_ids.add(hist) + + # If we failed to find any history visibility events then the default + # is "shared" visiblity. + if not visibility_ids: + defer.returnValue(events) + + event_map = yield store.get_events(visibility_ids) + all_open = all( + e.content.get("history_visibility") in (None, "shared", "world_readable") + for e in event_map.itervalues() + ) + + if all_open: + defer.returnValue(events) + + # Ok, so we're dealing with events that have non-trivial visibility + # rules, so we need to also get the memberships of the room. + + event_to_state_ids = yield store.get_state_ids_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, None), + ) + ) + + # We only want to pull out member events that correspond to the + # server's domain. + + def check_match(id): + try: + return server_name == get_domain_from_id(id) + except Exception: + return False + + # Parses mapping `event_id -> (type, state_key) -> state event_id` + # to get all state ids that we're interested in. + event_map = yield store.get_events([ + e_id + for key_to_eid in list(event_to_state_ids.values()) + for key, e_id in key_to_eid.items() + if key[0] != EventTypes.Member or check_match(key[1]) + ]) + + event_to_state = { + e_id: { + key: event_map[inner_e_id] + for key, inner_e_id in key_to_eid.iteritems() + if inner_e_id in event_map + } + for e_id, key_to_eid in event_to_state_ids.iteritems() + } + + erased_senders = yield store.are_users_erased( + e.sender for e in events, + ) + + def redact_disallowed(event, state): + # if the sender has been gdpr17ed, always return a redacted + # copy of the event. + if erased_senders[event.sender]: + logger.info( + "Sender of %s has been erased, redacting", + event.event_id, + ) + return prune_event(event) + + if not state: + return event + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + if visibility in ["invited", "joined"]: + # We now loop through all state events looking for + # membership states for the requesting server to determine + # if the server is either in the room or has been invited + # into the room. + for ev in state.itervalues(): + if ev.type != EventTypes.Member: + continue + try: + domain = get_domain_from_id(ev.state_key) + except Exception: + continue + + if domain != server_name: + continue + + memtype = ev.membership + if memtype == Membership.JOIN: + return event + elif memtype == Membership.INVITE: + if visibility == "invited": + return event + else: + return prune_event(event) + + return event + + defer.returnValue([ + redact_disallowed(e, event_to_state[e.event_id]) + for e in events + ]) diff --git a/tests/test_visibility.py b/tests/test_visibility.py new file mode 100644 index 0000000000..86981958cb --- /dev/null +++ b/tests/test_visibility.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.visibility import filter_events_for_server +from tests import unittest +from tests.utils import setup_test_homeserver + +logger = logging.getLogger(__name__) + +TEST_ROOM_ID = "!TEST:ROOM" + + +class FilterEventsForServerTestCase(unittest.TestCase): + @defer.inlineCallbacks + def setUp(self): + self.hs = yield setup_test_homeserver() + self.event_creation_handler = self.hs.get_event_creation_handler() + self.event_builder_factory = self.hs.get_event_builder_factory() + self.store = self.hs.get_datastore() + + @defer.inlineCallbacks + def test_filtering(self): + # + # The events to be filtered consist of 10 membership events (it doesn't + # really matter if they are joins or leaves, so let's make them joins). + # One of those membership events is going to be for a user on the + # server we are filtering for (so we can check the filtering is doing + # the right thing). + # + + # before we do that, we persist some other events to act as state. + self.inject_visibility("@admin:hs", "joined") + for i in range(0, 10): + yield self.inject_room_member("@resident%i:hs" % i) + + events_to_filter = [] + + for i in range(0, 10): + user = "@user%i:%s" % ( + i, "test_server" if i == 5 else "other_server" + ) + evt = yield self.inject_room_member(user, extra_content={"a": "b"}) + events_to_filter.append(evt) + + filtered = yield filter_events_for_server( + self.store, "test_server", events_to_filter, + ) + + # the result should be 5 redacted events, and 5 unredacted events. + for i in range(0, 5): + self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) + self.assertNotIn("a", filtered[i].content) + + for i in range(5, 10): + self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) + self.assertEqual(filtered[i].content["a"], "b") + + @defer.inlineCallbacks + def inject_visibility(self, user_id, visibility): + content = {"history_visibility": visibility} + builder = self.event_builder_factory.new({ + "type": "m.room.history_visibility", + "sender": user_id, + "state_key": "", + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) + + @defer.inlineCallbacks + def inject_room_member(self, user_id, membership="join", extra_content={}): + content = {"membership": membership} + content.update(extra_content) + builder = self.event_builder_factory.new({ + "type": "m.room.member", + "sender": user_id, + "state_key": user_id, + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) -- cgit 1.4.1 From 78a9ddcf9a5ddddd1ba01c58948d0c4a1ce294d9 Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 16 Jul 2018 14:23:25 +0200 Subject: rerun isort with latest version --- synapse/config/consent_config.py | 1 - synapse/config/logger.py | 1 - synapse/config/server_notices_config.py | 1 - synapse/http/client.py | 4 +--- synapse/rest/__init__.py | 16 +++------------- synapse/storage/schema/delta/48/group_unique_indexes.py | 1 - 6 files changed, 4 insertions(+), 20 deletions(-) (limited to 'synapse') diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index cec978125b..e22c731aad 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -15,7 +15,6 @@ from ._base import Config - DEFAULT_CONFIG = """\ # User Consent configuration # diff --git a/synapse/config/logger.py b/synapse/config/logger.py index cc1051057c..a87b11a1df 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -29,7 +29,6 @@ from synapse.util.versionstring import get_version_string from ._base import Config - DEFAULT_LOG_CONFIG = Template(""" version: 1 diff --git a/synapse/config/server_notices_config.py b/synapse/config/server_notices_config.py index 91b08446b7..3c39850ac6 100644 --- a/synapse/config/server_notices_config.py +++ b/synapse/config/server_notices_config.py @@ -16,7 +16,6 @@ from synapse.types import UserID from ._base import Config - DEFAULT_CONFIG = """\ # Server Notices room configuration # diff --git a/synapse/http/client.py b/synapse/http/client.py index 0622a69e86..d6a0d75b2b 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -26,11 +26,9 @@ from OpenSSL.SSL import VERIFY_NONE from twisted.internet import defer, protocol, reactor, ssl, task from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone +from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.client import ( - Agent, - BrowserLikeRedirectAgent, - ContentDecoderAgent, GzipDecoder, HTTPConnectionPool, PartialDownloadError, diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 0d69d37f7e..75c2a4ec8e 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -15,21 +15,11 @@ from synapse.http.server import JsonResource from synapse.rest.client import versions +from synapse.rest.client.v1 import admin, directory, events, initial_sync from synapse.rest.client.v1 import login as v1_login +from synapse.rest.client.v1 import logout, presence, profile, push_rule, pusher from synapse.rest.client.v1 import register as v1_register -from synapse.rest.client.v1 import ( - admin, - directory, - events, - initial_sync, - logout, - presence, - profile, - push_rule, - pusher, - room, - voip, -) +from synapse.rest.client.v1 import room, voip from synapse.rest.client.v2_alpha import ( account, account_data, diff --git a/synapse/storage/schema/delta/48/group_unique_indexes.py b/synapse/storage/schema/delta/48/group_unique_indexes.py index ac4ac66cd4..2233af87d7 100644 --- a/synapse/storage/schema/delta/48/group_unique_indexes.py +++ b/synapse/storage/schema/delta/48/group_unique_indexes.py @@ -15,7 +15,6 @@ from synapse.storage.engines import PostgresEngine from synapse.storage.prepare_database import get_statements - FIX_INDEXES = """ -- rebuild indexes as uniques DROP INDEX groups_invites_g_idx; -- cgit 1.4.1 From 09e29fb58b144ee629b2879e2c9fa57d7b11e3ab Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Jul 2018 16:32:46 +0100 Subject: Attempt to make _filter_events_for_server more efficient --- synapse/visibility.py | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'synapse') diff --git a/synapse/visibility.py b/synapse/visibility.py index ddce41efaf..27061e4ad9 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -16,6 +16,7 @@ import itertools import logging import operator +import six from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -230,14 +231,6 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks def filter_events_for_server(store, server_name, events): - """Filter the given events for the given server, redacting those the - server can't see. - - Assumes the server is currently in the room. - - Returns - list[FrozenEvent] - """ # First lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). @@ -271,6 +264,8 @@ def filter_events_for_server(store, server_name, events): # Ok, so we're dealing with events that have non-trivial visibility # rules, so we need to also get the memberships of the room. + # first, for each event we're wanting to return, get the event_ids + # of the history vis and membership state at those events. event_to_state_ids = yield store.get_state_ids_for_events( frozenset(e.event_id for e in events), types=( @@ -281,20 +276,30 @@ def filter_events_for_server(store, server_name, events): # We only want to pull out member events that correspond to the # server's domain. + # + # event_to_state_ids contains lots of duplicates, so it turns out to be + # cheaper to build a complete set of unique + # ((type, state_key), event_id) tuples, and then filter out the ones we + # don't want. + # + state_key_to_event_id_set = { + e + for key_to_eid in six.itervalues(event_to_state_ids) + for e in key_to_eid.items() + } - def check_match(id): - try: - return server_name == get_domain_from_id(id) - except Exception: + def include(typ, state_key): + if typ != EventTypes.Member: + return True + idx = state_key.find(":") + if idx == -1: return False + return state_key[idx + 1:] == server_name - # Parses mapping `event_id -> (type, state_key) -> state event_id` - # to get all state ids that we're interested in. event_map = yield store.get_events([ e_id - for key_to_eid in list(event_to_state_ids.values()) - for key, e_id in key_to_eid.items() - if key[0] != EventTypes.Member or check_match(key[1]) + for key, e_id in state_key_to_event_id_set + if include(key[0], key[1]) ]) event_to_state = { @@ -349,6 +354,7 @@ def filter_events_for_server(store, server_name, events): if visibility == "invited": return event else: + # server has no users in the room: redact return prune_event(event) return event -- cgit 1.4.1 From 547b1355d3747b267db3e21aefd143382f49f4ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Jul 2018 10:27:51 +0100 Subject: Fix perf regression in PR #3530 The get_entities_changed function was changed to return all changed entities since the given stream position, rather than only those changed from a given list of entities. This resulted in the function incorrectly returning large numbers of entities that, for example, caused large increases in database usage. --- synapse/util/caches/stream_change_cache.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 258655349b..c1e76b1a0e 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -74,12 +74,17 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - result = { + changed_entities = { self._cache[k] for k in self._cache.islice( start=self._cache.bisect_right(stream_pos), ) } + result = { + e for e in entities + if e in changed_entities + } + self.metrics.inc_hits() else: result = set(entities) -- cgit 1.4.1 From b2aa05a8d6b1b2fb9e1efcc6fb03f1b49bc1be1d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Jul 2018 11:07:04 +0100 Subject: Use efficient .intersection --- synapse/util/caches/stream_change_cache.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index c1e76b1a0e..f2bde74dc5 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -80,10 +80,7 @@ class StreamChangeCache(object): ) } - result = { - e for e in entities - if e in changed_entities - } + result = changed_entities.intersection(entities) self.metrics.inc_hits() else: -- cgit 1.4.1 From 2172a3d8cb18724b2e77d74afd0789f2abb246e5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 11:13:57 +0100 Subject: add a comment --- synapse/visibility.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse') diff --git a/synapse/visibility.py b/synapse/visibility.py index 27061e4ad9..6209bc0cde 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -291,6 +291,8 @@ def filter_events_for_server(store, server_name, events): def include(typ, state_key): if typ != EventTypes.Member: return True + + # we avoid using get_domain_from_id here for efficiency. idx = state_key.find(":") if idx == -1: return False -- cgit 1.4.1 From bc006b3c9d2a9982bc834ff5d1ec1768c85f907a Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 17 Jul 2018 20:43:18 +1000 Subject: Refactor REST API tests to use explicit reactors (#3351) --- changelog.d/3351.misc | 0 synapse/http/site.py | 5 +- synapse/rest/client/v2_alpha/register.py | 2 +- tests/rest/client/v1/test_register.py | 68 +- tests/rest/client/v1/test_rooms.py | 1156 +++++++++++---------------- tests/rest/client/v1/utils.py | 115 ++- tests/rest/client/v2_alpha/__init__.py | 61 -- tests/rest/client/v2_alpha/test_filter.py | 129 +-- tests/rest/client/v2_alpha/test_register.py | 211 ++--- tests/rest/client/v2_alpha/test_sync.py | 83 ++ tests/server.py | 10 + tests/test_server.py | 22 +- tests/unittest.py | 11 + 13 files changed, 939 insertions(+), 934 deletions(-) create mode 100644 changelog.d/3351.misc create mode 100644 tests/rest/client/v2_alpha/test_sync.py (limited to 'synapse') diff --git a/changelog.d/3351.misc b/changelog.d/3351.misc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/http/site.py b/synapse/http/site.py index 41dd974cea..5fd30a4c2c 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -42,9 +42,10 @@ class SynapseRequest(Request): which is handling the request, and returns a context manager. """ - def __init__(self, site, *args, **kw): - Request.__init__(self, *args, **kw) + def __init__(self, site, channel, *args, **kw): + Request.__init__(self, channel, *args, **kw) self.site = site + self._channel = channel self.authenticated_entity = None self.start_time = 0 diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 1918897f68..d6cf915d86 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -643,7 +643,7 @@ class RegisterRestServlet(RestServlet): @defer.inlineCallbacks def _do_guest_registration(self, params): if not self.hs.config.allow_guest_access: - defer.returnValue((403, "Guest access is disabled")) + raise SynapseError(403, "Guest access is disabled") user_id, _ = yield self.registration_handler.register( generate_token=False, make_guest=True diff --git a/tests/rest/client/v1/test_register.py b/tests/rest/client/v1/test_register.py index f596acb85f..f15fb36213 100644 --- a/tests/rest/client/v1/test_register.py +++ b/tests/rest/client/v1/test_register.py @@ -17,26 +17,22 @@ import json from mock import Mock -from twisted.internet import defer +from twisted.test.proto_helpers import MemoryReactorClock -from synapse.rest.client.v1.register import CreateUserRestServlet +from synapse.http.server import JsonResource +from synapse.rest.client.v1.register import register_servlets +from synapse.util import Clock from tests import unittest -from tests.utils import mock_getRawHeaders +from tests.server import make_request, setup_test_homeserver class CreateUserServletTestCase(unittest.TestCase): + """ + Tests for CreateUserRestServlet. + """ def setUp(self): - # do the dance to hook up request data to self.request_data - self.request_data = "" - self.request = Mock( - content=Mock(read=Mock(side_effect=lambda: self.request_data)), - path='/_matrix/client/api/v1/createUser' - ) - self.request.args = {} - self.request.requestHeaders.getRawHeaders = mock_getRawHeaders() - self.registration_handler = Mock() self.appservice = Mock(sender="@as:test") @@ -44,39 +40,49 @@ class CreateUserServletTestCase(unittest.TestCase): get_app_service_by_token=Mock(return_value=self.appservice) ) - # do the dance to hook things up to the hs global - handlers = Mock( - registration_handler=self.registration_handler, + handlers = Mock(registration_handler=self.registration_handler) + self.clock = MemoryReactorClock() + self.hs_clock = Clock(self.clock) + + self.hs = self.hs = setup_test_homeserver( + http_client=None, clock=self.hs_clock, reactor=self.clock ) - self.hs = Mock() - self.hs.hostname = "superbig~testing~thing.com" self.hs.get_datastore = Mock(return_value=self.datastore) self.hs.get_handlers = Mock(return_value=handlers) - self.servlet = CreateUserRestServlet(self.hs) - @defer.inlineCallbacks def test_POST_createuser_with_valid_user(self): + + res = JsonResource(self.hs) + register_servlets(self.hs, res) + + request_data = json.dumps( + { + "localpart": "someone", + "displayname": "someone interesting", + "duration_seconds": 200, + } + ) + + url = b'/_matrix/client/api/v1/createUser?access_token=i_am_an_app_service' + user_id = "@someone:interesting" token = "my token" - self.request.args = { - "access_token": "i_am_an_app_service" - } - self.request_data = json.dumps({ - "localpart": "someone", - "displayname": "someone interesting", - "duration_seconds": 200 - }) self.registration_handler.get_or_create_user = Mock( return_value=(user_id, token) ) - (code, result) = yield self.servlet.on_POST(self.request) - self.assertEquals(code, 200) + request, channel = make_request(b"POST", url, request_data) + request.render(res) + + # Advance the clock because it waits + self.clock.advance(1) + + self.assertEquals(channel.result["code"], b"200") det_data = { "user_id": user_id, "access_token": token, - "home_server": self.hs.hostname + "home_server": self.hs.hostname, } - self.assertDictContainsSubset(det_data, result) + self.assertDictContainsSubset(det_data, json.loads(channel.result["body"])) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 895dffa095..6b5764095e 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -25,949 +25,772 @@ from twisted.internet import defer import synapse.rest.client.v1.room from synapse.api.constants import Membership +from synapse.http.server import JsonResource from synapse.types import UserID +from synapse.util import Clock -from ....utils import MockHttpResource, setup_test_homeserver -from .utils import RestTestCase +from tests import unittest +from tests.server import ( + ThreadedMemoryReactorClock, + make_request, + render, + setup_test_homeserver, +) -PATH_PREFIX = "/_matrix/client/api/v1" +from .utils import RestHelper +PATH_PREFIX = b"/_matrix/client/api/v1" -class RoomPermissionsTestCase(RestTestCase): - """ Tests room permissions. """ - user_id = "@sid1:red" - rmcreator_id = "@notme:red" - @defer.inlineCallbacks +class RoomBase(unittest.TestCase): + rmcreator_id = None + def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - hs = yield setup_test_homeserver( + self.clock = ThreadedMemoryReactorClock() + self.hs_clock = Clock(self.clock) + + self.hs = setup_test_homeserver( "red", http_client=None, + clock=self.hs_clock, + reactor=self.clock, federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = self.hs.get_ratelimiter() self.ratelimiter.send_message.return_value = (True, 0) - hs.get_handlers().federation_handler = Mock() + self.hs.get_federation_handler = Mock(return_value=Mock()) def get_user_by_access_token(token=None, allow_guest=False): return { - "user": UserID.from_string(self.auth_user_id), + "user": UserID.from_string(self.helper.auth_user_id), "token_id": 1, "is_guest": False, } - hs.get_auth().get_user_by_access_token = get_user_by_access_token + + def get_user_by_req(request, allow_guest=False, rights="access"): + return synapse.types.create_requester( + UserID.from_string(self.helper.auth_user_id), 1, False, None + ) + + self.hs.get_auth().get_user_by_req = get_user_by_req + self.hs.get_auth().get_user_by_access_token = get_user_by_access_token + self.hs.get_auth().get_access_token_from_request = Mock(return_value=b"1234") def _insert_client_ip(*args, **kwargs): return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip - self.auth_user_id = self.rmcreator_id + self.hs.get_datastore().insert_client_ip = _insert_client_ip - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) + self.resource = JsonResource(self.hs) + synapse.rest.client.v1.room.register_servlets(self.hs, self.resource) + self.helper = RestHelper(self.hs, self.resource, self.user_id) - self.auth = hs.get_auth() - # create some rooms under the name rmcreator_id - self.uncreated_rmid = "!aa:test" +class RoomPermissionsTestCase(RoomBase): + """ Tests room permissions. """ - self.created_rmid = yield self.create_room_as(self.rmcreator_id, - is_public=False) + user_id = b"@sid1:red" + rmcreator_id = b"@notme:red" + + def setUp(self): - self.created_public_rmid = yield self.create_room_as(self.rmcreator_id, - is_public=True) + super(RoomPermissionsTestCase, self).setUp() + + self.helper.auth_user_id = self.rmcreator_id + # create some rooms under the name rmcreator_id + self.uncreated_rmid = "!aa:test" + self.created_rmid = self.helper.create_room_as( + self.rmcreator_id, is_public=False + ) + self.created_public_rmid = self.helper.create_room_as( + self.rmcreator_id, is_public=True + ) # send a message in one of the rooms self.created_rmid_msg_path = ( - "/rooms/%s/send/m.room.message/a1" % (self.created_rmid) - ) - (code, response) = yield self.mock_resource.trigger( - "PUT", + "rooms/%s/send/m.room.message/a1" % (self.created_rmid) + ).encode('ascii') + request, channel = make_request( + b"PUT", self.created_rmid_msg_path, - '{"msgtype":"m.text","body":"test msg"}' + b'{"msgtype":"m.text","body":"test msg"}', ) - self.assertEquals(200, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(channel.result["code"], b"200", channel.result) # set topic for public room - (code, response) = yield self.mock_resource.trigger( - "PUT", - "/rooms/%s/state/m.room.topic" % self.created_public_rmid, - '{"topic":"Public Room Topic"}' + request, channel = make_request( + b"PUT", + ("rooms/%s/state/m.room.topic" % self.created_public_rmid).encode('ascii'), + b'{"topic":"Public Room Topic"}', ) - self.assertEquals(200, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(channel.result["code"], b"200", channel.result) # auth as user_id now - self.auth_user_id = self.user_id - - def tearDown(self): - pass + self.helper.auth_user_id = self.user_id - @defer.inlineCallbacks def test_send_message(self): - msg_content = '{"msgtype":"m.text","body":"hello"}' - send_msg_path = ( - "/rooms/%s/send/m.room.message/mid1" % (self.created_rmid,) - ) + msg_content = b'{"msgtype":"m.text","body":"hello"}' + + seq = iter(range(100)) + + def send_msg_path(): + return b"/rooms/%s/send/m.room.message/mid%s" % ( + self.created_rmid, + str(next(seq)).encode('ascii'), + ) # send message in uncreated room, expect 403 - (code, response) = yield self.mock_resource.trigger( - "PUT", - "/rooms/%s/send/m.room.message/mid2" % (self.uncreated_rmid,), - msg_content + request, channel = make_request( + b"PUT", + b"/rooms/%s/send/m.room.message/mid2" % (self.uncreated_rmid,), + msg_content, ) - self.assertEquals(403, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # send message in created room not joined (no state), expect 403 - (code, response) = yield self.mock_resource.trigger( - "PUT", - send_msg_path, - msg_content - ) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"PUT", send_msg_path(), msg_content) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # send message in created room and invited, expect 403 - yield self.invite( - room=self.created_rmid, - src=self.rmcreator_id, - targ=self.user_id - ) - (code, response) = yield self.mock_resource.trigger( - "PUT", - send_msg_path, - msg_content + self.helper.invite( + room=self.created_rmid, src=self.rmcreator_id, targ=self.user_id ) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"PUT", send_msg_path(), msg_content) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # send message in created room and joined, expect 200 - yield self.join(room=self.created_rmid, user=self.user_id) - (code, response) = yield self.mock_resource.trigger( - "PUT", - send_msg_path, - msg_content - ) - self.assertEquals(200, code, msg=str(response)) + self.helper.join(room=self.created_rmid, user=self.user_id) + request, channel = make_request(b"PUT", send_msg_path(), msg_content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) # send message in created room and left, expect 403 - yield self.leave(room=self.created_rmid, user=self.user_id) - (code, response) = yield self.mock_resource.trigger( - "PUT", - send_msg_path, - msg_content - ) - self.assertEquals(403, code, msg=str(response)) + self.helper.leave(room=self.created_rmid, user=self.user_id) + request, channel = make_request(b"PUT", send_msg_path(), msg_content) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_topic_perms(self): - topic_content = '{"topic":"My Topic Name"}' - topic_path = "/rooms/%s/state/m.room.topic" % self.created_rmid + topic_content = b'{"topic":"My Topic Name"}' + topic_path = b"/rooms/%s/state/m.room.topic" % self.created_rmid # set/get topic in uncreated room, expect 403 - (code, response) = yield self.mock_resource.trigger( - "PUT", "/rooms/%s/state/m.room.topic" % self.uncreated_rmid, - topic_content + request, channel = make_request( + b"PUT", b"/rooms/%s/state/m.room.topic" % self.uncreated_rmid, topic_content ) - self.assertEquals(403, code, msg=str(response)) - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/state/m.room.topic" % self.uncreated_rmid + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) + request, channel = make_request( + b"GET", "/rooms/%s/state/m.room.topic" % self.uncreated_rmid ) - self.assertEquals(403, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # set/get topic in created PRIVATE room not joined, expect 403 - (code, response) = yield self.mock_resource.trigger( - "PUT", topic_path, topic_content - ) - self.assertEquals(403, code, msg=str(response)) - (code, response) = yield self.mock_resource.trigger_get(topic_path) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"PUT", topic_path, topic_content) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) + request, channel = make_request(b"GET", topic_path) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # set topic in created PRIVATE room and invited, expect 403 - yield self.invite( + self.helper.invite( room=self.created_rmid, src=self.rmcreator_id, targ=self.user_id ) - (code, response) = yield self.mock_resource.trigger( - "PUT", topic_path, topic_content - ) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"PUT", topic_path, topic_content) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # get topic in created PRIVATE room and invited, expect 403 - (code, response) = yield self.mock_resource.trigger_get(topic_path) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"GET", topic_path) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # set/get topic in created PRIVATE room and joined, expect 200 - yield self.join(room=self.created_rmid, user=self.user_id) + self.helper.join(room=self.created_rmid, user=self.user_id) # Only room ops can set topic by default - self.auth_user_id = self.rmcreator_id - (code, response) = yield self.mock_resource.trigger( - "PUT", topic_path, topic_content - ) - self.assertEquals(200, code, msg=str(response)) - self.auth_user_id = self.user_id + self.helper.auth_user_id = self.rmcreator_id + request, channel = make_request(b"PUT", topic_path, topic_content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) + self.helper.auth_user_id = self.user_id - (code, response) = yield self.mock_resource.trigger_get(topic_path) - self.assertEquals(200, code, msg=str(response)) - self.assert_dict(json.loads(topic_content), response) + request, channel = make_request(b"GET", topic_path) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assert_dict(json.loads(topic_content), channel.json_body) # set/get topic in created PRIVATE room and left, expect 403 - yield self.leave(room=self.created_rmid, user=self.user_id) - (code, response) = yield self.mock_resource.trigger( - "PUT", topic_path, topic_content - ) - self.assertEquals(403, code, msg=str(response)) - (code, response) = yield self.mock_resource.trigger_get(topic_path) - self.assertEquals(200, code, msg=str(response)) + self.helper.leave(room=self.created_rmid, user=self.user_id) + request, channel = make_request(b"PUT", topic_path, topic_content) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) + request, channel = make_request(b"GET", topic_path) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) # get topic in PUBLIC room, not joined, expect 403 - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/state/m.room.topic" % self.created_public_rmid + request, channel = make_request( + b"GET", b"/rooms/%s/state/m.room.topic" % self.created_public_rmid ) - self.assertEquals(403, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) # set topic in PUBLIC room, not joined, expect 403 - (code, response) = yield self.mock_resource.trigger( - "PUT", - "/rooms/%s/state/m.room.topic" % self.created_public_rmid, - topic_content + request, channel = make_request( + b"PUT", + b"/rooms/%s/state/m.room.topic" % self.created_public_rmid, + topic_content, ) - self.assertEquals(403, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def _test_get_membership(self, room=None, members=[], expect_code=None): for member in members: - path = "/rooms/%s/state/m.room.member/%s" % (room, member) - (code, response) = yield self.mock_resource.trigger_get(path) - self.assertEquals(expect_code, code) + path = b"/rooms/%s/state/m.room.member/%s" % (room, member) + request, channel = make_request(b"GET", path) + render(request, self.resource, self.clock) + self.assertEquals(expect_code, int(channel.result["code"])) - @defer.inlineCallbacks def test_membership_basic_room_perms(self): # === room does not exist === room = self.uncreated_rmid # get membership of self, get membership of other, uncreated room # expect all 403s - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=403) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=403 + ) # trying to invite people to this room should 403 - yield self.invite(room=room, src=self.user_id, targ=self.rmcreator_id, - expect_code=403) + self.helper.invite( + room=room, src=self.user_id, targ=self.rmcreator_id, expect_code=403 + ) # set [invite/join/left] of self, set [invite/join/left] of other, # expect all 404s because room doesn't exist on any server for usr in [self.user_id, self.rmcreator_id]: - yield self.join(room=room, user=usr, expect_code=404) - yield self.leave(room=room, user=usr, expect_code=404) + self.helper.join(room=room, user=usr, expect_code=404) + self.helper.leave(room=room, user=usr, expect_code=404) - @defer.inlineCallbacks def test_membership_private_room_perms(self): room = self.created_rmid # get membership of self, get membership of other, private room + invite # expect all 403s - yield self.invite(room=room, src=self.rmcreator_id, - targ=self.user_id) - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=403) + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=403 + ) # get membership of self, get membership of other, private room + joined # expect all 200s - yield self.join(room=room, user=self.user_id) - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=200) + self.helper.join(room=room, user=self.user_id) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=200 + ) # get membership of self, get membership of other, private room + left # expect all 200s - yield self.leave(room=room, user=self.user_id) - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=200) + self.helper.leave(room=room, user=self.user_id) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=200 + ) - @defer.inlineCallbacks def test_membership_public_room_perms(self): room = self.created_public_rmid # get membership of self, get membership of other, public room + invite # expect 403 - yield self.invite(room=room, src=self.rmcreator_id, - targ=self.user_id) - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=403) + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=403 + ) # get membership of self, get membership of other, public room + joined # expect all 200s - yield self.join(room=room, user=self.user_id) - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=200) + self.helper.join(room=room, user=self.user_id) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=200 + ) # get membership of self, get membership of other, public room + left # expect 200. - yield self.leave(room=room, user=self.user_id) - yield self._test_get_membership( - members=[self.user_id, self.rmcreator_id], - room=room, expect_code=200) + self.helper.leave(room=room, user=self.user_id) + self._test_get_membership( + members=[self.user_id, self.rmcreator_id], room=room, expect_code=200 + ) - @defer.inlineCallbacks def test_invited_permissions(self): room = self.created_rmid - yield self.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) # set [invite/join/left] of other user, expect 403s - yield self.invite(room=room, src=self.user_id, targ=self.rmcreator_id, - expect_code=403) - yield self.change_membership(room=room, src=self.user_id, - targ=self.rmcreator_id, - membership=Membership.JOIN, - expect_code=403) - yield self.change_membership(room=room, src=self.user_id, - targ=self.rmcreator_id, - membership=Membership.LEAVE, - expect_code=403) - - @defer.inlineCallbacks + self.helper.invite( + room=room, src=self.user_id, targ=self.rmcreator_id, expect_code=403 + ) + self.helper.change_membership( + room=room, + src=self.user_id, + targ=self.rmcreator_id, + membership=Membership.JOIN, + expect_code=403, + ) + self.helper.change_membership( + room=room, + src=self.user_id, + targ=self.rmcreator_id, + membership=Membership.LEAVE, + expect_code=403, + ) + def test_joined_permissions(self): room = self.created_rmid - yield self.invite(room=room, src=self.rmcreator_id, targ=self.user_id) - yield self.join(room=room, user=self.user_id) + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self.helper.join(room=room, user=self.user_id) # set invited of self, expect 403 - yield self.invite(room=room, src=self.user_id, targ=self.user_id, - expect_code=403) + self.helper.invite( + room=room, src=self.user_id, targ=self.user_id, expect_code=403 + ) # set joined of self, expect 200 (NOOP) - yield self.join(room=room, user=self.user_id) + self.helper.join(room=room, user=self.user_id) other = "@burgundy:red" # set invited of other, expect 200 - yield self.invite(room=room, src=self.user_id, targ=other, - expect_code=200) + self.helper.invite(room=room, src=self.user_id, targ=other, expect_code=200) # set joined of other, expect 403 - yield self.change_membership(room=room, src=self.user_id, - targ=other, - membership=Membership.JOIN, - expect_code=403) + self.helper.change_membership( + room=room, + src=self.user_id, + targ=other, + membership=Membership.JOIN, + expect_code=403, + ) # set left of other, expect 403 - yield self.change_membership(room=room, src=self.user_id, - targ=other, - membership=Membership.LEAVE, - expect_code=403) + self.helper.change_membership( + room=room, + src=self.user_id, + targ=other, + membership=Membership.LEAVE, + expect_code=403, + ) # set left of self, expect 200 - yield self.leave(room=room, user=self.user_id) + self.helper.leave(room=room, user=self.user_id) - @defer.inlineCallbacks def test_leave_permissions(self): room = self.created_rmid - yield self.invite(room=room, src=self.rmcreator_id, targ=self.user_id) - yield self.join(room=room, user=self.user_id) - yield self.leave(room=room, user=self.user_id) + self.helper.invite(room=room, src=self.rmcreator_id, targ=self.user_id) + self.helper.join(room=room, user=self.user_id) + self.helper.leave(room=room, user=self.user_id) # set [invite/join/left] of self, set [invite/join/left] of other, # expect all 403s for usr in [self.user_id, self.rmcreator_id]: - yield self.change_membership( + self.helper.change_membership( room=room, src=self.user_id, targ=usr, membership=Membership.INVITE, - expect_code=403 + expect_code=403, ) - yield self.change_membership( + self.helper.change_membership( room=room, src=self.user_id, targ=usr, membership=Membership.JOIN, - expect_code=403 + expect_code=403, ) # It is always valid to LEAVE if you've already left (currently.) - yield self.change_membership( + self.helper.change_membership( room=room, src=self.user_id, targ=self.rmcreator_id, membership=Membership.LEAVE, - expect_code=403 + expect_code=403, ) -class RoomsMemberListTestCase(RestTestCase): +class RoomsMemberListTestCase(RoomBase): """ Tests /rooms/$room_id/members/list REST events.""" - user_id = "@sid1:red" - - @defer.inlineCallbacks - def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - - hs.get_handlers().federation_handler = Mock() - - self.auth_user_id = self.user_id - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) + user_id = b"@sid1:red" - def tearDown(self): - pass - - @defer.inlineCallbacks def test_get_member_list(self): - room_id = yield self.create_room_as(self.user_id) - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/members" % room_id - ) - self.assertEquals(200, code, msg=str(response)) + room_id = self.helper.create_room_as(self.user_id) + request, channel = make_request(b"GET", b"/rooms/%s/members" % room_id) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_get_member_list_no_room(self): - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/roomdoesnotexist/members" - ) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"GET", b"/rooms/roomdoesnotexist/members") + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_get_member_list_no_permission(self): - room_id = yield self.create_room_as("@some_other_guy:red") - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/members" % room_id - ) - self.assertEquals(403, code, msg=str(response)) + room_id = self.helper.create_room_as(b"@some_other_guy:red") + request, channel = make_request(b"GET", b"/rooms/%s/members" % room_id) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_get_member_list_mixed_memberships(self): - room_creator = "@some_other_guy:red" - room_id = yield self.create_room_as(room_creator) - room_path = "/rooms/%s/members" % room_id - yield self.invite(room=room_id, src=room_creator, - targ=self.user_id) + room_creator = b"@some_other_guy:red" + room_id = self.helper.create_room_as(room_creator) + room_path = b"/rooms/%s/members" % room_id + self.helper.invite(room=room_id, src=room_creator, targ=self.user_id) # can't see list if you're just invited. - (code, response) = yield self.mock_resource.trigger_get(room_path) - self.assertEquals(403, code, msg=str(response)) + request, channel = make_request(b"GET", room_path) + render(request, self.resource, self.clock) + self.assertEquals(403, int(channel.result["code"]), msg=channel.result["body"]) - yield self.join(room=room_id, user=self.user_id) + self.helper.join(room=room_id, user=self.user_id) # can see list now joined - (code, response) = yield self.mock_resource.trigger_get(room_path) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"GET", room_path) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) - yield self.leave(room=room_id, user=self.user_id) + self.helper.leave(room=room_id, user=self.user_id) # can see old list once left - (code, response) = yield self.mock_resource.trigger_get(room_path) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"GET", room_path) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) -class RoomsCreateTestCase(RestTestCase): +class RoomsCreateTestCase(RoomBase): """ Tests /rooms and /rooms/$room_id REST events. """ - user_id = "@sid1:red" - - @defer.inlineCallbacks - def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - self.auth_user_id = self.user_id - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - - hs.get_handlers().federation_handler = Mock() - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip - - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) + user_id = b"@sid1:red" - @defer.inlineCallbacks def test_post_room_no_keys(self): # POST with no config keys, expect new room id - (code, response) = yield self.mock_resource.trigger("POST", - "/createRoom", - "{}") - self.assertEquals(200, code, response) - self.assertTrue("room_id" in response) + request, channel = make_request(b"POST", b"/createRoom", b"{}") + + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), channel.result) + self.assertTrue("room_id" in channel.json_body) - @defer.inlineCallbacks def test_post_room_visibility_key(self): # POST with visibility config key, expect new room id - (code, response) = yield self.mock_resource.trigger( - "POST", - "/createRoom", - '{"visibility":"private"}') - self.assertEquals(200, code) - self.assertTrue("room_id" in response) - - @defer.inlineCallbacks + request, channel = make_request( + b"POST", b"/createRoom", b'{"visibility":"private"}' + ) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"])) + self.assertTrue("room_id" in channel.json_body) + def test_post_room_custom_key(self): # POST with custom config keys, expect new room id - (code, response) = yield self.mock_resource.trigger( - "POST", - "/createRoom", - '{"custom":"stuff"}') - self.assertEquals(200, code) - self.assertTrue("room_id" in response) - - @defer.inlineCallbacks + request, channel = make_request(b"POST", b"/createRoom", b'{"custom":"stuff"}') + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"])) + self.assertTrue("room_id" in channel.json_body) + def test_post_room_known_and_unknown_keys(self): # POST with custom + known config keys, expect new room id - (code, response) = yield self.mock_resource.trigger( - "POST", - "/createRoom", - '{"visibility":"private","custom":"things"}') - self.assertEquals(200, code) - self.assertTrue("room_id" in response) - - @defer.inlineCallbacks + request, channel = make_request( + b"POST", b"/createRoom", b'{"visibility":"private","custom":"things"}' + ) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"])) + self.assertTrue("room_id" in channel.json_body) + def test_post_room_invalid_content(self): # POST with invalid content / paths, expect 400 - (code, response) = yield self.mock_resource.trigger( - "POST", - "/createRoom", - '{"visibili') - self.assertEquals(400, code) + request, channel = make_request(b"POST", b"/createRoom", b'{"visibili') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"])) - (code, response) = yield self.mock_resource.trigger( - "POST", - "/createRoom", - '["hello"]') - self.assertEquals(400, code) + request, channel = make_request(b"POST", b"/createRoom", b'["hello"]') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"])) -class RoomTopicTestCase(RestTestCase): +class RoomTopicTestCase(RoomBase): """ Tests /rooms/$room_id/topic REST events. """ - user_id = "@sid1:red" - @defer.inlineCallbacks - def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - self.auth_user_id = self.user_id + user_id = b"@sid1:red" - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - - hs.get_handlers().federation_handler = Mock() - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip + def setUp(self): - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) + super(RoomTopicTestCase, self).setUp() # create the room - self.room_id = yield self.create_room_as(self.user_id) - self.path = "/rooms/%s/state/m.room.topic" % (self.room_id,) - - def tearDown(self): - pass + self.room_id = self.helper.create_room_as(self.user_id) + self.path = b"/rooms/%s/state/m.room.topic" % (self.room_id,) - @defer.inlineCallbacks def test_invalid_puts(self): # missing keys or invalid json - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, '{}' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, '{}') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, '{"_name":"bob"}' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, '{"_name":"bob"}') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, '{"nao' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, '{"nao') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, '[{"_name":"bob"},{"_name":"jill"}]' + request, channel = make_request( + b"PUT", self.path, '[{"_name":"bob"},{"_name":"jill"}]' ) - self.assertEquals(400, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, 'text only' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, 'text only') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, '' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, '') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) # valid key, wrong type content = '{"topic":["Topic name"]}' - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, content - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, content) + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_rooms_topic(self): # nothing should be there - (code, response) = yield self.mock_resource.trigger_get(self.path) - self.assertEquals(404, code, msg=str(response)) + request, channel = make_request(b"GET", self.path) + render(request, self.resource, self.clock) + self.assertEquals(404, int(channel.result["code"]), msg=channel.result["body"]) # valid put content = '{"topic":"Topic name"}' - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, content - ) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) # valid get - (code, response) = yield self.mock_resource.trigger_get(self.path) - self.assertEquals(200, code, msg=str(response)) - self.assert_dict(json.loads(content), response) + request, channel = make_request(b"GET", self.path) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assert_dict(json.loads(content), channel.json_body) - @defer.inlineCallbacks def test_rooms_topic_with_extra_keys(self): # valid put with extra keys content = '{"topic":"Seasons","subtopic":"Summer"}' - (code, response) = yield self.mock_resource.trigger( - "PUT", self.path, content - ) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"PUT", self.path, content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) # valid get - (code, response) = yield self.mock_resource.trigger_get(self.path) - self.assertEquals(200, code, msg=str(response)) - self.assert_dict(json.loads(content), response) + request, channel = make_request(b"GET", self.path) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assert_dict(json.loads(content), channel.json_body) -class RoomMemberStateTestCase(RestTestCase): +class RoomMemberStateTestCase(RoomBase): """ Tests /rooms/$room_id/members/$user_id/state REST events. """ - user_id = "@sid1:red" - - @defer.inlineCallbacks - def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - self.auth_user_id = self.user_id - - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - hs.get_handlers().federation_handler = Mock() + user_id = b"@sid1:red" - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip - - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) + def setUp(self): - self.room_id = yield self.create_room_as(self.user_id) + super(RoomMemberStateTestCase, self).setUp() + self.room_id = self.helper.create_room_as(self.user_id) def tearDown(self): pass - @defer.inlineCallbacks def test_invalid_puts(self): path = "/rooms/%s/state/m.room.member/%s" % (self.room_id, self.user_id) # missing keys or invalid json - (code, response) = yield self.mock_resource.trigger("PUT", path, '{}') - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '{}') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '{"_name":"bob"}' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '{"_name":"bob"}') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '{"nao' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '{"nao') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '[{"_name":"bob"},{"_name":"jill"}]' + request, channel = make_request( + b"PUT", path, b'[{"_name":"bob"},{"_name":"jill"}]' ) - self.assertEquals(400, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, 'text only' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, 'text only') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) # valid keys, wrong types - content = ('{"membership":["%s","%s","%s"]}' % ( - Membership.INVITE, Membership.JOIN, Membership.LEAVE - )) - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(400, code, msg=str(response)) + content = '{"membership":["%s","%s","%s"]}' % ( + Membership.INVITE, + Membership.JOIN, + Membership.LEAVE, + ) + request, channel = make_request(b"PUT", path, content.encode('ascii')) + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_rooms_members_self(self): path = "/rooms/%s/state/m.room.member/%s" % ( - urlparse.quote(self.room_id), self.user_id + urlparse.quote(self.room_id), + self.user_id, ) # valid join message (NOOP since we made the room) content = '{"membership":"%s"}' % Membership.JOIN - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"PUT", path, content.encode('ascii')) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger("GET", path, None) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"GET", path, None) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) - expected_response = { - "membership": Membership.JOIN, - } - self.assertEquals(expected_response, response) + expected_response = {"membership": Membership.JOIN} + self.assertEquals(expected_response, channel.json_body) - @defer.inlineCallbacks def test_rooms_members_other(self): self.other_id = "@zzsid1:red" path = "/rooms/%s/state/m.room.member/%s" % ( - urlparse.quote(self.room_id), self.other_id + urlparse.quote(self.room_id), + self.other_id, ) # valid invite message content = '{"membership":"%s"}' % Membership.INVITE - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"PUT", path, content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger("GET", path, None) - self.assertEquals(200, code, msg=str(response)) - self.assertEquals(json.loads(content), response) + request, channel = make_request(b"GET", path, None) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEquals(json.loads(content), channel.json_body) - @defer.inlineCallbacks def test_rooms_members_other_custom_keys(self): self.other_id = "@zzsid1:red" path = "/rooms/%s/state/m.room.member/%s" % ( - urlparse.quote(self.room_id), self.other_id + urlparse.quote(self.room_id), + self.other_id, ) # valid invite message with custom key - content = ('{"membership":"%s","invite_text":"%s"}' % ( - Membership.INVITE, "Join us!" - )) - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(200, code, msg=str(response)) + content = '{"membership":"%s","invite_text":"%s"}' % ( + Membership.INVITE, + "Join us!", + ) + request, channel = make_request(b"PUT", path, content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger("GET", path, None) - self.assertEquals(200, code, msg=str(response)) - self.assertEquals(json.loads(content), response) + request, channel = make_request(b"GET", path, None) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEquals(json.loads(content), channel.json_body) -class RoomMessagesTestCase(RestTestCase): +class RoomMessagesTestCase(RoomBase): """ Tests /rooms/$room_id/messages/$user_id/$msg_id REST events. """ + user_id = "@sid1:red" - @defer.inlineCallbacks def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - self.auth_user_id = self.user_id - - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - - hs.get_handlers().federation_handler = Mock() + super(RoomMessagesTestCase, self).setUp() - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip + self.room_id = self.helper.create_room_as(self.user_id) - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) - - self.room_id = yield self.create_room_as(self.user_id) - - def tearDown(self): - pass - - @defer.inlineCallbacks def test_invalid_puts(self): - path = "/rooms/%s/send/m.room.message/mid1" % ( - urlparse.quote(self.room_id)) + path = "/rooms/%s/send/m.room.message/mid1" % (urlparse.quote(self.room_id)) # missing keys or invalid json - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '{}' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '{}') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '{"_name":"bob"}' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '{"_name":"bob"}') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '{"nao' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '{"nao') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '[{"_name":"bob"},{"_name":"jill"}]' + request, channel = make_request( + b"PUT", path, '[{"_name":"bob"},{"_name":"jill"}]' ) - self.assertEquals(400, code, msg=str(response)) + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, 'text only' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, 'text only') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - (code, response) = yield self.mock_resource.trigger( - "PUT", path, '' - ) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, '') + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) - @defer.inlineCallbacks def test_rooms_messages_sent(self): - path = "/rooms/%s/send/m.room.message/mid1" % ( - urlparse.quote(self.room_id)) + path = "/rooms/%s/send/m.room.message/mid1" % (urlparse.quote(self.room_id)) content = '{"body":"test","msgtype":{"type":"a"}}' - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(400, code, msg=str(response)) + request, channel = make_request(b"PUT", path, content) + render(request, self.resource, self.clock) + self.assertEquals(400, int(channel.result["code"]), msg=channel.result["body"]) # custom message types content = '{"body":"test","msgtype":"test.custom.text"}' - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(200, code, msg=str(response)) - -# (code, response) = yield self.mock_resource.trigger("GET", path, None) -# self.assertEquals(200, code, msg=str(response)) -# self.assert_dict(json.loads(content), response) + request, channel = make_request(b"PUT", path, content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) # m.text message type - path = "/rooms/%s/send/m.room.message/mid2" % ( - urlparse.quote(self.room_id)) + path = "/rooms/%s/send/m.room.message/mid2" % (urlparse.quote(self.room_id)) content = '{"body":"test2","msgtype":"m.text"}' - (code, response) = yield self.mock_resource.trigger("PUT", path, content) - self.assertEquals(200, code, msg=str(response)) + request, channel = make_request(b"PUT", path, content) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) -class RoomInitialSyncTestCase(RestTestCase): +class RoomInitialSyncTestCase(RoomBase): """ Tests /rooms/$room_id/initialSync. """ + user_id = "@sid1:red" - @defer.inlineCallbacks def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - self.auth_user_id = self.user_id - - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=[ - "send_message", - ]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - - hs.get_handlers().federation_handler = Mock() - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip - - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) + super(RoomInitialSyncTestCase, self).setUp() # create the room - self.room_id = yield self.create_room_as(self.user_id) + self.room_id = self.helper.create_room_as(self.user_id) - @defer.inlineCallbacks def test_initial_sync(self): - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/initialSync" % self.room_id - ) - self.assertEquals(200, code) + request, channel = make_request(b"GET", "/rooms/%s/initialSync" % self.room_id) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"])) - self.assertEquals(self.room_id, response["room_id"]) - self.assertEquals("join", response["membership"]) + self.assertEquals(self.room_id, channel.json_body["room_id"]) + self.assertEquals("join", channel.json_body["membership"]) # Room state is easier to assert on if we unpack it into a dict state = {} - for event in response["state"]: + for event in channel.json_body["state"]: if "state_key" not in event: continue t = event["type"] @@ -977,75 +800,48 @@ class RoomInitialSyncTestCase(RestTestCase): self.assertTrue("m.room.create" in state) - self.assertTrue("messages" in response) - self.assertTrue("chunk" in response["messages"]) - self.assertTrue("end" in response["messages"]) + self.assertTrue("messages" in channel.json_body) + self.assertTrue("chunk" in channel.json_body["messages"]) + self.assertTrue("end" in channel.json_body["messages"]) - self.assertTrue("presence" in response) + self.assertTrue("presence" in channel.json_body) presence_by_user = { - e["content"]["user_id"]: e for e in response["presence"] + e["content"]["user_id"]: e for e in channel.json_body["presence"] } self.assertTrue(self.user_id in presence_by_user) self.assertEquals("m.presence", presence_by_user[self.user_id]["type"]) -class RoomMessageListTestCase(RestTestCase): +class RoomMessageListTestCase(RoomBase): """ Tests /rooms/$room_id/messages REST events. """ + user_id = "@sid1:red" - @defer.inlineCallbacks def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - self.auth_user_id = self.user_id - - hs = yield setup_test_homeserver( - "red", - http_client=None, - federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), - ) - self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) - - hs.get_handlers().federation_handler = Mock() - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.auth_user_id), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token + super(RoomMessageListTestCase, self).setUp() + self.room_id = self.helper.create_room_as(self.user_id) - def _insert_client_ip(*args, **kwargs): - return defer.succeed(None) - hs.get_datastore().insert_client_ip = _insert_client_ip - - synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) - - self.room_id = yield self.create_room_as(self.user_id) - - @defer.inlineCallbacks def test_topo_token_is_accepted(self): token = "t1-0_0_0_0_0_0_0_0_0" - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/messages?access_token=x&from=%s" % - (self.room_id, token)) - self.assertEquals(200, code) - self.assertTrue("start" in response) - self.assertEquals(token, response['start']) - self.assertTrue("chunk" in response) - self.assertTrue("end" in response) - - @defer.inlineCallbacks + request, channel = make_request( + b"GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token) + ) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"])) + self.assertTrue("start" in channel.json_body) + self.assertEquals(token, channel.json_body['start']) + self.assertTrue("chunk" in channel.json_body) + self.assertTrue("end" in channel.json_body) + def test_stream_token_is_accepted_for_fwd_pagianation(self): token = "s0_0_0_0_0_0_0_0_0" - (code, response) = yield self.mock_resource.trigger_get( - "/rooms/%s/messages?access_token=x&from=%s" % - (self.room_id, token)) - self.assertEquals(200, code) - self.assertTrue("start" in response) - self.assertEquals(token, response['start']) - self.assertTrue("chunk" in response) - self.assertTrue("end" in response) + request, channel = make_request( + b"GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token) + ) + render(request, self.resource, self.clock) + self.assertEquals(200, int(channel.result["code"])) + self.assertTrue("start" in channel.json_body) + self.assertEquals(token, channel.json_body['start']) + self.assertTrue("chunk" in channel.json_body) + self.assertTrue("end" in channel.json_body) diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py index 54d7ba380d..41de8e0762 100644 --- a/tests/rest/client/v1/utils.py +++ b/tests/rest/client/v1/utils.py @@ -16,13 +16,14 @@ import json import time -# twisted imports +import attr + from twisted.internet import defer from synapse.api.constants import Membership -# trial imports from tests import unittest +from tests.server import make_request, wait_until_result class RestTestCase(unittest.TestCase): @@ -133,3 +134,113 @@ class RestTestCase(unittest.TestCase): for key in required: self.assertEquals(required[key], actual[key], msg="%s mismatch. %s" % (key, actual)) + + +@attr.s +class RestHelper(object): + """Contains extra helper functions to quickly and clearly perform a given + REST action, which isn't the focus of the test. + """ + + hs = attr.ib() + resource = attr.ib() + auth_user_id = attr.ib() + + def create_room_as(self, room_creator, is_public=True, tok=None): + temp_id = self.auth_user_id + self.auth_user_id = room_creator + path = b"/_matrix/client/r0/createRoom" + content = {} + if not is_public: + content["visibility"] = "private" + if tok: + path = path + b"?access_token=%s" % tok.encode('ascii') + + request, channel = make_request(b"POST", path, json.dumps(content).encode('utf8')) + request.render(self.resource) + wait_until_result(self.hs.get_reactor(), channel) + + assert channel.result["code"] == b"200", channel.result + self.auth_user_id = temp_id + return channel.json_body["room_id"] + + def invite(self, room=None, src=None, targ=None, expect_code=200, tok=None): + self.change_membership( + room=room, + src=src, + targ=targ, + tok=tok, + membership=Membership.INVITE, + expect_code=expect_code, + ) + + def join(self, room=None, user=None, expect_code=200, tok=None): + self.change_membership( + room=room, + src=user, + targ=user, + tok=tok, + membership=Membership.JOIN, + expect_code=expect_code, + ) + + def leave(self, room=None, user=None, expect_code=200, tok=None): + self.change_membership( + room=room, + src=user, + targ=user, + tok=tok, + membership=Membership.LEAVE, + expect_code=expect_code, + ) + + def change_membership(self, room, src, targ, membership, tok=None, expect_code=200): + temp_id = self.auth_user_id + self.auth_user_id = src + + path = "/_matrix/client/r0/rooms/%s/state/m.room.member/%s" % (room, targ) + if tok: + path = path + "?access_token=%s" % tok + + data = {"membership": membership} + + request, channel = make_request( + b"PUT", path.encode('ascii'), json.dumps(data).encode('utf8') + ) + + request.render(self.resource) + wait_until_result(self.hs.get_reactor(), channel) + + assert int(channel.result["code"]) == expect_code, ( + "Expected: %d, got: %d, resp: %r" + % (expect_code, int(channel.result["code"]), channel.result["body"]) + ) + + self.auth_user_id = temp_id + + @defer.inlineCallbacks + def register(self, user_id): + (code, response) = yield self.mock_resource.trigger( + "POST", + "/_matrix/client/r0/register", + json.dumps( + {"user": user_id, "password": "test", "type": "m.login.password"} + ), + ) + self.assertEquals(200, code) + defer.returnValue(response) + + @defer.inlineCallbacks + def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200): + if txn_id is None: + txn_id = "m%s" % (str(time.time())) + if body is None: + body = "body_text_here" + + path = "/_matrix/client/r0/rooms/%s/send/m.room.message/%s" % (room_id, txn_id) + content = '{"msgtype":"m.text","body":"%s"}' % body + if tok: + path = path + "?access_token=%s" % tok + + (code, response) = yield self.mock_resource.trigger("PUT", path, content) + self.assertEquals(expect_code, code, msg=str(response)) diff --git a/tests/rest/client/v2_alpha/__init__.py b/tests/rest/client/v2_alpha/__init__.py index f18a8a6027..e69de29bb2 100644 --- a/tests/rest/client/v2_alpha/__init__.py +++ b/tests/rest/client/v2_alpha/__init__.py @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from mock import Mock - -from twisted.internet import defer - -from synapse.types import UserID - -from tests import unittest - -from ....utils import MockHttpResource, setup_test_homeserver - -PATH_PREFIX = "/_matrix/client/v2_alpha" - - -class V2AlphaRestTestCase(unittest.TestCase): - # Consumer must define - # USER_ID = - # TO_REGISTER = [] - - @defer.inlineCallbacks - def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) - - hs = yield setup_test_homeserver( - datastore=self.make_datastore_mock(), - http_client=None, - resource_for_client=self.mock_resource, - resource_for_federation=self.mock_resource, - ) - - def get_user_by_access_token(token=None, allow_guest=False): - return { - "user": UserID.from_string(self.USER_ID), - "token_id": 1, - "is_guest": False, - } - hs.get_auth().get_user_by_access_token = get_user_by_access_token - - for r in self.TO_REGISTER: - r.register_servlets(hs, self.mock_resource) - - def make_datastore_mock(self): - store = Mock(spec=[ - "insert_client_ip", - ]) - store.get_app_service_by_token = Mock(return_value=None) - return store diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index bb0b2f94ea..5ea9cc825f 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -13,35 +13,33 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer - import synapse.types from synapse.api.errors import Codes +from synapse.http.server import JsonResource from synapse.rest.client.v2_alpha import filter from synapse.types import UserID +from synapse.util import Clock from tests import unittest - -from ....utils import MockHttpResource, setup_test_homeserver +from tests.server import ThreadedMemoryReactorClock as MemoryReactorClock +from tests.server import make_request, setup_test_homeserver, wait_until_result PATH_PREFIX = "/_matrix/client/v2_alpha" class FilterTestCase(unittest.TestCase): - USER_ID = "@apple:test" + USER_ID = b"@apple:test" EXAMPLE_FILTER = {"room": {"timeline": {"types": ["m.room.message"]}}} - EXAMPLE_FILTER_JSON = '{"room": {"timeline": {"types": ["m.room.message"]}}}' + EXAMPLE_FILTER_JSON = b'{"room": {"timeline": {"types": ["m.room.message"]}}}' TO_REGISTER = [filter] - @defer.inlineCallbacks def setUp(self): - self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) + self.clock = MemoryReactorClock() + self.hs_clock = Clock(self.clock) - self.hs = yield setup_test_homeserver( - http_client=None, - resource_for_client=self.mock_resource, - resource_for_federation=self.mock_resource, + self.hs = setup_test_homeserver( + http_client=None, clock=self.hs_clock, reactor=self.clock ) self.auth = self.hs.get_auth() @@ -55,82 +53,103 @@ class FilterTestCase(unittest.TestCase): def get_user_by_req(request, allow_guest=False, rights="access"): return synapse.types.create_requester( - UserID.from_string(self.USER_ID), 1, False, None) + UserID.from_string(self.USER_ID), 1, False, None + ) self.auth.get_user_by_access_token = get_user_by_access_token self.auth.get_user_by_req = get_user_by_req self.store = self.hs.get_datastore() self.filtering = self.hs.get_filtering() + self.resource = JsonResource(self.hs) for r in self.TO_REGISTER: - r.register_servlets(self.hs, self.mock_resource) + r.register_servlets(self.hs, self.resource) - @defer.inlineCallbacks def test_add_filter(self): - (code, response) = yield self.mock_resource.trigger( - "POST", "/user/%s/filter" % (self.USER_ID), self.EXAMPLE_FILTER_JSON - ) - self.assertEquals(200, code) - self.assertEquals({"filter_id": "0"}, response) - filter = yield self.store.get_user_filter( - user_localpart='apple', - filter_id=0, + request, channel = make_request( + b"POST", + b"/_matrix/client/r0/user/%s/filter" % (self.USER_ID), + self.EXAMPLE_FILTER_JSON, ) - self.assertEquals(filter, self.EXAMPLE_FILTER) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"200") + self.assertEqual(channel.json_body, {"filter_id": "0"}) + filter = self.store.get_user_filter(user_localpart="apple", filter_id=0) + self.clock.advance(0) + self.assertEquals(filter.result, self.EXAMPLE_FILTER) - @defer.inlineCallbacks def test_add_filter_for_other_user(self): - (code, response) = yield self.mock_resource.trigger( - "POST", "/user/%s/filter" % ('@watermelon:test'), self.EXAMPLE_FILTER_JSON + request, channel = make_request( + b"POST", + b"/_matrix/client/r0/user/%s/filter" % (b"@watermelon:test"), + self.EXAMPLE_FILTER_JSON, ) - self.assertEquals(403, code) - self.assertEquals(response['errcode'], Codes.FORBIDDEN) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"403") + self.assertEquals(channel.json_body["errcode"], Codes.FORBIDDEN) - @defer.inlineCallbacks def test_add_filter_non_local_user(self): _is_mine = self.hs.is_mine self.hs.is_mine = lambda target_user: False - (code, response) = yield self.mock_resource.trigger( - "POST", "/user/%s/filter" % (self.USER_ID), self.EXAMPLE_FILTER_JSON + request, channel = make_request( + b"POST", + b"/_matrix/client/r0/user/%s/filter" % (self.USER_ID), + self.EXAMPLE_FILTER_JSON, ) + request.render(self.resource) + wait_until_result(self.clock, channel) + self.hs.is_mine = _is_mine - self.assertEquals(403, code) - self.assertEquals(response['errcode'], Codes.FORBIDDEN) + self.assertEqual(channel.result["code"], b"403") + self.assertEquals(channel.json_body["errcode"], Codes.FORBIDDEN) - @defer.inlineCallbacks def test_get_filter(self): - filter_id = yield self.filtering.add_user_filter( - user_localpart='apple', - user_filter=self.EXAMPLE_FILTER + filter_id = self.filtering.add_user_filter( + user_localpart="apple", user_filter=self.EXAMPLE_FILTER ) - (code, response) = yield self.mock_resource.trigger_get( - "/user/%s/filter/%s" % (self.USER_ID, filter_id) + self.clock.advance(1) + filter_id = filter_id.result + request, channel = make_request( + b"GET", b"/_matrix/client/r0/user/%s/filter/%s" % (self.USER_ID, filter_id) ) - self.assertEquals(200, code) - self.assertEquals(self.EXAMPLE_FILTER, response) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"200") + self.assertEquals(channel.json_body, self.EXAMPLE_FILTER) - @defer.inlineCallbacks def test_get_filter_non_existant(self): - (code, response) = yield self.mock_resource.trigger_get( - "/user/%s/filter/12382148321" % (self.USER_ID) + request, channel = make_request( + b"GET", "/_matrix/client/r0/user/%s/filter/12382148321" % (self.USER_ID) ) - self.assertEquals(400, code) - self.assertEquals(response['errcode'], Codes.NOT_FOUND) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"400") + self.assertEquals(channel.json_body["errcode"], Codes.NOT_FOUND) # Currently invalid params do not have an appropriate errcode # in errors.py - @defer.inlineCallbacks def test_get_filter_invalid_id(self): - (code, response) = yield self.mock_resource.trigger_get( - "/user/%s/filter/foobar" % (self.USER_ID) + request, channel = make_request( + b"GET", "/_matrix/client/r0/user/%s/filter/foobar" % (self.USER_ID) ) - self.assertEquals(400, code) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"400") # No ID also returns an invalid_id error - @defer.inlineCallbacks def test_get_filter_no_id(self): - (code, response) = yield self.mock_resource.trigger_get( - "/user/%s/filter/" % (self.USER_ID) + request, channel = make_request( + b"GET", "/_matrix/client/r0/user/%s/filter/" % (self.USER_ID) ) - self.assertEquals(400, code) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"400") diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 9b57a56070..e004d8fc73 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -2,165 +2,192 @@ import json from mock import Mock -from twisted.internet import defer from twisted.python import failure +from twisted.test.proto_helpers import MemoryReactorClock -from synapse.api.errors import InteractiveAuthIncompleteError, SynapseError -from synapse.rest.client.v2_alpha.register import RegisterRestServlet +from synapse.api.errors import InteractiveAuthIncompleteError +from synapse.http.server import JsonResource +from synapse.rest.client.v2_alpha.register import register_servlets +from synapse.util import Clock from tests import unittest -from tests.utils import mock_getRawHeaders +from tests.server import make_request, setup_test_homeserver, wait_until_result class RegisterRestServletTestCase(unittest.TestCase): - def setUp(self): - # do the dance to hook up request data to self.request_data - self.request_data = "" - self.request = Mock( - content=Mock(read=Mock(side_effect=lambda: self.request_data)), - path='/_matrix/api/v2_alpha/register' - ) - self.request.args = {} - self.request.requestHeaders.getRawHeaders = mock_getRawHeaders() + + self.clock = MemoryReactorClock() + self.hs_clock = Clock(self.clock) + self.url = b"/_matrix/client/r0/register" self.appservice = None - self.auth = Mock(get_appservice_by_req=Mock( - side_effect=lambda x: self.appservice) + self.auth = Mock( + get_appservice_by_req=Mock(side_effect=lambda x: self.appservice) ) self.auth_result = failure.Failure(InteractiveAuthIncompleteError(None)) self.auth_handler = Mock( check_auth=Mock(side_effect=lambda x, y, z: self.auth_result), - get_session_data=Mock(return_value=None) + get_session_data=Mock(return_value=None), ) self.registration_handler = Mock() self.identity_handler = Mock() self.login_handler = Mock() self.device_handler = Mock() + self.device_handler.check_device_registered = Mock(return_value="FAKE") + + self.datastore = Mock(return_value=Mock()) + self.datastore.get_current_state_deltas = Mock(return_value=[]) # do the dance to hook it up to the hs global self.handlers = Mock( registration_handler=self.registration_handler, identity_handler=self.identity_handler, - login_handler=self.login_handler + login_handler=self.login_handler, + ) + self.hs = setup_test_homeserver( + http_client=None, clock=self.hs_clock, reactor=self.clock ) - self.hs = Mock() - self.hs.hostname = "superbig~testing~thing.com" self.hs.get_auth = Mock(return_value=self.auth) self.hs.get_handlers = Mock(return_value=self.handlers) self.hs.get_auth_handler = Mock(return_value=self.auth_handler) self.hs.get_device_handler = Mock(return_value=self.device_handler) + self.hs.get_datastore = Mock(return_value=self.datastore) self.hs.config.enable_registration = True self.hs.config.registrations_require_3pid = [] self.hs.config.auto_join_rooms = [] - # init the thing we're testing - self.servlet = RegisterRestServlet(self.hs) + self.resource = JsonResource(self.hs) + register_servlets(self.hs, self.resource) - @defer.inlineCallbacks def test_POST_appservice_registration_valid(self): user_id = "@kermit:muppet" token = "kermits_access_token" - self.request.args = { - "access_token": "i_am_an_app_service" - } - self.request_data = json.dumps({ - "username": "kermit" - }) - self.appservice = { - "id": "1234" - } - self.registration_handler.appservice_register = Mock( - return_value=user_id - ) - self.auth_handler.get_access_token_for_user_id = Mock( - return_value=token + self.appservice = {"id": "1234"} + self.registration_handler.appservice_register = Mock(return_value=user_id) + self.auth_handler.get_access_token_for_user_id = Mock(return_value=token) + request_data = json.dumps({"username": "kermit"}) + + request, channel = make_request( + b"POST", self.url + b"?access_token=i_am_an_app_service", request_data ) + request.render(self.resource) + wait_until_result(self.clock, channel) - (code, result) = yield self.servlet.on_POST(self.request) - self.assertEquals(code, 200) + self.assertEquals(channel.result["code"], b"200", channel.result) det_data = { "user_id": user_id, "access_token": token, - "home_server": self.hs.hostname + "home_server": self.hs.hostname, } - self.assertDictContainsSubset(det_data, result) + self.assertDictContainsSubset(det_data, json.loads(channel.result["body"])) - @defer.inlineCallbacks def test_POST_appservice_registration_invalid(self): - self.request.args = { - "access_token": "i_am_an_app_service" - } - - self.request_data = json.dumps({ - "username": "kermit" - }) self.appservice = None # no application service exists - result = yield self.servlet.on_POST(self.request) - self.assertEquals(result, (401, None)) + request_data = json.dumps({"username": "kermit"}) + request, channel = make_request( + b"POST", self.url + b"?access_token=i_am_an_app_service", request_data + ) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEquals(channel.result["code"], b"401", channel.result) def test_POST_bad_password(self): - self.request_data = json.dumps({ - "username": "kermit", - "password": 666 - }) - d = self.servlet.on_POST(self.request) - return self.assertFailure(d, SynapseError) + request_data = json.dumps({"username": "kermit", "password": 666}) + request, channel = make_request(b"POST", self.url, request_data) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEquals(channel.result["code"], b"400", channel.result) + self.assertEquals( + json.loads(channel.result["body"])["error"], "Invalid password" + ) def test_POST_bad_username(self): - self.request_data = json.dumps({ - "username": 777, - "password": "monkey" - }) - d = self.servlet.on_POST(self.request) - return self.assertFailure(d, SynapseError) - - @defer.inlineCallbacks + request_data = json.dumps({"username": 777, "password": "monkey"}) + request, channel = make_request(b"POST", self.url, request_data) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEquals(channel.result["code"], b"400", channel.result) + self.assertEquals( + json.loads(channel.result["body"])["error"], "Invalid username" + ) + def test_POST_user_valid(self): user_id = "@kermit:muppet" token = "kermits_access_token" device_id = "frogfone" - self.request_data = json.dumps({ - "username": "kermit", - "password": "monkey", - "device_id": device_id, - }) + request_data = json.dumps( + {"username": "kermit", "password": "monkey", "device_id": device_id} + ) self.registration_handler.check_username = Mock(return_value=True) - self.auth_result = (None, { - "username": "kermit", - "password": "monkey" - }, None) + self.auth_result = (None, {"username": "kermit", "password": "monkey"}, None) self.registration_handler.register = Mock(return_value=(user_id, None)) - self.auth_handler.get_access_token_for_user_id = Mock( - return_value=token - ) - self.device_handler.check_device_registered = \ - Mock(return_value=device_id) + self.auth_handler.get_access_token_for_user_id = Mock(return_value=token) + self.device_handler.check_device_registered = Mock(return_value=device_id) + + request, channel = make_request(b"POST", self.url, request_data) + request.render(self.resource) + wait_until_result(self.clock, channel) - (code, result) = yield self.servlet.on_POST(self.request) - self.assertEquals(code, 200) det_data = { "user_id": user_id, "access_token": token, "home_server": self.hs.hostname, "device_id": device_id, } - self.assertDictContainsSubset(det_data, result) + self.assertEquals(channel.result["code"], b"200", channel.result) + self.assertDictContainsSubset(det_data, json.loads(channel.result["body"])) self.auth_handler.get_login_tuple_for_user_id( - user_id, device_id=device_id, initial_device_display_name=None) + user_id, device_id=device_id, initial_device_display_name=None + ) def test_POST_disabled_registration(self): self.hs.config.enable_registration = False - self.request_data = json.dumps({ - "username": "kermit", - "password": "monkey" - }) + request_data = json.dumps({"username": "kermit", "password": "monkey"}) self.registration_handler.check_username = Mock(return_value=True) - self.auth_result = (None, { - "username": "kermit", - "password": "monkey" - }, None) + self.auth_result = (None, {"username": "kermit", "password": "monkey"}, None) self.registration_handler.register = Mock(return_value=("@user:id", "t")) - d = self.servlet.on_POST(self.request) - return self.assertFailure(d, SynapseError) + + request, channel = make_request(b"POST", self.url, request_data) + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEquals(channel.result["code"], b"403", channel.result) + self.assertEquals( + json.loads(channel.result["body"])["error"], + "Registration has been disabled", + ) + + def test_POST_guest_registration(self): + user_id = "a@b" + self.hs.config.macaroon_secret_key = "test" + self.hs.config.allow_guest_access = True + self.registration_handler.register = Mock(return_value=(user_id, None)) + + request, channel = make_request(b"POST", self.url + b"?kind=guest", b"{}") + request.render(self.resource) + wait_until_result(self.clock, channel) + + det_data = { + "user_id": user_id, + "home_server": self.hs.hostname, + "device_id": "guest_device", + } + self.assertEquals(channel.result["code"], b"200", channel.result) + self.assertDictContainsSubset(det_data, json.loads(channel.result["body"])) + + def test_POST_disabled_guest_registration(self): + self.hs.config.allow_guest_access = False + + request, channel = make_request(b"POST", self.url + b"?kind=guest", b"{}") + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEquals(channel.result["code"], b"403", channel.result) + self.assertEquals( + json.loads(channel.result["body"])["error"], "Guest access is disabled" + ) diff --git a/tests/rest/client/v2_alpha/test_sync.py b/tests/rest/client/v2_alpha/test_sync.py new file mode 100644 index 0000000000..704cf97a40 --- /dev/null +++ b/tests/rest/client/v2_alpha/test_sync.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import synapse.types +from synapse.http.server import JsonResource +from synapse.rest.client.v2_alpha import sync +from synapse.types import UserID +from synapse.util import Clock + +from tests import unittest +from tests.server import ThreadedMemoryReactorClock as MemoryReactorClock +from tests.server import make_request, setup_test_homeserver, wait_until_result + +PATH_PREFIX = "/_matrix/client/v2_alpha" + + +class FilterTestCase(unittest.TestCase): + + USER_ID = b"@apple:test" + TO_REGISTER = [sync] + + def setUp(self): + self.clock = MemoryReactorClock() + self.hs_clock = Clock(self.clock) + + self.hs = setup_test_homeserver( + http_client=None, clock=self.hs_clock, reactor=self.clock + ) + + self.auth = self.hs.get_auth() + + def get_user_by_access_token(token=None, allow_guest=False): + return { + "user": UserID.from_string(self.USER_ID), + "token_id": 1, + "is_guest": False, + } + + def get_user_by_req(request, allow_guest=False, rights="access"): + return synapse.types.create_requester( + UserID.from_string(self.USER_ID), 1, False, None + ) + + self.auth.get_user_by_access_token = get_user_by_access_token + self.auth.get_user_by_req = get_user_by_req + + self.store = self.hs.get_datastore() + self.filtering = self.hs.get_filtering() + self.resource = JsonResource(self.hs) + + for r in self.TO_REGISTER: + r.register_servlets(self.hs, self.resource) + + def test_sync_argless(self): + request, channel = make_request(b"GET", b"/_matrix/client/r0/sync") + request.render(self.resource) + wait_until_result(self.clock, channel) + + self.assertEqual(channel.result["code"], b"200") + self.assertTrue( + set( + [ + "next_batch", + "rooms", + "presence", + "account_data", + "to_device", + "device_lists", + ] + ).issubset(set(channel.json_body.keys())) + ) diff --git a/tests/server.py b/tests/server.py index e93f5a7f94..c611dd6059 100644 --- a/tests/server.py +++ b/tests/server.py @@ -80,6 +80,11 @@ def make_request(method, path, content=b""): content, and return the Request and the Channel underneath. """ + # Decorate it to be the full path + if not path.startswith(b"/_matrix"): + path = b"/_matrix/client/r0/" + path + path = path.replace("//", "/") + if isinstance(content, text_type): content = content.encode('utf8') @@ -110,6 +115,11 @@ def wait_until_result(clock, channel, timeout=100): clock.advance(0.1) +def render(request, resource, clock): + request.render(resource) + wait_until_result(clock, request._channel) + + class ThreadedMemoryReactorClock(MemoryReactorClock): """ A MemoryReactorClock that supports callFromThread. diff --git a/tests/test_server.py b/tests/test_server.py index 4192013f6d..7e063c0290 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -33,9 +33,11 @@ class JsonResourceTests(unittest.TestCase): return (200, kwargs) res = JsonResource(self.homeserver) - res.register_paths("GET", [re.compile("^/foo/(?P[^/]*)$")], _callback) + res.register_paths( + "GET", [re.compile("^/_matrix/foo/(?P[^/]*)$")], _callback + ) - request, channel = make_request(b"GET", b"/foo/%E2%98%83?a=%E2%98%83") + request, channel = make_request(b"GET", b"/_matrix/foo/%E2%98%83?a=%E2%98%83") request.render(res) self.assertEqual(request.args, {b'a': [u"\N{SNOWMAN}".encode('utf8')]}) @@ -51,9 +53,9 @@ class JsonResourceTests(unittest.TestCase): raise Exception("boo") res = JsonResource(self.homeserver) - res.register_paths("GET", [re.compile("^/foo$")], _callback) + res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/foo") + request, channel = make_request(b"GET", b"/_matrix/foo") request.render(res) self.assertEqual(channel.result["code"], b'500') @@ -74,9 +76,9 @@ class JsonResourceTests(unittest.TestCase): return d res = JsonResource(self.homeserver) - res.register_paths("GET", [re.compile("^/foo$")], _callback) + res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/foo") + request, channel = make_request(b"GET", b"/_matrix/foo") request.render(res) # No error has been raised yet @@ -96,9 +98,9 @@ class JsonResourceTests(unittest.TestCase): raise SynapseError(403, "Forbidden!!one!", Codes.FORBIDDEN) res = JsonResource(self.homeserver) - res.register_paths("GET", [re.compile("^/foo$")], _callback) + res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/foo") + request, channel = make_request(b"GET", b"/_matrix/foo") request.render(res) self.assertEqual(channel.result["code"], b'403') @@ -118,9 +120,9 @@ class JsonResourceTests(unittest.TestCase): self.fail("shouldn't ever get here") res = JsonResource(self.homeserver) - res.register_paths("GET", [re.compile("^/foo$")], _callback) + res.register_paths("GET", [re.compile("^/_matrix/foo$")], _callback) - request, channel = make_request(b"GET", b"/foobar") + request, channel = make_request(b"GET", b"/_matrix/foobar") request.render(res) self.assertEqual(channel.result["code"], b'400') diff --git a/tests/unittest.py b/tests/unittest.py index b25f2db5d5..b15b06726b 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -109,6 +109,17 @@ class TestCase(unittest.TestCase): except AssertionError as e: raise (type(e))(e.message + " for '.%s'" % key) + def assert_dict(self, required, actual): + """Does a partial assert of a dict. + + Args: + required (dict): The keys and value which MUST be in 'actual'. + actual (dict): The test result. Extra keys will not be checked. + """ + for key in required: + self.assertEquals(required[key], actual[key], + msg="%s mismatch. %s" % (key, actual)) + def DEBUG(target): """A decorator to set the .loglevel attribute to logging.DEBUG. -- cgit 1.4.1 From 94440ae9946936339453925c249f81ea463da4d0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 11:51:26 +0100 Subject: fix imports --- synapse/visibility.py | 1 + tests/test_visibility.py | 1 + 2 files changed, 2 insertions(+) (limited to 'synapse') diff --git a/synapse/visibility.py b/synapse/visibility.py index 6209bc0cde..dc33f61d2b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -17,6 +17,7 @@ import logging import operator import six + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 1401d831d2..8436c29fe8 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -19,6 +19,7 @@ from twisted.internet.defer import succeed from synapse.events import FrozenEvent from synapse.visibility import filter_events_for_server + import tests.unittest from tests.utils import setup_test_homeserver -- cgit 1.4.1 From d897be6a98ff2073235dc1d356a517f085dbf388 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 15:22:27 +0100 Subject: Fix visibility of events from erased users over federation --- synapse/visibility.py | 123 ++++++++++++++++++++++++++--------------------- tests/test_visibility.py | 63 ++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 56 deletions(-) (limited to 'synapse') diff --git a/synapse/visibility.py b/synapse/visibility.py index dc33f61d2b..202daa6800 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -232,7 +232,57 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks def filter_events_for_server(store, server_name, events): - # First lets check to see if all the events have a history visibility + # Whatever else we do, we need to check for senders which have requested + # erasure of their data. + erased_senders = yield store.are_users_erased( + e.sender for e in events, + ) + + def redact_disallowed(event, state): + # if the sender has been gdpr17ed, always return a redacted + # copy of the event. + if erased_senders[event.sender]: + logger.info( + "Sender of %s has been erased, redacting", + event.event_id, + ) + return prune_event(event) + + if not state: + return event + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + if visibility in ["invited", "joined"]: + # We now loop through all state events looking for + # membership states for the requesting server to determine + # if the server is either in the room or has been invited + # into the room. + for ev in state.itervalues(): + if ev.type != EventTypes.Member: + continue + try: + domain = get_domain_from_id(ev.state_key) + except Exception: + continue + + if domain != server_name: + continue + + memtype = ev.membership + if memtype == Membership.JOIN: + return event + elif memtype == Membership.INVITE: + if visibility == "invited": + return event + else: + # server has no users in the room: redact + return prune_event(event) + + return event + + # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( @@ -251,15 +301,24 @@ def filter_events_for_server(store, server_name, events): # If we failed to find any history visibility events then the default # is "shared" visiblity. if not visibility_ids: - defer.returnValue(events) - - event_map = yield store.get_events(visibility_ids) - all_open = all( - e.content.get("history_visibility") in (None, "shared", "world_readable") - for e in event_map.itervalues() - ) + all_open = True + else: + event_map = yield store.get_events(visibility_ids) + all_open = all( + e.content.get("history_visibility") in (None, "shared", "world_readable") + for e in event_map.itervalues() + ) if all_open: + # all the history_visibility state affecting these events is open, so + # we don't need to filter by membership state. We *do* need to check + # for user erasure, though. + if erased_senders: + events = [ + redact_disallowed(e, None) + for e in events + ] + defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -314,54 +373,6 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in event_to_state_ids.iteritems() } - erased_senders = yield store.are_users_erased( - e.sender for e in events, - ) - - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. - if erased_senders[event.sender]: - logger.info( - "Sender of %s has been erased, redacting", - event.event_id, - ) - return prune_event(event) - - if not state: - return event - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - if visibility in ["invited", "joined"]: - # We now loop through all state events looking for - # membership states for the requesting server to determine - # if the server is either in the room or has been invited - # into the room. - for ev in state.itervalues(): - if ev.type != EventTypes.Member: - continue - try: - domain = get_domain_from_id(ev.state_key) - except Exception: - continue - - if domain != server_name: - continue - - memtype = ev.membership - if memtype == Membership.JOIN: - return event - elif memtype == Membership.INVITE: - if visibility == "invited": - return event - else: - # server has no users in the room: redact - return prune_event(event) - - return event - defer.returnValue([ redact_disallowed(e, event_to_state[e.event_id]) for e in events diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 8436c29fe8..0dc1a924d3 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -73,6 +73,51 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) self.assertEqual(filtered[i].content["a"], "b") + @tests.unittest.DEBUG + @defer.inlineCallbacks + def test_erased_user(self): + # 4 message events, from erased and unerased users, with a membership + # change in the middle of them. + events_to_filter = [] + + evt = yield self.inject_message("@unerased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@erased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_room_member("@joiner:remote_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@unerased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@erased:local_hs") + events_to_filter.append(evt) + + # the erasey user gets erased + self.hs.get_datastore().mark_user_erased("@erased:local_hs") + + # ... and the filtering happens. + filtered = yield filter_events_for_server( + self.store, "test_server", events_to_filter, + ) + + for i in range(0, len(events_to_filter)): + self.assertEqual( + events_to_filter[i].event_id, filtered[i].event_id, + "Unexpected event at result position %i" % (i, ) + ) + + for i in (0, 3): + self.assertEqual( + events_to_filter[i].content["body"], filtered[i].content["body"], + "Unexpected event content at result position %i" % (i,) + ) + + for i in (1, 4): + self.assertNotIn("body", filtered[i].content) + @defer.inlineCallbacks def inject_visibility(self, user_id, visibility): content = {"history_visibility": visibility} @@ -109,6 +154,24 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): yield self.hs.get_datastore().persist_event(event, context) defer.returnValue(event) + @defer.inlineCallbacks + def inject_message(self, user_id, content=None): + if content is None: + content = {"body": "testytest"} + builder = self.event_builder_factory.new({ + "type": "m.room.message", + "sender": user_id, + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) + @defer.inlineCallbacks def test_large_room(self): # see what happens when we have a large room with hundreds of thousands -- cgit 1.4.1 From 79eb339c662f47b2a525d303de941c67f52f0e21 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 14:53:34 +0100 Subject: add a comment --- synapse/visibility.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse') diff --git a/synapse/visibility.py b/synapse/visibility.py index 202daa6800..9b97ea2b83 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -248,6 +248,8 @@ def filter_events_for_server(store, server_name, events): ) return prune_event(event) + # state will be None if we decided we didn't need to filter by + # room membership. if not state: return event -- cgit 1.4.1 From 5f3d02f6ebbce8fbbb2a71ef1ea7f5537257ff15 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 18 Jul 2018 12:52:56 +1000 Subject: bump to 0.33.0rc1 --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/__init__.py b/synapse/__init__.py index 3cde33c0d7..6dc16f0808 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,4 +17,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.32.2" +__version__ = "0.33.0rc1" -- cgit 1.4.1 From 6e3fc657b4c8db10e872ba1c7264e82b7fed5ed0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 18 Jul 2018 10:50:33 +0100 Subject: Resource tracking for background processes This introduces a mechanism for tracking resource usage by background processes, along with an example of how it will be used. This will help address #3518, but more importantly will give us better insights into things which are happening but not being shown up by the request metrics. We *could* do this with Measure blocks, but: - I think having them pulled out as a completely separate metric class will make it easier to distinguish top-level processes from those which are nested. - I want to be able to report on in-flight background processes, and I don't think we want to do this for *all* Measure blocks. --- synapse/federation/transaction_queue.py | 12 +- synapse/metrics/background_process_metrics.py | 179 ++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 synapse/metrics/background_process_metrics.py (limited to 'synapse') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 5a956ecfb3..5c5a73b73c 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -30,7 +30,8 @@ from synapse.metrics import ( sent_edus_counter, sent_transactions_counter, ) -from synapse.util import PreserveLoggingContext, logcontext +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.util import logcontext from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter @@ -165,10 +166,11 @@ class TransactionQueue(object): if self._is_processing: return - # fire off a processing loop in the background. It's likely it will - # outlast the current request, so run it in the sentinel logcontext. - with PreserveLoggingContext(): - self._process_event_queue_loop() + # fire off a processing loop in the background + run_as_background_process( + "process_transaction_queue", + self._process_event_queue_loop, + ) @defer.inlineCallbacks def _process_event_queue_loop(self): diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py new file mode 100644 index 0000000000..9d820e44a6 --- /dev/null +++ b/synapse/metrics/background_process_metrics.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six + +from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily + +from twisted.internet import defer + +from synapse.util.logcontext import LoggingContext, PreserveLoggingContext + +_background_process_start_count = Counter( + "synapse_background_process_start_count", + "Number of background processes started", + ["name"], +) + +# we set registry=None in all of these to stop them getting registered with +# the default registry. Instead we collect them all via the CustomCollector, +# which ensures that we can update them before they are collected. +# +_background_process_ru_utime = Counter( + "synapse_background_process_ru_utime_seconds", + "User CPU time used by background processes, in seconds", + ["name"], + registry=None, +) + +_background_process_ru_stime = Counter( + "synapse_background_process_ru_stime_seconds", + "System CPU time used by background processes, in seconds", + ["name"], + registry=None, +) + +_background_process_db_txn_count = Counter( + "synapse_background_process_db_txn_count", + "Number of database transactions done by background processes", + ["name"], + registry=None, +) + +_background_process_db_txn_duration = Counter( + "synapse_background_process_db_txn_duration_seconds", + ("Seconds spent by background processes waiting for database " + "transactions, excluding scheduling time"), + ["name"], + registry=None, +) + +_background_process_db_sched_duration = Counter( + "synapse_background_process_db_sched_duration_seconds", + "Seconds spent by background processes waiting for database connections", + ["name"], + registry=None, +) + +# map from description to a counter, so that we can name our logcontexts +# incrementally. (It actually duplicates _background_process_start_count, but +# it's much simpler to do so than to try to combine them.) +_background_process_counts = dict() # type: dict[str, int] + +# map from description to the currently running background processes. +# +# it's kept as a dict of sets rather than a big set so that we can keep track +# of process descriptions that no longer have any active processes. +_background_processes = dict() # type: dict[str, set[_BackgroundProcess]] + + +class _Collector(object): + """A custom metrics collector for the background process metrics. + + Ensures that all of the metrics are up-to-date with any in-flight processes + before they are returned. + """ + def collect(self): + background_process_in_flight_count = GaugeMetricFamily( + "synapse_background_process_in_flight_count", + "Number of background processes in flight", + labels=["name"], + ) + + for desc, processes in six.iteritems(_background_processes): + background_process_in_flight_count.add_metric( + (desc,), len(processes), + ) + for process in processes: + process.update_metrics() + + yield background_process_in_flight_count + + # now we need to run collect() over each of the static Counters, and + # yield each metric they return. + for m in ( + _background_process_ru_utime, + _background_process_ru_stime, + _background_process_db_txn_count, + _background_process_db_txn_duration, + _background_process_db_sched_duration, + ): + for r in m.collect(): + yield r + + +REGISTRY.register(_Collector()) + + +class _BackgroundProcess(object): + def __init__(self, desc, ctx): + self.desc = desc + self._context = ctx + self._reported_stats = None + + def update_metrics(self): + """Updates the metrics with values from this process.""" + new_stats = self._context.get_resource_usage() + if self._reported_stats is None: + diff = new_stats + else: + diff = new_stats - self._reported_stats + self._reported_stats = new_stats + + _background_process_ru_utime.labels(self.desc).inc(diff.ru_utime) + _background_process_ru_stime.labels(self.desc).inc(diff.ru_stime) + _background_process_db_txn_count.labels(self.desc).inc( + diff.db_txn_count, + ) + _background_process_db_txn_duration.labels(self.desc).inc( + diff.db_txn_duration_sec, + ) + _background_process_db_sched_duration.labels(self.desc).inc( + diff.db_sched_duration_sec, + ) + + +def run_as_background_process(desc, func, *args, **kwargs): + """Run the given function in its own logcontext, with resource metrics + + This should be used to wrap processes which are fired off to run in the + background, instead of being associated with a particular request. + + Args: + desc (str): a description for this background process type + func: a function, which may return a Deferred + args: positional args for func + kwargs: keyword args for func + + Returns: None + """ + @defer.inlineCallbacks + def run(): + count = _background_process_counts.get(desc, 0) + _background_process_counts[desc] = count + 1 + _background_process_start_count.labels(desc).inc() + + with LoggingContext(desc) as context: + context.request = "%s-%i" % (desc, count) + proc = _BackgroundProcess(desc, context) + _background_processes.setdefault(desc, set()).add(proc) + try: + yield func(*args, **kwargs) + finally: + proc.update_metrics() + _background_processes[desc].remove(proc) + + with PreserveLoggingContext(): + run() -- cgit 1.4.1 From e45a46b6e4990befe6567aaf26354320dce4337a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Jul 2018 13:59:36 +0100 Subject: Add response code to response timer metrics --- synapse/http/request_metrics.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index f24b4b949c..c029a69ee5 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -38,7 +38,7 @@ outgoing_responses_counter = Counter( ) response_timer = Histogram( - "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"] + "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag", "code"] ) response_ru_utime = Counter( @@ -171,11 +171,13 @@ class RequestMetrics(object): ) return - outgoing_responses_counter.labels(request.method, str(request.code)).inc() + response_code = str(request.code) + + outgoing_responses_counter.labels(request.method, response_code).inc() response_count.labels(request.method, self.name, tag).inc() - response_timer.labels(request.method, self.name, tag).observe( + response_timer.labels(request.method, self.name, tag, response_code).observe( time_sec - self.start ) -- cgit 1.4.1 From 5bd0a47fcdea7dada06dc5d42e4340c8748386e8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Jul 2018 14:19:00 +0100 Subject: pep8 --- synapse/http/request_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index c029a69ee5..588e280571 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -38,7 +38,8 @@ outgoing_responses_counter = Counter( ) response_timer = Histogram( - "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag", "code"] + "synapse_http_server_response_time_seconds", "sec", + ["method", "servlet", "tag", "code"], ) response_ru_utime = Counter( -- cgit 1.4.1 From 8cb8df55e9bb5de27d9e6570441560eb81db36df Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Jul 2018 15:22:02 +0100 Subject: Split MessageHandler into read only and writers This will let us call the read only parts from workers, and so be able to move some APIs off of master, e.g. the `/state` API. --- synapse/handlers/__init__.py | 2 - synapse/handlers/message.py | 281 +++++++++++++++++++++------------------- synapse/rest/client/v1/admin.py | 8 +- synapse/rest/client/v1/room.py | 20 ++- synapse/server.py | 14 +- 5 files changed, 176 insertions(+), 149 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 4b9923d8c0..0bad3e0a2e 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -17,7 +17,6 @@ from .admin import AdminHandler from .directory import DirectoryHandler from .federation import FederationHandler from .identity import IdentityHandler -from .message import MessageHandler from .register import RegistrationHandler from .room import RoomContextHandler from .search import SearchHandler @@ -44,7 +43,6 @@ class Handlers(object): def __init__(self, hs): self.registration_handler = RegistrationHandler(hs) - self.message_handler = MessageHandler(hs) self.federation_handler = FederationHandler(hs) self.directory_handler = DirectoryHandler(hs) self.admin_handler = AdminHandler(hs) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a39b852ceb..3c6f9860d5 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -75,12 +75,159 @@ class PurgeStatus(object): } -class MessageHandler(BaseHandler): +class MessageHandler(object): + """Contains some read only APIs to get state about a room + """ def __init__(self, hs): - super(MessageHandler, self).__init__(hs) - self.hs = hs + self.auth = hs.get_auth() + self.clock = hs.get_clock() self.state = hs.get_state_handler() + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def get_room_data(self, user_id=None, room_id=None, + event_type=None, state_key="", is_guest=False): + """ Get data from a room. + + Args: + event : The room path event + Returns: + The path data content. + Raises: + SynapseError if something went wrong. + """ + membership, membership_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id + ) + + if membership == Membership.JOIN: + data = yield self.state.get_current_state( + room_id, event_type, state_key + ) + elif membership == Membership.LEAVE: + key = (event_type, state_key) + room_state = yield self.store.get_state_for_events( + [membership_event_id], [key] + ) + data = room_state[membership_event_id].get(key) + + defer.returnValue(data) + + @defer.inlineCallbacks + def _check_in_room_or_world_readable(self, room_id, user_id): + try: + # check_user_was_in_room will return the most recent membership + # event for the user if: + # * The user is a non-guest user, and was ever in the room + # * The user is a guest user, and has joined the room + # else it will throw. + member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + defer.returnValue((member_event.membership, member_event.event_id)) + return + except AuthError: + visibility = yield self.state.get_current_state( + room_id, EventTypes.RoomHistoryVisibility, "" + ) + if ( + visibility and + visibility.content["history_visibility"] == "world_readable" + ): + defer.returnValue((Membership.JOIN, None)) + return + raise AuthError( + 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN + ) + + @defer.inlineCallbacks + def get_state_events(self, user_id, room_id, is_guest=False): + """Retrieve all state events for a given room. If the user is + joined to the room then return the current state. If the user has + left the room return the state events from when they left. + + Args: + user_id(str): The user requesting state events. + room_id(str): The room ID to get all state events from. + Returns: + A list of dicts representing state events. [{}, {}, {}] + """ + membership, membership_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id + ) + + if membership == Membership.JOIN: + room_state = yield self.state.get_current_state(room_id) + elif membership == Membership.LEAVE: + room_state = yield self.store.get_state_for_events( + [membership_event_id], None + ) + room_state = room_state[membership_event_id] + + now = self.clock.time_msec() + defer.returnValue( + [serialize_event(c, now) for c in room_state.values()] + ) + + @defer.inlineCallbacks + def get_joined_members(self, requester, room_id): + """Get all the joined members in the room and their profile information. + + If the user has left the room return the state events from when they left. + + Args: + requester(Requester): The user requesting state events. + room_id(str): The room ID to get all state events from. + Returns: + A dict of user_id to profile info + """ + user_id = requester.user.to_string() + if not requester.app_service: + # We check AS auth after fetching the room membership, as it + # requires us to pull out all joined members anyway. + membership, _ = yield self._check_in_room_or_world_readable( + room_id, user_id + ) + if membership != Membership.JOIN: + raise NotImplementedError( + "Getting joined members after leaving is not implemented" + ) + + users_with_profile = yield self.state.get_current_user_in_room(room_id) + + # If this is an AS, double check that they are allowed to see the members. + # This can either be because the AS user is in the room or because there + # is a user in the room that the AS is "interested in" + if requester.app_service and user_id not in users_with_profile: + for uid in users_with_profile: + if requester.app_service.is_interested_in_user(uid): + break + else: + # Loop fell through, AS has no interested users in room + raise AuthError(403, "Appservice not in room") + + defer.returnValue({ + user_id: { + "avatar_url": profile.avatar_url, + "display_name": profile.display_name, + } + for user_id, profile in iteritems(users_with_profile) + }) + + +class PaginationHandler(MessageHandler): + """Handles pagination and purge history requests. + + These are in the same handler due to the fact we need to block clients + paginating during a purge. + + This subclasses MessageHandler to get at _check_in_room_or_world_readable + """ + + def __init__(self, hs): + super(PaginationHandler, self).__init__(hs) + + self.hs = hs + self.store = hs.get_datastore() self.clock = hs.get_clock() self.pagination_lock = ReadWriteLock() @@ -274,134 +421,6 @@ class MessageHandler(BaseHandler): defer.returnValue(chunk) - @defer.inlineCallbacks - def get_room_data(self, user_id=None, room_id=None, - event_type=None, state_key="", is_guest=False): - """ Get data from a room. - - Args: - event : The room path event - Returns: - The path data content. - Raises: - SynapseError if something went wrong. - """ - membership, membership_event_id = yield self._check_in_room_or_world_readable( - room_id, user_id - ) - - if membership == Membership.JOIN: - data = yield self.state_handler.get_current_state( - room_id, event_type, state_key - ) - elif membership == Membership.LEAVE: - key = (event_type, state_key) - room_state = yield self.store.get_state_for_events( - [membership_event_id], [key] - ) - data = room_state[membership_event_id].get(key) - - defer.returnValue(data) - - @defer.inlineCallbacks - def _check_in_room_or_world_readable(self, room_id, user_id): - try: - # check_user_was_in_room will return the most recent membership - # event for the user if: - # * The user is a non-guest user, and was ever in the room - # * The user is a guest user, and has joined the room - # else it will throw. - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) - defer.returnValue((member_event.membership, member_event.event_id)) - return - except AuthError: - visibility = yield self.state_handler.get_current_state( - room_id, EventTypes.RoomHistoryVisibility, "" - ) - if ( - visibility and - visibility.content["history_visibility"] == "world_readable" - ): - defer.returnValue((Membership.JOIN, None)) - return - raise AuthError( - 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN - ) - - @defer.inlineCallbacks - def get_state_events(self, user_id, room_id, is_guest=False): - """Retrieve all state events for a given room. If the user is - joined to the room then return the current state. If the user has - left the room return the state events from when they left. - - Args: - user_id(str): The user requesting state events. - room_id(str): The room ID to get all state events from. - Returns: - A list of dicts representing state events. [{}, {}, {}] - """ - membership, membership_event_id = yield self._check_in_room_or_world_readable( - room_id, user_id - ) - - if membership == Membership.JOIN: - room_state = yield self.state_handler.get_current_state(room_id) - elif membership == Membership.LEAVE: - room_state = yield self.store.get_state_for_events( - [membership_event_id], None - ) - room_state = room_state[membership_event_id] - - now = self.clock.time_msec() - defer.returnValue( - [serialize_event(c, now) for c in room_state.values()] - ) - - @defer.inlineCallbacks - def get_joined_members(self, requester, room_id): - """Get all the joined members in the room and their profile information. - - If the user has left the room return the state events from when they left. - - Args: - requester(Requester): The user requesting state events. - room_id(str): The room ID to get all state events from. - Returns: - A dict of user_id to profile info - """ - user_id = requester.user.to_string() - if not requester.app_service: - # We check AS auth after fetching the room membership, as it - # requires us to pull out all joined members anyway. - membership, _ = yield self._check_in_room_or_world_readable( - room_id, user_id - ) - if membership != Membership.JOIN: - raise NotImplementedError( - "Getting joined members after leaving is not implemented" - ) - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - - # If this is an AS, double check that they are allowed to see the members. - # This can either be because the AS user is in the room or because there - # is a user in the room that the AS is "interested in" - if requester.app_service and user_id not in users_with_profile: - for uid in users_with_profile: - if requester.app_service.is_interested_in_user(uid): - break - else: - # Loop fell through, AS has no interested users in room - raise AuthError(403, "Appservice not in room") - - defer.returnValue({ - user_id: { - "avatar_url": profile.avatar_url, - "display_name": profile.display_name, - } - for user_id, profile in iteritems(users_with_profile) - }) - class EventCreationHandler(object): def __init__(self, hs): diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 2dc50e582b..13fd63a5b2 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -123,7 +123,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): hs (synapse.server.HomeServer) """ super(PurgeHistoryRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.pagination_handler = hs.get_pagination_handler() self.store = hs.get_datastore() @defer.inlineCallbacks @@ -198,7 +198,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): errcode=Codes.BAD_JSON, ) - purge_id = yield self.handlers.message_handler.start_purge_history( + purge_id = yield self.pagination_handler.start_purge_history( room_id, token, delete_local_events=delete_local_events, ) @@ -220,7 +220,7 @@ class PurgeHistoryStatusRestServlet(ClientV1RestServlet): hs (synapse.server.HomeServer) """ super(PurgeHistoryStatusRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.pagination_handler = hs.get_pagination_handler() @defer.inlineCallbacks def on_GET(self, request, purge_id): @@ -230,7 +230,7 @@ class PurgeHistoryStatusRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - purge_status = self.handlers.message_handler.get_purge_status(purge_id) + purge_status = self.pagination_handler.get_purge_status(purge_id) if purge_status is None: raise NotFoundError("purge id '%s' not found" % purge_id) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 3d62447854..8b6be9da96 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -90,6 +90,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() + self.message_handler = hs.get_message_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -124,7 +125,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): format = parse_string(request, "format", default="content", allowed_values=["content", "event"]) - msg_handler = self.handlers.message_handler + msg_handler = self.message_handler data = yield msg_handler.get_room_data( user_id=requester.user.to_string(), room_id=room_id, @@ -377,14 +378,13 @@ class RoomMemberListRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomMemberListRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.message_handler = hs.get_message_handler() @defer.inlineCallbacks def on_GET(self, request, room_id): # TODO support Pagination stream API (limit/tokens) requester = yield self.auth.get_user_by_req(request) - handler = self.handlers.message_handler - events = yield handler.get_state_events( + events = yield self.message_handler.get_state_events( room_id=room_id, user_id=requester.user.to_string(), ) @@ -406,7 +406,7 @@ class JoinedRoomMemberListRestServlet(ClientV1RestServlet): def __init__(self, hs): super(JoinedRoomMemberListRestServlet, self).__init__(hs) - self.message_handler = hs.get_handlers().message_handler + self.message_handler = hs.get_message_handler() @defer.inlineCallbacks def on_GET(self, request, room_id): @@ -427,7 +427,7 @@ class RoomMessageListRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomMessageListRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.pagination_handler = hs.get_pagination_handler() @defer.inlineCallbacks def on_GET(self, request, room_id): @@ -442,8 +442,7 @@ class RoomMessageListRestServlet(ClientV1RestServlet): event_filter = Filter(json.loads(filter_json)) else: event_filter = None - handler = self.handlers.message_handler - msgs = yield handler.get_messages( + msgs = yield self.pagination_handler.get_messages( room_id=room_id, requester=requester, pagin_config=pagination_config, @@ -460,14 +459,13 @@ class RoomStateRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomStateRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.message_handler = hs.get_message_handler() @defer.inlineCallbacks def on_GET(self, request, room_id): requester = yield self.auth.get_user_by_req(request, allow_guest=True) - handler = self.handlers.message_handler # Get all the current state for this room - events = yield handler.get_state_events( + events = yield self.message_handler.get_state_events( room_id=room_id, user_id=requester.user.to_string(), is_guest=requester.is_guest, diff --git a/synapse/server.py b/synapse/server.py index 92bea96c5c..b93bd6d7d9 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -52,7 +52,11 @@ from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.events import EventHandler, EventStreamHandler from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.initial_sync import InitialSyncHandler -from synapse.handlers.message import EventCreationHandler +from synapse.handlers.message import ( + EventCreationHandler, + MessageHandler, + PaginationHandler, +) from synapse.handlers.presence import PresenceHandler from synapse.handlers.profile import ProfileHandler from synapse.handlers.read_marker import ReadMarkerHandler @@ -163,6 +167,8 @@ class HomeServer(object): 'federation_registry', 'server_notices_manager', 'server_notices_sender', + 'message_handler', + 'pagination_handler', ] def __init__(self, hostname, reactor=None, **kwargs): @@ -426,6 +432,12 @@ class HomeServer(object): return WorkerServerNoticesSender(self) return ServerNoticesSender(self) + def build_message_handler(self): + return MessageHandler(self) + + def build_pagination_handler(self): + return PaginationHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) -- cgit 1.4.1 From bacdf0cbf9fdbf9bbab2420b86308830ac4e4592 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Jul 2018 15:29:45 +0100 Subject: Move RoomContextHandler out of Handlers This is in preparation for moving GET /context/ to a worker --- synapse/handlers/__init__.py | 2 -- synapse/handlers/room.py | 6 +++++- synapse/rest/client/v1/room.py | 4 ++-- synapse/server.py | 6 +++++- 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 0bad3e0a2e..413425fed1 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -18,7 +18,6 @@ from .directory import DirectoryHandler from .federation import FederationHandler from .identity import IdentityHandler from .register import RegistrationHandler -from .room import RoomContextHandler from .search import SearchHandler @@ -48,4 +47,3 @@ class Handlers(object): self.admin_handler = AdminHandler(hs) self.identity_handler = IdentityHandler(hs) self.search_handler = SearchHandler(hs) - self.room_context_handler = RoomContextHandler(hs) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index f67512078b..6150b7e226 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -395,7 +395,11 @@ class RoomCreationHandler(BaseHandler): ) -class RoomContextHandler(BaseHandler): +class RoomContextHandler(object): + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() + @defer.inlineCallbacks def get_event_context(self, user, room_id, event_id, limit): """Retrieves events, pagination tokens and state around a given event diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 8b6be9da96..2c9459534e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -523,7 +523,7 @@ class RoomEventContextServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomEventContextServlet, self).__init__(hs) self.clock = hs.get_clock() - self.handlers = hs.get_handlers() + self.room_context_handler = hs.get_room_context_handler() @defer.inlineCallbacks def on_GET(self, request, room_id, event_id): @@ -531,7 +531,7 @@ class RoomEventContextServlet(ClientV1RestServlet): limit = parse_integer(request, "limit", default=10) - results = yield self.handlers.room_context_handler.get_event_context( + results = yield self.room_context_handler.get_event_context( requester.user, room_id, event_id, diff --git a/synapse/server.py b/synapse/server.py index b93bd6d7d9..a24ea158df 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -61,7 +61,7 @@ from synapse.handlers.presence import PresenceHandler from synapse.handlers.profile import ProfileHandler from synapse.handlers.read_marker import ReadMarkerHandler from synapse.handlers.receipts import ReceiptsHandler -from synapse.handlers.room import RoomCreationHandler +from synapse.handlers.room import RoomContextHandler, RoomCreationHandler from synapse.handlers.room_list import RoomListHandler from synapse.handlers.room_member import RoomMemberMasterHandler from synapse.handlers.room_member_worker import RoomMemberWorkerHandler @@ -169,6 +169,7 @@ class HomeServer(object): 'server_notices_sender', 'message_handler', 'pagination_handler', + 'room_context_handler', ] def __init__(self, hostname, reactor=None, **kwargs): @@ -438,6 +439,9 @@ class HomeServer(object): def build_pagination_handler(self): return PaginationHandler(self) + def build_room_context_handler(self): + return RoomContextHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) -- cgit 1.4.1 From 9b596177aeb12211e86cce27aa409bc596909ed0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Jul 2018 15:30:43 +0100 Subject: Add some room read only APIs to client_reader --- synapse/app/client_reader.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index b0ea26dcb4..398bb36602 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -40,7 +40,13 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.tcp.client import ReplicationClientHandler -from synapse.rest.client.v1.room import PublicRoomListRestServlet +from synapse.rest.client.v1.room import ( + JoinedRoomMemberListRestServlet, + PublicRoomListRestServlet, + RoomEventContextServlet, + RoomMemberListRestServlet, + RoomStateRestServlet, +) from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -82,7 +88,13 @@ class ClientReaderServer(HomeServer): resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) elif name == "client": resource = JsonResource(self, canonical_json=False) + PublicRoomListRestServlet(self).register(resource) + RoomMemberListRestServlet(self).register(resource) + JoinedRoomMemberListRestServlet(self).register(resource) + RoomStateRestServlet(self).register(resource) + RoomEventContextServlet(self).register(resource) + resources.update({ "/_matrix/client/r0": resource, "/_matrix/client/unstable": resource, -- cgit 1.4.1 From 667fba68f3ca808f48143a2a739a54665b0162c6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 18 Jul 2018 14:35:24 +0100 Subject: Run things as background processes This fixes #3518, and ensures that we get useful logs and metrics for lots of things that happen in the background. (There are certainly more things that happen in the background; these are just the common ones I've found running a single-process synapse locally). --- synapse/federation/transaction_queue.py | 15 ++++++--------- synapse/storage/background_updates.py | 10 ++++++++-- synapse/storage/client_ips.py | 15 +++++++++++---- synapse/storage/events.py | 10 ++++------ synapse/storage/events_worker.py | 10 ++++++---- synapse/util/caches/expiringcache.py | 6 +++++- synapse/util/distributor.py | 4 ++++ 7 files changed, 44 insertions(+), 26 deletions(-) (limited to 'synapse') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 5c5a73b73c..6996d6b695 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -168,7 +168,7 @@ class TransactionQueue(object): # fire off a processing loop in the background run_as_background_process( - "process_transaction_queue", + "process_event_queue_for_federation", self._process_event_queue_loop, ) @@ -434,14 +434,11 @@ class TransactionQueue(object): logger.debug("TX [%s] Starting transaction loop", destination) - # Drop the logcontext before starting the transaction. It doesn't - # really make sense to log all the outbound transactions against - # whatever path led us to this point: that's pretty arbitrary really. - # - # (this also means we can fire off _perform_transaction without - # yielding) - with logcontext.PreserveLoggingContext(): - self._transaction_transmission_loop(destination) + run_as_background_process( + "federation_transaction_transmission_loop", + self._transaction_transmission_loop, + destination, + ) @defer.inlineCallbacks def _transaction_transmission_loop(self, destination): diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index dc9eca7d15..5fe1ca2de7 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -19,6 +19,8 @@ from canonicaljson import json from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process + from . import engines from ._base import SQLBaseStore @@ -87,10 +89,14 @@ class BackgroundUpdateStore(SQLBaseStore): self._background_update_handlers = {} self._all_done = False - @defer.inlineCallbacks def start_doing_background_updates(self): - logger.info("Starting background schema updates") + run_as_background_process( + "background_updates", self._run_background_updates, + ) + @defer.inlineCallbacks + def _run_background_updates(self): + logger.info("Starting background schema updates") while True: yield self.hs.get_clock().sleep( self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index b78eda3413..77ae10da3d 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -19,6 +19,7 @@ from six import iteritems from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import CACHE_SIZE_FACTOR from . import background_updates @@ -93,10 +94,16 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) def _update_client_ips_batch(self): - to_update = self._batch_row_update - self._batch_row_update = {} - return self.runInteraction( - "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update + def update(): + to_update = self._batch_row_update + self._batch_row_update = {} + return self.runInteraction( + "_update_client_ips_batch", self._update_client_ips_batch_txn, + to_update, + ) + + run_as_background_process( + "update_client_ips", update, ) def _update_client_ips_batch_txn(self, txn, to_update): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2aaab0d02c..4ff0fdc4ab 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -33,12 +33,13 @@ from synapse.api.errors import SynapseError # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.events_worker import EventsWorkerStore from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util.async import ObservableDeferred from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.frozenutils import frozendict_json_encoder -from synapse.util.logcontext import PreserveLoggingContext, make_deferred_yieldable +from synapse.util.logcontext import make_deferred_yieldable from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -155,11 +156,8 @@ class _EventPeristenceQueue(object): self._event_persist_queues[room_id] = queue self._currently_persisting_rooms.discard(room_id) - # set handle_queue_loop off on the background. We don't want to - # attribute work done in it to the current request, so we drop the - # logcontext altogether. - with PreserveLoggingContext(): - handle_queue_loop() + # set handle_queue_loop off in the background + run_as_background_process("persist_events", handle_queue_loop) def _get_drainining_queue(self, room_id): queue = self._event_persist_queues.setdefault(room_id, deque()) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 67433606c6..f28239a808 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -25,6 +25,7 @@ from synapse.events import EventBase # noqa: F401 from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.logcontext import ( LoggingContext, PreserveLoggingContext, @@ -322,10 +323,11 @@ class EventsWorkerStore(SQLBaseStore): should_start = False if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) + run_as_background_process( + "fetch_events", + self.runWithConnection, + self._do_fetch, + ) logger.debug("Loading %d events", len(events)) with PreserveLoggingContext(): diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 4abca91f6d..465adc54a8 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -16,6 +16,7 @@ import logging from collections import OrderedDict +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import register_cache logger = logging.getLogger(__name__) @@ -63,7 +64,10 @@ class ExpiringCache(object): return def f(): - self._prune_cache() + run_as_background_process( + "prune_cache_%s" % self._cache_name, + self._prune_cache, + ) self._clock.looping_call(f, self._expiry_ms / 2) diff --git a/synapse/util/distributor.py b/synapse/util/distributor.py index 734331caaa..d91ae400eb 100644 --- a/synapse/util/distributor.py +++ b/synapse/util/distributor.py @@ -75,6 +75,10 @@ class Distributor(object): self.pre_registration[name].append(observer) def fire(self, name, *args, **kwargs): + """Dispatches the given signal to the registered observers. + + Runs the observers as a background process. Does not return a deferred. + """ if name not in self.signals: raise KeyError("%r does not have a signal named %s" % (self, name)) -- cgit 1.4.1 From 8c69b735e3186875a8bf676241f990470fb7c592 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 18 Jul 2018 15:33:13 +0100 Subject: Make Distributor run its processes as a background process This is more involved than it might otherwise be, because the current implementation just drops its logcontexts and runs everything in the sentinel context. It turns out that we aren't actually using a bunch of the functionality here (notably suppress_failures and the fact that Distributor.fire returns a deferred), so the easiest way to fix this is actually by simplifying a bunch of code. --- synapse/util/distributor.py | 44 +++++++++++++++-------------------- tests/test_distributor.py | 56 ++++----------------------------------------- 2 files changed, 22 insertions(+), 78 deletions(-) (limited to 'synapse') diff --git a/synapse/util/distributor.py b/synapse/util/distributor.py index d91ae400eb..194da87639 100644 --- a/synapse/util/distributor.py +++ b/synapse/util/distributor.py @@ -17,20 +17,18 @@ import logging from twisted.internet import defer -from synapse.util import unwrapFirstError -from synapse.util.logcontext import PreserveLoggingContext +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.util.logcontext import make_deferred_yieldable, run_in_background logger = logging.getLogger(__name__) def user_left_room(distributor, user, room_id): - with PreserveLoggingContext(): - distributor.fire("user_left_room", user=user, room_id=room_id) + distributor.fire("user_left_room", user=user, room_id=room_id) def user_joined_room(distributor, user, room_id): - with PreserveLoggingContext(): - distributor.fire("user_joined_room", user=user, room_id=room_id) + distributor.fire("user_joined_room", user=user, room_id=room_id) class Distributor(object): @@ -44,9 +42,7 @@ class Distributor(object): model will do for today. """ - def __init__(self, suppress_failures=True): - self.suppress_failures = suppress_failures - + def __init__(self): self.signals = {} self.pre_registration = {} @@ -56,7 +52,6 @@ class Distributor(object): self.signals[name] = Signal( name, - suppress_failures=self.suppress_failures, ) if name in self.pre_registration: @@ -82,7 +77,11 @@ class Distributor(object): if name not in self.signals: raise KeyError("%r does not have a signal named %s" % (self, name)) - return self.signals[name].fire(*args, **kwargs) + run_as_background_process( + name, + self.signals[name].fire, + *args, **kwargs + ) class Signal(object): @@ -95,9 +94,8 @@ class Signal(object): method into all of the observers. """ - def __init__(self, name, suppress_failures): + def __init__(self, name): self.name = name - self.suppress_failures = suppress_failures self.observers = [] def observe(self, observer): @@ -107,7 +105,6 @@ class Signal(object): Each observer callable may return a Deferred.""" self.observers.append(observer) - @defer.inlineCallbacks def fire(self, *args, **kwargs): """Invokes every callable in the observer list, passing in the args and kwargs. Exceptions thrown by observers are logged but ignored. It is @@ -125,22 +122,17 @@ class Signal(object): failure.type, failure.value, failure.getTracebackObject())) - if not self.suppress_failures: - return failure return defer.maybeDeferred(observer, *args, **kwargs).addErrback(eb) - with PreserveLoggingContext(): - deferreds = [ - do(observer) - for observer in self.observers - ] - - res = yield defer.gatherResults( - deferreds, consumeErrors=True - ).addErrback(unwrapFirstError) + deferreds = [ + run_in_background(do, o) + for o in self.observers + ] - defer.returnValue(res) + return make_deferred_yieldable(defer.gatherResults( + deferreds, consumeErrors=True, + )) def __repr__(self): return "" % (self.name,) diff --git a/tests/test_distributor.py b/tests/test_distributor.py index 04a88056f1..71d11cda77 100644 --- a/tests/test_distributor.py +++ b/tests/test_distributor.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,8 +16,6 @@ from mock import Mock, patch -from twisted.internet import defer - from synapse.util.distributor import Distributor from . import unittest @@ -27,38 +26,15 @@ class DistributorTestCase(unittest.TestCase): def setUp(self): self.dist = Distributor() - @defer.inlineCallbacks def test_signal_dispatch(self): self.dist.declare("alert") observer = Mock() self.dist.observe("alert", observer) - d = self.dist.fire("alert", 1, 2, 3) - yield d - self.assertTrue(d.called) + self.dist.fire("alert", 1, 2, 3) observer.assert_called_with(1, 2, 3) - @defer.inlineCallbacks - def test_signal_dispatch_deferred(self): - self.dist.declare("whine") - - d_inner = defer.Deferred() - - def observer(): - return d_inner - - self.dist.observe("whine", observer) - - d_outer = self.dist.fire("whine") - - self.assertFalse(d_outer.called) - - d_inner.callback(None) - yield d_outer - self.assertTrue(d_outer.called) - - @defer.inlineCallbacks def test_signal_catch(self): self.dist.declare("alarm") @@ -71,9 +47,7 @@ class DistributorTestCase(unittest.TestCase): with patch( "synapse.util.distributor.logger", spec=["warning"] ) as mock_logger: - d = self.dist.fire("alarm", "Go") - yield d - self.assertTrue(d.called) + self.dist.fire("alarm", "Go") observers[0].assert_called_once_with("Go") observers[1].assert_called_once_with("Go") @@ -83,34 +57,12 @@ class DistributorTestCase(unittest.TestCase): mock_logger.warning.call_args[0][0], str ) - @defer.inlineCallbacks - def test_signal_catch_no_suppress(self): - # Gut-wrenching - self.dist.suppress_failures = False - - self.dist.declare("whail") - - class MyException(Exception): - pass - - @defer.inlineCallbacks - def observer(): - raise MyException("Oopsie") - - self.dist.observe("whail", observer) - - d = self.dist.fire("whail") - - yield self.assertFailure(d, MyException) - self.dist.suppress_failures = True - - @defer.inlineCallbacks def test_signal_prereg(self): observer = Mock() self.dist.observe("flare", observer) self.dist.declare("flare") - yield self.dist.fire("flare", 4, 5) + self.dist.fire("flare", 4, 5) observer.assert_called_with(4, 5) -- cgit 1.4.1 From 00bc9791379109690fb5e465adf0e0e8e76167aa Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Jul 2018 10:51:15 +0100 Subject: Disable logcontext warning Temporary workaround to #3518 while we release 0.33.0. --- synapse/storage/_base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 98dde77431..a6a0e6ec9f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -343,9 +343,10 @@ class SQLBaseStore(object): """ parent_context = LoggingContext.current_context() if parent_context == LoggingContext.sentinel: - logger.warn( - "Running db txn from sentinel context: metrics will be lost", - ) + # warning disabled for 0.33.0 release; proper fixes will land imminently. + # logger.warn( + # "Running db txn from sentinel context: metrics will be lost", + # ) parent_context = None start_time = time.time() -- cgit 1.4.1 From c0685f67c001ab156fb6922877c35f70536100dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 10:59:02 +0100 Subject: spell out that include_deleted_devices requires include_all_devices --- synapse/storage/end_to_end_keys.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 6c28719420..ffe4d7235a 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -74,7 +74,8 @@ class EndToEndKeyStore(SQLBaseStore): include_all_devices (bool): whether to include entries for devices that don't have device keys include_deleted_devices (bool): whether to include null entries for - devices which no longer exist (but were in the query_list) + devices which no longer exist (but were in the query_list). + This option only takes effect if include_all_devices is true. Returns: Dict mapping from user-id to dict mapping from device_id to dict containing "key_json", "device_display_name". @@ -100,6 +101,9 @@ class EndToEndKeyStore(SQLBaseStore): query_clauses = [] query_params = [] + if include_all_devices is False: + include_deleted_devices = False + if include_deleted_devices: deleted_devices = set(query_list) -- cgit 1.4.1 From a97c845271f9a89ebdb7186d4c9d04c099bd1beb Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 19 Jul 2018 20:03:33 +1000 Subject: Move v1-only APIs into their own module & isolate deprecated ones (#3460) --- changelog.d/3460.misc | 0 setup.cfg | 1 + synapse/http/client.py | 6 +- synapse/rest/__init__.py | 43 ++- synapse/rest/client/v1/register.py | 436 ----------------------------- synapse/rest/client/v1/room.py | 5 +- synapse/rest/client/v1_only/__init__.py | 3 + synapse/rest/client/v1_only/base.py | 39 +++ synapse/rest/client/v1_only/register.py | 437 ++++++++++++++++++++++++++++++ tests/rest/client/v1/test_events.py | 90 +----- tests/rest/client/v1/test_register.py | 5 +- tests/rest/client/v1/test_rooms.py | 2 +- tests/rest/client/v2_alpha/test_filter.py | 8 +- tests/rest/client/v2_alpha/test_sync.py | 8 +- 14 files changed, 549 insertions(+), 534 deletions(-) create mode 100644 changelog.d/3460.misc delete mode 100644 synapse/rest/client/v1/register.py create mode 100644 synapse/rest/client/v1_only/__init__.py create mode 100644 synapse/rest/client/v1_only/base.py create mode 100644 synapse/rest/client/v1_only/register.py (limited to 'synapse') diff --git a/changelog.d/3460.misc b/changelog.d/3460.misc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/setup.cfg b/setup.cfg index 3f84283a38..c2620be6c5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,3 +36,4 @@ known_compat = mock,six known_twisted=twisted,OpenSSL multi_line_output=3 include_trailing_comma=true +combine_as_imports=true diff --git a/synapse/http/client.py b/synapse/http/client.py index d6a0d75b2b..25b6307884 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -26,9 +26,11 @@ from OpenSSL.SSL import VERIFY_NONE from twisted.internet import defer, protocol, reactor, ssl, task from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent -from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.client import ( + Agent, + BrowserLikeRedirectAgent, + ContentDecoderAgent, + FileBodyProducer as TwistedFileBodyProducer, GzipDecoder, HTTPConnectionPool, PartialDownloadError, diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 75c2a4ec8e..3418f06fd6 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,13 +14,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +from six import PY3 + from synapse.http.server import JsonResource from synapse.rest.client import versions -from synapse.rest.client.v1 import admin, directory, events, initial_sync -from synapse.rest.client.v1 import login as v1_login -from synapse.rest.client.v1 import logout, presence, profile, push_rule, pusher -from synapse.rest.client.v1 import register as v1_register -from synapse.rest.client.v1 import room, voip +from synapse.rest.client.v1 import ( + admin, + directory, + events, + initial_sync, + login as v1_login, + logout, + presence, + profile, + push_rule, + pusher, + room, + voip, +) from synapse.rest.client.v2_alpha import ( account, account_data, @@ -42,6 +54,11 @@ from synapse.rest.client.v2_alpha import ( user_directory, ) +if not PY3: + from synapse.rest.client.v1_only import ( + register as v1_register, + ) + class ClientRestResource(JsonResource): """A resource for version 1 of the matrix client API.""" @@ -54,14 +71,22 @@ class ClientRestResource(JsonResource): def register_servlets(client_resource, hs): versions.register_servlets(client_resource) - # "v1" - room.register_servlets(hs, client_resource) + if not PY3: + # "v1" (Python 2 only) + v1_register.register_servlets(hs, client_resource) + + # Deprecated in r0 + initial_sync.register_servlets(hs, client_resource) + room.register_deprecated_servlets(hs, client_resource) + + # Partially deprecated in r0 events.register_servlets(hs, client_resource) - v1_register.register_servlets(hs, client_resource) + + # "v1" + "r0" + room.register_servlets(hs, client_resource) v1_login.register_servlets(hs, client_resource) profile.register_servlets(hs, client_resource) presence.register_servlets(hs, client_resource) - initial_sync.register_servlets(hs, client_resource) directory.register_servlets(hs, client_resource) voip.register_servlets(hs, client_resource) admin.register_servlets(hs, client_resource) diff --git a/synapse/rest/client/v1/register.py b/synapse/rest/client/v1/register.py deleted file mode 100644 index 25a143af8d..0000000000 --- a/synapse/rest/client/v1/register.py +++ /dev/null @@ -1,436 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This module contains REST servlets to do with registration: /register""" -import hmac -import logging -from hashlib import sha1 - -from twisted.internet import defer - -import synapse.util.stringutils as stringutils -from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError -from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request -from synapse.types import create_requester - -from .base import ClientV1RestServlet, client_path_patterns - -logger = logging.getLogger(__name__) - - -# We ought to be using hmac.compare_digest() but on older pythons it doesn't -# exist. It's a _really minor_ security flaw to use plain string comparison -# because the timing attack is so obscured by all the other code here it's -# unlikely to make much difference -if hasattr(hmac, "compare_digest"): - compare_digest = hmac.compare_digest -else: - def compare_digest(a, b): - return a == b - - -class RegisterRestServlet(ClientV1RestServlet): - """Handles registration with the home server. - - This servlet is in control of the registration flow; the registration - handler doesn't have a concept of multi-stages or sessions. - """ - - PATTERNS = client_path_patterns("/register$", releases=(), include_in_unstable=False) - - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ - super(RegisterRestServlet, self).__init__(hs) - # sessions are stored as: - # self.sessions = { - # "session_id" : { __session_dict__ } - # } - # TODO: persistent storage - self.sessions = {} - self.enable_registration = hs.config.enable_registration - self.auth = hs.get_auth() - self.auth_handler = hs.get_auth_handler() - self.handlers = hs.get_handlers() - - def on_GET(self, request): - - require_email = 'email' in self.hs.config.registrations_require_3pid - require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid - - flows = [] - if self.hs.config.enable_registration_captcha: - # only support the email-only flow if we don't require MSISDN 3PIDs - if not require_msisdn: - flows.extend([ - { - "type": LoginType.RECAPTCHA, - "stages": [ - LoginType.RECAPTCHA, - LoginType.EMAIL_IDENTITY, - LoginType.PASSWORD - ] - }, - ]) - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - flows.extend([ - { - "type": LoginType.RECAPTCHA, - "stages": [LoginType.RECAPTCHA, LoginType.PASSWORD] - } - ]) - else: - # only support the email-only flow if we don't require MSISDN 3PIDs - if require_email or not require_msisdn: - flows.extend([ - { - "type": LoginType.EMAIL_IDENTITY, - "stages": [ - LoginType.EMAIL_IDENTITY, LoginType.PASSWORD - ] - } - ]) - # only support 3PIDless registration if no 3PIDs are required - if not require_email and not require_msisdn: - flows.extend([ - { - "type": LoginType.PASSWORD - } - ]) - return (200, {"flows": flows}) - - @defer.inlineCallbacks - def on_POST(self, request): - register_json = parse_json_object_from_request(request) - - session = (register_json["session"] - if "session" in register_json else None) - login_type = None - assert_params_in_dict(register_json, ["type"]) - - try: - login_type = register_json["type"] - - is_application_server = login_type == LoginType.APPLICATION_SERVICE - is_using_shared_secret = login_type == LoginType.SHARED_SECRET - - can_register = ( - self.enable_registration - or is_application_server - or is_using_shared_secret - ) - if not can_register: - raise SynapseError(403, "Registration has been disabled") - - stages = { - LoginType.RECAPTCHA: self._do_recaptcha, - LoginType.PASSWORD: self._do_password, - LoginType.EMAIL_IDENTITY: self._do_email_identity, - LoginType.APPLICATION_SERVICE: self._do_app_service, - LoginType.SHARED_SECRET: self._do_shared_secret, - } - - session_info = self._get_session_info(request, session) - logger.debug("%s : session info %s request info %s", - login_type, session_info, register_json) - response = yield stages[login_type]( - request, - register_json, - session_info - ) - - if "access_token" not in response: - # isn't a final response - response["session"] = session_info["id"] - - defer.returnValue((200, response)) - except KeyError as e: - logger.exception(e) - raise SynapseError(400, "Missing JSON keys for login type %s." % ( - login_type, - )) - - def on_OPTIONS(self, request): - return (200, {}) - - def _get_session_info(self, request, session_id): - if not session_id: - # create a new session - while session_id is None or session_id in self.sessions: - session_id = stringutils.random_string(24) - self.sessions[session_id] = { - "id": session_id, - LoginType.EMAIL_IDENTITY: False, - LoginType.RECAPTCHA: False - } - - return self.sessions[session_id] - - def _save_session(self, session): - # TODO: Persistent storage - logger.debug("Saving session %s", session) - self.sessions[session["id"]] = session - - def _remove_session(self, session): - logger.debug("Removing session %s", session) - self.sessions.pop(session["id"]) - - @defer.inlineCallbacks - def _do_recaptcha(self, request, register_json, session): - if not self.hs.config.enable_registration_captcha: - raise SynapseError(400, "Captcha not required.") - - yield self._check_recaptcha(request, register_json, session) - - session[LoginType.RECAPTCHA] = True # mark captcha as done - self._save_session(session) - defer.returnValue({ - "next": [LoginType.PASSWORD, LoginType.EMAIL_IDENTITY] - }) - - @defer.inlineCallbacks - def _check_recaptcha(self, request, register_json, session): - if ("captcha_bypass_hmac" in register_json and - self.hs.config.captcha_bypass_secret): - if "user" not in register_json: - raise SynapseError(400, "Captcha bypass needs 'user'") - - want = hmac.new( - key=self.hs.config.captcha_bypass_secret, - msg=register_json["user"], - digestmod=sha1, - ).hexdigest() - - # str() because otherwise hmac complains that 'unicode' does not - # have the buffer interface - got = str(register_json["captcha_bypass_hmac"]) - - if compare_digest(want, got): - session["user"] = register_json["user"] - defer.returnValue(None) - else: - raise SynapseError( - 400, "Captcha bypass HMAC incorrect", - errcode=Codes.CAPTCHA_NEEDED - ) - - challenge = None - user_response = None - try: - challenge = register_json["challenge"] - user_response = register_json["response"] - except KeyError: - raise SynapseError(400, "Captcha response is required", - errcode=Codes.CAPTCHA_NEEDED) - - ip_addr = self.hs.get_ip_from_request(request) - - handler = self.handlers.registration_handler - yield handler.check_recaptcha( - ip_addr, - self.hs.config.recaptcha_private_key, - challenge, - user_response - ) - - @defer.inlineCallbacks - def _do_email_identity(self, request, register_json, session): - if (self.hs.config.enable_registration_captcha and - not session[LoginType.RECAPTCHA]): - raise SynapseError(400, "Captcha is required.") - - threepidCreds = register_json['threepidCreds'] - handler = self.handlers.registration_handler - logger.debug("Registering email. threepidcreds: %s" % (threepidCreds)) - yield handler.register_email(threepidCreds) - session["threepidCreds"] = threepidCreds # store creds for next stage - session[LoginType.EMAIL_IDENTITY] = True # mark email as done - self._save_session(session) - defer.returnValue({ - "next": LoginType.PASSWORD - }) - - @defer.inlineCallbacks - def _do_password(self, request, register_json, session): - if (self.hs.config.enable_registration_captcha and - not session[LoginType.RECAPTCHA]): - # captcha should've been done by this stage! - raise SynapseError(400, "Captcha is required.") - - if ("user" in session and "user" in register_json and - session["user"] != register_json["user"]): - raise SynapseError( - 400, "Cannot change user ID during registration" - ) - - password = register_json["password"].encode("utf-8") - desired_user_id = ( - register_json["user"].encode("utf-8") - if "user" in register_json else None - ) - - handler = self.handlers.registration_handler - (user_id, token) = yield handler.register( - localpart=desired_user_id, - password=password - ) - - if session[LoginType.EMAIL_IDENTITY]: - logger.debug("Binding emails %s to %s" % ( - session["threepidCreds"], user_id) - ) - yield handler.bind_emails(user_id, session["threepidCreds"]) - - result = { - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - } - self._remove_session(session) - defer.returnValue(result) - - @defer.inlineCallbacks - def _do_app_service(self, request, register_json, session): - as_token = self.auth.get_access_token_from_request(request) - - assert_params_in_dict(register_json, ["user"]) - user_localpart = register_json["user"].encode("utf-8") - - handler = self.handlers.registration_handler - user_id = yield handler.appservice_register( - user_localpart, as_token - ) - token = yield self.auth_handler.issue_access_token(user_id) - self._remove_session(session) - defer.returnValue({ - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - }) - - @defer.inlineCallbacks - def _do_shared_secret(self, request, register_json, session): - assert_params_in_dict(register_json, ["mac", "user", "password"]) - - if not self.hs.config.registration_shared_secret: - raise SynapseError(400, "Shared secret registration is not enabled") - - user = register_json["user"].encode("utf-8") - password = register_json["password"].encode("utf-8") - admin = register_json.get("admin", None) - - # Its important to check as we use null bytes as HMAC field separators - if b"\x00" in user: - raise SynapseError(400, "Invalid user") - if b"\x00" in password: - raise SynapseError(400, "Invalid password") - - # str() because otherwise hmac complains that 'unicode' does not - # have the buffer interface - got_mac = str(register_json["mac"]) - - want_mac = hmac.new( - key=self.hs.config.registration_shared_secret.encode(), - digestmod=sha1, - ) - want_mac.update(user) - want_mac.update(b"\x00") - want_mac.update(password) - want_mac.update(b"\x00") - want_mac.update(b"admin" if admin else b"notadmin") - want_mac = want_mac.hexdigest() - - if compare_digest(want_mac, got_mac): - handler = self.handlers.registration_handler - user_id, token = yield handler.register( - localpart=user.lower(), - password=password, - admin=bool(admin), - ) - self._remove_session(session) - defer.returnValue({ - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - }) - else: - raise SynapseError( - 403, "HMAC incorrect", - ) - - -class CreateUserRestServlet(ClientV1RestServlet): - """Handles user creation via a server-to-server interface - """ - - PATTERNS = client_path_patterns("/createUser$", releases=()) - - def __init__(self, hs): - super(CreateUserRestServlet, self).__init__(hs) - self.store = hs.get_datastore() - self.handlers = hs.get_handlers() - - @defer.inlineCallbacks - def on_POST(self, request): - user_json = parse_json_object_from_request(request) - - access_token = self.auth.get_access_token_from_request(request) - app_service = self.store.get_app_service_by_token( - access_token - ) - if not app_service: - raise SynapseError(403, "Invalid application service token.") - - requester = create_requester(app_service.sender) - - logger.debug("creating user: %s", user_json) - response = yield self._do_create(requester, user_json) - - defer.returnValue((200, response)) - - def on_OPTIONS(self, request): - return 403, {} - - @defer.inlineCallbacks - def _do_create(self, requester, user_json): - assert_params_in_dict(user_json, ["localpart", "displayname"]) - - localpart = user_json["localpart"].encode("utf-8") - displayname = user_json["displayname"].encode("utf-8") - password_hash = user_json["password_hash"].encode("utf-8") \ - if user_json.get("password_hash") else None - - handler = self.handlers.registration_handler - user_id, token = yield handler.get_or_create_user( - requester=requester, - localpart=localpart, - displayname=displayname, - password_hash=password_hash - ) - - defer.returnValue({ - "user_id": user_id, - "access_token": token, - "home_server": self.hs.hostname, - }) - - -def register_servlets(hs, http_server): - RegisterRestServlet(hs).register(http_server) - CreateUserRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 3d62447854..b9512a2b61 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -832,10 +832,13 @@ def register_servlets(hs, http_server): RoomSendEventRestServlet(hs).register(http_server) PublicRoomListRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) - RoomInitialSyncRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) RoomTypingRestServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) JoinedRoomsRestServlet(hs).register(http_server) RoomEventServlet(hs).register(http_server) RoomEventContextServlet(hs).register(http_server) + + +def register_deprecated_servlets(hs, http_server): + RoomInitialSyncRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/v1_only/__init__.py b/synapse/rest/client/v1_only/__init__.py new file mode 100644 index 0000000000..936f902ace --- /dev/null +++ b/synapse/rest/client/v1_only/__init__.py @@ -0,0 +1,3 @@ +""" +REST APIs that are only used in v1 (the legacy API). +""" diff --git a/synapse/rest/client/v1_only/base.py b/synapse/rest/client/v1_only/base.py new file mode 100644 index 0000000000..9d4db7437c --- /dev/null +++ b/synapse/rest/client/v1_only/base.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains base REST classes for constructing client v1 servlets. +""" + +import re + +from synapse.api.urls import CLIENT_PREFIX + + +def v1_only_client_path_patterns(path_regex, include_in_unstable=True): + """Creates a regex compiled client path with the correct client path + prefix. + + Args: + path_regex (str): The regex string to match. This should NOT have a ^ + as this will be prefixed. + Returns: + list of SRE_Pattern + """ + patterns = [re.compile("^" + CLIENT_PREFIX + path_regex)] + if include_in_unstable: + unstable_prefix = CLIENT_PREFIX.replace("/api/v1", "/unstable") + patterns.append(re.compile("^" + unstable_prefix + path_regex)) + return patterns diff --git a/synapse/rest/client/v1_only/register.py b/synapse/rest/client/v1_only/register.py new file mode 100644 index 0000000000..3439c3c6d4 --- /dev/null +++ b/synapse/rest/client/v1_only/register.py @@ -0,0 +1,437 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains REST servlets to do with registration: /register""" +import hmac +import logging +from hashlib import sha1 + +from twisted.internet import defer + +import synapse.util.stringutils as stringutils +from synapse.api.constants import LoginType +from synapse.api.errors import Codes, SynapseError +from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_request +from synapse.rest.client.v1.base import ClientV1RestServlet +from synapse.types import create_requester + +from .base import v1_only_client_path_patterns + +logger = logging.getLogger(__name__) + + +# We ought to be using hmac.compare_digest() but on older pythons it doesn't +# exist. It's a _really minor_ security flaw to use plain string comparison +# because the timing attack is so obscured by all the other code here it's +# unlikely to make much difference +if hasattr(hmac, "compare_digest"): + compare_digest = hmac.compare_digest +else: + def compare_digest(a, b): + return a == b + + +class RegisterRestServlet(ClientV1RestServlet): + """Handles registration with the home server. + + This servlet is in control of the registration flow; the registration + handler doesn't have a concept of multi-stages or sessions. + """ + + PATTERNS = v1_only_client_path_patterns("/register$", include_in_unstable=False) + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + super(RegisterRestServlet, self).__init__(hs) + # sessions are stored as: + # self.sessions = { + # "session_id" : { __session_dict__ } + # } + # TODO: persistent storage + self.sessions = {} + self.enable_registration = hs.config.enable_registration + self.auth = hs.get_auth() + self.auth_handler = hs.get_auth_handler() + self.handlers = hs.get_handlers() + + def on_GET(self, request): + + require_email = 'email' in self.hs.config.registrations_require_3pid + require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid + + flows = [] + if self.hs.config.enable_registration_captcha: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: + flows.extend([ + { + "type": LoginType.RECAPTCHA, + "stages": [ + LoginType.RECAPTCHA, + LoginType.EMAIL_IDENTITY, + LoginType.PASSWORD + ] + }, + ]) + # only support 3PIDless registration if no 3PIDs are required + if not require_email and not require_msisdn: + flows.extend([ + { + "type": LoginType.RECAPTCHA, + "stages": [LoginType.RECAPTCHA, LoginType.PASSWORD] + } + ]) + else: + # only support the email-only flow if we don't require MSISDN 3PIDs + if require_email or not require_msisdn: + flows.extend([ + { + "type": LoginType.EMAIL_IDENTITY, + "stages": [ + LoginType.EMAIL_IDENTITY, LoginType.PASSWORD + ] + } + ]) + # only support 3PIDless registration if no 3PIDs are required + if not require_email and not require_msisdn: + flows.extend([ + { + "type": LoginType.PASSWORD + } + ]) + return (200, {"flows": flows}) + + @defer.inlineCallbacks + def on_POST(self, request): + register_json = parse_json_object_from_request(request) + + session = (register_json["session"] + if "session" in register_json else None) + login_type = None + assert_params_in_dict(register_json, ["type"]) + + try: + login_type = register_json["type"] + + is_application_server = login_type == LoginType.APPLICATION_SERVICE + is_using_shared_secret = login_type == LoginType.SHARED_SECRET + + can_register = ( + self.enable_registration + or is_application_server + or is_using_shared_secret + ) + if not can_register: + raise SynapseError(403, "Registration has been disabled") + + stages = { + LoginType.RECAPTCHA: self._do_recaptcha, + LoginType.PASSWORD: self._do_password, + LoginType.EMAIL_IDENTITY: self._do_email_identity, + LoginType.APPLICATION_SERVICE: self._do_app_service, + LoginType.SHARED_SECRET: self._do_shared_secret, + } + + session_info = self._get_session_info(request, session) + logger.debug("%s : session info %s request info %s", + login_type, session_info, register_json) + response = yield stages[login_type]( + request, + register_json, + session_info + ) + + if "access_token" not in response: + # isn't a final response + response["session"] = session_info["id"] + + defer.returnValue((200, response)) + except KeyError as e: + logger.exception(e) + raise SynapseError(400, "Missing JSON keys for login type %s." % ( + login_type, + )) + + def on_OPTIONS(self, request): + return (200, {}) + + def _get_session_info(self, request, session_id): + if not session_id: + # create a new session + while session_id is None or session_id in self.sessions: + session_id = stringutils.random_string(24) + self.sessions[session_id] = { + "id": session_id, + LoginType.EMAIL_IDENTITY: False, + LoginType.RECAPTCHA: False + } + + return self.sessions[session_id] + + def _save_session(self, session): + # TODO: Persistent storage + logger.debug("Saving session %s", session) + self.sessions[session["id"]] = session + + def _remove_session(self, session): + logger.debug("Removing session %s", session) + self.sessions.pop(session["id"]) + + @defer.inlineCallbacks + def _do_recaptcha(self, request, register_json, session): + if not self.hs.config.enable_registration_captcha: + raise SynapseError(400, "Captcha not required.") + + yield self._check_recaptcha(request, register_json, session) + + session[LoginType.RECAPTCHA] = True # mark captcha as done + self._save_session(session) + defer.returnValue({ + "next": [LoginType.PASSWORD, LoginType.EMAIL_IDENTITY] + }) + + @defer.inlineCallbacks + def _check_recaptcha(self, request, register_json, session): + if ("captcha_bypass_hmac" in register_json and + self.hs.config.captcha_bypass_secret): + if "user" not in register_json: + raise SynapseError(400, "Captcha bypass needs 'user'") + + want = hmac.new( + key=self.hs.config.captcha_bypass_secret, + msg=register_json["user"], + digestmod=sha1, + ).hexdigest() + + # str() because otherwise hmac complains that 'unicode' does not + # have the buffer interface + got = str(register_json["captcha_bypass_hmac"]) + + if compare_digest(want, got): + session["user"] = register_json["user"] + defer.returnValue(None) + else: + raise SynapseError( + 400, "Captcha bypass HMAC incorrect", + errcode=Codes.CAPTCHA_NEEDED + ) + + challenge = None + user_response = None + try: + challenge = register_json["challenge"] + user_response = register_json["response"] + except KeyError: + raise SynapseError(400, "Captcha response is required", + errcode=Codes.CAPTCHA_NEEDED) + + ip_addr = self.hs.get_ip_from_request(request) + + handler = self.handlers.registration_handler + yield handler.check_recaptcha( + ip_addr, + self.hs.config.recaptcha_private_key, + challenge, + user_response + ) + + @defer.inlineCallbacks + def _do_email_identity(self, request, register_json, session): + if (self.hs.config.enable_registration_captcha and + not session[LoginType.RECAPTCHA]): + raise SynapseError(400, "Captcha is required.") + + threepidCreds = register_json['threepidCreds'] + handler = self.handlers.registration_handler + logger.debug("Registering email. threepidcreds: %s" % (threepidCreds)) + yield handler.register_email(threepidCreds) + session["threepidCreds"] = threepidCreds # store creds for next stage + session[LoginType.EMAIL_IDENTITY] = True # mark email as done + self._save_session(session) + defer.returnValue({ + "next": LoginType.PASSWORD + }) + + @defer.inlineCallbacks + def _do_password(self, request, register_json, session): + if (self.hs.config.enable_registration_captcha and + not session[LoginType.RECAPTCHA]): + # captcha should've been done by this stage! + raise SynapseError(400, "Captcha is required.") + + if ("user" in session and "user" in register_json and + session["user"] != register_json["user"]): + raise SynapseError( + 400, "Cannot change user ID during registration" + ) + + password = register_json["password"].encode("utf-8") + desired_user_id = ( + register_json["user"].encode("utf-8") + if "user" in register_json else None + ) + + handler = self.handlers.registration_handler + (user_id, token) = yield handler.register( + localpart=desired_user_id, + password=password + ) + + if session[LoginType.EMAIL_IDENTITY]: + logger.debug("Binding emails %s to %s" % ( + session["threepidCreds"], user_id) + ) + yield handler.bind_emails(user_id, session["threepidCreds"]) + + result = { + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname, + } + self._remove_session(session) + defer.returnValue(result) + + @defer.inlineCallbacks + def _do_app_service(self, request, register_json, session): + as_token = self.auth.get_access_token_from_request(request) + + assert_params_in_dict(register_json, ["user"]) + user_localpart = register_json["user"].encode("utf-8") + + handler = self.handlers.registration_handler + user_id = yield handler.appservice_register( + user_localpart, as_token + ) + token = yield self.auth_handler.issue_access_token(user_id) + self._remove_session(session) + defer.returnValue({ + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname, + }) + + @defer.inlineCallbacks + def _do_shared_secret(self, request, register_json, session): + assert_params_in_dict(register_json, ["mac", "user", "password"]) + + if not self.hs.config.registration_shared_secret: + raise SynapseError(400, "Shared secret registration is not enabled") + + user = register_json["user"].encode("utf-8") + password = register_json["password"].encode("utf-8") + admin = register_json.get("admin", None) + + # Its important to check as we use null bytes as HMAC field separators + if b"\x00" in user: + raise SynapseError(400, "Invalid user") + if b"\x00" in password: + raise SynapseError(400, "Invalid password") + + # str() because otherwise hmac complains that 'unicode' does not + # have the buffer interface + got_mac = str(register_json["mac"]) + + want_mac = hmac.new( + key=self.hs.config.registration_shared_secret.encode(), + digestmod=sha1, + ) + want_mac.update(user) + want_mac.update(b"\x00") + want_mac.update(password) + want_mac.update(b"\x00") + want_mac.update(b"admin" if admin else b"notadmin") + want_mac = want_mac.hexdigest() + + if compare_digest(want_mac, got_mac): + handler = self.handlers.registration_handler + user_id, token = yield handler.register( + localpart=user.lower(), + password=password, + admin=bool(admin), + ) + self._remove_session(session) + defer.returnValue({ + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname, + }) + else: + raise SynapseError( + 403, "HMAC incorrect", + ) + + +class CreateUserRestServlet(ClientV1RestServlet): + """Handles user creation via a server-to-server interface + """ + + PATTERNS = v1_only_client_path_patterns("/createUser$") + + def __init__(self, hs): + super(CreateUserRestServlet, self).__init__(hs) + self.store = hs.get_datastore() + self.handlers = hs.get_handlers() + + @defer.inlineCallbacks + def on_POST(self, request): + user_json = parse_json_object_from_request(request) + + access_token = self.auth.get_access_token_from_request(request) + app_service = self.store.get_app_service_by_token( + access_token + ) + if not app_service: + raise SynapseError(403, "Invalid application service token.") + + requester = create_requester(app_service.sender) + + logger.debug("creating user: %s", user_json) + response = yield self._do_create(requester, user_json) + + defer.returnValue((200, response)) + + def on_OPTIONS(self, request): + return 403, {} + + @defer.inlineCallbacks + def _do_create(self, requester, user_json): + assert_params_in_dict(user_json, ["localpart", "displayname"]) + + localpart = user_json["localpart"].encode("utf-8") + displayname = user_json["displayname"].encode("utf-8") + password_hash = user_json["password_hash"].encode("utf-8") \ + if user_json.get("password_hash") else None + + handler = self.handlers.registration_handler + user_id, token = yield handler.get_or_create_user( + requester=requester, + localpart=localpart, + displayname=displayname, + password_hash=password_hash + ) + + defer.returnValue({ + "user_id": user_id, + "access_token": token, + "home_server": self.hs.hostname, + }) + + +def register_servlets(hs, http_server): + RegisterRestServlet(hs).register(http_server) + CreateUserRestServlet(hs).register(http_server) diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index a5af36a99c..50418153fa 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -14,100 +14,30 @@ # limitations under the License. """ Tests REST events for /events paths.""" + from mock import Mock, NonCallableMock +from six import PY3 -# twisted imports from twisted.internet import defer -import synapse.rest.client.v1.events -import synapse.rest.client.v1.register -import synapse.rest.client.v1.room - -from tests import unittest - from ....utils import MockHttpResource, setup_test_homeserver from .utils import RestTestCase PATH_PREFIX = "/_matrix/client/api/v1" -class EventStreamPaginationApiTestCase(unittest.TestCase): - """ Tests event streaming query parameters and start/end keys used in the - Pagination stream API. """ - user_id = "sid1" - - def setUp(self): - # configure stream and inject items - pass - - def tearDown(self): - pass - - def TODO_test_long_poll(self): - # stream from 'end' key, send (self+other) message, expect message. - - # stream from 'END', send (self+other) message, expect message. - - # stream from 'end' key, send (self+other) topic, expect topic. - - # stream from 'END', send (self+other) topic, expect topic. - - # stream from 'end' key, send (self+other) invite, expect invite. - - # stream from 'END', send (self+other) invite, expect invite. - - pass - - def TODO_test_stream_forward(self): - # stream from START, expect injected items - - # stream from 'start' key, expect same content - - # stream from 'end' key, expect nothing - - # stream from 'END', expect nothing - - # The following is needed for cases where content is removed e.g. you - # left a room, so the token you're streaming from is > the one that - # would be returned naturally from START>END. - # stream from very new token (higher than end key), expect same token - # returned as end key - pass - - def TODO_test_limits(self): - # stream from a key, expect limit_num items - - # stream from START, expect limit_num items - - pass - - def TODO_test_range(self): - # stream from key to key, expect X items - - # stream from key to END, expect X items - - # stream from START to key, expect X items - - # stream from START to END, expect all items - pass - - def TODO_test_direction(self): - # stream from END to START and fwds, expect newest first - - # stream from END to START and bwds, expect oldest first - - # stream from START to END and fwds, expect oldest first - - # stream from START to END and bwds, expect newest first - - pass - - class EventStreamPermissionsTestCase(RestTestCase): """ Tests event streaming (GET /events). """ + if PY3: + skip = "Skip on Py3 until ported to use not V1 only register." + @defer.inlineCallbacks def setUp(self): + import synapse.rest.client.v1.events + import synapse.rest.client.v1_only.register + import synapse.rest.client.v1.room + self.mock_resource = MockHttpResource(prefix=PATH_PREFIX) hs = yield setup_test_homeserver( @@ -125,7 +55,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs.get_handlers().federation_handler = Mock() - synapse.rest.client.v1.register.register_servlets(hs, self.mock_resource) + synapse.rest.client.v1_only.register.register_servlets(hs, self.mock_resource) synapse.rest.client.v1.events.register_servlets(hs, self.mock_resource) synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) diff --git a/tests/rest/client/v1/test_register.py b/tests/rest/client/v1/test_register.py index f15fb36213..83a23cd8fe 100644 --- a/tests/rest/client/v1/test_register.py +++ b/tests/rest/client/v1/test_register.py @@ -16,11 +16,12 @@ import json from mock import Mock +from six import PY3 from twisted.test.proto_helpers import MemoryReactorClock from synapse.http.server import JsonResource -from synapse.rest.client.v1.register import register_servlets +from synapse.rest.client.v1_only.register import register_servlets from synapse.util import Clock from tests import unittest @@ -31,6 +32,8 @@ class CreateUserServletTestCase(unittest.TestCase): """ Tests for CreateUserRestServlet. """ + if PY3: + skip = "Not ported to Python 3." def setUp(self): self.registration_handler = Mock() diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 6b5764095e..00fc796787 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -20,7 +20,6 @@ import json from mock import Mock, NonCallableMock from six.moves.urllib import parse as urlparse -# twisted imports from twisted.internet import defer import synapse.rest.client.v1.room @@ -86,6 +85,7 @@ class RoomBase(unittest.TestCase): self.resource = JsonResource(self.hs) synapse.rest.client.v1.room.register_servlets(self.hs, self.resource) + synapse.rest.client.v1.room.register_deprecated_servlets(self.hs, self.resource) self.helper = RestHelper(self.hs, self.resource, self.user_id) diff --git a/tests/rest/client/v2_alpha/test_filter.py b/tests/rest/client/v2_alpha/test_filter.py index 5ea9cc825f..e890f0feac 100644 --- a/tests/rest/client/v2_alpha/test_filter.py +++ b/tests/rest/client/v2_alpha/test_filter.py @@ -21,8 +21,12 @@ from synapse.types import UserID from synapse.util import Clock from tests import unittest -from tests.server import ThreadedMemoryReactorClock as MemoryReactorClock -from tests.server import make_request, setup_test_homeserver, wait_until_result +from tests.server import ( + ThreadedMemoryReactorClock as MemoryReactorClock, + make_request, + setup_test_homeserver, + wait_until_result, +) PATH_PREFIX = "/_matrix/client/v2_alpha" diff --git a/tests/rest/client/v2_alpha/test_sync.py b/tests/rest/client/v2_alpha/test_sync.py index 704cf97a40..03ec3993b2 100644 --- a/tests/rest/client/v2_alpha/test_sync.py +++ b/tests/rest/client/v2_alpha/test_sync.py @@ -20,8 +20,12 @@ from synapse.types import UserID from synapse.util import Clock from tests import unittest -from tests.server import ThreadedMemoryReactorClock as MemoryReactorClock -from tests.server import make_request, setup_test_homeserver, wait_until_result +from tests.server import ( + ThreadedMemoryReactorClock as MemoryReactorClock, + make_request, + setup_test_homeserver, + wait_until_result, +) PATH_PREFIX = "/_matrix/client/v2_alpha" -- cgit 1.4.1 From f1a15ea20692e9f4b347cdddf6d714912356ba64 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 19 Jul 2018 11:14:20 +0100 Subject: revert 00bc979 ... we've fixed the things that caused the warnings, so we should reinstate the warning. --- synapse/storage/_base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a6a0e6ec9f..1d41d8d445 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -343,10 +343,9 @@ class SQLBaseStore(object): """ parent_context = LoggingContext.current_context() if parent_context == LoggingContext.sentinel: - # warning disabled for 0.33.0 release; proper fixes will land imminently. - # logger.warn( - # "Running db txn from sentinel context: metrics will be lost", - # ) + logger.warn( + "Starting db connection from sentinel context: metrics will be lost", + ) parent_context = None start_time = time.time() -- cgit 1.4.1 From 9e40834f742d95c324183f3e71ae73aafe3c6a99 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 11:15:10 +0100 Subject: yes, we do need to invalidate the device_id_exists_cache when deleting a remote device --- synapse/storage/devices.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 203f50f07d..cc3cdf2ebc 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -258,7 +258,6 @@ class DeviceStore(SQLBaseStore): }, ) - # Do we need this? txn.call_after( self.device_id_exists_cache.invalidate, (user_id, device_id,) ) -- cgit 1.4.1 From 95ccb6e2ec57f2150a697ea9cde030e8f78d6db9 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 19 Jul 2018 20:58:18 +1000 Subject: Don't spew errors because we can't save metrics (#3563) --- changelog.d/3548.bugfix | 1 + synapse/util/logcontext.py | 11 +++++++++++ synapse/util/metrics.py | 19 +++++++++++++------ 3 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 changelog.d/3548.bugfix (limited to 'synapse') diff --git a/changelog.d/3548.bugfix b/changelog.d/3548.bugfix new file mode 100644 index 0000000000..38dc3b1232 --- /dev/null +++ b/changelog.d/3548.bugfix @@ -0,0 +1 @@ +Catch failures saving metrics captured by Measure, and instead log the faulty metrics information for further analysis. diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index f6c7175f74..8dcae50b39 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -99,6 +99,17 @@ class ContextResourceUsage(object): self.db_sched_duration_sec = 0 self.evt_db_fetch_count = 0 + def __repr__(self): + return ("") % ( + self.ru_stime, + self.ru_utime, + self.db_txn_count, + self.db_txn_duration_sec, + self.db_sched_duration_sec, + self.evt_db_fetch_count,) + def __iadd__(self, other): """Add another ContextResourceUsage's stats to this one's. diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 6ba7107896..97f1267380 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -104,12 +104,19 @@ class Measure(object): logger.warn("Expected context. (%r)", self.name) return - usage = context.get_resource_usage() - self.start_usage - block_ru_utime.labels(self.name).inc(usage.ru_utime) - block_ru_stime.labels(self.name).inc(usage.ru_stime) - block_db_txn_count.labels(self.name).inc(usage.db_txn_count) - block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec) - block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec) + current = context.get_resource_usage() + usage = current - self.start_usage + try: + block_ru_utime.labels(self.name).inc(usage.ru_utime) + block_ru_stime.labels(self.name).inc(usage.ru_stime) + block_db_txn_count.labels(self.name).inc(usage.db_txn_count) + block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec) + block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec) + except ValueError: + logger.warn( + "Failed to save metrics! OLD: %r, NEW: %r", + self.start_usage, current + ) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) -- cgit 1.4.1 From 38f53399a28a75016255509c8e8939d2815cd807 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 19 Jul 2018 21:11:40 +1000 Subject: 0.33 final --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/__init__.py b/synapse/__init__.py index 6dc16f0808..5c0f2f83aa 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,4 +17,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.33.0rc1" +__version__ = "0.33.0" -- cgit 1.4.1 From 924eb34d9428a4163a03249abbb6f40d4baa29c6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 18:32:02 +0100 Subject: add a filtered_types param to limit filtering to specific types --- synapse/handlers/sync.py | 65 +++++++++++++++------------ synapse/storage/state.py | 113 +++++++++++++++++++++++++---------------------- 2 files changed, 96 insertions(+), 82 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0c21ac2c77..cb711b8758 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -417,38 +417,44 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event, types=None): + def get_state_after_event(self, event, types=None, filtered_types=None): """ Get the room state after the given event Args: event(synapse.events.EventBase): event of interest - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. + Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) + state_ids = yield self.store.get_state_ids_for_event( + event.event_id, types, filtered_types=filtered_types + ) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position, types=None): + def get_state_at(self, room_id, stream_position, types=None, filtered_types=None): """ Get the room state at a particular stream position Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. - May be None, which matches any key. + all events are returned of the given type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A Deferred map from ((type, state_key)->Event) @@ -463,7 +469,9 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event, types) + state = yield self.get_state_after_event( + last_event, types, filtered_types=filtered_types + ) else: # no events in this room - so presumably no state @@ -499,6 +507,7 @@ class SyncHandler(object): types = None member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() + filtered_types = None if lazy_load_members: # We only request state for the members needed to display the @@ -516,29 +525,25 @@ class SyncHandler(object): # to be done based on event_id, and we don't have the member # event ids until we've pulled them out of the DB. - if not types: - # an optimisation to stop needlessly trying to calculate - # member_state_ids - # - # XXX: i can't remember what this trying to do. why would - # types ever be []? --matthew - lazy_load_members = False - - types.append((None, None)) # don't just filter to room members + # only apply the filtering to room members + filtered_types = [EventTypes.Member] if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token, types=types + room_id, stream_position=now_token, types=types, + filtered_types=filtered_types ) state_ids = current_state_ids @@ -563,15 +568,18 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token, types=types + room_id, stream_position=since_token, types=types, + filtered_types=filtered_types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) if lazy_load_members: @@ -603,11 +611,10 @@ class SyncHandler(object): # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. - # strip off the (None, None) and filter to just room members - types = types[:-1] if types: state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c5ff44fef7..ee531a2ce0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types): + def _get_state_groups_from_groups(self, groups, types, filtered_types=None): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -193,9 +193,10 @@ class StateGroupWorkerStore(SQLBaseStore): groups(list[int]): list of state group IDs to query types(list[str|None, str|None])|None: List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. If None, - all types are returned. + state_keys for the `type`. If None, all types are returned. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -206,26 +207,21 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, + self._get_state_groups_from_groups_txn, chunk, types, filtered_types ) results.update(res) defer.returnValue(results) - def _get_state_groups_from_groups_txn(self, txn, groups, types=None): + def _get_state_groups_from_groups_txn( + self, txn, groups, types=None, filtered_types=None + ): results = {group: {} for group in groups} - include_other_types = False + include_other_types = False if filtered_types is None else True if types is not None: - type_set = set(types) - if (None, None) in type_set: - # special case (None, None) to mean that other types should be - # returned - i.e. we were just filtering down the state keys - # for particular types. - include_other_types = True - type_set.remove((None, None)) - types = list(type_set) # deduplicate types list + types = list(set(types)) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -276,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -313,7 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) @@ -373,18 +369,20 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types): + def get_state_for_events(self, event_ids, types, filtered_types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: event_ids (list[string]) - types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + types (list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -395,7 +393,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) state_event_map = yield self.get_events( [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)], @@ -414,17 +412,19 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_ids_for_events(self, event_ids, types=None): + def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None): """ Get the state dicts corresponding to a list of events Args: event_ids(list(str)): events whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -434,7 +434,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) event_to_state = { event_id: group_to_state[group] @@ -444,41 +444,45 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_for_event(self, event_id, types=None): + def get_state_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_for_events([event_id], types) + state_map = yield self.get_state_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @defer.inlineCallbacks - def get_state_ids_for_event(self, event_id, types=None): + def get_state_ids_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_ids_for_events([event_id], types) + state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @cached(max_entries=50000) @@ -509,7 +513,7 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) - def _get_some_state_from_cache(self, group, types): + def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). @@ -520,29 +524,30 @@ class StateGroupWorkerStore(SQLBaseStore): Args: group(int): The state group to lookup - types(list[str|None, str|None]): List of 2-tuples of the form + types(list[str, str|None]): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. + state_keys for the `type`. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} + + # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False + include_other_types = True if filtered_types is None else False for typ, state_key in types: key = (typ, state_key) - if typ is None: - include_other_types = True - next - if state_key is None: type_to_key[typ] = None # XXX: why do we mark the type as missing from our cache just # because we weren't filtering on a specific value of state_key? + # is it because the cache doesn't handle wildcards? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -556,7 +561,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types + return include_other_types and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -585,21 +590,23 @@ class StateGroupWorkerStore(SQLBaseStore): return state_dict_ids, is_all @defer.inlineCallbacks - def _get_state_for_groups(self, groups, types=None): + def _get_state_for_groups(self, groups, types=None, filtered_types=None): """Gets the state at each of a list of state groups, optionally filtering by type/state_key Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None|str, None|str)]): + types (None|iterable[(None, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. Otherwise, each entry should be a `(type, state_key)` tuple to include in the response. A `state_key` of None is a wildcard - meaning that we require all state with that type. A `type` of None - indicates that types not in the list should not be filtered out. + meaning that we require all state with that type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: Deferred[dict[int, dict[(type, state_key), EventBase]]] @@ -612,7 +619,7 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: for group in set(groups): state_dict_ids, _, got_all = self._get_some_state_from_cache( - group, types, + group, types, filtered_types ) results[group] = state_dict_ids @@ -645,7 +652,7 @@ class StateGroupWorkerStore(SQLBaseStore): types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, + missing_groups, types_to_fetch, filtered_types ) for group, group_state_dict in iteritems(group_to_state_dict): -- cgit 1.4.1 From bcaec2915ac74937171e27d507b8f9c0e39d3677 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:03:50 +0100 Subject: incorporate review --- synapse/handlers/sync.py | 44 +++++++++++++++++++++++++++----------------- synapse/storage/state.py | 7 ++++--- 2 files changed, 31 insertions(+), 20 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index cb711b8758..b597f94cf6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -435,7 +435,7 @@ class SyncHandler(object): A Deferred map from ((type, state_key)->Event) """ state_ids = yield self.store.get_state_ids_for_event( - event.event_id, types, filtered_types=filtered_types + event.event_id, types, filtered_types=filtered_types, ) if event.is_state(): state_ids = state_ids.copy() @@ -470,7 +470,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] state = yield self.get_state_after_event( - last_event, types, filtered_types=filtered_types + last_event, types, filtered_types=filtered_types, ) else: @@ -505,7 +505,6 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None - member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() filtered_types = None @@ -521,10 +520,6 @@ class SyncHandler(object): ) ] - # We can't remove redundant member types at this stage as it has - # to be done based on event_id, and we don't have the member - # event ids until we've pulled them out of the DB. - # only apply the filtering to room members filtered_types = [EventTypes.Member] @@ -532,27 +527,32 @@ class SyncHandler(object): if batch: current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = current_state_ids + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -569,28 +569,38 @@ class SyncHandler(object): elif batch.limited: state_at_previous_sync = yield self.get_state_at( room_id, stream_position=since_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_at_timeline_start = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: - # TODO: filter out redundant members based on their event_ids - # (not mxids) at this point. In practice, limited syncs are + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implement, this should filter using event_ids (not mxids). + # In practice, limited syncs are # relatively rare so it's not a total disaster to send redundant - # members down at this point. + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -614,7 +624,7 @@ class SyncHandler(object): if types: state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ee531a2ce0..75c6366e7a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -545,9 +545,10 @@ class StateGroupWorkerStore(SQLBaseStore): if state_key is None: type_to_key[typ] = None - # XXX: why do we mark the type as missing from our cache just - # because we weren't filtering on a specific value of state_key? - # is it because the cache doesn't handle wildcards? + # we mark the type as missing from the cache because + # when the cache was populated it might have been done with a + # restricted set of state_keys, so the wildcard will not work + # and the cache may be incomplete. missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: -- cgit 1.4.1 From 2f558300cc648e633342746dc7b42a36fcb6b32e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:22:27 +0100 Subject: fix thinkos; unbreak tests --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 75c6366e7a..f09be7172d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -369,7 +369,7 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types, filtered_types): + def get_state_for_events(self, event_ids, types, filtered_types=None): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. @@ -538,7 +538,7 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = True if filtered_types is None else False + include_other_types = False if filtered_types is None else True for typ, state_key in types: key = (typ, state_key) -- cgit 1.4.1 From d7275eecf3abcd5f7c645e5da47c7e7310350333 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Jul 2018 12:37:12 +0100 Subject: Add a sleep to the Limiter to fix stack overflows. Fixes #3570 --- synapse/util/async.py | 23 ++++++++++++++++++++--- tests/util/test_limiter.py | 8 ++++---- 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/util/async.py b/synapse/util/async.py index 5d0fb39130..7d5acecb1c 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -248,11 +248,15 @@ class Limiter(object): # do some work. """ - def __init__(self, max_count): + def __init__(self, max_count, clock=None): """ Args: max_count(int): The maximum number of concurrent access """ + if not clock: + from twisted.internet import reactor + clock = Clock(reactor) + self._clock = clock self.max_count = max_count # key_to_defer is a map from the key to a 2 element list where @@ -277,10 +281,23 @@ class Limiter(object): with PreserveLoggingContext(): yield new_defer logger.info("Acquired limiter lock for key %r", key) + entry[0] += 1 + + # if the code holding the lock completes synchronously, then it + # will recursively run the next claimant on the list. That can + # relatively rapidly lead to stack exhaustion. This is essentially + # the same problem as http://twistedmatrix.com/trac/ticket/9304. + # + # In order to break the cycle, we add a cheeky sleep(0) here to + # ensure that we fall back to the reactor between each iteration. + # + # (This needs to happen while we hold the lock, and the context manager's exit + # code must be synchronous, so this is the only sensible place.) + yield self._clock.sleep(0) + else: logger.info("Acquired uncontended limiter lock for key %r", key) - - entry[0] += 1 + entry[0] += 1 @contextmanager def _ctx_manager(): diff --git a/tests/util/test_limiter.py b/tests/util/test_limiter.py index a5a767b1ff..f7b942f5c1 100644 --- a/tests/util/test_limiter.py +++ b/tests/util/test_limiter.py @@ -48,21 +48,21 @@ class LimiterTestCase(unittest.TestCase): self.assertFalse(d4.called) self.assertFalse(d5.called) - self.assertTrue(d4.called) + cm4 = yield d4 self.assertFalse(d5.called) with cm3: self.assertFalse(d5.called) - self.assertTrue(d5.called) + cm5 = yield d5 with cm2: pass - with (yield d4): + with cm4: pass - with (yield d5): + with cm5: pass d6 = limiter.queue(key) -- cgit 1.4.1 From 8462c26485fb4f19fc52accc05870c0ea4c8eb6a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Jul 2018 12:43:23 +0100 Subject: Improvements to the Limiter * give them names, to improve logging * use a deque rather than a list for efficiency --- synapse/handlers/message.py | 2 +- synapse/util/async.py | 33 ++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a39b852ceb..8c12c6990f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -427,7 +427,7 @@ class EventCreationHandler(object): # We arbitrarily limit concurrent event creation for a room to 5. # This is to stop us from diverging history *too* much. - self.limiter = Limiter(max_count=5) + self.limiter = Limiter(max_count=5, name="room_event_creation_limit") self.action_generator = hs.get_action_generator() diff --git a/synapse/util/async.py b/synapse/util/async.py index 7d5acecb1c..22071ddef7 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import collections import logging from contextlib import contextmanager @@ -248,11 +249,16 @@ class Limiter(object): # do some work. """ - def __init__(self, max_count, clock=None): + def __init__(self, max_count=1, name=None, clock=None): """ Args: - max_count(int): The maximum number of concurrent access + max_count(int): The maximum number of concurrent accesses """ + if name is None: + self.name = id(self) + else: + self.name = name + if not clock: from twisted.internet import reactor clock = Clock(reactor) @@ -260,14 +266,14 @@ class Limiter(object): self.max_count = max_count # key_to_defer is a map from the key to a 2 element list where - # the first element is the number of things executing - # the second element is a list of deferreds for the things blocked from + # the first element is the number of things executing, and + # the second element is a deque of deferreds for the things blocked from # executing. self.key_to_defer = {} @defer.inlineCallbacks def queue(self, key): - entry = self.key_to_defer.setdefault(key, [0, []]) + entry = self.key_to_defer.setdefault(key, [0, collections.deque()]) # If the number of things executing is greater than the maximum # then add a deferred to the list of blocked items @@ -277,10 +283,10 @@ class Limiter(object): new_defer = defer.Deferred() entry[1].append(new_defer) - logger.info("Waiting to acquire limiter lock for key %r", key) - with PreserveLoggingContext(): - yield new_defer - logger.info("Acquired limiter lock for key %r", key) + logger.info("Waiting to acquire limiter lock %r for key %r", self.name, key) + yield make_deferred_yieldable(new_defer) + + logger.info("Acquired limiter lock %r for key %r", self.name, key) entry[0] += 1 # if the code holding the lock completes synchronously, then it @@ -296,7 +302,7 @@ class Limiter(object): yield self._clock.sleep(0) else: - logger.info("Acquired uncontended limiter lock for key %r", key) + logger.info("Acquired uncontended limiter lock %r for key %r", self.name, key) entry[0] += 1 @contextmanager @@ -304,15 +310,16 @@ class Limiter(object): try: yield finally: - logger.info("Releasing limiter lock for key %r", key) + logger.info("Releasing limiter lock %r for key %r", self.name, key) # We've finished executing so check if there are any things # blocked waiting to execute and start one of them entry[0] -= 1 if entry[1]: - next_def = entry[1].pop(0) + next_def = entry[1].popleft() + # we need to run the next thing in the sentinel context. with PreserveLoggingContext(): next_def.callback(None) elif entry[0] == 0: -- cgit 1.4.1 From 7c712f95bbc7f405355d5714c92d65551f64fec2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Jul 2018 13:11:43 +0100 Subject: Combine Limiter and Linearizer Linearizer was effectively a Limiter with max_count=1, so rather than maintaining two sets of code, let's combine them. --- synapse/handlers/message.py | 4 +- synapse/util/async.py | 99 +++++-------------------------------------- tests/util/test_limiter.py | 70 ------------------------------ tests/util/test_linearizer.py | 47 ++++++++++++++++++++ 4 files changed, 59 insertions(+), 161 deletions(-) delete mode 100644 tests/util/test_limiter.py (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8c12c6990f..abc07ea87c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -33,7 +33,7 @@ from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.replication.http.send_event import send_event_to_master from synapse.types import RoomAlias, RoomStreamToken, UserID -from synapse.util.async import Limiter, ReadWriteLock +from synapse.util.async import Linearizer, ReadWriteLock from synapse.util.frozenutils import frozendict_json_encoder from synapse.util.logcontext import run_in_background from synapse.util.metrics import measure_func @@ -427,7 +427,7 @@ class EventCreationHandler(object): # We arbitrarily limit concurrent event creation for a room to 5. # This is to stop us from diverging history *too* much. - self.limiter = Limiter(max_count=5, name="room_event_creation_limit") + self.limiter = Linearizer(max_count=5, name="room_event_creation_limit") self.action_generator = hs.get_action_generator() diff --git a/synapse/util/async.py b/synapse/util/async.py index 22071ddef7..5a50d9700f 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -157,91 +157,8 @@ def concurrently_execute(func, args, limit): class Linearizer(object): - """Linearizes access to resources based on a key. Useful to ensure only one - thing is happening at a time on a given resource. - - Example: - - with (yield linearizer.queue("test_key")): - # do some work. - - """ - def __init__(self, name=None, clock=None): - if name is None: - self.name = id(self) - else: - self.name = name - self.key_to_defer = {} - - if not clock: - from twisted.internet import reactor - clock = Clock(reactor) - self._clock = clock - - @defer.inlineCallbacks - def queue(self, key): - # If there is already a deferred in the queue, we pull it out so that - # we can wait on it later. - # Then we replace it with a deferred that we resolve *after* the - # context manager has exited. - # We only return the context manager after the previous deferred has - # resolved. - # This all has the net effect of creating a chain of deferreds that - # wait for the previous deferred before starting their work. - current_defer = self.key_to_defer.get(key) - - new_defer = defer.Deferred() - self.key_to_defer[key] = new_defer - - if current_defer: - logger.info( - "Waiting to acquire linearizer lock %r for key %r", self.name, key - ) - try: - with PreserveLoggingContext(): - yield current_defer - except Exception: - logger.exception("Unexpected exception in Linearizer") - - logger.info("Acquired linearizer lock %r for key %r", self.name, - key) - - # if the code holding the lock completes synchronously, then it - # will recursively run the next claimant on the list. That can - # relatively rapidly lead to stack exhaustion. This is essentially - # the same problem as http://twistedmatrix.com/trac/ticket/9304. - # - # In order to break the cycle, we add a cheeky sleep(0) here to - # ensure that we fall back to the reactor between each iteration. - # - # (There's no particular need for it to happen before we return - # the context manager, but it needs to happen while we hold the - # lock, and the context manager's exit code must be synchronous, - # so actually this is the only sensible place. - yield self._clock.sleep(0) - - else: - logger.info("Acquired uncontended linearizer lock %r for key %r", - self.name, key) - - @contextmanager - def _ctx_manager(): - try: - yield - finally: - logger.info("Releasing linearizer lock %r for key %r", self.name, key) - with PreserveLoggingContext(): - new_defer.callback(None) - current_d = self.key_to_defer.get(key) - if current_d is new_defer: - self.key_to_defer.pop(key, None) - - defer.returnValue(_ctx_manager()) - - -class Limiter(object): """Limits concurrent access to resources based on a key. Useful to ensure - only a few thing happen at a time on a given resource. + only a few things happen at a time on a given resource. Example: @@ -249,7 +166,7 @@ class Limiter(object): # do some work. """ - def __init__(self, max_count=1, name=None, clock=None): + def __init__(self, name=None, max_count=1, clock=None): """ Args: max_count(int): The maximum number of concurrent accesses @@ -283,10 +200,12 @@ class Limiter(object): new_defer = defer.Deferred() entry[1].append(new_defer) - logger.info("Waiting to acquire limiter lock %r for key %r", self.name, key) + logger.info( + "Waiting to acquire linearizer lock %r for key %r", self.name, key, + ) yield make_deferred_yieldable(new_defer) - logger.info("Acquired limiter lock %r for key %r", self.name, key) + logger.info("Acquired linearizer lock %r for key %r", self.name, key) entry[0] += 1 # if the code holding the lock completes synchronously, then it @@ -302,7 +221,9 @@ class Limiter(object): yield self._clock.sleep(0) else: - logger.info("Acquired uncontended limiter lock %r for key %r", self.name, key) + logger.info( + "Acquired uncontended linearizer lock %r for key %r", self.name, key, + ) entry[0] += 1 @contextmanager @@ -310,7 +231,7 @@ class Limiter(object): try: yield finally: - logger.info("Releasing limiter lock %r for key %r", self.name, key) + logger.info("Releasing linearizer lock %r for key %r", self.name, key) # We've finished executing so check if there are any things # blocked waiting to execute and start one of them diff --git a/tests/util/test_limiter.py b/tests/util/test_limiter.py deleted file mode 100644 index f7b942f5c1..0000000000 --- a/tests/util/test_limiter.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from twisted.internet import defer - -from synapse.util.async import Limiter - -from tests import unittest - - -class LimiterTestCase(unittest.TestCase): - - @defer.inlineCallbacks - def test_limiter(self): - limiter = Limiter(3) - - key = object() - - d1 = limiter.queue(key) - cm1 = yield d1 - - d2 = limiter.queue(key) - cm2 = yield d2 - - d3 = limiter.queue(key) - cm3 = yield d3 - - d4 = limiter.queue(key) - self.assertFalse(d4.called) - - d5 = limiter.queue(key) - self.assertFalse(d5.called) - - with cm1: - self.assertFalse(d4.called) - self.assertFalse(d5.called) - - cm4 = yield d4 - self.assertFalse(d5.called) - - with cm3: - self.assertFalse(d5.called) - - cm5 = yield d5 - - with cm2: - pass - - with cm4: - pass - - with cm5: - pass - - d6 = limiter.queue(key) - with (yield d6): - pass diff --git a/tests/util/test_linearizer.py b/tests/util/test_linearizer.py index c95907b32c..c9563124f9 100644 --- a/tests/util/test_linearizer.py +++ b/tests/util/test_linearizer.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -65,3 +66,49 @@ class LinearizerTestCase(unittest.TestCase): func(i) return func(1000) + + @defer.inlineCallbacks + def test_multiple_entries(self): + limiter = Linearizer(max_count=3) + + key = object() + + d1 = limiter.queue(key) + cm1 = yield d1 + + d2 = limiter.queue(key) + cm2 = yield d2 + + d3 = limiter.queue(key) + cm3 = yield d3 + + d4 = limiter.queue(key) + self.assertFalse(d4.called) + + d5 = limiter.queue(key) + self.assertFalse(d5.called) + + with cm1: + self.assertFalse(d4.called) + self.assertFalse(d5.called) + + cm4 = yield d4 + self.assertFalse(d5.called) + + with cm3: + self.assertFalse(d5.called) + + cm5 = yield d5 + + with cm2: + pass + + with cm4: + pass + + with cm5: + pass + + d6 = limiter.queue(key) + with (yield d6): + pass -- cgit 1.4.1 From e1a237eaabf0ba37f242897700f9bf00729976b8 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 20 Jul 2018 22:41:13 +1000 Subject: Admin API for creating new users (#3415) --- changelog.d/3415.misc | 0 docs/admin_api/register_api.rst | 63 ++++++++ scripts/register_new_matrix_user | 32 +++- synapse/rest/client/v1/admin.py | 122 +++++++++++++++ synapse/secrets.py | 42 +++++ synapse/server.py | 5 + tests/rest/client/v1/test_admin.py | 305 +++++++++++++++++++++++++++++++++++++ tests/utils.py | 3 + 8 files changed, 569 insertions(+), 3 deletions(-) create mode 100644 changelog.d/3415.misc create mode 100644 docs/admin_api/register_api.rst create mode 100644 synapse/secrets.py create mode 100644 tests/rest/client/v1/test_admin.py (limited to 'synapse') diff --git a/changelog.d/3415.misc b/changelog.d/3415.misc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/admin_api/register_api.rst b/docs/admin_api/register_api.rst new file mode 100644 index 0000000000..209cd140fd --- /dev/null +++ b/docs/admin_api/register_api.rst @@ -0,0 +1,63 @@ +Shared-Secret Registration +========================== + +This API allows for the creation of users in an administrative and +non-interactive way. This is generally used for bootstrapping a Synapse +instance with administrator accounts. + +To authenticate yourself to the server, you will need both the shared secret +(``registration_shared_secret`` in the homeserver configuration), and a +one-time nonce. If the registration shared secret is not configured, this API +is not enabled. + +To fetch the nonce, you need to request one from the API:: + + > GET /_matrix/client/r0/admin/register + + < {"nonce": "thisisanonce"} + +Once you have the nonce, you can make a ``POST`` to the same URL with a JSON +body containing the nonce, username, password, whether they are an admin +(optional, False by default), and a HMAC digest of the content. + +As an example:: + + > POST /_matrix/client/r0/admin/register + > { + "nonce": "thisisanonce", + "username": "pepper_roni", + "password": "pizza", + "admin": true, + "mac": "mac_digest_here" + } + + < { + "access_token": "token_here", + "user_id": "@pepper_roni@test", + "home_server": "test", + "device_id": "device_id_here" + } + +The MAC is the hex digest output of the HMAC-SHA1 algorithm, with the key being +the shared secret and the content being the nonce, user, password, and either +the string "admin" or "notadmin", each separated by NULs. For an example of +generation in Python:: + + import hmac, hashlib + + def generate_mac(nonce, user, password, admin=False): + + mac = hmac.new( + key=shared_secret, + digestmod=hashlib.sha1, + ) + + mac.update(nonce.encode('utf8')) + mac.update(b"\x00") + mac.update(user.encode('utf8')) + mac.update(b"\x00") + mac.update(password.encode('utf8')) + mac.update(b"\x00") + mac.update(b"admin" if admin else b"notadmin") + + return mac.hexdigest() diff --git a/scripts/register_new_matrix_user b/scripts/register_new_matrix_user index 12ed20d623..8c3d429351 100755 --- a/scripts/register_new_matrix_user +++ b/scripts/register_new_matrix_user @@ -26,11 +26,37 @@ import yaml def request_registration(user, password, server_location, shared_secret, admin=False): + req = urllib2.Request( + "%s/_matrix/client/r0/admin/register" % (server_location,), + headers={'Content-Type': 'application/json'} + ) + + try: + if sys.version_info[:3] >= (2, 7, 9): + # As of version 2.7.9, urllib2 now checks SSL certs + import ssl + f = urllib2.urlopen(req, context=ssl.SSLContext(ssl.PROTOCOL_SSLv23)) + else: + f = urllib2.urlopen(req) + body = f.read() + f.close() + nonce = json.loads(body)["nonce"] + except urllib2.HTTPError as e: + print "ERROR! Received %d %s" % (e.code, e.reason,) + if 400 <= e.code < 500: + if e.info().type == "application/json": + resp = json.load(e) + if "error" in resp: + print resp["error"] + sys.exit(1) + mac = hmac.new( key=shared_secret, digestmod=hashlib.sha1, ) + mac.update(nonce) + mac.update("\x00") mac.update(user) mac.update("\x00") mac.update(password) @@ -40,10 +66,10 @@ def request_registration(user, password, server_location, shared_secret, admin=F mac = mac.hexdigest() data = { - "user": user, + "nonce": nonce, + "username": user, "password": password, "mac": mac, - "type": "org.matrix.login.shared_secret", "admin": admin, } @@ -52,7 +78,7 @@ def request_registration(user, password, server_location, shared_secret, admin=F print "Sending registration request..." req = urllib2.Request( - "%s/_matrix/client/api/v1/register" % (server_location,), + "%s/_matrix/client/r0/admin/register" % (server_location,), data=json.dumps(data), headers={'Content-Type': 'application/json'} ) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 2dc50e582b..9e9c175970 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import hashlib +import hmac import logging from six.moves import http_client @@ -63,6 +65,125 @@ class UsersRestServlet(ClientV1RestServlet): defer.returnValue((200, ret)) +class UserRegisterServlet(ClientV1RestServlet): + """ + Attributes: + NONCE_TIMEOUT (int): Seconds until a generated nonce won't be accepted + nonces (dict[str, int]): The nonces that we will accept. A dict of + nonce to the time it was generated, in int seconds. + """ + PATTERNS = client_path_patterns("/admin/register") + NONCE_TIMEOUT = 60 + + def __init__(self, hs): + super(UserRegisterServlet, self).__init__(hs) + self.handlers = hs.get_handlers() + self.reactor = hs.get_reactor() + self.nonces = {} + self.hs = hs + + def _clear_old_nonces(self): + """ + Clear out old nonces that are older than NONCE_TIMEOUT. + """ + now = int(self.reactor.seconds()) + + for k, v in list(self.nonces.items()): + if now - v > self.NONCE_TIMEOUT: + del self.nonces[k] + + def on_GET(self, request): + """ + Generate a new nonce. + """ + self._clear_old_nonces() + + nonce = self.hs.get_secrets().token_hex(64) + self.nonces[nonce] = int(self.reactor.seconds()) + return (200, {"nonce": nonce.encode('ascii')}) + + @defer.inlineCallbacks + def on_POST(self, request): + self._clear_old_nonces() + + if not self.hs.config.registration_shared_secret: + raise SynapseError(400, "Shared secret registration is not enabled") + + body = parse_json_object_from_request(request) + + if "nonce" not in body: + raise SynapseError( + 400, "nonce must be specified", errcode=Codes.BAD_JSON, + ) + + nonce = body["nonce"] + + if nonce not in self.nonces: + raise SynapseError( + 400, "unrecognised nonce", + ) + + # Delete the nonce, so it can't be reused, even if it's invalid + del self.nonces[nonce] + + if "username" not in body: + raise SynapseError( + 400, "username must be specified", errcode=Codes.BAD_JSON, + ) + else: + if (not isinstance(body['username'], str) or len(body['username']) > 512): + raise SynapseError(400, "Invalid username") + + username = body["username"].encode("utf-8") + if b"\x00" in username: + raise SynapseError(400, "Invalid username") + + if "password" not in body: + raise SynapseError( + 400, "password must be specified", errcode=Codes.BAD_JSON, + ) + else: + if (not isinstance(body['password'], str) or len(body['password']) > 512): + raise SynapseError(400, "Invalid password") + + password = body["password"].encode("utf-8") + if b"\x00" in password: + raise SynapseError(400, "Invalid password") + + admin = body.get("admin", None) + got_mac = body["mac"] + + want_mac = hmac.new( + key=self.hs.config.registration_shared_secret.encode(), + digestmod=hashlib.sha1, + ) + want_mac.update(nonce) + want_mac.update(b"\x00") + want_mac.update(username) + want_mac.update(b"\x00") + want_mac.update(password) + want_mac.update(b"\x00") + want_mac.update(b"admin" if admin else b"notadmin") + want_mac = want_mac.hexdigest() + + if not hmac.compare_digest(want_mac, got_mac): + raise SynapseError( + 403, "HMAC incorrect", + ) + + # Reuse the parts of RegisterRestServlet to reduce code duplication + from synapse.rest.client.v2_alpha.register import RegisterRestServlet + register = RegisterRestServlet(self.hs) + + (user_id, _) = yield register.registration_handler.register( + localpart=username.lower(), password=password, admin=bool(admin), + generate_token=False, + ) + + result = yield register._create_registration_details(user_id, body) + defer.returnValue((200, result)) + + class WhoisRestServlet(ClientV1RestServlet): PATTERNS = client_path_patterns("/admin/whois/(?P[^/]*)") @@ -614,3 +735,4 @@ def register_servlets(hs, http_server): ShutdownRoomRestServlet(hs).register(http_server) QuarantineMediaInRoom(hs).register(http_server) ListMediaInRoom(hs).register(http_server) + UserRegisterServlet(hs).register(http_server) diff --git a/synapse/secrets.py b/synapse/secrets.py new file mode 100644 index 0000000000..f397daaa5e --- /dev/null +++ b/synapse/secrets.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Injectable secrets module for Synapse. + +See https://docs.python.org/3/library/secrets.html#module-secrets for the API +used in Python 3.6, and the API emulated in Python 2.7. +""" + +import six + +if six.PY3: + import secrets + + def Secrets(): + return secrets + + +else: + + import os + import binascii + + class Secrets(object): + def token_bytes(self, nbytes=32): + return os.urandom(nbytes) + + def token_hex(self, nbytes=32): + return binascii.hexlify(self.token_bytes(nbytes)) diff --git a/synapse/server.py b/synapse/server.py index 92bea96c5c..fd4f992258 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -74,6 +74,7 @@ from synapse.rest.media.v1.media_repository import ( MediaRepository, MediaRepositoryResource, ) +from synapse.secrets import Secrets from synapse.server_notices.server_notices_manager import ServerNoticesManager from synapse.server_notices.server_notices_sender import ServerNoticesSender from synapse.server_notices.worker_server_notices_sender import WorkerServerNoticesSender @@ -158,6 +159,7 @@ class HomeServer(object): 'groups_server_handler', 'groups_attestation_signing', 'groups_attestation_renewer', + 'secrets', 'spam_checker', 'room_member_handler', 'federation_registry', @@ -405,6 +407,9 @@ class HomeServer(object): def build_groups_attestation_renewer(self): return GroupAttestionRenewer(self) + def build_secrets(self): + return Secrets() + def build_spam_checker(self): return SpamChecker(self) diff --git a/tests/rest/client/v1/test_admin.py b/tests/rest/client/v1/test_admin.py new file mode 100644 index 0000000000..8c90145601 --- /dev/null +++ b/tests/rest/client/v1/test_admin.py @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import hmac +import json + +from mock import Mock + +from synapse.http.server import JsonResource +from synapse.rest.client.v1.admin import register_servlets +from synapse.util import Clock + +from tests import unittest +from tests.server import ( + ThreadedMemoryReactorClock, + make_request, + render, + setup_test_homeserver, +) + + +class UserRegisterTestCase(unittest.TestCase): + def setUp(self): + + self.clock = ThreadedMemoryReactorClock() + self.hs_clock = Clock(self.clock) + self.url = "/_matrix/client/r0/admin/register" + + self.registration_handler = Mock() + self.identity_handler = Mock() + self.login_handler = Mock() + self.device_handler = Mock() + self.device_handler.check_device_registered = Mock(return_value="FAKE") + + self.datastore = Mock(return_value=Mock()) + self.datastore.get_current_state_deltas = Mock(return_value=[]) + + self.secrets = Mock() + + self.hs = setup_test_homeserver( + http_client=None, clock=self.hs_clock, reactor=self.clock + ) + + self.hs.config.registration_shared_secret = u"shared" + + self.hs.get_media_repository = Mock() + self.hs.get_deactivate_account_handler = Mock() + + self.resource = JsonResource(self.hs) + register_servlets(self.hs, self.resource) + + def test_disabled(self): + """ + If there is no shared secret, registration through this method will be + prevented. + """ + self.hs.config.registration_shared_secret = None + + request, channel = make_request("POST", self.url, b'{}') + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual( + 'Shared secret registration is not enabled', channel.json_body["error"] + ) + + def test_get_nonce(self): + """ + Calling GET on the endpoint will return a randomised nonce, using the + homeserver's secrets provider. + """ + secrets = Mock() + secrets.token_hex = Mock(return_value="abcd") + + self.hs.get_secrets = Mock(return_value=secrets) + + request, channel = make_request("GET", self.url) + render(request, self.resource, self.clock) + + self.assertEqual(channel.json_body, {"nonce": "abcd"}) + + def test_expired_nonce(self): + """ + Calling GET on the endpoint will return a randomised nonce, which will + only last for SALT_TIMEOUT (60s). + """ + request, channel = make_request("GET", self.url) + render(request, self.resource, self.clock) + nonce = channel.json_body["nonce"] + + # 59 seconds + self.clock.advance(59) + + body = json.dumps({"nonce": nonce}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('username must be specified', channel.json_body["error"]) + + # 61 seconds + self.clock.advance(2) + + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('unrecognised nonce', channel.json_body["error"]) + + def test_register_incorrect_nonce(self): + """ + Only the provided nonce can be used, as it's checked in the MAC. + """ + request, channel = make_request("GET", self.url) + render(request, self.resource, self.clock) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(b"notthenonce\x00bob\x00abc123\x00admin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "mac": want_mac, + } + ).encode('utf8') + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("HMAC incorrect", channel.json_body["error"]) + + def test_register_correct_nonce(self): + """ + When the correct nonce is provided, and the right key is provided, the + user is registered. + """ + request, channel = make_request("GET", self.url) + render(request, self.resource, self.clock) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "mac": want_mac, + } + ).encode('utf8') + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("@bob:test", channel.json_body["user_id"]) + + def test_nonce_reuse(self): + """ + A valid unrecognised nonce. + """ + request, channel = make_request("GET", self.url) + render(request, self.resource, self.clock) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "mac": want_mac, + } + ).encode('utf8') + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("@bob:test", channel.json_body["user_id"]) + + # Now, try and reuse it + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('unrecognised nonce', channel.json_body["error"]) + + def test_missing_parts(self): + """ + Synapse will complain if you don't give nonce, username, password, and + mac. Admin is optional. Additional checks are done for length and + type. + """ + def nonce(): + request, channel = make_request("GET", self.url) + render(request, self.resource, self.clock) + return channel.json_body["nonce"] + + # + # Nonce check + # + + # Must be present + body = json.dumps({}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('nonce must be specified', channel.json_body["error"]) + + # + # Username checks + # + + # Must be present + body = json.dumps({"nonce": nonce()}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('username must be specified', channel.json_body["error"]) + + # Must be a string + body = json.dumps({"nonce": nonce(), "username": 1234}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid username', channel.json_body["error"]) + + # Must not have null bytes + body = json.dumps({"nonce": nonce(), "username": b"abcd\x00"}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid username', channel.json_body["error"]) + + # Must not have null bytes + body = json.dumps({"nonce": nonce(), "username": "a" * 1000}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid username', channel.json_body["error"]) + + # + # Username checks + # + + # Must be present + body = json.dumps({"nonce": nonce(), "username": "a"}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('password must be specified', channel.json_body["error"]) + + # Must be a string + body = json.dumps({"nonce": nonce(), "username": "a", "password": 1234}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid password', channel.json_body["error"]) + + # Must not have null bytes + body = json.dumps({"nonce": nonce(), "username": "a", "password": b"abcd\x00"}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid password', channel.json_body["error"]) + + # Super long + body = json.dumps({"nonce": nonce(), "username": "a", "password": "A" * 1000}) + request, channel = make_request("POST", self.url, body.encode('utf8')) + render(request, self.resource, self.clock) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid password', channel.json_body["error"]) diff --git a/tests/utils.py b/tests/utils.py index e488238bb3..c3dbff8507 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -71,6 +71,8 @@ def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None config.user_directory_search_all_users = False config.user_consent_server_notice_content = None config.block_events_without_consent_error = None + config.media_storage_providers = [] + config.auto_join_rooms = [] # disable user directory updates, because they get done in the # background, which upsets the test runner. @@ -136,6 +138,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None database_engine=db_engine, room_list_handler=object(), tls_server_context_factory=Mock(), + reactor=reactor, **kargs ) -- cgit 1.4.1 From 3d6df846580ce6ef8769945e6990af2f44251e40 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Jul 2018 13:59:55 +0100 Subject: Test and fix support for cancellation in Linearizer --- synapse/util/async.py | 28 ++++++++++++++++++++++------ tests/util/test_linearizer.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/util/async.py b/synapse/util/async.py index 5a50d9700f..a7094e2fb4 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -184,13 +184,13 @@ class Linearizer(object): # key_to_defer is a map from the key to a 2 element list where # the first element is the number of things executing, and - # the second element is a deque of deferreds for the things blocked from - # executing. + # the second element is an OrderedDict, where the keys are deferreds for the + # things blocked from executing. self.key_to_defer = {} @defer.inlineCallbacks def queue(self, key): - entry = self.key_to_defer.setdefault(key, [0, collections.deque()]) + entry = self.key_to_defer.setdefault(key, [0, collections.OrderedDict()]) # If the number of things executing is greater than the maximum # then add a deferred to the list of blocked items @@ -198,12 +198,28 @@ class Linearizer(object): # this item so that it can continue executing. if entry[0] >= self.max_count: new_defer = defer.Deferred() - entry[1].append(new_defer) + entry[1][new_defer] = 1 logger.info( "Waiting to acquire linearizer lock %r for key %r", self.name, key, ) - yield make_deferred_yieldable(new_defer) + try: + yield make_deferred_yieldable(new_defer) + except Exception as e: + if isinstance(e, CancelledError): + logger.info( + "Cancelling wait for linearizer lock %r for key %r", + self.name, key, + ) + else: + logger.warn( + "Unexpected exception waiting for linearizer lock %r for key %r", + self.name, key, + ) + + # we just have to take ourselves back out of the queue. + del entry[1][new_defer] + raise logger.info("Acquired linearizer lock %r for key %r", self.name, key) entry[0] += 1 @@ -238,7 +254,7 @@ class Linearizer(object): entry[0] -= 1 if entry[1]: - next_def = entry[1].popleft() + (next_def, _) = entry[1].popitem(last=False) # we need to run the next thing in the sentinel context. with PreserveLoggingContext(): diff --git a/tests/util/test_linearizer.py b/tests/util/test_linearizer.py index c9563124f9..4729bd5a0a 100644 --- a/tests/util/test_linearizer.py +++ b/tests/util/test_linearizer.py @@ -17,6 +17,7 @@ from six.moves import range from twisted.internet import defer, reactor +from twisted.internet.defer import CancelledError from synapse.util import Clock, logcontext from synapse.util.async import Linearizer @@ -112,3 +113,33 @@ class LinearizerTestCase(unittest.TestCase): d6 = limiter.queue(key) with (yield d6): pass + + @defer.inlineCallbacks + def test_cancellation(self): + linearizer = Linearizer() + + key = object() + + d1 = linearizer.queue(key) + cm1 = yield d1 + + d2 = linearizer.queue(key) + self.assertFalse(d2.called) + + d3 = linearizer.queue(key) + self.assertFalse(d3.called) + + d2.cancel() + + with cm1: + pass + + self.assertTrue(d2.called) + try: + yield d2 + self.fail("Expected d2 to raise CancelledError") + except CancelledError: + pass + + with (yield d3): + pass -- cgit 1.4.1 From 0ecf68aedc09f4037208b613b692a8a98c78b3ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 Jul 2018 15:30:59 +0100 Subject: Move check_in_room_or_world_readable to Auth --- synapse/api/auth.py | 34 ++++++++++++++++++++++++++++++++++ synapse/handlers/message.py | 40 ++++++---------------------------------- 2 files changed, 40 insertions(+), 34 deletions(-) (limited to 'synapse') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index bc629832d9..bf9efb170a 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -737,3 +737,37 @@ class Auth(object): ) return query_params[0] + + @defer.inlineCallbacks + def check_in_room_or_world_readable(self, room_id, user_id): + """Checks that the user is or was in the room or the room is world + readable. If it isn't then an exception is raised. + + Returns: + Deferred[tuple[str, str|None]]: Resolves to the current membership of + the user in the room and the membership event ID of the user. If + the user is not in the room and never has been, then + `(Membership.JOIN, None)` is returned. + """ + + try: + # check_user_was_in_room will return the most recent membership + # event for the user if: + # * The user is a non-guest user, and was ever in the room + # * The user is a guest user, and has joined the room + # else it will throw. + member_event = yield self.check_user_was_in_room(room_id, user_id) + defer.returnValue((member_event.membership, member_event.event_id)) + except AuthError: + visibility = yield self.state.get_current_state( + room_id, EventTypes.RoomHistoryVisibility, "" + ) + if ( + visibility and + visibility.content["history_visibility"] == "world_readable" + ): + defer.returnValue((Membership.JOIN, None)) + return + raise AuthError( + 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN + ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3c6f9860d5..c1489cd066 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -97,7 +97,7 @@ class MessageHandler(object): Raises: SynapseError if something went wrong. """ - membership, membership_event_id = yield self._check_in_room_or_world_readable( + membership, membership_event_id = yield self.auth.check_in_room_or_world_readable( room_id, user_id ) @@ -114,31 +114,6 @@ class MessageHandler(object): defer.returnValue(data) - @defer.inlineCallbacks - def _check_in_room_or_world_readable(self, room_id, user_id): - try: - # check_user_was_in_room will return the most recent membership - # event for the user if: - # * The user is a non-guest user, and was ever in the room - # * The user is a guest user, and has joined the room - # else it will throw. - member_event = yield self.auth.check_user_was_in_room(room_id, user_id) - defer.returnValue((member_event.membership, member_event.event_id)) - return - except AuthError: - visibility = yield self.state.get_current_state( - room_id, EventTypes.RoomHistoryVisibility, "" - ) - if ( - visibility and - visibility.content["history_visibility"] == "world_readable" - ): - defer.returnValue((Membership.JOIN, None)) - return - raise AuthError( - 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN - ) - @defer.inlineCallbacks def get_state_events(self, user_id, room_id, is_guest=False): """Retrieve all state events for a given room. If the user is @@ -151,7 +126,7 @@ class MessageHandler(object): Returns: A list of dicts representing state events. [{}, {}, {}] """ - membership, membership_event_id = yield self._check_in_room_or_world_readable( + membership, membership_event_id = yield self.auth.check_in_room_or_world_readable( room_id, user_id ) @@ -184,7 +159,7 @@ class MessageHandler(object): if not requester.app_service: # We check AS auth after fetching the room membership, as it # requires us to pull out all joined members anyway. - membership, _ = yield self._check_in_room_or_world_readable( + membership, _ = yield self.auth.check_in_room_or_world_readable( room_id, user_id ) if membership != Membership.JOIN: @@ -214,19 +189,16 @@ class MessageHandler(object): }) -class PaginationHandler(MessageHandler): +class PaginationHandler(object): """Handles pagination and purge history requests. These are in the same handler due to the fact we need to block clients paginating during a purge. - - This subclasses MessageHandler to get at _check_in_room_or_world_readable """ def __init__(self, hs): - super(PaginationHandler, self).__init__(hs) - self.hs = hs + self.auth = hs.get_auth() self.store = hs.get_datastore() self.clock = hs.get_clock() @@ -349,7 +321,7 @@ class PaginationHandler(MessageHandler): source_config = pagin_config.get_source_config("room") with (yield self.pagination_lock.read(room_id)): - membership, member_event_id = yield self._check_in_room_or_world_readable( + membership, member_event_id = yield self.auth.check_in_room_or_world_readable( room_id, user_id ) -- cgit 1.4.1 From 5c88bb722f57af1c77f34d77152689425ab95eba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 Jul 2018 15:32:23 +0100 Subject: Move PaginationHandler to its own file --- synapse/handlers/message.py | 242 +------------------------------------ synapse/handlers/pagination.py | 265 +++++++++++++++++++++++++++++++++++++++++ synapse/server.py | 7 +- 3 files changed, 269 insertions(+), 245 deletions(-) create mode 100644 synapse/handlers/pagination.py (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c1489cd066..ba3c4642bc 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -23,7 +23,6 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer from twisted.internet.defer import succeed -from twisted.python.failure import Failure from synapse.api.constants import MAX_DEPTH, EventTypes, Membership from synapse.api.errors import AuthError, Codes, ConsentNotGivenError, SynapseError @@ -32,49 +31,17 @@ from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.replication.http.send_event import send_event_to_master -from synapse.types import RoomAlias, RoomStreamToken, UserID -from synapse.util.async import Limiter, ReadWriteLock +from synapse.types import RoomAlias, UserID +from synapse.util.async import Limiter from synapse.util.frozenutils import frozendict_json_encoder from synapse.util.logcontext import run_in_background from synapse.util.metrics import measure_func -from synapse.util.stringutils import random_string -from synapse.visibility import filter_events_for_client from ._base import BaseHandler logger = logging.getLogger(__name__) -class PurgeStatus(object): - """Object tracking the status of a purge request - - This class contains information on the progress of a purge request, for - return by get_purge_status. - - Attributes: - status (int): Tracks whether this request has completed. One of - STATUS_{ACTIVE,COMPLETE,FAILED} - """ - - STATUS_ACTIVE = 0 - STATUS_COMPLETE = 1 - STATUS_FAILED = 2 - - STATUS_TEXT = { - STATUS_ACTIVE: "active", - STATUS_COMPLETE: "complete", - STATUS_FAILED: "failed", - } - - def __init__(self): - self.status = PurgeStatus.STATUS_ACTIVE - - def asdict(self): - return { - "status": PurgeStatus.STATUS_TEXT[self.status] - } - - class MessageHandler(object): """Contains some read only APIs to get state about a room """ @@ -189,211 +156,6 @@ class MessageHandler(object): }) -class PaginationHandler(object): - """Handles pagination and purge history requests. - - These are in the same handler due to the fact we need to block clients - paginating during a purge. - """ - - def __init__(self, hs): - self.hs = hs - self.auth = hs.get_auth() - self.store = hs.get_datastore() - self.clock = hs.get_clock() - - self.pagination_lock = ReadWriteLock() - self._purges_in_progress_by_room = set() - # map from purge id to PurgeStatus - self._purges_by_id = {} - - def start_purge_history(self, room_id, token, - delete_local_events=False): - """Start off a history purge on a room. - - Args: - room_id (str): The room to purge from - - token (str): topological token to delete events before - delete_local_events (bool): True to delete local events as well as - remote ones - - Returns: - str: unique ID for this purge transaction. - """ - if room_id in self._purges_in_progress_by_room: - raise SynapseError( - 400, - "History purge already in progress for %s" % (room_id, ), - ) - - purge_id = random_string(16) - - # we log the purge_id here so that it can be tied back to the - # request id in the log lines. - logger.info("[purge] starting purge_id %s", purge_id) - - self._purges_by_id[purge_id] = PurgeStatus() - run_in_background( - self._purge_history, - purge_id, room_id, token, delete_local_events, - ) - return purge_id - - @defer.inlineCallbacks - def _purge_history(self, purge_id, room_id, token, - delete_local_events): - """Carry out a history purge on a room. - - Args: - purge_id (str): The id for this purge - room_id (str): The room to purge from - token (str): topological token to delete events before - delete_local_events (bool): True to delete local events as well as - remote ones - - Returns: - Deferred - """ - self._purges_in_progress_by_room.add(room_id) - try: - with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history( - room_id, token, delete_local_events, - ) - logger.info("[purge] complete") - self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE - except Exception: - logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) - self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED - finally: - self._purges_in_progress_by_room.discard(room_id) - - # remove the purge from the list 24 hours after it completes - def clear_purge(): - del self._purges_by_id[purge_id] - self.hs.get_reactor().callLater(24 * 3600, clear_purge) - - def get_purge_status(self, purge_id): - """Get the current status of an active purge - - Args: - purge_id (str): purge_id returned by start_purge_history - - Returns: - PurgeStatus|None - """ - return self._purges_by_id.get(purge_id) - - @defer.inlineCallbacks - def get_messages(self, requester, room_id=None, pagin_config=None, - as_client_event=True, event_filter=None): - """Get messages in a room. - - Args: - requester (Requester): The user requesting messages. - room_id (str): The room they want messages from. - pagin_config (synapse.api.streams.PaginationConfig): The pagination - config rules to apply, if any. - as_client_event (bool): True to get events in client-server format. - event_filter (Filter): Filter to apply to results or None - Returns: - dict: Pagination API results - """ - user_id = requester.user.to_string() - - if pagin_config.from_token: - room_token = pagin_config.from_token.room_key - else: - pagin_config.from_token = ( - yield self.hs.get_event_sources().get_current_token_for_room( - room_id=room_id - ) - ) - room_token = pagin_config.from_token.room_key - - room_token = RoomStreamToken.parse(room_token) - - pagin_config.from_token = pagin_config.from_token.copy_and_replace( - "room_key", str(room_token) - ) - - source_config = pagin_config.get_source_config("room") - - with (yield self.pagination_lock.read(room_id)): - membership, member_event_id = yield self.auth.check_in_room_or_world_readable( - room_id, user_id - ) - - if source_config.direction == 'b': - # if we're going backwards, we might need to backfill. This - # requires that we have a topo token. - if room_token.topological: - max_topo = room_token.topological - else: - max_topo = yield self.store.get_max_topological_token( - room_id, room_token.stream - ) - - if membership == Membership.LEAVE: - # If they have left the room then clamp the token to be before - # they left the room, to save the effort of loading from the - # database. - leave_token = yield self.store.get_topological_token_for_event( - member_event_id - ) - leave_token = RoomStreamToken.parse(leave_token) - if leave_token.topological < max_topo: - source_config.from_key = str(leave_token) - - yield self.hs.get_handlers().federation_handler.maybe_backfill( - room_id, max_topo - ) - - events, next_key = yield self.store.paginate_room_events( - room_id=room_id, - from_key=source_config.from_key, - to_key=source_config.to_key, - direction=source_config.direction, - limit=source_config.limit, - event_filter=event_filter, - ) - - next_token = pagin_config.from_token.copy_and_replace( - "room_key", next_key - ) - - if not events: - defer.returnValue({ - "chunk": [], - "start": pagin_config.from_token.to_string(), - "end": next_token.to_string(), - }) - - if event_filter: - events = event_filter.filter(events) - - events = yield filter_events_for_client( - self.store, - user_id, - events, - is_peeking=(member_event_id is None), - ) - - time_now = self.clock.time_msec() - - chunk = { - "chunk": [ - serialize_event(e, time_now, as_client_event) - for e in events - ], - "start": pagin_config.from_token.to_string(), - "end": next_token.to_string(), - } - - defer.returnValue(chunk) - - class EventCreationHandler(object): def __init__(self, hs): self.hs = hs diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py new file mode 100644 index 0000000000..b2849783ed --- /dev/null +++ b/synapse/handlers/pagination.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 - 2016 OpenMarket Ltd +# Copyright 2017 - 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer +from twisted.python.failure import Failure + +from synapse.api.constants import Membership +from synapse.api.errors import SynapseError +from synapse.events.utils import serialize_event +from synapse.types import RoomStreamToken +from synapse.util.async import ReadWriteLock +from synapse.util.logcontext import run_in_background +from synapse.util.stringutils import random_string +from synapse.visibility import filter_events_for_client + +logger = logging.getLogger(__name__) + + +class PurgeStatus(object): + """Object tracking the status of a purge request + + This class contains information on the progress of a purge request, for + return by get_purge_status. + + Attributes: + status (int): Tracks whether this request has completed. One of + STATUS_{ACTIVE,COMPLETE,FAILED} + """ + + STATUS_ACTIVE = 0 + STATUS_COMPLETE = 1 + STATUS_FAILED = 2 + + STATUS_TEXT = { + STATUS_ACTIVE: "active", + STATUS_COMPLETE: "complete", + STATUS_FAILED: "failed", + } + + def __init__(self): + self.status = PurgeStatus.STATUS_ACTIVE + + def asdict(self): + return { + "status": PurgeStatus.STATUS_TEXT[self.status] + } + + +class PaginationHandler(object): + """Handles pagination and purge history requests. + + These are in the same handler due to the fact we need to block clients + paginating during a purge. + """ + + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + self.pagination_lock = ReadWriteLock() + self._purges_in_progress_by_room = set() + # map from purge id to PurgeStatus + self._purges_by_id = {} + + def start_purge_history(self, room_id, token, + delete_local_events=False): + """Start off a history purge on a room. + + Args: + room_id (str): The room to purge from + + token (str): topological token to delete events before + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + str: unique ID for this purge transaction. + """ + if room_id in self._purges_in_progress_by_room: + raise SynapseError( + 400, + "History purge already in progress for %s" % (room_id, ), + ) + + purge_id = random_string(16) + + # we log the purge_id here so that it can be tied back to the + # request id in the log lines. + logger.info("[purge] starting purge_id %s", purge_id) + + self._purges_by_id[purge_id] = PurgeStatus() + run_in_background( + self._purge_history, + purge_id, room_id, token, delete_local_events, + ) + return purge_id + + @defer.inlineCallbacks + def _purge_history(self, purge_id, room_id, token, + delete_local_events): + """Carry out a history purge on a room. + + Args: + purge_id (str): The id for this purge + room_id (str): The room to purge from + token (str): topological token to delete events before + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + Deferred + """ + self._purges_in_progress_by_room.add(room_id) + try: + with (yield self.pagination_lock.write(room_id)): + yield self.store.purge_history( + room_id, token, delete_local_events, + ) + logger.info("[purge] complete") + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE + except Exception: + logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED + finally: + self._purges_in_progress_by_room.discard(room_id) + + # remove the purge from the list 24 hours after it completes + def clear_purge(): + del self._purges_by_id[purge_id] + self.hs.get_reactor().callLater(24 * 3600, clear_purge) + + def get_purge_status(self, purge_id): + """Get the current status of an active purge + + Args: + purge_id (str): purge_id returned by start_purge_history + + Returns: + PurgeStatus|None + """ + return self._purges_by_id.get(purge_id) + + @defer.inlineCallbacks + def get_messages(self, requester, room_id=None, pagin_config=None, + as_client_event=True, event_filter=None): + """Get messages in a room. + + Args: + requester (Requester): The user requesting messages. + room_id (str): The room they want messages from. + pagin_config (synapse.api.streams.PaginationConfig): The pagination + config rules to apply, if any. + as_client_event (bool): True to get events in client-server format. + event_filter (Filter): Filter to apply to results or None + Returns: + dict: Pagination API results + """ + user_id = requester.user.to_string() + + if pagin_config.from_token: + room_token = pagin_config.from_token.room_key + else: + pagin_config.from_token = ( + yield self.hs.get_event_sources().get_current_token_for_room( + room_id=room_id + ) + ) + room_token = pagin_config.from_token.room_key + + room_token = RoomStreamToken.parse(room_token) + + pagin_config.from_token = pagin_config.from_token.copy_and_replace( + "room_key", str(room_token) + ) + + source_config = pagin_config.get_source_config("room") + + with (yield self.pagination_lock.read(room_id)): + membership, member_event_id = yield self.auth.check_in_room_or_world_readable( + room_id, user_id + ) + + if source_config.direction == 'b': + # if we're going backwards, we might need to backfill. This + # requires that we have a topo token. + if room_token.topological: + max_topo = room_token.topological + else: + max_topo = yield self.store.get_max_topological_token( + room_id, room_token.stream + ) + + if membership == Membership.LEAVE: + # If they have left the room then clamp the token to be before + # they left the room, to save the effort of loading from the + # database. + leave_token = yield self.store.get_topological_token_for_event( + member_event_id + ) + leave_token = RoomStreamToken.parse(leave_token) + if leave_token.topological < max_topo: + source_config.from_key = str(leave_token) + + yield self.hs.get_handlers().federation_handler.maybe_backfill( + room_id, max_topo + ) + + events, next_key = yield self.store.paginate_room_events( + room_id=room_id, + from_key=source_config.from_key, + to_key=source_config.to_key, + direction=source_config.direction, + limit=source_config.limit, + event_filter=event_filter, + ) + + next_token = pagin_config.from_token.copy_and_replace( + "room_key", next_key + ) + + if not events: + defer.returnValue({ + "chunk": [], + "start": pagin_config.from_token.to_string(), + "end": next_token.to_string(), + }) + + if event_filter: + events = event_filter.filter(events) + + events = yield filter_events_for_client( + self.store, + user_id, + events, + is_peeking=(member_event_id is None), + ) + + time_now = self.clock.time_msec() + + chunk = { + "chunk": [ + serialize_event(e, time_now, as_client_event) + for e in events + ], + "start": pagin_config.from_token.to_string(), + "end": next_token.to_string(), + } + + defer.returnValue(chunk) diff --git a/synapse/server.py b/synapse/server.py index a24ea158df..83eacccc29 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -52,11 +52,8 @@ from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.events import EventHandler, EventStreamHandler from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.initial_sync import InitialSyncHandler -from synapse.handlers.message import ( - EventCreationHandler, - MessageHandler, - PaginationHandler, -) +from synapse.handlers.message import EventCreationHandler, MessageHandler +from synapse.handlers.pagination import PaginationHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.profile import ProfileHandler from synapse.handlers.read_marker import ReadMarkerHandler -- cgit 1.4.1 From 3132b89f12f0386558045683ad198f090b0e2c90 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 21 Jul 2018 15:47:18 +1000 Subject: Make the rest of the .iterwhatever go away (#3562) --- changelog.d/3562.misc | 0 synapse/app/homeserver.py | 6 ++++-- synapse/app/synctl.py | 4 +++- synapse/events/snapshot.py | 4 +++- synapse/handlers/federation.py | 18 +++++++++--------- synapse/state.py | 6 +++--- synapse/visibility.py | 19 ++++++++++--------- tests/test_federation.py | 3 +-- 8 files changed, 33 insertions(+), 27 deletions(-) create mode 100644 changelog.d/3562.misc (limited to 'synapse') diff --git a/changelog.d/3562.misc b/changelog.d/3562.misc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 14e6dca522..2ad1beb8d8 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -18,6 +18,8 @@ import logging import os import sys +from six import iteritems + from twisted.application import service from twisted.internet import defer, reactor from twisted.web.resource import EncodingResourceWrapper, NoResource @@ -442,7 +444,7 @@ def run(hs): stats["total_nonbridged_users"] = total_nonbridged_users daily_user_type_results = yield hs.get_datastore().count_daily_user_type() - for name, count in daily_user_type_results.iteritems(): + for name, count in iteritems(daily_user_type_results): stats["daily_user_type_" + name] = count room_count = yield hs.get_datastore().get_room_count() @@ -453,7 +455,7 @@ def run(hs): stats["daily_messages"] = yield hs.get_datastore().count_daily_messages() r30_results = yield hs.get_datastore().count_r30_users() - for name, count in r30_results.iteritems(): + for name, count in iteritems(r30_results): stats["r30_users_" + name] = count daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 68acc15a9a..d658f967ba 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -25,6 +25,8 @@ import subprocess import sys import time +from six import iteritems + import yaml SYNAPSE = [sys.executable, "-B", "-m", "synapse.app.homeserver"] @@ -173,7 +175,7 @@ def main(): os.environ["SYNAPSE_CACHE_FACTOR"] = str(cache_factor) cache_factors = config.get("synctl_cache_factors", {}) - for cache_name, factor in cache_factors.iteritems(): + for cache_name, factor in iteritems(cache_factors): os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor) worker_configfiles = [] diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index bcd9bb5946..f83a1581a6 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from six import iteritems + from frozendict import frozendict from twisted.internet import defer @@ -159,7 +161,7 @@ def _encode_state_dict(state_dict): return [ (etype, state_key, v) - for (etype, state_key), v in state_dict.iteritems() + for (etype, state_key), v in iteritems(state_dict) ] diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 65f6041b10..a6d391c4e8 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -21,8 +21,8 @@ import logging import sys import six -from six import iteritems -from six.moves import http_client +from six import iteritems, itervalues +from six.moves import http_client, zip from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json @@ -731,7 +731,7 @@ class FederationHandler(BaseHandler): """ joined_users = [ (state_key, int(event.depth)) - for (e_type, state_key), event in state.iteritems() + for (e_type, state_key), event in iteritems(state) if e_type == EventTypes.Member and event.membership == Membership.JOIN ] @@ -748,7 +748,7 @@ class FederationHandler(BaseHandler): except Exception: pass - return sorted(joined_domains.iteritems(), key=lambda d: d[1]) + return sorted(joined_domains.items(), key=lambda d: d[1]) curr_domains = get_domains_from_state(curr_state) @@ -811,7 +811,7 @@ class FederationHandler(BaseHandler): tried_domains = set(likely_domains) tried_domains.add(self.server_name) - event_ids = list(extremities.iterkeys()) + event_ids = list(extremities.keys()) logger.debug("calling resolve_state_groups in _maybe_backfill") resolve = logcontext.preserve_fn( @@ -827,15 +827,15 @@ class FederationHandler(BaseHandler): states = dict(zip(event_ids, [s.state for s in states])) state_map = yield self.store.get_events( - [e_id for ids in states.itervalues() for e_id in ids.itervalues()], + [e_id for ids in itervalues(states) for e_id in itervalues(ids)], get_prev_content=False ) states = { key: { k: state_map[e_id] - for k, e_id in state_dict.iteritems() + for k, e_id in iteritems(state_dict) if e_id in state_map - } for key, state_dict in states.iteritems() + } for key, state_dict in iteritems(states) } for e_id, _ in sorted_extremeties_tuple: @@ -1515,7 +1515,7 @@ class FederationHandler(BaseHandler): yield self.store.persist_events( [ (ev_info["event"], context) - for ev_info, context in itertools.izip(event_infos, contexts) + for ev_info, context in zip(event_infos, contexts) ], backfilled=backfilled, ) diff --git a/synapse/state.py b/synapse/state.py index 15a593d41c..504caae2f7 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -18,7 +18,7 @@ import hashlib import logging from collections import namedtuple -from six import iteritems, itervalues +from six import iteritems, iterkeys, itervalues from frozendict import frozendict @@ -647,7 +647,7 @@ def resolve_events_with_factory(state_sets, event_map, state_map_factory): for event_id in event_ids ) if event_map is not None: - needed_events -= set(event_map.iterkeys()) + needed_events -= set(iterkeys(event_map)) logger.info("Asking for %d conflicted events", len(needed_events)) @@ -668,7 +668,7 @@ def resolve_events_with_factory(state_sets, event_map, state_map_factory): new_needed_events = set(itervalues(auth_events)) new_needed_events -= needed_events if event_map is not None: - new_needed_events -= set(event_map.iterkeys()) + new_needed_events -= set(iterkeys(event_map)) logger.info("Asking for %d auth events", len(new_needed_events)) diff --git a/synapse/visibility.py b/synapse/visibility.py index 9b97ea2b83..ba0499a022 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -12,11 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import itertools + import logging import operator -import six +from six import iteritems, itervalues +from six.moves import map from twisted.internet import defer @@ -221,7 +222,7 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, return event # check each event: gives an iterable[None|EventBase] - filtered_events = itertools.imap(allowed, events) + filtered_events = map(allowed, events) # remove the None entries filtered_events = filter(operator.truth, filtered_events) @@ -261,7 +262,7 @@ def filter_events_for_server(store, server_name, events): # membership states for the requesting server to determine # if the server is either in the room or has been invited # into the room. - for ev in state.itervalues(): + for ev in itervalues(state): if ev.type != EventTypes.Member: continue try: @@ -295,7 +296,7 @@ def filter_events_for_server(store, server_name, events): ) visibility_ids = set() - for sids in event_to_state_ids.itervalues(): + for sids in itervalues(event_to_state_ids): hist = sids.get((EventTypes.RoomHistoryVisibility, "")) if hist: visibility_ids.add(hist) @@ -308,7 +309,7 @@ def filter_events_for_server(store, server_name, events): event_map = yield store.get_events(visibility_ids) all_open = all( e.content.get("history_visibility") in (None, "shared", "world_readable") - for e in event_map.itervalues() + for e in itervalues(event_map) ) if all_open: @@ -346,7 +347,7 @@ def filter_events_for_server(store, server_name, events): # state_key_to_event_id_set = { e - for key_to_eid in six.itervalues(event_to_state_ids) + for key_to_eid in itervalues(event_to_state_ids) for e in key_to_eid.items() } @@ -369,10 +370,10 @@ def filter_events_for_server(store, server_name, events): event_to_state = { e_id: { key: event_map[inner_e_id] - for key, inner_e_id in key_to_eid.iteritems() + for key, inner_e_id in iteritems(key_to_eid) if inner_e_id in event_map } - for e_id, key_to_eid in event_to_state_ids.iteritems() + for e_id, key_to_eid in iteritems(event_to_state_ids) } defer.returnValue([ diff --git a/tests/test_federation.py b/tests/test_federation.py index 159a136971..f40ff29b52 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -137,7 +137,6 @@ class MessageAcceptTests(unittest.TestCase): ) self.assertEqual(self.successResultOf(extrem)[0], "$join:test.serv") - @unittest.DEBUG def test_cant_hide_past_history(self): """ If you send a message, you must be able to provide the direct @@ -178,7 +177,7 @@ class MessageAcceptTests(unittest.TestCase): for x, y in d.items() if x == ("m.room.member", "@us:test") ], - "auth_chain_ids": d.values(), + "auth_chain_ids": list(d.values()), } ) -- cgit 1.4.1 From c1bf2b587eaa718e28a33f76a7b5f6e288255fca Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 09:56:23 +0100 Subject: add trailing comma --- synapse/storage/end_to_end_keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index ffe4d7235a..523b4360c3 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -66,7 +66,7 @@ class EndToEndKeyStore(SQLBaseStore): @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, - include_deleted_devices=False + include_deleted_devices=False, ): """Fetch a list of device keys. Args: -- cgit 1.4.1 From acbfdc3442f706150c6312e534ca4ecec8548582 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 12:17:16 +0100 Subject: Refcator EventContext to accept state during init --- synapse/events/snapshot.py | 48 +++++++++++++---------- synapse/state.py | 97 ++++++++++++++++++++++++++-------------------- 2 files changed, 82 insertions(+), 63 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index f83a1581a6..fbbe8dd490 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -60,22 +60,22 @@ class EventContext(object): "app_service", ] - def __init__(self): + def __init__(self, state_group, current_state_ids, prev_state_ids, + prev_group=None, delta_ids=None): # The current state including the current event - self.current_state_ids = None + self.current_state_ids = current_state_ids # The current state excluding the current event - self.prev_state_ids = None - self.state_group = None - - self.rejected = False + self.prev_state_ids = prev_state_ids + self.state_group = state_group # A previously persisted state group and a delta between that # and this state. - self.prev_group = None - self.delta_ids = None + self.prev_group = prev_group + self.delta_ids = delta_ids - self.prev_state_events = None + self.prev_state_events = [] + self.rejected = False self.app_service = None def serialize(self, event): @@ -123,27 +123,33 @@ class EventContext(object): Returns: EventContext """ - context = EventContext() - context.state_group = input["state_group"] - context.rejected = input["rejected"] - context.prev_group = input["prev_group"] - context.delta_ids = _decode_state_dict(input["delta_ids"]) - context.prev_state_events = input["prev_state_events"] - # We use the state_group and prev_state_id stuff to pull the # current_state_ids out of the DB and construct prev_state_ids. prev_state_id = input["prev_state_id"] event_type = input["event_type"] event_state_key = input["event_state_key"] - context.current_state_ids = yield store.get_state_ids_for_group( - context.state_group, + state_group = input["state_group"] + + current_state_ids = yield store.get_state_ids_for_group( + state_group, ) if prev_state_id and event_state_key: - context.prev_state_ids = dict(context.current_state_ids) - context.prev_state_ids[(event_type, event_state_key)] = prev_state_id + prev_state_ids = dict(current_state_ids) + prev_state_ids[(event_type, event_state_key)] = prev_state_id else: - context.prev_state_ids = context.current_state_ids + prev_state_ids = current_state_ids + + context = EventContext( + state_group=state_group, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + prev_group=input["prev_group"], + delta_ids = _decode_state_dict(input["delta_ids"]), + ) + + context.rejected = input["rejected"] + context.prev_state_events = input["prev_state_events"] app_service_id = input["app_service_id"] if app_service_id: diff --git a/synapse/state.py b/synapse/state.py index 504caae2f7..a708695006 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -203,25 +203,27 @@ class StateHandler(object): # If this is an outlier, then we know it shouldn't have any current # state. Certainly store.get_current_state won't return any, and # persisting the event won't store the state group. - context = EventContext() if old_state: - context.prev_state_ids = { + prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } if event.is_state(): - context.current_state_ids = dict(context.prev_state_ids) + current_state_ids = dict(prev_state_ids) key = (event.type, event.state_key) - context.current_state_ids[key] = event.event_id + current_state_ids[key] = event.event_id else: - context.current_state_ids = context.prev_state_ids + current_state_ids = prev_state_ids else: - context.current_state_ids = {} - context.prev_state_ids = {} - context.prev_state_events = [] + current_state_ids = {} + prev_state_ids = {} # We don't store state for outliers, so we don't generate a state - # froup for it. - context.state_group = None + # group for it. + context = EventContext( + state_group=None, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + ) defer.returnValue(context) @@ -230,31 +232,35 @@ class StateHandler(object): # Let's just correctly fill out the context and create a # new state group for it. - context = EventContext() - context.prev_state_ids = { + prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } if event.is_state(): key = (event.type, event.state_key) - if key in context.prev_state_ids: - replaces = context.prev_state_ids[key] + if key in prev_state_ids: + replaces = prev_state_ids[key] if replaces != event.event_id: # Paranoia check event.unsigned["replaces_state"] = replaces - context.current_state_ids = dict(context.prev_state_ids) - context.current_state_ids[key] = event.event_id + current_state_ids = dict(prev_state_ids) + current_state_ids[key] = event.event_id else: - context.current_state_ids = context.prev_state_ids + current_state_ids = prev_state_ids - context.state_group = yield self.store.store_state_group( + state_group = yield self.store.store_state_group( event.event_id, event.room_id, prev_group=None, delta_ids=None, - current_state_ids=context.current_state_ids, + current_state_ids=current_state_ids, + ) + + context = EventContext( + state_group=state_group, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, ) - context.prev_state_events = [] defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") @@ -262,47 +268,47 @@ class StateHandler(object): event.room_id, [e for e, _ in event.prev_events], ) - curr_state = entry.state + prev_state_ids = entry.state + prev_group = None + delta_ids = None - context = EventContext() - context.prev_state_ids = curr_state if event.is_state(): # If this is a state event then we need to create a new state # group for the state after this event. key = (event.type, event.state_key) - if key in context.prev_state_ids: - replaces = context.prev_state_ids[key] + if key in prev_state_ids: + replaces = prev_state_ids[key] event.unsigned["replaces_state"] = replaces - context.current_state_ids = dict(context.prev_state_ids) - context.current_state_ids[key] = event.event_id + current_state_ids = dict(prev_state_ids) + current_state_ids[key] = event.event_id if entry.state_group: # If the state at the event has a state group assigned then # we can use that as the prev group - context.prev_group = entry.state_group - context.delta_ids = { + prev_group = entry.state_group + delta_ids = { key: event.event_id } elif entry.prev_group: # If the state at the event only has a prev group, then we can # use that as a prev group too. - context.prev_group = entry.prev_group - context.delta_ids = dict(entry.delta_ids) - context.delta_ids[key] = event.event_id + prev_group = entry.prev_group + delta_ids = dict(entry.delta_ids) + delta_ids[key] = event.event_id - context.state_group = yield self.store.store_state_group( + state_group = yield self.store.store_state_group( event.event_id, event.room_id, - prev_group=context.prev_group, - delta_ids=context.delta_ids, - current_state_ids=context.current_state_ids, + prev_group=prev_group, + delta_ids=delta_ids, + current_state_ids=current_state_ids, ) else: - context.current_state_ids = context.prev_state_ids - context.prev_group = entry.prev_group - context.delta_ids = entry.delta_ids + current_state_ids = prev_state_ids + prev_group = entry.prev_group + delta_ids = entry.delta_ids if entry.state_group is None: entry.state_group = yield self.store.store_state_group( @@ -310,13 +316,20 @@ class StateHandler(object): event.room_id, prev_group=entry.prev_group, delta_ids=entry.delta_ids, - current_state_ids=context.current_state_ids, + current_state_ids=current_state_ids, ) entry.state_id = entry.state_group - context.state_group = entry.state_group + state_group = entry.state_group + + context = EventContext( + state_group=state_group, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + prev_group=prev_group, + delta_ids=delta_ids, + ) - context.prev_state_events = [] defer.returnValue(context) @defer.inlineCallbacks -- cgit 1.4.1 From 842cdece42e59a4181a496761447f0cb00053c05 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 12:31:35 +0100 Subject: pep8 --- synapse/events/snapshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index fbbe8dd490..5e02ef1a5c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -145,7 +145,7 @@ class EventContext(object): current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, prev_group=input["prev_group"], - delta_ids = _decode_state_dict(input["delta_ids"]), + delta_ids=_decode_state_dict(input["delta_ids"]), ) context.rejected = input["rejected"] -- cgit 1.4.1 From 440b8845b531db05e7c4f646e48dee7635cf1f0a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 12:38:46 +0100 Subject: Make EventContext lazy load state --- synapse/events/snapshot.py | 153 +++++++++++++++++++++++++++++++++------------ synapse/state.py | 6 +- 2 files changed, 115 insertions(+), 44 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 5e02ef1a5c..f9568638a1 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -19,18 +19,12 @@ from frozendict import frozendict from twisted.internet import defer +from synapse.util.logcontext import make_deferred_yieldable, run_in_background + class EventContext(object): """ Attributes: - current_state_ids (dict[(str, str), str]): - The current state map including the current event. - (type, state_key) -> event_id - - prev_state_ids (dict[(str, str), str]): - The current state map excluding the current event. - (type, state_key) -> event_id - state_group (int|None): state group id, if the state has been stored as a state group. This is usually only None if e.g. the event is an outlier. @@ -47,36 +41,71 @@ class EventContext(object): prev_state_events (?): XXX: is this ever set to anything other than the empty list? + + _current_state_ids (dict[(str, str), str]|None): + The current state map including the current event. None if outlier + or we haven't fetched the state from DB yet. + (type, state_key) -> event_id + + _prev_state_ids (dict[(str, str), str]|None): + The current state map excluding the current event. None if outlier + or we haven't fetched the state from DB yet. + (type, state_key) -> event_id + + _fetching_state_deferred (Deferred|None): Resolves when *_state_ids have + been calculated. None if we haven't started calculating yet + + _prev_state_id (str|None): If set then the event associated with the + context overrode the _prev_state_id + + _event_type (str): The type of the event the context is associated with + + _event_state_key (str|None): The state_key of the event the context is + associated with """ __slots__ = [ - "current_state_ids", - "prev_state_ids", "state_group", "rejected", "prev_group", "delta_ids", "prev_state_events", "app_service", + "_current_state_ids", + "_prev_state_ids", + "_prev_state_id", + "_event_type", + "_event_state_key", + "_fetching_state_deferred", ] - def __init__(self, state_group, current_state_ids, prev_state_ids, - prev_group=None, delta_ids=None): + @staticmethod + def with_state(state_group, current_state_ids, prev_state_ids, + prev_group=None, delta_ids=None): + context = EventContext() + # The current state including the current event - self.current_state_ids = current_state_ids + context._current_state_ids = current_state_ids # The current state excluding the current event - self.prev_state_ids = prev_state_ids - self.state_group = state_group + context._prev_state_ids = prev_state_ids + context.state_group = state_group + + context._prev_state_id = None + context._event_type = None + context._event_state_key = None + context._fetching_state_deferred = defer.succeed(None) # A previously persisted state group and a delta between that # and this state. - self.prev_group = prev_group - self.delta_ids = delta_ids + context.prev_group = prev_group + context.delta_ids = delta_ids + + context.prev_state_events = [] - self.prev_state_events = [] + context.rejected = False + context.app_service = None - self.rejected = False - self.app_service = None + return context def serialize(self, event): """Converts self to a type that can be serialized as JSON, and then @@ -123,30 +152,17 @@ class EventContext(object): Returns: EventContext """ + context = EventContext() + # We use the state_group and prev_state_id stuff to pull the # current_state_ids out of the DB and construct prev_state_ids. - prev_state_id = input["prev_state_id"] - event_type = input["event_type"] - event_state_key = input["event_state_key"] + context._prev_state_id = input["prev_state_id"] + context._event_type = input["event_type"] + context._event_state_key = input["event_state_key"] - state_group = input["state_group"] - - current_state_ids = yield store.get_state_ids_for_group( - state_group, - ) - if prev_state_id and event_state_key: - prev_state_ids = dict(current_state_ids) - prev_state_ids[(event_type, event_state_key)] = prev_state_id - else: - prev_state_ids = current_state_ids - - context = EventContext( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - prev_group=input["prev_group"], - delta_ids=_decode_state_dict(input["delta_ids"]), - ) + context.state_group = input["state_group"] + context.prev_group = input["prev_group"] + context.delta_ids = _decode_state_dict(input["delta_ids"]) context.rejected = input["rejected"] context.prev_state_events = input["prev_state_events"] @@ -157,6 +173,61 @@ class EventContext(object): defer.returnValue(context) + @defer.inlineCallbacks + def get_current_state_ids(self, store): + """Gets the current state IDs + + Returns: + Deferred[dict[(str, str), str]|None]: Returns None if state_group + is None, which happens when the associated event is an outlier. + """ + + if not self._fetching_state_deferred: + self._fetching_state_deferred = run_in_background( + self._fill_out_state, store, + ) + + yield make_deferred_yieldable(self._fetching_state_deferred) + + defer.returnValue(self._current_state_ids) + + @defer.inlineCallbacks + def get_prev_state_ids(self, store): + """Gets the prev state IDs + + Returns: + Deferred[dict[(str, str), str]|None]: Returns None if state_group + is None, which happens when the associated event is an outlier. + """ + + if not self._fetching_state_deferred: + self._fetching_state_deferred = run_in_background( + self._fill_out_state, store, + ) + + yield make_deferred_yieldable(self._fetching_state_deferred) + + defer.returnValue(self._prev_state_ids) + + @defer.inlineCallbacks + def _fill_out_state(self, store): + """Called to populate the _current_state_ids and _prev_state_ids + attributes by loading from the database. + """ + if self.state_group is None: + return + + self._current_state_ids = yield store.get_state_ids_for_group( + self.state_group, + ) + if self._prev_state_id and self._event_state_key is not None: + self._prev_state_ids = dict(self._current_state_ids) + + key = (self._event_type, self._event_state_key) + self._prev_state_ids[key] = self._prev_state_id + else: + self._prev_state_ids = self._current_state_ids + def _encode_state_dict(state_dict): """Since dicts of (type, state_key) -> event_id cannot be serialized in diff --git a/synapse/state.py b/synapse/state.py index a708695006..32125c95df 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -219,7 +219,7 @@ class StateHandler(object): # We don't store state for outliers, so we don't generate a state # group for it. - context = EventContext( + context = EventContext.with_state( state_group=None, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, @@ -255,7 +255,7 @@ class StateHandler(object): current_state_ids=current_state_ids, ) - context = EventContext( + context = EventContext.with_state( state_group=state_group, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, @@ -322,7 +322,7 @@ class StateHandler(object): state_group = entry.state_group - context = EventContext( + context = EventContext.with_state( state_group=state_group, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, -- cgit 1.4.1 From e42510ba635b3e4d83215e4f5634ca51411996e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 13:00:22 +0100 Subject: Use new getters --- synapse/api/auth.py | 6 ++++-- synapse/handlers/_base.py | 3 ++- synapse/handlers/federation.py | 23 ++++++++++++++++------- synapse/handlers/message.py | 26 ++++++++++++++++---------- synapse/handlers/room_member.py | 9 ++++++--- synapse/push/bulk_push_rule_evaluator.py | 7 ++++--- synapse/storage/events.py | 2 +- synapse/storage/push_rule.py | 7 +++++-- synapse/storage/roommember.py | 7 +++++-- 9 files changed, 59 insertions(+), 31 deletions(-) (limited to 'synapse') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index bc629832d9..535bdb449d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -65,8 +65,9 @@ class Auth(object): @defer.inlineCallbacks def check_from_context(self, event, context, do_sig_check=True): + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.compute_auth_events( - event, context.prev_state_ids, for_verification=True, + event, prev_state_ids, for_verification=True, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -544,7 +545,8 @@ class Auth(object): @defer.inlineCallbacks def add_auth_events(self, builder, context): - auth_ids = yield self.compute_auth_events(builder, context.prev_state_ids) + prev_state_ids = yield context.get_prev_state_ids(self.store) + auth_ids = yield self.compute_auth_events(builder, prev_state_ids) auth_events_entries = yield self.store.add_event_hashes( auth_ids diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index b6a8b3aa3b..704181d2d3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -112,8 +112,9 @@ class BaseHandler(object): guest_access = event.content.get("guest_access", "forbidden") if guest_access != "can_join": if context: + current_state_ids = yield context.get_current_state_ids(self.store) current_state = yield self.store.get_events( - list(context.current_state_ids.values()) + list(current_state_ids.values()) ) else: current_state = yield self.state_handler.get_current_state( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a6d391c4e8..98dd4a7fd1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -486,7 +486,10 @@ class FederationHandler(BaseHandler): # joined the room. Don't bother if the user is just # changing their profile info. newly_joined = True - prev_state_id = context.prev_state_ids.get( + + prev_state_ids = yield context.get_prev_state_ids(self.store) + + prev_state_id = prev_state_ids.get( (event.type, event.state_key) ) if prev_state_id: @@ -1106,10 +1109,12 @@ class FederationHandler(BaseHandler): user = UserID.from_string(event.state_key) yield user_joined_room(self.distributor, user, event.room_id) - state_ids = list(context.prev_state_ids.values()) + prev_state_ids = yield context.get_prev_state_ids(self.store) + + state_ids = list(prev_state_ids.values()) auth_chain = yield self.store.get_auth_chain(state_ids) - state = yield self.store.get_events(list(context.prev_state_ids.values())) + state = yield self.store.get_events(list(prev_state_ids.values())) defer.returnValue({ "state": list(state.values()), @@ -1635,8 +1640,9 @@ class FederationHandler(BaseHandler): ) if not auth_events: + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids, for_verification=True, + event, prev_state_ids, for_verification=True, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -1876,9 +1882,10 @@ class FederationHandler(BaseHandler): break if do_resolution: + prev_state_ids = yield context.get_prev_state_ids(self.store) # 1. Get what we think is the auth chain. auth_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids + event, prev_state_ids ) local_auth_chain = yield self.store.get_auth_chain( auth_ids, include_given=True @@ -2222,7 +2229,8 @@ class FederationHandler(BaseHandler): event.content["third_party_invite"]["signed"]["token"] ) original_invite = None - original_invite_id = context.prev_state_ids.get(key) + prev_state_ids = yield context.get_prev_state_ids(self.store) + original_invite_id = prev_state_ids.get(key) if original_invite_id: original_invite = yield self.store.get_event( original_invite_id, allow_none=True @@ -2264,7 +2272,8 @@ class FederationHandler(BaseHandler): signed = event.content["third_party_invite"]["signed"] token = signed["token"] - invite_event_id = context.prev_state_ids.get( + prev_state_ids = yield context.get_prev_state_ids(self.store) + invite_event_id = prev_state_ids.get( (EventTypes.ThirdPartyInvite, token,) ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index abc07ea87c..c4bcd9018b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -630,7 +630,8 @@ class EventCreationHandler(object): If so, returns the version of the event in context. Otherwise, returns None. """ - prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) + prev_state_ids = yield context.get_prev_state_ids(self.store) + prev_event_id = prev_state_ids.get((event.type, event.state_key)) prev_event = yield self.store.get_event(prev_event_id, allow_none=True) if not prev_event: return @@ -752,8 +753,8 @@ class EventCreationHandler(object): event = builder.build() logger.debug( - "Created event %s with state: %s", - event.event_id, context.prev_state_ids, + "Created event %s", + event.event_id, ) defer.returnValue( @@ -884,9 +885,11 @@ class EventCreationHandler(object): e.sender == event.sender ) + current_state_ids = yield context.get_current_state_ids(self.store) + state_to_include_ids = [ e_id - for k, e_id in iteritems(context.current_state_ids) + for k, e_id in iteritems(current_state_ids) if k[0] in self.hs.config.room_invite_state_types or k == (EventTypes.Member, event.sender) ] @@ -922,8 +925,9 @@ class EventCreationHandler(object): ) if event.type == EventTypes.Redaction: + prev_state_ids = yield context.get_prev_state_ids(self.store) auth_events_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids, for_verification=True, + event, prev_state_ids, for_verification=True, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -943,11 +947,13 @@ class EventCreationHandler(object): "You don't have permission to redact events" ) - if event.type == EventTypes.Create and context.prev_state_ids: - raise AuthError( - 403, - "Changing the room create event is forbidden", - ) + if event.type == EventTypes.Create: + prev_state_ids = yield context.get_prev_state_ids(self.store) + if prev_state_ids: + raise AuthError( + 403, + "Changing the room create event is forbidden", + ) (event_stream_id, max_stream_id) = yield self.store.persist_event( event, context=context diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 00f2e279bc..a832d91809 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -201,7 +201,9 @@ class RoomMemberHandler(object): ratelimit=ratelimit, ) - prev_member_event_id = context.prev_state_ids.get( + prev_state_ids = yield context.get_prev_state_ids(self.store) + + prev_member_event_id = prev_state_ids.get( (EventTypes.Member, target.to_string()), None ) @@ -496,9 +498,10 @@ class RoomMemberHandler(object): if prev_event is not None: return + prev_state_ids = yield context.get_prev_state_ids(self.store) if event.membership == Membership.JOIN: if requester.is_guest: - guest_can_join = yield self._can_guest_join(context.prev_state_ids) + guest_can_join = yield self._can_guest_join(prev_state_ids) if not guest_can_join: # This should be an auth check, but guests are a local concept, # so don't really fit into the general auth process. @@ -517,7 +520,7 @@ class RoomMemberHandler(object): ratelimit=ratelimit, ) - prev_member_event_id = context.prev_state_ids.get( + prev_member_event_id = prev_state_ids.get( (EventTypes.Member, event.state_key), None ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index bb181d94ee..1d14d3639c 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -112,7 +112,8 @@ class BulkPushRuleEvaluator(object): @defer.inlineCallbacks def _get_power_levels_and_sender_level(self, event, context): - pl_event_id = context.prev_state_ids.get(POWER_KEY) + prev_state_ids = yield context.get_prev_state_ids(self.store) + pl_event_id = prev_state_ids.get(POWER_KEY) if pl_event_id: # fastpath: if there's a power level event, that's all we need, and # not having a power level event is an extreme edge case @@ -120,7 +121,7 @@ class BulkPushRuleEvaluator(object): auth_events = {POWER_KEY: pl_event} else: auth_events_ids = yield self.auth.compute_auth_events( - event, context.prev_state_ids, for_verification=False, + event, prev_state_ids, for_verification=False, ) auth_events = yield self.store.get_events(auth_events_ids) auth_events = { @@ -304,7 +305,7 @@ class RulesForRoom(object): push_rules_delta_state_cache_metric.inc_hits() else: - current_state_ids = context.current_state_ids + current_state_ids = yield context.get_current_state_ids(self.store) push_rules_delta_state_cache_metric.inc_misses() push_rules_state_size_counter.inc(len(current_state_ids)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 4ff0fdc4ab..bf4f3ee92a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -549,7 +549,7 @@ class EventsStore(EventsWorkerStore): if ctx.state_group in state_groups_map: continue - state_groups_map[ctx.state_group] = ctx.current_state_ids + state_groups_map[ctx.state_group] = yield ctx.get_current_state_ids(self) # We need to map the event_ids to their state groups. First, let's # check if the event is one we're persisting, in which case we can diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index be655d287b..af564b1b4e 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -186,6 +186,7 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, defer.returnValue(results) + @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): state_group = context.state_group if not state_group: @@ -195,9 +196,11 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._bulk_get_push_rules_for_room( - event.room_id, state_group, context.current_state_ids, event=event + current_state_ids = yield context.get_current_state_ids(self) + result = yield self._bulk_get_push_rules_for_room( + event.room_id, state_group, current_state_ids, event=event ) + defer.returnValue(result) @cachedInlineCallbacks(num_args=2, cache_context=True) def _bulk_get_push_rules_for_room(self, room_id, state_group, current_state_ids, diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 02a802bed9..a27702a7a0 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -232,6 +232,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): defer.returnValue(user_who_share_room) + @defer.inlineCallbacks def get_joined_users_from_context(self, event, context): state_group = context.state_group if not state_group: @@ -241,11 +242,13 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() - return self._get_joined_users_from_context( - event.room_id, state_group, context.current_state_ids, + current_state_ids = yield context.get_current_state_ids(self) + result = yield self._get_joined_users_from_context( + event.room_id, state_group, current_state_ids, event=event, context=context, ) + defer.returnValue(result) def get_joined_users_from_state(self, room_id, state_entry): state_group = state_entry.state_group -- cgit 1.4.1 From 027bc01a1bc254fe08140c6e91a9fb945b08486f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 13:02:09 +0100 Subject: Add support for updating state --- synapse/events/snapshot.py | 19 +++++++++++++++++++ synapse/handlers/federation.py | 32 +++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 9 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index f9568638a1..b090751bf1 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -228,6 +228,25 @@ class EventContext(object): else: self._prev_state_ids = self._current_state_ids + @defer.inlineCallbacks + def update_state(self, state_group, prev_state_ids, current_state_ids, + delta_ids): + """Replace the state in the context + """ + + # We need to make sure we wait for any ongoing fetching of state + # to complete so that the updated state doesn't get clobbered + if self._fetching_state_deferred: + yield make_deferred_yieldable(self._fetching_state_deferred) + + self.state_group = state_group + self._prev_state_ids = prev_state_ids + self._current_state_ids = current_state_ids + self.delta_ids = delta_ids + + # We need to ensure that that we've marked as having fetched the state + self._fetching_state_deferred = defer.succeed(None) + def _encode_state_dict(state_dict): """Since dicts of (type, state_key) -> event_id cannot be serialized in diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 98dd4a7fd1..14654d59f1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1975,21 +1975,35 @@ class FederationHandler(BaseHandler): k: a.event_id for k, a in iteritems(auth_events) if k != event_key } - context.current_state_ids = dict(context.current_state_ids) - context.current_state_ids.update(state_updates) + current_state_ids = yield context.get_current_state_ids(self.store) + current_state_ids = dict(current_state_ids) + + current_state_ids.update(state_updates) + if context.delta_ids is not None: - context.delta_ids = dict(context.delta_ids) - context.delta_ids.update(state_updates) - context.prev_state_ids = dict(context.prev_state_ids) - context.prev_state_ids.update({ + delta_ids = dict(context.delta_ids) + delta_ids.update(state_updates) + + prev_state_ids = yield context.get_prev_state_ids(self.store) + prev_state_ids = dict(prev_state_ids) + + prev_state_ids.update({ k: a.event_id for k, a in iteritems(auth_events) }) - context.state_group = yield self.store.store_state_group( + + state_group = yield self.store.store_state_group( event.event_id, event.room_id, prev_group=context.prev_group, - delta_ids=context.delta_ids, - current_state_ids=context.current_state_ids, + delta_ids=delta_ids, + current_state_ids=current_state_ids, + ) + + yield context.update_state( + state_group=state_group, + current_state_ids=current_state_ids, + prev_state_ids=prev_state_ids, + delta_ids=delta_ids, ) @defer.inlineCallbacks -- cgit 1.4.1 From 4797ed000e612b68c418ce8c342bdd7ecc16b198 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 15:05:40 +0100 Subject: Update docstrings to make sense --- synapse/events/snapshot.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index b090751bf1..a6d7bf5700 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -55,13 +55,16 @@ class EventContext(object): _fetching_state_deferred (Deferred|None): Resolves when *_state_ids have been calculated. None if we haven't started calculating yet - _prev_state_id (str|None): If set then the event associated with the - context overrode the _prev_state_id - - _event_type (str): The type of the event the context is associated with + _event_type (str): The type of the event the context is associated with. + Only set when state has not been fetched yet. _event_state_key (str|None): The state_key of the event the context is - associated with + associated with. Only set when state has not been fetched yet. + + _prev_state_id (str|None): If the event associated with the context is + a state event, then `_prev_state_id` is the event_id of the state + that was replaced. + Only set when state has not been fetched yet. """ __slots__ = [ -- cgit 1.4.1 From 999bcf9d016fb7fd9ad5a9daf4f0ec6d25a10717 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 15:24:21 +0100 Subject: Fix EventContext when using workers We were: 1. Not correctly setting all attributes 2. Using defer.inlineCallbacks in a non-generator --- synapse/events/snapshot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index a6d7bf5700..e31eceb921 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -82,6 +82,11 @@ class EventContext(object): "_fetching_state_deferred", ] + def __init__(self): + self.prev_state_events = [] + self.rejected = False + self.app_service = None + @staticmethod def with_state(state_group, current_state_ids, prev_state_ids, prev_group=None, delta_ids=None): @@ -103,11 +108,6 @@ class EventContext(object): context.prev_group = prev_group context.delta_ids = delta_ids - context.prev_state_events = [] - - context.rejected = False - context.app_service = None - return context def serialize(self, event): @@ -143,7 +143,6 @@ class EventContext(object): } @staticmethod - @defer.inlineCallbacks def deserialize(store, input): """Converts a dict that was produced by `serialize` back into a EventContext. @@ -162,6 +161,7 @@ class EventContext(object): context._prev_state_id = input["prev_state_id"] context._event_type = input["event_type"] context._event_state_key = input["event_state_key"] + context._fetching_state_deferred = None context.state_group = input["state_group"] context.prev_group = input["prev_group"] @@ -174,7 +174,7 @@ class EventContext(object): if app_service_id: context.app_service = store.get_app_service_by_id(app_service_id) - defer.returnValue(context) + return context @defer.inlineCallbacks def get_current_state_ids(self, store): -- cgit 1.4.1 From 0faa3223cdf996aa18376a7420a43061a6691638 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 16:28:00 +0100 Subject: Fix missing attributes on workers. This was missed during the transition from attribute to getter for getting state from context. --- synapse/events/snapshot.py | 10 ++++++---- synapse/handlers/message.py | 5 +++-- synapse/replication/http/send_event.py | 7 +++++-- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e31eceb921..a59064b416 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -110,7 +110,8 @@ class EventContext(object): return context - def serialize(self, event): + @defer.inlineCallbacks + def serialize(self, event, store): """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` @@ -126,11 +127,12 @@ class EventContext(object): # the prev_state_ids, so if we're a state event we include the event # id that we replaced in the state. if event.is_state(): - prev_state_id = self.prev_state_ids.get((event.type, event.state_key)) + prev_state_ids = yield self.get_prev_state_ids(store) + prev_state_id = prev_state_ids.get((event.type, event.state_key)) else: prev_state_id = None - return { + defer.returnValue({ "prev_state_id": prev_state_id, "event_type": event.type, "event_state_key": event.state_key if event.is_state() else None, @@ -140,7 +142,7 @@ class EventContext(object): "delta_ids": _encode_state_dict(self.delta_ids), "prev_state_events": self.prev_state_events, "app_service_id": self.app_service.id if self.app_service else None - } + }) @staticmethod def deserialize(store, input): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c4bcd9018b..7571975c22 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -807,8 +807,9 @@ class EventCreationHandler(object): # If we're a worker we need to hit out to the master. if self.config.worker_app: yield send_event_to_master( - self.hs.get_clock(), - self.http_client, + clock=self.hs.get_clock(), + store=self.store, + client=self.http_client, host=self.config.worker_replication_host, port=self.config.worker_replication_http_port, requester=requester, diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 2eede54792..5227bc333d 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -34,12 +34,13 @@ logger = logging.getLogger(__name__) @defer.inlineCallbacks -def send_event_to_master(clock, client, host, port, requester, event, context, +def send_event_to_master(clock, store, client, host, port, requester, event, context, ratelimit, extra_users): """Send event to be handled on the master Args: clock (synapse.util.Clock) + store (DataStore) client (SimpleHttpClient) host (str): host of master port (int): port on master listening for HTTP replication @@ -53,11 +54,13 @@ def send_event_to_master(clock, client, host, port, requester, event, context, host, port, event.event_id, ) + serialized_context = yield context.serialize(event, store) + payload = { "event": event.get_pdu_json(), "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, - "context": context.serialize(event), + "context": serialized_context, "requester": requester.serialize(), "ratelimit": ratelimit, "extra_users": [u.to_string() for u in extra_users], -- cgit 1.4.1 From dae6dc1e776cc6198dc1f71575876fc16693c170 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 17:13:34 +0100 Subject: Remove redundant checks on room forgottenness Fixes #3550 --- changelog.d/3350.misc | 1 + synapse/storage/push_rule.py | 13 ------------- synapse/storage/roommember.py | 15 --------------- synapse/visibility.py | 19 +------------------ 4 files changed, 2 insertions(+), 46 deletions(-) create mode 100644 changelog.d/3350.misc (limited to 'synapse') diff --git a/changelog.d/3350.misc b/changelog.d/3350.misc new file mode 100644 index 0000000000..3713cd6d63 --- /dev/null +++ b/changelog.d/3350.misc @@ -0,0 +1 @@ +Remove redundant checks on who_forgot_in_room \ No newline at end of file diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index be655d287b..d25b39ec02 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -21,7 +21,6 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.constants import EventTypes from synapse.push.baserules import list_with_base_rules from synapse.storage.appservice import ApplicationServiceWorkerStore from synapse.storage.pusher import PusherWorkerStore @@ -247,18 +246,6 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, if uid in local_users_in_room: user_ids.add(uid) - forgotten = yield self.who_forgot_in_room( - event.room_id, on_invalidate=cache_context.invalidate, - ) - - for row in forgotten: - user_id = row["user_id"] - event_id = row["event_id"] - - mem_id = current_state_ids.get((EventTypes.Member, user_id), None) - if event_id == mem_id: - user_ids.discard(user_id) - rules_by_user = yield self.bulk_get_push_rules( user_ids, on_invalidate=cache_context.invalidate, ) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 02a802bed9..9eb5b18144 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -458,18 +458,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -578,9 +566,6 @@ class RoomMemberStore(RoomMemberWorkerStore): txn.execute(sql, (user_id, room_id)) txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.who_forgot_in_room, (room_id,) - ) return self.runInteraction("forget_membership", f) @cachedInlineCallbacks(num_args=2) diff --git a/synapse/visibility.py b/synapse/visibility.py index 9b97ea2b83..e0b2eccceb 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -23,7 +23,6 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events.utils import prune_event from synapse.types import get_domain_from_id -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn logger = logging.getLogger(__name__) @@ -75,19 +74,6 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, types=types, ) - forgotten = yield make_deferred_yieldable(defer.gatherResults([ - defer.maybeDeferred( - preserve_fn(store.who_forgot_in_room), - room_id, - ) - for room_id in frozenset(e.room_id for e in events) - ], consumeErrors=True)) - - # Set of membership event_ids that have been forgotten - event_id_forgotten = frozenset( - row["event_id"] for rows in forgotten for row in rows - ) - ignore_dict_content = yield store.get_global_account_data_by_type_for_user( "m.ignored_user_list", user_id, ) @@ -176,10 +162,7 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, if membership is None: membership_event = state.get((EventTypes.Member, user_id), None) if membership_event: - # XXX why do we do this? - # https://github.com/matrix-org/synapse/issues/3350 - if membership_event.event_id not in event_id_forgotten: - membership = membership_event.membership + membership = membership_event.membership # if the user was a member of the room at the time of the event, # they can see it. -- cgit 1.4.1 From 50c60e5fadbefff6785c17dda9eecf88286dba30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 17:21:40 +0100 Subject: Only get cached state from context in persist_event We don't want to bother pulling out the current state from the DB since until we know we have to. Checking the context for state is just an optimisation. --- synapse/events/snapshot.py | 13 +++++++++++++ synapse/storage/events.py | 4 +++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index a59064b416..c439b53801 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -163,6 +163,9 @@ class EventContext(object): context._prev_state_id = input["prev_state_id"] context._event_type = input["event_type"] context._event_state_key = input["event_state_key"] + + context._current_state_ids = None + context._prev_state_ids = None context._fetching_state_deferred = None context.state_group = input["state_group"] @@ -214,6 +217,16 @@ class EventContext(object): defer.returnValue(self._prev_state_ids) + def get_cached_current_state_ids(self): + """Gets the current state IDs if we have them already cached. + + Returns: + dict[(str, str), str]|None: Returns None if state_group + is None, which happens when the associated event is an outlier. + """ + + return self._current_state_ids + @defer.inlineCallbacks def _fill_out_state(self, store): """Called to populate the _current_state_ids and _prev_state_ids diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bf4f3ee92a..dc0b3c2eba 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -549,7 +549,9 @@ class EventsStore(EventsWorkerStore): if ctx.state_group in state_groups_map: continue - state_groups_map[ctx.state_group] = yield ctx.get_current_state_ids(self) + current_state_ids = ctx.get_cached_current_state_ids() + if current_state_ids is not None: + state_groups_map[ctx.state_group] = current_state_ids # We need to map the event_ids to their state groups. First, let's # check if the event is one we're persisting, in which case we can -- cgit 1.4.1 From 8b9f164fff6cf821ff5bc702f3660c0f0eb320e7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Jul 2018 17:43:01 +0100 Subject: Comments --- synapse/events/snapshot.py | 5 +++-- synapse/storage/events.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index c439b53801..189212b0fa 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -221,8 +221,9 @@ class EventContext(object): """Gets the current state IDs if we have them already cached. Returns: - dict[(str, str), str]|None: Returns None if state_group - is None, which happens when the associated event is an outlier. + dict[(str, str), str]|None: Returns None if we haven't cached the + state or if state_group is None, which happens when the associated + event is an outlier. """ return self._current_state_ids diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dc0b3c2eba..c2910094d0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -549,6 +549,9 @@ class EventsStore(EventsWorkerStore): if ctx.state_group in state_groups_map: continue + # We're only interested in pulling out state that has already + # been cached in the context. We'll pull stuff out of the DB later + # if necessary. current_state_ids = ctx.get_cached_current_state_ids() if current_state_ids is not None: state_groups_map[ctx.state_group] = current_state_ids -- cgit 1.4.1 From 5c705f70c9489427a7985ea10ec60552965b9a1c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 19:00:16 +0100 Subject: Fixes and optimisations for resolve_state_groups First of all, fix the logic which looks for identical input state groups so that we actually use them. This turned out to be most easily done by factoring the relevant code out to a separate function so that we could do an early return. Secondly, avoid building the whole `conflicted_state` dict (which was only ever used as a boolean flag). Thirdly, replace the construction of the `state` dict (which mapped from keys to events that set them), with an optimistic construction of the resolution result assuming there will be no conflicts. This should be no slower than building the old `state` dict, and: - in the conflicted case, we'll short-cut it, saving part of the work - in the unconflicted case, it saves rebuilding the resolution from the `state` dict. Finally, do a couple of s/values/itervalues/. --- synapse/state.py | 143 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 54 deletions(-) (limited to 'synapse') diff --git a/synapse/state.py b/synapse/state.py index 32125c95df..033f55d967 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -471,69 +471,39 @@ class StateResolutionHandler(object): "Resolving state for %s with %d groups", room_id, len(state_groups_ids) ) - # build a map from state key to the event_ids which set that state. - # dict[(str, str), set[str]) - state = {} + # start by assuming we won't have any conflicted state, and build up the new + # state map by iterating through the state groups. If we discover a conflict, + # we give up and instead use `resolve_events_with_factory`. + # + # XXX: is this actually worthwhile, or should we just let + # resolve_events_with_factory do it? + new_state = {} + conflicted_state = False for st in itervalues(state_groups_ids): for key, e_id in iteritems(st): - state.setdefault(key, set()).add(e_id) - - # build a map from state key to the event_ids which set that state, - # including only those where there are state keys in conflict. - conflicted_state = { - k: list(v) - for k, v in iteritems(state) - if len(v) > 1 - } + if key in new_state: + conflicted_state = True + break + new_state[key] = e_id + if conflicted_state: + break if conflicted_state: logger.info("Resolving conflicted state for %r", room_id) with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( - list(state_groups_ids.values()), + list(itervalues(state_groups_ids)), event_map=event_map, state_map_factory=state_map_factory, ) - else: - new_state = { - key: e_ids.pop() for key, e_ids in iteritems(state) - } - with Measure(self.clock, "state.create_group_ids"): - # if the new state matches any of the input state groups, we can - # use that state group again. Otherwise we will generate a state_id - # which will be used as a cache key for future resolutions, but - # not get persisted. - state_group = None - new_state_event_ids = frozenset(itervalues(new_state)) - for sg, events in iteritems(state_groups_ids): - if new_state_event_ids == frozenset(e_id for e_id in events): - state_group = sg - break + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. - # TODO: We want to create a state group for this set of events, to - # increase cache hits, but we need to make sure that it doesn't - # end up as a prev_group without being added to the database - - prev_group = None - delta_ids = None - for old_group, old_ids in iteritems(state_groups_ids): - if not set(new_state) - set(old_ids): - n_delta_ids = { - k: v - for k, v in iteritems(new_state) - if old_ids.get(k) != v - } - if not delta_ids or len(n_delta_ids) < len(delta_ids): - prev_group = old_group - delta_ids = n_delta_ids - - cache = _StateCacheEntry( - state=new_state, - state_group=state_group, - prev_group=prev_group, - delta_ids=delta_ids, - ) + with Measure(self.clock, "state.create_group_ids"): + cache = _make_state_cache_entry(new_state, state_groups_ids) if self._state_cache is not None: self._state_cache[group_names] = cache @@ -541,6 +511,70 @@ class StateResolutionHandler(object): defer.returnValue(cache) +def _make_state_cache_entry( + new_state, + state_groups_ids, +): + """Given a resolved state, and a set of input state groups, pick one to base + a new state group on (if any), and return an appropriately-constructed + _StateCacheEntry. + + Args: + new_state (dict[(str, str), str]): resolved state map (mapping from + (type, state_key) to event_id) + + state_groups_ids (dict[int, dict[(str, str), str]]): + map from state group id to the state in that state group + (where 'state' is a map from state key to event id) + + Returns: + _StateCacheEntry + """ + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. + + # first look for exact matches + new_state_event_ids = set(itervalues(new_state)) + for sg, state in iteritems(state_groups_ids): + if len(new_state_event_ids) != len(state): + continue + + old_state_event_ids = set(itervalues(state)) + if new_state_event_ids == old_state_event_ids: + # got an exact match. + return _StateCacheEntry( + state=new_state, + state_group=sg, + ) + + # TODO: We want to create a state group for this set of events, to + # increase cache hits, but we need to make sure that it doesn't + # end up as a prev_group without being added to the database + + # failing that, look for the closest match. + prev_group = None + delta_ids = None + + for old_group, old_state in iteritems(state_groups_ids): + n_delta_ids = { + k: v + for k, v in iteritems(new_state) + if old_state.get(k) != v + } + if not delta_ids or len(n_delta_ids) < len(delta_ids): + prev_group = old_group + delta_ids = n_delta_ids + + return _StateCacheEntry( + state=new_state, + state_group=None, + prev_group=prev_group, + delta_ids=delta_ids, + ) + + def _ordered_events(events): def key_func(e): return -int(e.depth), hashlib.sha1(e.event_id.encode()).hexdigest() @@ -582,7 +616,7 @@ def _seperate(state_sets): with them in different state sets. Args: - state_sets(list[dict[(str, str), str]]): + state_sets(iterable[dict[(str, str), str]]): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. @@ -596,10 +630,11 @@ def _seperate(state_sets): conflicted_state is a dict mapping (type, state_key) to a set of event ids for conflicted state keys. """ - unconflicted_state = dict(state_sets[0]) + state_set_iterator = iter(state_sets) + unconflicted_state = dict(next(state_set_iterator)) conflicted_state = {} - for state_set in state_sets[1:]: + for state_set in state_set_iterator: for key, value in iteritems(state_set): # Check if there is an unconflicted entry for the state key. unconflicted_value = unconflicted_state.get(key) -- cgit 1.4.1 From 254fb430d1662c93c56c2abbd6984e07fb04c36b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 19:21:20 +0100 Subject: incorporate review --- synapse/handlers/sync.py | 67 +++++++++++++++++++----------------------------- synapse/storage/state.py | 20 ++++++--------- 2 files changed, 35 insertions(+), 52 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b597f94cf6..5689ad2f58 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -543,17 +543,6 @@ class SyncHandler(object): state_ids = current_state_ids - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -562,9 +551,9 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, - timeline_start_members=member_state_ids, previous={}, current=current_state_ids, + lazy_load_members=lazy_load_members, ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( @@ -582,37 +571,27 @@ class SyncHandler(object): filtered_types=filtered_types, ) - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implement, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implemented, this should filter using event_ids (not mxids). + # In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. + state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, + lazy_load_members=lazy_load_members, ) else: state_ids = {} @@ -1536,16 +1515,14 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, timeline_start_members, - previous, current): +def _calculate_state( + timeline_contains, timeline_start, previous, current, lazy_load_members, +): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline - timeline_start_members (dict): state at the start of the timeline - for room members who participate in this chunk of timeline. - Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members, c_ids = set(e for e in current.values()) ts_ids = set(e for e in timeline_start.values()) - tsm_ids = set(e for e in timeline_start_members.values()) p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids + # track the membership events in the state as of the start of the timeline + # so we can add them back in to the state if we're lazyloading. We don't + # add them into state if they're already contained in the timeline. + if lazy_load_members: + ll_ids = set( + e for t, e in timeline_start.iteritems() + if t[0] == EventTypes.Member and e not in tc_ids + ) + else: + ll_ids = set() + + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids return { event_id_to_key[e]: e for e in state_ids diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f09be7172d..40ca8bd2a2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups(list[int]): list of state group IDs to query - types(list[str|None, str|None])|None: List of 2-tuples of the form + types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(list[str]|None): Only apply filtering via `types` to this + filtered_types(Iterable[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. @@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types + self._get_state_groups_from_groups_txn, chunk, types, filtered_types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None + self, txn, groups, types=None, filtered_types=None, ): results = {group: {} for group in groups} - include_other_types = False if filtered_types is None else True - if types is not None: types = list(set(types)) # deduplicate types list @@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore): for etype, state_key in types ] - if include_other_types: + if filtered_types is not None: # XXX: check whether this slows postgres down like a list of # ORs does too? unique_types = set(filtered_types) @@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if include_other_types: + if filtered_types is not None: unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" @@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False if filtered_types is None else True - for typ, state_key in types: key = (typ, state_key) @@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types and typ not in filtered_types + return filtered_types is not None and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None, None|str)]): + types (None|iterable[(str, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. -- cgit 1.4.1 From c1f80effbe17b1572161cc50838e60b495fb45a4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 22:06:50 +0100 Subject: Handle delta_ids being None in _update_context_for_auth_events it's easier to create the new state group as a delta from the existing one. (There's an outside chance this will help with https://github.com/matrix-org/synapse/issues/3364) --- synapse/events/snapshot.py | 3 ++- synapse/handlers/federation.py | 13 ++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'synapse') diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 189212b0fa..368b5f6ae4 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -249,7 +249,7 @@ class EventContext(object): @defer.inlineCallbacks def update_state(self, state_group, prev_state_ids, current_state_ids, - delta_ids): + prev_group, delta_ids): """Replace the state in the context """ @@ -260,6 +260,7 @@ class EventContext(object): self.state_group = state_group self._prev_state_ids = prev_state_ids + self.prev_group = prev_group self._current_state_ids = current_state_ids self.delta_ids = delta_ids diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 14654d59f1..145c1a21d4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1980,10 +1980,6 @@ class FederationHandler(BaseHandler): current_state_ids.update(state_updates) - if context.delta_ids is not None: - delta_ids = dict(context.delta_ids) - delta_ids.update(state_updates) - prev_state_ids = yield context.get_prev_state_ids(self.store) prev_state_ids = dict(prev_state_ids) @@ -1991,11 +1987,13 @@ class FederationHandler(BaseHandler): k: a.event_id for k, a in iteritems(auth_events) }) + # create a new state group as a delta from the existing one. + prev_group = context.state_group state_group = yield self.store.store_state_group( event.event_id, event.room_id, - prev_group=context.prev_group, - delta_ids=delta_ids, + prev_group=prev_group, + delta_ids=state_updates, current_state_ids=current_state_ids, ) @@ -2003,7 +2001,8 @@ class FederationHandler(BaseHandler): state_group=state_group, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, - delta_ids=delta_ids, + prev_group=prev_group, + delta_ids=state_updates, ) @defer.inlineCallbacks -- cgit 1.4.1 From ce0c18dec5d04626018b8e401273c8f30e88b0c7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 22:13:19 +0100 Subject: Improve logging for exceptions handling PDUs when we get an exception handling a federation PDU, log the whole stacktrace. --- synapse/federation/federation_server.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 48f26db67c..e501251b6e 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -24,6 +24,7 @@ from prometheus_client import Counter from twisted.internet import defer from twisted.internet.abstract import isIPAddress +from twisted.python import failure from synapse.api.constants import EventTypes from synapse.api.errors import AuthError, FederationError, NotFoundError, SynapseError @@ -186,8 +187,12 @@ class FederationServer(FederationBase): logger.warn("Error handling PDU %s: %s", event_id, e) pdu_results[event_id] = {"error": str(e)} except Exception as e: + f = failure.Failure() pdu_results[event_id] = {"error": str(e)} - logger.exception("Failed to handle PDU %s", event_id) + logger.error( + "Failed to handle PDU %s: %s", + event_id, f.getTraceback().rstrip(), + ) yield async.concurrently_execute( process_pdus_for_room, pdus_by_room.keys(), @@ -203,8 +208,8 @@ class FederationServer(FederationBase): ) pdu_failures = getattr(transaction, "pdu_failures", []) - for failure in pdu_failures: - logger.info("Got failure %r", failure) + for fail in pdu_failures: + logger.info("Got failure %r", fail) response = { "pdus": pdu_results, -- cgit 1.4.1 From efcdacad7d1b7f52f879179701c7e0d9b763511f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 22:41:05 +0100 Subject: handle case where types is [] on postgres correctly --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 40ca8bd2a2..f99d3871e4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -255,7 +255,7 @@ class StateGroupWorkerStore(SQLBaseStore): # Turns out that postgres doesn't like doing a list of OR's and # is about 1000x slower, so we just issue a query for each specific # type seperately. - if types: + if types is not None: clause_to_args = [ ( "AND type = ? AND state_key = ?", -- cgit 1.4.1 From 1938cffaea465375d5dd410baa94a6b9b6ded0cc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 23 Jul 2018 23:48:19 +0100 Subject: Add some measure blocks to persist_events ... to help us figure out where 40% of CPU is going --- synapse/storage/events.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c2910094d0..d13d099d1e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -417,19 +417,23 @@ class EventsStore(EventsWorkerStore): logger.info( "Calculating state delta for room %s", room_id, ) - current_state = yield self._get_new_state_after_events( - room_id, - ev_ctx_rm, - latest_event_ids, - new_latest_event_ids, - ) + + with Measure("persist_events.get_new_state_after_events"): + current_state = yield self._get_new_state_after_events( + room_id, + ev_ctx_rm, + latest_event_ids, + new_latest_event_ids, + ) + if current_state is not None: current_state_for_room[room_id] = current_state - delta = yield self._calculate_state_delta( - room_id, current_state, - ) - if delta is not None: - state_delta_for_room[room_id] = delta + with Measure("persist_events.calculate_state_delta"): + delta = yield self._calculate_state_delta( + room_id, current_state, + ) + if delta is not None: + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", -- cgit 1.4.1 From 69fb5dbdabf50dffe34e2a0d012b5a227fc16c55 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 24 Jul 2018 00:04:44 +0100 Subject: fix idiocy --- synapse/storage/events.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d13d099d1e..392935cacf 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -418,7 +418,10 @@ class EventsStore(EventsWorkerStore): "Calculating state delta for room %s", room_id, ) - with Measure("persist_events.get_new_state_after_events"): + with Measure( + self._clock, + "persist_events.get_new_state_after_events", + ): current_state = yield self._get_new_state_after_events( room_id, ev_ctx_rm, @@ -428,7 +431,10 @@ class EventsStore(EventsWorkerStore): if current_state is not None: current_state_for_room[room_id] = current_state - with Measure("persist_events.calculate_state_delta"): + with Measure( + self._clock, + "persist_events.calculate_state_delta", + ): delta = yield self._calculate_state_delta( room_id, current_state, ) -- cgit 1.4.1 From 8dff6e0322718ec9c446465c1e10ab331a417b8a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 24 Jul 2018 00:37:17 +0100 Subject: Logcontext fixes Fix some random logcontext leaks. --- synapse/handlers/initial_sync.py | 28 +++++++++++++++------------- synapse/storage/events.py | 5 +++-- synapse/storage/pusher.py | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index fb11716eb8..50b13d8820 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -387,19 +387,21 @@ class InitialSyncHandler(BaseHandler): receipts = [] defer.returnValue(receipts) - presence, receipts, (messages, token) = yield defer.gatherResults( - [ - run_in_background(get_presence), - run_in_background(get_receipts), - run_in_background( - self.store.get_recent_events_for_room, - room_id, - limit=limit, - end_token=now_token.room_key, - ) - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + presence, receipts, (messages, token) = yield make_deferred_yieldable( + defer.gatherResults( + [ + run_in_background(get_presence), + run_in_background(get_receipts), + run_in_background( + self.store.get_recent_events_for_room, + room_id, + limit=limit, + end_token=now_token.room_key, + ) + ], + consumeErrors=True, + ).addErrback(unwrapFirstError), + ) messages = yield filter_events_for_client( self.store, user_id, messages, is_peeking=is_peeking, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c2910094d0..c06dbb3768 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -39,7 +39,7 @@ from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util.async import ObservableDeferred from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.frozenutils import frozendict_json_encoder -from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.logcontext import PreserveLoggingContext, make_deferred_yieldable from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -147,7 +147,8 @@ class _EventPeristenceQueue(object): # callbacks on the deferred. try: ret = yield per_item_callback(item) - item.deferred.callback(ret) + with PreserveLoggingContext(): + item.deferred.callback(ret) except Exception: item.deferred.errback() finally: diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index cc273a57b2..8443bd4c1b 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -233,7 +233,7 @@ class PusherStore(PusherWorkerStore): ) if newly_inserted: - self.runInteraction( + yield self.runInteraction( "add_pusher", self._invalidate_cache_and_stream, self.get_if_user_has_pusher, (user_id,) -- cgit 1.4.1 From cf2d15c6a953d42207fb2c8fe5dc57ee7fdae7ce Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 24 Jul 2018 00:57:48 +0100 Subject: another couple of logcontext leaks --- synapse/handlers/appservice.py | 5 ++++- synapse/handlers/initial_sync.py | 10 ++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index ec9fe01a5a..ee41aed69e 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -23,6 +23,7 @@ from twisted.internet import defer import synapse from synapse.api.constants import EventTypes +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.logcontext import make_deferred_yieldable, run_in_background from synapse.util.metrics import Measure @@ -106,7 +107,9 @@ class ApplicationServicesHandler(object): yield self._check_user_exists(event.state_key) if not self.started_scheduler: - self.scheduler.start().addErrback(log_failure) + def start_scheduler(): + return self.scheduler.start().addErrback(log_failure) + run_as_background_process("as_scheduler", start_scheduler) self.started_scheduler = True # Fork off pushes to these services diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 50b13d8820..40e7580a61 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -148,13 +148,15 @@ class InitialSyncHandler(BaseHandler): try: if event.membership == Membership.JOIN: room_end_token = now_token.room_key - deferred_room_state = self.state_handler.get_current_state( - event.room_id + deferred_room_state = run_in_background( + self.state_handler.get_current_state, + event.room_id, ) elif event.membership == Membership.LEAVE: room_end_token = "s%d" % (event.stream_ordering,) - deferred_room_state = self.store.get_state_for_events( - [event.event_id], None + deferred_room_state = run_in_background( + self.store.get_state_for_events, + [event.event_id], None, ) deferred_room_state.addCallback( lambda states: states[event.event_id] -- cgit 1.4.1 From ff5426f6b8263b416f412725a6d7be2fac284824 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 10:55:11 +0100 Subject: Speed up _calculate_state_delta --- synapse/storage/events.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c2910094d0..1b075e6cc6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -644,21 +644,14 @@ class EventsStore(EventsWorkerStore): """ existing_state = yield self.get_current_state_ids(room_id) - existing_events = set(itervalues(existing_state)) - new_events = set(ev_id for ev_id in itervalues(current_state)) - changed_events = existing_events ^ new_events - - if not changed_events: - return - to_delete = { key: ev_id for key, ev_id in iteritems(existing_state) - if ev_id in changed_events + if ev_id != current_state.get(key) } - events_to_insert = (new_events - existing_events) + to_insert = { key: ev_id for key, ev_id in iteritems(current_state) - if ev_id in events_to_insert + if ev_id != existing_state.get(key) } defer.returnValue((to_delete, to_insert)) -- cgit 1.4.1 From 0fa73e4a63d6cd5d402ea5213eac6b766e650321 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 11:19:23 +0100 Subject: Remove unnecessary if --- synapse/storage/events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1b075e6cc6..cc8ab5165b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -428,8 +428,7 @@ class EventsStore(EventsWorkerStore): delta = yield self._calculate_state_delta( room_id, current_state, ) - if delta is not None: - state_delta_for_room[room_id] = delta + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", -- cgit 1.4.1 From cd241d6bda01a761fbe1ca29727dacd918fb8975 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 12:39:40 +0100 Subject: incorporate more review --- synapse/handlers/sync.py | 12 +++++++++--- synapse/storage/state.py | 36 +++++++++--------------------------- tests/storage/test_state.py | 9 +++++++++ 3 files changed, 27 insertions(+), 30 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5689ad2f58..e5a2329d73 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1526,6 +1526,9 @@ def _calculate_state( previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline + lazy_load_members (bool): whether to return members from timeline_start + or not. assumes that timeline_start has already been filtered to + include only the members the client needs to know about. Returns: dict @@ -1545,9 +1548,12 @@ def _calculate_state( p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - # track the membership events in the state as of the start of the timeline - # so we can add them back in to the state if we're lazyloading. We don't - # add them into state if they're already contained in the timeline. + # If we are lazyloading room members, we explicitly add the membership events + # for the senders in the timeline into the state block returned by /sync, + # as we may not have sent them to the client before. We find these membership + # events by filtering them out of timeline_start, which has already been filtered + # to only include membership events for the senders in the timeline. + if lazy_load_members: ll_ids = set( e for t, e in timeline_start.iteritems() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f99d3871e4..1413a6f910 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types, filtered_types=None): + def _get_state_groups_from_groups(self, groups, types): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -194,9 +194,6 @@ class StateGroupWorkerStore(SQLBaseStore): types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(Iterable[str]|None): Only apply filtering via `types` to this - list of event types. Other types of events are returned unfiltered. - If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -207,14 +204,14 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types, + self._get_state_groups_from_groups_txn, chunk, types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None, + self, txn, groups, types=None, ): results = {group: {} for group in groups} @@ -266,17 +263,6 @@ class StateGroupWorkerStore(SQLBaseStore): ) for etype, state_key in types ] - - if filtered_types is not None: - # XXX: check whether this slows postgres down like a list of - # ORs does too? - unique_types = set(filtered_types) - clause_to_args.append( - ( - "AND type <> ? " * len(unique_types), - list(unique_types) - ) - ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -306,13 +292,6 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if filtered_types is not None: - unique_types = set(filtered_types) - where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" - ) - where_args.extend(list(unique_types)) - where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" @@ -643,13 +622,13 @@ class StateGroupWorkerStore(SQLBaseStore): # cache. Hence, if we are doing a wildcard lookup, populate the # cache fully so that we can do an efficient lookup next time. - if types and any(k is None for (t, k) in types): + if filtered_types or (types and any(k is None for (t, k) in types)): types_to_fetch = None else: types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, filtered_types + missing_groups, types_to_fetch ) for group, group_state_dict in iteritems(group_to_state_dict): @@ -659,7 +638,10 @@ class StateGroupWorkerStore(SQLBaseStore): if types: for k, v in iteritems(group_state_dict): (typ, _) = k - if k in types or (typ, None) in types: + if ( + (k in types or (typ, None) in types) or + (filtered_types and typ not in filtered_types) + ): state_dict[k] = v else: state_dict.update(group_state_dict) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 8924ba9f7f..b2f314e9db 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -158,3 +158,12 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2, (e3.type, e3.state_key): e3, }, state) + + state = yield self.store.get_state_for_event( + e5.event_id, [], filtered_types=[EventTypes.Member], + ) + + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + }, state) -- cgit 1.4.1 From eb1d911ab743e85154f7c4b2db8a954d152020dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:40:49 +0100 Subject: rather than adding ll_ids, remove them from p_ids --- synapse/handlers/sync.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e5a2329d73..1422843af8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1553,16 +1553,17 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. + # In practice, we can do this by removing them from the p_ids list. + # see https://github.com/matrix-org/synapse/pull/2970 + # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: - ll_ids = set( + p_ids.difference_update( e for t, e in timeline_start.iteritems() - if t[0] == EventTypes.Member and e not in tc_ids + if t[0] == EventTypes.Member ) - else: - ll_ids = set() - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids + state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids return { event_id_to_key[e]: e for e in state_ids -- cgit 1.4.1 From e22700c3dd929f9f0953b3d2b37e503eece82b38 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:59:07 +0100 Subject: consider non-filter_type types as wildcards, thus missing from the state-group-cache --- synapse/storage/state.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1413a6f910..86f2c2e6b1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -518,7 +518,10 @@ class StateGroupWorkerStore(SQLBaseStore): for typ, state_key in types: key = (typ, state_key) - if state_key is None: + if ( + state_key is None or + filtered_types is not None and typ not in filtered_types + ): type_to_key[typ] = None # we mark the type as missing from the cache because # when the cache was populated it might have been done with a -- cgit 1.4.1 From 223341205edf725c5583a73d5680245a24790452 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 11:59:16 +0100 Subject: Don't require to_delete to have event_ids --- synapse/storage/events.py | 91 ++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 36 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 19c05dc8d6..87d0b78a2d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -19,7 +19,7 @@ import logging from collections import OrderedDict, deque, namedtuple from functools import wraps -from six import iteritems, itervalues +from six import iteritems from six.moves import range from canonicaljson import json @@ -347,8 +347,9 @@ class EventsStore(EventsWorkerStore): # state in each room after adding these events current_state_for_room = {} - # map room_id->(to_delete, to_insert) where each entry is - # a map (type,key)->event_id giving the state delta in each + # map room_id->(to_delete, to_insert) where to_delete is a list + # of type/state keys to remove from current state, and to_insert + # is a map (type,key)->event_id giving the state delta in each # room state_delta_for_room = {} @@ -647,17 +648,16 @@ class EventsStore(EventsWorkerStore): Assumes that we are only persisting events for one room at a time. Returns: - 2-tuple (to_delete, to_insert) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to - first be deleted from current_state_events, `to_insert` are entries - to insert. + tuple[list, dict] (to_delete, to_insert): where to_delete are the + type/state_keys to remove from current_state_events and `to_insert` + are the updates to current_state_events. """ existing_state = yield self.get_current_state_ids(room_id) - to_delete = { - key: ev_id for key, ev_id in iteritems(existing_state) - if ev_id != current_state.get(key) - } + to_delete = [ + key for key, ev_id in iteritems(existing_state) + if key not in current_state + ] to_insert = { key: ev_id for key, ev_id in iteritems(current_state) @@ -684,10 +684,10 @@ class EventsStore(EventsWorkerStore): delete_existing (bool): True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - state_delta_for_room (dict[str, (list[str], list[str])]): + state_delta_for_room (dict[str, (list, dict)]): The current-state delta for each room. For each room, a tuple - (to_delete, to_insert), being a list of event ids to be removed - from the current state, and a list of event ids to be added to + (to_delete, to_insert), being a list of type/state keys to be + removed from the current state, and a state set to be added to the current state. new_forward_extremeties (dict[str, list[str]]): The new forward extremities for each room. For each room, a @@ -765,9 +765,46 @@ class EventsStore(EventsWorkerStore): def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in iteritems(state_delta_by_room): to_delete, to_insert = current_state_tuple + + # First we add entries to the current_state_delta_stream. We + # do this before updating the current_state_events table so + # that we can use it to calculate the `prev_event_id`. (This + # allows us to not have to pull out the existing state + # unnecessarily). + sql = """ + INSERT INTO current_state_delta_stream + (stream_id, room_id, type, state_key, event_id, prev_event_id) + SELECT ?, ?, ?, ?, ?, ( + SELECT event_id FROM current_state_events + WHERE room_id = ? AND type = ? AND state_key = ? + ) + """ + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, None, + room_id, etype, state_key, + ) + for etype, state_key in to_delete + # We sanity check that we're deleting rather than updating + if (etype, state_key) not in to_insert + )) + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, ev_id, + room_id, etype, state_key, + ) + for (etype, state_key), ev_id in iteritems(to_insert) + )) + + # Now we actually update the current_state_events table + txn.executemany( - "DELETE FROM current_state_events WHERE event_id = ?", - [(ev_id,) for ev_id in itervalues(to_delete)], + "DELETE FROM current_state_events" + " WHERE room_id = ? AND type = ? AND state_key = ?", + ( + (room_id, etype, state_key) + for etype, state_key in itertools.chain(to_delete, to_insert) + ), ) self._simple_insert_many_txn( @@ -784,25 +821,6 @@ class EventsStore(EventsWorkerStore): ], ) - state_deltas = {key: None for key in to_delete} - state_deltas.update(to_insert) - - self._simple_insert_many_txn( - txn, - table="current_state_delta_stream", - values=[ - { - "stream_id": max_stream_order, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": ev_id, - "prev_event_id": to_delete.get(key, None), - } - for key, ev_id in iteritems(state_deltas) - ] - ) - txn.call_after( self._curr_state_delta_stream_cache.entity_has_changed, room_id, max_stream_order, @@ -816,7 +834,8 @@ class EventsStore(EventsWorkerStore): # and which we have added, then we invlidate the caches for all # those users. members_changed = set( - state_key for ev_type, state_key in state_deltas + state_key + for ev_type, state_key in itertools.chain(to_delete, to_insert) if ev_type == EventTypes.Member ) -- cgit 1.4.1 From 1a01a5b964d3ea373355684a91b9f7fd95726fbc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 14:03:15 +0100 Subject: clarify comment on p_ids --- synapse/handlers/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1422843af8..4ced3144c8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1553,7 +1553,8 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. - # In practice, we can do this by removing them from the p_ids list. + # In practice, we can do this by removing them from the p_ids list, + # which is the list of relevant state we know we have already sent to the client. # see https://github.com/matrix-org/synapse/pull/2970 # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 -- cgit 1.4.1 From a79410e7b8dce3fb8b4b40901a66fd9fde9c47a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 11:13:47 +0100 Subject: Have _get_new_state_after_events return delta If we have a delta from the existing to new current state, then we can reuse that rather than manually working it out by fetching both lots of state. --- synapse/storage/events.py | 65 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 17 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 87d0b78a2d..6d2aae9c4a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -344,7 +344,9 @@ class EventsStore(EventsWorkerStore): new_forward_extremeties = {} # map room_id->(type,state_key)->event_id tracking the full - # state in each room after adding these events + # state in each room after adding these events. + # This is simply used to prefill the get_current_state_ids + # cache current_state_for_room = {} # map room_id->(to_delete, to_insert) where to_delete is a list @@ -419,28 +421,40 @@ class EventsStore(EventsWorkerStore): logger.info( "Calculating state delta for room %s", room_id, ) - with Measure( - self._clock, - "persist_events.get_new_state_after_events", + self._clock, + "persist_events.get_new_state_after_events", ): - current_state = yield self._get_new_state_after_events( + res = yield self._get_new_state_after_events( room_id, ev_ctx_rm, latest_event_ids, new_latest_event_ids, ) - - if current_state is not None: - current_state_for_room[room_id] = current_state + current_state, delta_ids = res + + # If either are not None then there has been a change, + # and we need to work out the delta (or use that + # given) + if delta_ids is not None: + # If there is a delta we know that we've + # only added or replaced state, never + # removed keys entirely. + state_delta_for_room[room_id] = ([], delta_ids) + elif current_state is not None: with Measure( - self._clock, - "persist_events.calculate_state_delta", + self._clock, + "persist_events.calculate_state_delta", ): delta = yield self._calculate_state_delta( room_id, current_state, ) - state_delta_for_room[room_id] = delta + state_delta_for_room[room_id] = delta + + # If we have the current_state then lets prefill + # the cache with it. + if current_state is not None: + current_state_for_room[room_id] = current_state yield self.runInteraction( "persist_events", @@ -539,9 +553,10 @@ class EventsStore(EventsWorkerStore): the new forward extremities for the room. Returns: - Deferred[dict[(str,str), str]|None]: - None if there are no changes to the room state, or - a dict of (type, state_key) -> event_id]. + Deferred[tuple[dict[(str,str), str]|None, dict[(str,str), str]|None]]: + Returns a tuple of two state maps, the first being the full new current + state and the second being the delta to the existing current state. + If both are None then there has been no change. """ if not new_latest_event_ids: @@ -549,6 +564,9 @@ class EventsStore(EventsWorkerStore): # map from state_group to ((type, key) -> event_id) state map state_groups_map = {} + + state_group_deltas = {} + for ev, ctx in events_context: if ctx.state_group is None: # I don't think this can happen, but let's double-check @@ -567,6 +585,9 @@ class EventsStore(EventsWorkerStore): if current_state_ids is not None: state_groups_map[ctx.state_group] = current_state_ids + if ctx.prev_group: + state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids + # We need to map the event_ids to their state groups. First, let's # check if the event is one we're persisting, in which case we can # pull the state group from its context. @@ -608,7 +629,7 @@ class EventsStore(EventsWorkerStore): # If they old and new groups are the same then we don't need to do # anything. if old_state_groups == new_state_groups: - return + defer.returnValue((None, None)) # Now that we have calculated new_state_groups we need to get # their state IDs so we can resolve to a single state set. @@ -620,7 +641,17 @@ class EventsStore(EventsWorkerStore): if len(new_state_groups) == 1: # If there is only one state group, then we know what the current # state is. - defer.returnValue(state_groups_map[new_state_groups.pop()]) + new_state_group = new_state_groups.pop() + + delta_ids = None + if len(old_state_groups) == 1: + old_state_group = old_state_groups.pop() + + delta_ids = state_group_deltas.get( + (old_state_group, new_state_group,), None + ) + + defer.returnValue((state_groups_map[new_state_group], delta_ids)) # Ok, we need to defer to the state handler to resolve our state sets. @@ -639,7 +670,7 @@ class EventsStore(EventsWorkerStore): room_id, state_groups, events_map, get_events ) - defer.returnValue(res.state) + defer.returnValue((res.state, None)) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): -- cgit 1.4.1 From 811ac73a42db16db7211a7517ad5651a1f6aee71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 13:40:42 +0100 Subject: Don't fetch state from the database unless needed --- synapse/storage/events.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6d2aae9c4a..eb8bbe13ab 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -631,27 +631,33 @@ class EventsStore(EventsWorkerStore): if old_state_groups == new_state_groups: defer.returnValue((None, None)) - # Now that we have calculated new_state_groups we need to get - # their state IDs so we can resolve to a single state set. - missing_state = new_state_groups - set(state_groups_map) - if missing_state: - group_to_state = yield self._get_state_for_groups(missing_state) - state_groups_map.update(group_to_state) - if len(new_state_groups) == 1: # If there is only one state group, then we know what the current # state is. - new_state_group = new_state_groups.pop() - - delta_ids = None if len(old_state_groups) == 1: - old_state_group = old_state_groups.pop() + new_state_group = next(iter(new_state_groups)) + old_state_group = next(iter(old_state_groups)) delta_ids = state_group_deltas.get( (old_state_group, new_state_group,), None ) + if delta_ids is not None: + # We have a delta from the existing to new current state, + # so lets just return that. If we happen to already have + # the current state in memory then lets also return that, + # but it doesn't matter if we don't. + new_state = state_groups_map.get(new_state_group) + defer.returnValue((new_state, delta_ids)) - defer.returnValue((state_groups_map[new_state_group], delta_ids)) + # Now that we have calculated new_state_groups we need to get + # their state IDs so we can resolve to a single state set. + missing_state = new_state_groups - set(state_groups_map) + if missing_state: + group_to_state = yield self._get_state_for_groups(missing_state) + state_groups_map.update(group_to_state) + + if len(new_state_groups) == 1: + defer.returnValue((state_groups_map[new_state_groups.pop()], None)) # Ok, we need to defer to the state handler to resolve our state sets. -- cgit 1.4.1 From ed0dd68731fa1549b4ea9b2dab08c2afb6ccb64e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 14:31:38 +0100 Subject: Fixup comment (and indent) --- synapse/storage/events.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index eb8bbe13ab..76cfbc90fe 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -565,6 +565,7 @@ class EventsStore(EventsWorkerStore): # map from state_group to ((type, key) -> event_id) state map state_groups_map = {} + # Map from (prev state group, new state group) -> delta state dict state_group_deltas = {} for ev, ctx in events_context: @@ -631,23 +632,24 @@ class EventsStore(EventsWorkerStore): if old_state_groups == new_state_groups: defer.returnValue((None, None)) - if len(new_state_groups) == 1: - # If there is only one state group, then we know what the current - # state is. - if len(old_state_groups) == 1: - new_state_group = next(iter(new_state_groups)) - old_state_group = next(iter(old_state_groups)) + if len(new_state_groups) == 1 and len(old_state_groups) == 1: + # If we're going from one state group to another, lets check if + # we have a delta for that transition. If we do then we can just + # return that. - delta_ids = state_group_deltas.get( - (old_state_group, new_state_group,), None - ) - if delta_ids is not None: - # We have a delta from the existing to new current state, - # so lets just return that. If we happen to already have - # the current state in memory then lets also return that, - # but it doesn't matter if we don't. - new_state = state_groups_map.get(new_state_group) - defer.returnValue((new_state, delta_ids)) + new_state_group = next(iter(new_state_groups)) + old_state_group = next(iter(old_state_groups)) + + delta_ids = state_group_deltas.get( + (old_state_group, new_state_group,), None + ) + if delta_ids is not None: + # We have a delta from the existing to new current state, + # so lets just return that. If we happen to already have + # the current state in memory then lets also return that, + # but it doesn't matter if we don't. + new_state = state_groups_map.get(new_state_group) + defer.returnValue((new_state, delta_ids)) # Now that we have calculated new_state_groups we need to get # their state IDs so we can resolve to a single state set. @@ -657,6 +659,8 @@ class EventsStore(EventsWorkerStore): state_groups_map.update(group_to_state) if len(new_state_groups) == 1: + # If there is only one state group, then we know what the current + # state is. defer.returnValue((state_groups_map[new_state_groups.pop()], None)) # Ok, we need to defer to the state handler to resolve our state sets. -- cgit 1.4.1 From 8f65ab98d2ab35bbb29c23cc7978d54b12fd3db0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 15:08:01 +0100 Subject: Remove unnecessary iteritems --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 76cfbc90fe..9c94d3454f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -696,7 +696,7 @@ class EventsStore(EventsWorkerStore): existing_state = yield self.get_current_state_ids(room_id) to_delete = [ - key for key, ev_id in iteritems(existing_state) + key for key in existing_state if key not in current_state ] -- cgit 1.4.1 From 709c309b0e1756afd8b4514b95b0e901c567a0f1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 15:12:50 +0100 Subject: Expand on docstring comment about return value --- synapse/storage/events.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 9c94d3454f..906a405031 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -557,6 +557,11 @@ class EventsStore(EventsWorkerStore): Returns a tuple of two state maps, the first being the full new current state and the second being the delta to the existing current state. If both are None then there has been no change. + + If there has been a change then we only return the delta if its + already been calculated. Conversely if we do know the delta then + the new current state is only returned if we've already calculated + it. """ if not new_latest_event_ids: -- cgit 1.4.1 From 3188973857d5bab5c1faf968917ad0ced42b5a0d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 16:20:14 +0100 Subject: Pull out did_forget to worker store --- synapse/app/synchrotron.py | 5 +---- synapse/storage/roommember.py | 52 ++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 29 deletions(-) (limited to 'synapse') diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 26b9ec85f2..e201f18efd 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -55,7 +55,6 @@ from synapse.rest.client.v2_alpha import sync from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.storage.presence import UserPresenceState -from synapse.storage.roommember import RoomMemberStore from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, run_in_background from synapse.util.manhole import manhole @@ -81,9 +80,7 @@ class SynchrotronSlavedStore( RoomStore, BaseSlavedStore, ): - did_forget = ( - RoomMemberStore.__dict__["did_forget"] - ) + pass UPDATE_SYNCING_USERS_MS = 10 * 1000 diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 01697ab2c9..027bf8c85e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -461,6 +461,30 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) + @cachedInlineCallbacks(num_args=2) + def did_forget(self, user_id, room_id): + """Returns whether user_id has elected to discard history for room_id. + + Returns False if they have since re-joined.""" + def f(txn): + sql = ( + "SELECT" + " COUNT(*)" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " forgotten = 0" + ) + txn.execute(sql, (user_id, room_id)) + rows = txn.fetchall() + return rows[0][0] + count = yield self.runInteraction("did_forget_membership", f) + defer.returnValue(count == 0) + class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -568,32 +592,10 @@ class RoomMemberStore(RoomMemberWorkerStore): ) txn.execute(sql, (user_id, room_id)) - txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - return self.runInteraction("forget_membership", f) - - @cachedInlineCallbacks(num_args=2) - def did_forget(self, user_id, room_id): - """Returns whether user_id has elected to discard history for room_id. - - Returns False if they have since re-joined.""" - def f(txn): - sql = ( - "SELECT" - " COUNT(*)" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " forgotten = 0" + self._invalidate_cache_and_stream( + txn, self.did_forget, (user_id, room_id,), ) - txn.execute(sql, (user_id, room_id)) - rows = txn.fetchall() - return rows[0][0] - count = yield self.runInteraction("did_forget_membership", f) - defer.returnValue(count == 0) + return self.runInteraction("forget_membership", f) @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): -- cgit 1.4.1 From 8b8c4f34a336376610bf353f6aa5d71c5ef69980 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Jul 2018 16:46:30 +0100 Subject: Replace usage of get_current_toke with StreamToken.START This allows us to handle /context/ requests on the client_reader worker without having to pull in all the various stream handlers (e.g. precence, typing, pushers etc). The only thing the token gets used for is pagination, and that ignores everything but the room portion of the token. --- synapse/app/client_reader.py | 2 ++ synapse/handlers/room.py | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 398bb36602..e2c91123db 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -31,6 +31,7 @@ from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.directory import DirectoryStore @@ -58,6 +59,7 @@ logger = logging.getLogger("synapse.app.client_reader") class ClientReaderSlavedStore( + SlavedAccountDataStore, SlavedEventStore, SlavedKeyStore, RoomStore, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6150b7e226..003b848c00 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -24,7 +24,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules, RoomCreationPreset from synapse.api.errors import AuthError, Codes, StoreError, SynapseError -from synapse.types import RoomAlias, RoomID, RoomStreamToken, UserID +from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID from synapse.util import stringutils from synapse.visibility import filter_events_for_client @@ -418,8 +418,6 @@ class RoomContextHandler(object): before_limit = math.floor(limit / 2.) after_limit = limit - before_limit - now_token = yield self.hs.get_event_sources().get_current_token() - users = yield self.store.get_users_in_room(room_id) is_peeking = user.to_string() not in users @@ -462,11 +460,15 @@ class RoomContextHandler(object): ) results["state"] = list(state[last_event_id].values()) - results["start"] = now_token.copy_and_replace( + # We use a dummy token here as we only care about the room portion of + # the token, which we replace. + token = StreamToken.START + + results["start"] = token.copy_and_replace( "room_key", results["start"] ).to_string() - results["end"] = now_token.copy_and_replace( + results["end"] = token.copy_and_replace( "room_key", results["end"] ).to_string() -- cgit 1.4.1 From cb5c37a57c387a74f6079008870cf024e674dfe5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 20:34:45 +0100 Subject: handle the edge case for _get_some_state_from_cache where types is [] --- synapse/storage/state.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 86f2c2e6b1..989977c644 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -547,7 +547,13 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - got_all = is_all or not missing_types + if types == [] and filtered_types is not None: + # special wildcard case for empty type-list but an explicit filtered_types + # which means that we'll try to return all types which aren't in the + # filtered_types list. missing_types will always be empty, so we ignore it. + got_all = is_all + else: + got_all = is_all or not missing_types return { k: v for k, v in iteritems(state_dict_ids) -- cgit 1.4.1 From ec56121b0d099824a1455260e05904f081b6d14a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Jul 2018 09:35:02 +0100 Subject: Correctly handle outliers during persist events We incorrectly asserted that all contexts must have a non None state group without consider outliers. This would usually be fine as the assertion would never be hit, as there is a shortcut during persistence if the forward extremities don't change. However, if the outlier is being persisted with non-outlier events, the function would be called and the assertion would be hit. Fixes #3601 --- synapse/storage/events.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 906a405031..bd05f23b06 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -575,11 +575,12 @@ class EventsStore(EventsWorkerStore): for ev, ctx in events_context: if ctx.state_group is None: - # I don't think this can happen, but let's double-check - raise Exception( - "Context for new extremity event %s has no state " - "group" % (ev.event_id, ), - ) + # This should only happen for outlier events. + if not event.internal_metadata.is_outlier(): + raise Exception( + "Context for new event %s has no state " + "group" % (ev.event_id, ), + ) if ctx.state_group in state_groups_map: continue @@ -607,7 +608,7 @@ class EventsStore(EventsWorkerStore): for event_id in new_latest_event_ids: # First search in the list of new events we're adding. for ev, ctx in events_context: - if event_id == ev.event_id: + if event_id == ev.event_id and ctx.state_group is not None: event_id_to_state_group[event_id] = ctx.state_group break else: -- cgit 1.4.1 From 371da42ae4ec093f3887e9485d149cdfbedd4e58 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 09:41:12 +0100 Subject: Wrap a number of things that run in the background This will reduce the number of "Starting db connection from sentinel context" warnings, and will help with our metrics. --- synapse/app/homeserver.py | 13 ++++++++++--- synapse/groups/attestations.py | 6 +++++- synapse/replication/tcp/resource.py | 14 ++++++++------ synapse/rest/media/v1/media_repository.py | 8 +++++++- synapse/rest/media/v1/preview_url_resource.py | 8 +++++++- synapse/storage/devices.py | 8 ++++++-- synapse/storage/event_federation.py | 9 ++++++--- synapse/storage/event_push_actions.py | 13 +++++++++---- synapse/storage/transactions.py | 6 +++++- 9 files changed, 63 insertions(+), 22 deletions(-) (limited to 'synapse') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 2ad1beb8d8..b7e7718290 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -49,6 +49,7 @@ from synapse.http.additional_resource import AdditionalResource from synapse.http.server import RootRedirect from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.module_api import ModuleApi from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, check_requirements @@ -427,6 +428,9 @@ def run(hs): # currently either 0 or 1 stats_process = [] + def start_phone_stats_home(): + run_as_background_process("phone_stats_home", phone_stats_home) + @defer.inlineCallbacks def phone_stats_home(): logger.info("Gathering stats for reporting") @@ -498,7 +502,10 @@ def run(hs): ) def generate_user_daily_visit_stats(): - hs.get_datastore().generate_user_daily_visits() + run_as_background_process( + "generate_user_daily_visits", + hs.get_datastore().generate_user_daily_visits, + ) # Rather than update on per session basis, batch up the requests. # If you increase the loop period, the accuracy of user_daily_visits @@ -507,7 +514,7 @@ def run(hs): if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") - clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) + clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000) # We need to defer this init for the cases that we daemonize # otherwise the process ID we get is that of the non-daemon process @@ -515,7 +522,7 @@ def run(hs): # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes - clock.call_later(5 * 60, phone_stats_home) + clock.call_later(5 * 60, start_phone_stats_home) if hs.config.daemonize and hs.config.print_pidfile: print (hs.config.pid_file) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 47452700a8..4216af0a27 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -43,6 +43,7 @@ from signedjson.sign import sign_json from twisted.internet import defer from synapse.api.errors import SynapseError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from synapse.util.logcontext import run_in_background @@ -129,7 +130,7 @@ class GroupAttestionRenewer(object): self.attestations = hs.get_groups_attestation_signing() self._renew_attestations_loop = self.clock.looping_call( - self._renew_attestations, 30 * 60 * 1000, + self._start_renew_attestations, 30 * 60 * 1000, ) @defer.inlineCallbacks @@ -151,6 +152,9 @@ class GroupAttestionRenewer(object): defer.returnValue({}) + def _start_renew_attestations(self): + run_as_background_process("renew_attestations", self._renew_attestations) + @defer.inlineCallbacks def _renew_attestations(self): """Called periodically to check if we need to update any of our attestations diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 611fb66e1d..fd59f1595f 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -25,6 +25,7 @@ from twisted.internet import defer from twisted.internet.protocol import Factory from synapse.metrics import LaterGauge +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.metrics import Measure, measure_func from .protocol import ServerReplicationStreamProtocol @@ -117,7 +118,6 @@ class ReplicationStreamer(object): for conn in self.connections: conn.send_error("server shutting down") - @defer.inlineCallbacks def on_notifier_poke(self): """Checks if there is actually any new data and sends it to the connections if there are. @@ -132,14 +132,16 @@ class ReplicationStreamer(object): stream.discard_updates_and_advance() return - # If we're in the process of checking for new updates, mark that fact - # and return + self.pending_updates = True + if self.is_looping: - logger.debug("Noitifier poke loop already running") - self.pending_updates = True + logger.debug("Notifier poke loop already running") return - self.pending_updates = True + run_as_background_process("replication_notifier", self._run_notifier_loop) + + @defer.inlineCallbacks + def _run_notifier_loop(self): self.is_looping = True try: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 30242c525a..5b13378caa 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.async import Linearizer from synapse.util.logcontext import make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -100,10 +101,15 @@ class MediaRepository(object): ) self.clock.looping_call( - self._update_recently_accessed, + self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS, ) + def _start_update_recently_accessed(self): + run_as_background_process( + "update_recently_accessed_media", self._update_recently_accessed, + ) + @defer.inlineCallbacks def _update_recently_accessed(self): remote_media = self.recently_accessed_remotes diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index b70b15c4c2..4efd5339a4 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -41,6 +41,7 @@ from synapse.http.server import ( wrap_json_request_handler, ) from synapse.http.servlet import parse_integer, parse_string +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.async import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background @@ -81,7 +82,7 @@ class PreviewUrlResource(Resource): self._cache.start() self._cleaner_loop = self.clock.looping_call( - self._expire_url_cache_data, 10 * 1000 + self._start_expire_url_cache_data, 10 * 1000, ) def render_OPTIONS(self, request): @@ -371,6 +372,11 @@ class PreviewUrlResource(Resource): "etag": headers["ETag"][0] if "ETag" in headers else None, }) + def _start_expire_url_cache_data(self): + run_as_background_process( + "expire_url_cache_data", self._expire_url_cache_data, + ) + @defer.inlineCallbacks def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index cc3cdf2ebc..52dccb1507 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -21,6 +21,7 @@ from canonicaljson import json from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList from ._base import Cache, SQLBaseStore @@ -711,6 +712,9 @@ class DeviceStore(SQLBaseStore): logger.info("Pruned %d device list outbound pokes", txn.rowcount) - return self.runInteraction( - "_prune_old_outbound_device_pokes", _prune_txn + run_as_background_process( + "prune_old_outbound_device_pokes", + self.runInteraction, + "_prune_old_outbound_device_pokes", + _prune_txn, ) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 8d366d1b91..65f2d19e20 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -23,6 +23,7 @@ from unpaddedbase64 import encode_base64 from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import SQLBaseStore from synapse.storage.events import EventsWorkerStore from synapse.storage.signatures import SignatureWorkerStore @@ -446,7 +447,7 @@ class EventFederationStore(EventFederationWorkerStore): ) hs.get_clock().looping_call( - self._delete_old_forward_extrem_cache, 60 * 60 * 1000 + self._delete_old_forward_extrem_cache, 60 * 60 * 1000, ) def _update_min_depth_for_room_txn(self, txn, room_id, depth): @@ -548,9 +549,11 @@ class EventFederationStore(EventFederationWorkerStore): sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) ) - return self.runInteraction( + run_as_background_process( + "delete_old_forward_extrem_cache", + self.runInteraction, "_delete_old_forward_extrem_cache", - _delete_old_forward_extrem_cache_txn + _delete_old_forward_extrem_cache_txn, ) def clean_room_for_join(self, room_id): diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 29b511ae5e..4f44b0ad47 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -22,6 +22,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import LoggingTransaction, SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -458,11 +459,12 @@ class EventPushActionsWorkerStore(SQLBaseStore): "Error removing push actions after event persistence failure", ) - @defer.inlineCallbacks def _find_stream_orderings_for_times(self): - yield self.runInteraction( + run_as_background_process( + "event_push_action_stream_orderings", + self.runInteraction, "_find_stream_orderings_for_times", - self._find_stream_orderings_for_times_txn + self._find_stream_orderings_for_times_txn, ) def _find_stream_orderings_for_times_txn(self, txn): @@ -604,7 +606,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): self._doing_notif_rotation = False self._rotate_notif_loop = self._clock.looping_call( - self._rotate_notifs, 30 * 60 * 1000 + self._start_rotate_notifs, 30 * 60 * 1000, ) def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts, @@ -787,6 +789,9 @@ class EventPushActionsStore(EventPushActionsWorkerStore): WHERE room_id = ? AND user_id = ? AND stream_ordering <= ? """, (room_id, user_id, stream_ordering)) + def _start_rotate_notifs(self): + run_as_background_process("rotate_notifs", self._rotate_notifs) + @defer.inlineCallbacks def _rotate_notifs(self): if self._doing_notif_rotation or self.stream_ordering_day_ago is None: diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index c3bc94f56d..b4b479d94c 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -22,6 +22,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore @@ -57,7 +58,7 @@ class TransactionStore(SQLBaseStore): def __init__(self, db_conn, hs): super(TransactionStore, self).__init__(db_conn, hs) - self._clock.looping_call(self._cleanup_transactions, 30 * 60 * 1000) + self._clock.looping_call(self._start_cleanup_transactions, 30 * 60 * 1000) def get_received_txn_response(self, transaction_id, origin): """For an incoming transaction from a given origin, check if we have @@ -271,6 +272,9 @@ class TransactionStore(SQLBaseStore): txn.execute(query, (self._clock.time_msec(),)) return self.cursor_to_dict(txn) + def _start_cleanup_transactions(self): + run_as_background_process("cleanup_transactions", self._cleanup_transactions) + def _cleanup_transactions(self): now = self._clock.time_msec() month_ago = now - 30 * 24 * 60 * 60 * 1000 -- cgit 1.4.1 From a297ff2b16094f358cc8432f1db660ca539f8b4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Jul 2018 09:48:01 +0100 Subject: Fix typo in conditional --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bd05f23b06..5024c4714d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -576,7 +576,7 @@ class EventsStore(EventsWorkerStore): for ev, ctx in events_context: if ctx.state_group is None: # This should only happen for outlier events. - if not event.internal_metadata.is_outlier(): + if not ev.internal_metadata.is_outlier(): raise Exception( "Context for new event %s has no state " "group" % (ev.event_id, ), -- cgit 1.4.1 From f59be4eb0e4c96d6744b4a479a04991bd53a50a6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 10:30:36 +0100 Subject: Fix unit tests on_notifier_poke no longer runs synchonously, so we have to do a different hack to make sure that the replication data has been sent. Let's actually listen for its arrival. --- synapse/replication/tcp/client.py | 2 +- tests/replication/slave/storage/_base.py | 37 ++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index e592ab57bf..970e94313e 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -192,7 +192,7 @@ class ReplicationClientHandler(object): """Returns a deferred that is resolved when we receive a SYNC command with given data. - Used by tests. + [Not currently] used by tests. """ return self.awaiting_syncs.setdefault(data, defer.Deferred()) diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 8708c8a196..a103e7be80 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -11,23 +11,44 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import tempfile from mock import Mock, NonCallableMock from twisted.internet import defer, reactor +from twisted.internet.defer import Deferred from synapse.replication.tcp.client import ( ReplicationClientFactory, ReplicationClientHandler, ) from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory +from synapse.util.logcontext import PreserveLoggingContext, make_deferred_yieldable from tests import unittest from tests.utils import setup_test_homeserver +class TestReplicationClientHandler(ReplicationClientHandler): + """Overrides on_rdata so that we can wait for it to happen""" + def __init__(self, store): + super(TestReplicationClientHandler, self).__init__(store) + self._rdata_awaiters = [] + + def await_replication(self): + d = Deferred() + self._rdata_awaiters.append(d) + return make_deferred_yieldable(d) + + def on_rdata(self, stream_name, token, rows): + awaiters = self._rdata_awaiters + self._rdata_awaiters = [] + super(TestReplicationClientHandler, self).on_rdata(stream_name, token, rows) + with PreserveLoggingContext(): + for a in awaiters: + a.callback(None) + + class BaseSlavedStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): @@ -52,7 +73,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.addCleanup(listener.stopListening) self.streamer = server_factory.streamer - self.replication_handler = ReplicationClientHandler(self.slaved_store) + self.replication_handler = TestReplicationClientHandler(self.slaved_store) client_factory = ReplicationClientFactory( self.hs, "client_name", self.replication_handler ) @@ -60,12 +81,14 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.addCleanup(client_factory.stopTrying) self.addCleanup(client_connector.disconnect) - @defer.inlineCallbacks def replicate(self): - yield self.streamer.on_notifier_poke() - d = self.replication_handler.await_sync("replication_test") - self.streamer.send_sync_to_all_connections("replication_test") - yield d + """Tell the master side of replication that something has happened, and then + wait for the replication to occur. + """ + # xxx: should we be more specific in what we wait for? + d = self.replication_handler.await_replication() + self.streamer.on_notifier_poke() + return d @defer.inlineCallbacks def check(self, method, args, expected_result=None): -- cgit 1.4.1 From 55acd6856cd86feb34edcbb5d9ce30e55a04e27f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 10:34:48 +0100 Subject: Fix updating of cached remote profiles _update_remote_profile_cache was missing its `defer.inlineCallbacks`, so when it was called, would just return a generator object, without actually running any of the method body. --- synapse/handlers/profile.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 859f6d2b2e..43692b83a8 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -18,6 +18,7 @@ import logging from twisted.internet import defer from synapse.api.errors import AuthError, CodeMessageException, SynapseError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID, get_domain_from_id from ._base import BaseHandler @@ -41,7 +42,7 @@ class ProfileHandler(BaseHandler): if hs.config.worker_app is None: self.clock.looping_call( - self._update_remote_profile_cache, self.PROFILE_UPDATE_MS, + self._start_update_remote_profile_cache, self.PROFILE_UPDATE_MS, ) @defer.inlineCallbacks @@ -254,6 +255,12 @@ class ProfileHandler(BaseHandler): room_id, str(e.message) ) + def _start_update_remote_profile_cache(self): + run_as_background_process( + "Update remote profile", self._update_remote_profile_cache, + ) + + @defer.inlineCallbacks def _update_remote_profile_cache(self): """Called periodically to check profiles of remote users we haven't checked in a while. -- cgit 1.4.1 From 07defd5fe6cc6f645195ddf0d679290bb214ca73 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 10:47:57 +0100 Subject: Fix another logcontext leak in _persist_events We need to run the errback in the sentinel context to avoid losing our own context. Also: add logging to runInteraction to help identify where "Starting db connection from sentinel context" warnings are coming from --- changelog.d/3606.misc | 1 + synapse/storage/_base.py | 6 ++++++ synapse/storage/events.py | 9 ++++----- 3 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 changelog.d/3606.misc (limited to 'synapse') diff --git a/changelog.d/3606.misc b/changelog.d/3606.misc new file mode 100644 index 0000000000..f0137766a0 --- /dev/null +++ b/changelog.d/3606.misc @@ -0,0 +1 @@ +Fix some random logcontext leaks. \ No newline at end of file diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1d41d8d445..44f37b4c1e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -311,6 +311,12 @@ class SQLBaseStore(object): after_callbacks = [] exception_callbacks = [] + if LoggingContext.current_context() == LoggingContext.sentinel: + logger.warn( + "Starting db txn '%s' from sentinel context", + desc, + ) + try: result = yield self.runWithConnection( self._new_transaction, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 906a405031..ee9e4d4b65 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -142,15 +142,14 @@ class _EventPeristenceQueue(object): try: queue = self._get_drainining_queue(room_id) for item in queue: - # handle_queue_loop runs in the sentinel logcontext, so - # there is no need to preserve_fn when running the - # callbacks on the deferred. try: ret = yield per_item_callback(item) + except Exception: + with PreserveLoggingContext(): + item.deferred.errback() + else: with PreserveLoggingContext(): item.deferred.callback(ret) - except Exception: - item.deferred.errback() finally: queue = self._event_persist_queues.pop(room_id, None) if queue: -- cgit 1.4.1 From 1be94440d38ad6af64486ce31c650d0540d4049c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 25 Jul 2018 11:01:36 +0100 Subject: Fix occasional 'tuple index out of range' error This fixes a bug in _delete_existing_rows_txn which was introduced in #3435 (though it's been on matrix-org-hotfixes for *years*). This code is only called when there is some sort of conflict the first time we try to persist an event, so it only happens rarely. Still, the exceptions are annoying. --- changelog.d/3607.bugfix | 1 + synapse/storage/events.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/3607.bugfix (limited to 'synapse') diff --git a/changelog.d/3607.bugfix b/changelog.d/3607.bugfix new file mode 100644 index 0000000000..7ad64593b8 --- /dev/null +++ b/changelog.d/3607.bugfix @@ -0,0 +1 @@ +Fix 'tuple index out of range' error \ No newline at end of file diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 906a405031..f7c4226ea2 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1137,7 +1137,7 @@ class EventsStore(EventsWorkerStore): ): txn.executemany( "DELETE FROM %s WHERE room_id = ? AND event_id = ?" % (table,), - [(ev.event_id,) for ev, _ in events_and_contexts] + [(ev.room_id, ev.event_id) for ev, _ in events_and_contexts] ) def _store_event_txn(self, txn, events_and_contexts): -- cgit 1.4.1 From 7780a7b47ca0fa039886dda8f6ca84bb197e9425 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Jul 2018 11:13:20 +0100 Subject: Actually fix it by adding continue --- synapse/storage/events.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5024c4714d..d7fea0986b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -581,6 +581,7 @@ class EventsStore(EventsWorkerStore): "Context for new event %s has no state " "group" % (ev.event_id, ), ) + continue if ctx.state_group in state_groups_map: continue -- cgit 1.4.1 From 7d9fb88617f475ac7e064c5cccc8d78dbd78d2a3 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 16:15:33 +0100 Subject: incorporate more review. --- synapse/storage/state.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 989977c644..e38427bf9d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -493,12 +493,6 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` - Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). - `missing_types` is the list of types that aren't in the cache for that - group. `got_all` is a bool indicating if we successfully retrieved all - requests state from the cache, if False we need to query the DB for the - missing state. - Args: group(int): The state group to lookup types(list[str, str|None]): List of 2-tuples of the form @@ -507,6 +501,11 @@ class StateGroupWorkerStore(SQLBaseStore): filtered_types(list[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. + + Returns 2-tuple (`state_dict`, `got_all`). + `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -520,7 +519,7 @@ class StateGroupWorkerStore(SQLBaseStore): if ( state_key is None or - filtered_types is not None and typ not in filtered_types + (filtered_types is not None and typ not in filtered_types) ): type_to_key[typ] = None # we mark the type as missing from the cache because @@ -547,18 +546,17 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - if types == [] and filtered_types is not None: - # special wildcard case for empty type-list but an explicit filtered_types - # which means that we'll try to return all types which aren't in the - # filtered_types list. missing_types will always be empty, so we ignore it. - got_all = is_all - else: - got_all = is_all or not missing_types + got_all = is_all + if not got_all: + # the cache is incomplete. We may still have got all the results we need, if + # we don't have any wildcards in the match list. + if not missing_types and filtered_types is None: + got_all = True return { k: v for k, v in iteritems(state_dict_ids) if include(k[0], k[1]) - }, missing_types, got_all + }, got_all def _get_all_state_from_cache(self, group): """Checks if group is in cache. See `_get_state_for_groups` @@ -603,7 +601,7 @@ class StateGroupWorkerStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict_ids, _, got_all = self._get_some_state_from_cache( + state_dict_ids, got_all = self._get_some_state_from_cache( group, types, filtered_types ) results[group] = state_dict_ids -- cgit 1.4.1 From d8e65ed7e111243c08c0b87c9a49e7537c355074 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 25 Jul 2018 15:44:41 -0600 Subject: Fix a minor documentation typo in on_make_leave --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 145c1a21d4..49068c06d9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1279,7 +1279,7 @@ class FederationHandler(BaseHandler): @log_function def on_make_leave_request(self, room_id, user_id): """ We've received a /make_leave/ request, so we create a partial - join event for the room and return that. We do *not* persist or + leave event for the room and return that. We do *not* persist or process it until the other server has signed it and sent it back. """ builder = self.event_builder_factory.new({ -- cgit 1.4.1 From bc7944e6d2ea0076badd0eba414e1ba7020eb1e6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 23:36:31 +0100 Subject: switch missing_types to be a bool --- synapse/storage/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e38427bf9d..b27b3ae144 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -511,8 +511,8 @@ class StateGroupWorkerStore(SQLBaseStore): type_to_key = {} - # tracks which of the requested types are missing from our cache - missing_types = set() + # tracks whether any of ourrequested types are missing from the cache + missing_types = False for typ, state_key in types: key = (typ, state_key) @@ -526,13 +526,13 @@ class StateGroupWorkerStore(SQLBaseStore): # when the cache was populated it might have been done with a # restricted set of state_keys, so the wildcard will not work # and the cache may be incomplete. - missing_types.add(key) + missing_types = True else: if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) if key not in state_dict_ids and key not in known_absent: - missing_types.add(key) + missing_types = True sentinel = object() -- cgit 1.4.1 From 03751a64203b169cbf33b636b6d940ca6d414c31 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 11:44:26 +0100 Subject: Fix some looping_call calls which were broken in #3604 It turns out that looping_call does check the deferred returned by its callback, and (at least in the case of client_ips), we were relying on this, and I broke it in #3604. Update run_as_background_process to return the deferred, and make sure we return it to clock.looping_call. --- changelog.d/3610.feature | 1 + synapse/app/homeserver.py | 4 ++-- synapse/groups/attestations.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/metrics/background_process_metrics.py | 10 ++++++++-- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/storage/client_ips.py | 2 +- synapse/storage/devices.py | 2 +- synapse/storage/event_federation.py | 2 +- synapse/storage/event_push_actions.py | 4 ++-- synapse/storage/transactions.py | 4 +++- synapse/util/caches/expiringcache.py | 2 +- 13 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 changelog.d/3610.feature (limited to 'synapse') diff --git a/changelog.d/3610.feature b/changelog.d/3610.feature new file mode 100644 index 0000000000..77a294cb9f --- /dev/null +++ b/changelog.d/3610.feature @@ -0,0 +1 @@ +Add metrics to track resource usage by background processes diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index b7e7718290..57b815d777 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -429,7 +429,7 @@ def run(hs): stats_process = [] def start_phone_stats_home(): - run_as_background_process("phone_stats_home", phone_stats_home) + return run_as_background_process("phone_stats_home", phone_stats_home) @defer.inlineCallbacks def phone_stats_home(): @@ -502,7 +502,7 @@ def run(hs): ) def generate_user_daily_visit_stats(): - run_as_background_process( + return run_as_background_process( "generate_user_daily_visits", hs.get_datastore().generate_user_daily_visits, ) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 4216af0a27..b04f4234ca 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -153,7 +153,7 @@ class GroupAttestionRenewer(object): defer.returnValue({}) def _start_renew_attestations(self): - run_as_background_process("renew_attestations", self._renew_attestations) + return run_as_background_process("renew_attestations", self._renew_attestations) @defer.inlineCallbacks def _renew_attestations(self): diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 43692b83a8..cb5c6d587e 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -256,7 +256,7 @@ class ProfileHandler(BaseHandler): ) def _start_update_remote_profile_cache(self): - run_as_background_process( + return run_as_background_process( "Update remote profile", self._update_remote_profile_cache, ) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 9d820e44a6..ce678d5f75 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -151,13 +151,19 @@ def run_as_background_process(desc, func, *args, **kwargs): This should be used to wrap processes which are fired off to run in the background, instead of being associated with a particular request. + It returns a Deferred which completes when the function completes, but it doesn't + follow the synapse logcontext rules, which makes it appropriate for passing to + clock.looping_call and friends (or for firing-and-forgetting in the middle of a + normal synapse inlineCallbacks function). + Args: desc (str): a description for this background process type func: a function, which may return a Deferred args: positional args for func kwargs: keyword args for func - Returns: None + Returns: Deferred which returns the result of func, but note that it does not + follow the synapse logcontext rules. """ @defer.inlineCallbacks def run(): @@ -176,4 +182,4 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes[desc].remove(proc) with PreserveLoggingContext(): - run() + return run() diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5b13378caa..174ad20123 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -106,7 +106,7 @@ class MediaRepository(object): ) def _start_update_recently_accessed(self): - run_as_background_process( + return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed, ) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 4efd5339a4..27aa0def2f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -373,7 +373,7 @@ class PreviewUrlResource(Resource): }) def _start_expire_url_cache_data(self): - run_as_background_process( + return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data, ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 77ae10da3d..b8cefd43d6 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -102,7 +102,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): to_update, ) - run_as_background_process( + return run_as_background_process( "update_client_ips", update, ) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 52dccb1507..c0943ecf91 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -712,7 +712,7 @@ class DeviceStore(SQLBaseStore): logger.info("Pruned %d device list outbound pokes", txn.rowcount) - run_as_background_process( + return run_as_background_process( "prune_old_outbound_device_pokes", self.runInteraction, "_prune_old_outbound_device_pokes", diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 65f2d19e20..f269ec6fb3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -549,7 +549,7 @@ class EventFederationStore(EventFederationWorkerStore): sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) ) - run_as_background_process( + return run_as_background_process( "delete_old_forward_extrem_cache", self.runInteraction, "_delete_old_forward_extrem_cache", diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 4f44b0ad47..6840320641 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -460,7 +460,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): ) def _find_stream_orderings_for_times(self): - run_as_background_process( + return run_as_background_process( "event_push_action_stream_orderings", self.runInteraction, "_find_stream_orderings_for_times", @@ -790,7 +790,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): """, (room_id, user_id, stream_ordering)) def _start_rotate_notifs(self): - run_as_background_process("rotate_notifs", self._rotate_notifs) + return run_as_background_process("rotate_notifs", self._rotate_notifs) @defer.inlineCallbacks def _rotate_notifs(self): diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index b4b479d94c..428e7fa36e 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -273,7 +273,9 @@ class TransactionStore(SQLBaseStore): return self.cursor_to_dict(txn) def _start_cleanup_transactions(self): - run_as_background_process("cleanup_transactions", self._cleanup_transactions) + return run_as_background_process( + "cleanup_transactions", self._cleanup_transactions, + ) def _cleanup_transactions(self): now = self._clock.time_msec() diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 465adc54a8..ce85b2ae11 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -64,7 +64,7 @@ class ExpiringCache(object): return def f(): - run_as_background_process( + return run_as_background_process( "prune_cache_%s" % self._cache_name, self._prune_cache, ) -- cgit 1.4.1 From 1b4d73fa520a301ec35ba417d14a4549a44f33e0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 12:53:51 +0100 Subject: comment on event_edges --- synapse/storage/schema/full_schemas/16/event_edges.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/schema/full_schemas/16/event_edges.sql b/synapse/storage/schema/full_schemas/16/event_edges.sql index 52eec88357..6b5a5a88fa 100644 --- a/synapse/storage/schema/full_schemas/16/event_edges.sql +++ b/synapse/storage/schema/full_schemas/16/event_edges.sql @@ -37,7 +37,8 @@ CREATE TABLE IF NOT EXISTS event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, - is_state BOOL NOT NULL, + is_state BOOL NOT NULL, -- true if this is a prev_state edge rather than a regular + -- event dag edge. UNIQUE (event_id, prev_event_id, room_id, is_state) ); -- cgit 1.4.1 From bd4b25f4d07e07a2da0382cfc59a5a262883c0fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 13:19:08 +0100 Subject: Remove some redundant joins on event_edges.room_id We've long passed the point where it's possible to have the same event_id in different tables, so these join conditions are redundant: we can just join on event_id. event_edges is of non-trivial size, and the room_id column is wasteful, so let's stop reading from it. In future, we can stop writing to it, and then drop it. --- synapse/storage/event_federation.py | 13 ++++++------- synapse/storage/events.py | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 65f2d19e20..801581dcf7 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -114,9 +114,9 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, sql = ( "SELECT b.event_id, MAX(e.depth) FROM events as e" " INNER JOIN event_edges as g" - " ON g.event_id = e.event_id AND g.room_id = e.room_id" + " ON g.event_id = e.event_id" " INNER JOIN event_backward_extremities as b" - " ON g.prev_event_id = b.event_id AND g.room_id = b.room_id" + " ON g.prev_event_id = b.event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" ) @@ -330,8 +330,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, "SELECT depth, prev_event_id FROM event_edges" " INNER JOIN events" " ON prev_event_id = events.event_id" - " AND event_edges.room_id = events.room_id" - " WHERE event_edges.room_id = ? AND event_edges.event_id = ?" + " WHERE event_edges.event_id = ?" " AND event_edges.is_state = ?" " LIMIT ?" ) @@ -365,7 +364,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, txn.execute( query, - (room_id, event_id, False, limit - len(event_results)) + (event_id, False, limit - len(event_results)) ) for row in txn: @@ -402,7 +401,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, query = ( "SELECT prev_event_id FROM event_edges " - "WHERE room_id = ? AND event_id = ? AND is_state = ? " + "WHERE event_id = ? AND is_state = ? " "LIMIT ?" ) @@ -411,7 +410,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, for event_id in front: txn.execute( query, - (room_id, event_id, False, limit - len(event_results)) + (event_id, False, limit - len(event_results)) ) for e_id, in txn: diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 200f5ec95f..cb10fdedc0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -520,7 +520,6 @@ class EventsStore(EventsWorkerStore): iterable=list(new_latest_event_ids), retcols=["prev_event_id"], keyvalues={ - "room_id": room_id, "is_state": False, }, desc="_calculate_new_extremeties", -- cgit 1.4.1 From 5c1d301fd9e669b6704d54caeb1e8a3a223ba053 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:12:00 +0100 Subject: Stop populating events.content This field is no longer read from, so we should stop populating it. Once we're happy that this doesn't break everything, and a rollback is unlikely, we can think about dropping the column. --- synapse/storage/events.py | 1 - .../schema/delta/50/make_event_content_nullable.py | 93 ++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/50/make_event_content_nullable.py (limited to 'synapse') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 200f5ec95f..94515cd153 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1189,7 +1189,6 @@ class EventsStore(EventsWorkerStore): "type": event.type, "processed": True, "outlier": event.internal_metadata.is_outlier(), - "content": encode_json(event.content).decode("UTF-8"), "origin_server_ts": int(event.origin_server_ts), "received_ts": self._clock.time_msec(), "sender": event.sender, diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py new file mode 100644 index 0000000000..fa4a289514 --- /dev/null +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +We want to stop populating 'event.content', so we need to make it nullable. + +If this has to be rolled back, then the following should populate the missing data: + +Postgres: + + UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej + WHERE ej.event_id = events.event_id AND + stream_ordering < ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering LIMIT 1 + ); + + UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej + WHERE ej.event_id = events.event_id AND + stream_ordering > ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering DESC LIMIT 1 + ); + +SQLite: + + UPDATE events SET content=( + SELECT json_extract(json,'$.content') FROM event_json ej + WHERE ej.event_id = events.event_id + ) + WHERE + stream_ordering < ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering LIMIT 1 + ) + OR stream_ordering > ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering DESC LIMIT 1 + ); + +""" + +import logging + +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + cur.execute(""" + ALTER TABLE events ALTER COLUMN content DROP NOT NULL; + """) + return + + # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html + cur.execute("PRAGMA schema_version") + (oldver,) = cur.fetchone() + + cur.execute("SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'") + (oldsql,) = cur.fetchone() + sql = oldsql.replace("content TEXT NOT NULL", "content TEXT") + if sql == oldsql: + raise Exception("Couldn't find null constraint to drop in %s" % oldsql) + + logger.info("Replacing definition of 'events' with: %s", sql) + + cur.execute("PRAGMA writable_schema=ON") + + cur.execute( + "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", + (sql, ), + ) + + cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) + cur.execute("PRAGMA writable_schema=OFF") + + +def run_upgrade(*args, **kwargs): + pass -- cgit 1.4.1 From 51d7df19158de0f21e659625bf43716ff0a700bc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:54:04 +0100 Subject: Create the column nullable There's no real point in ever making the column non-nullable, and doing so breaks the sytests. --- .../schema/delta/50/make_event_content_nullable.py | 15 +++++++-------- synapse/storage/schema/full_schemas/16/im.sql | 7 ++++++- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py index fa4a289514..7d27342e39 100644 --- a/synapse/storage/schema/delta/50/make_event_content_nullable.py +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -60,6 +60,10 @@ logger = logging.getLogger(__name__) def run_create(cur, database_engine, *args, **kwargs): + pass + + +def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): cur.execute(""" ALTER TABLE events ALTER COLUMN content DROP NOT NULL; @@ -67,27 +71,22 @@ def run_create(cur, database_engine, *args, **kwargs): return # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html - cur.execute("PRAGMA schema_version") - (oldver,) = cur.fetchone() cur.execute("SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'") (oldsql,) = cur.fetchone() + sql = oldsql.replace("content TEXT NOT NULL", "content TEXT") if sql == oldsql: raise Exception("Couldn't find null constraint to drop in %s" % oldsql) logger.info("Replacing definition of 'events' with: %s", sql) + cur.execute("PRAGMA schema_version") + (oldver,) = cur.fetchone() cur.execute("PRAGMA writable_schema=ON") - cur.execute( "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", (sql, ), ) - cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) cur.execute("PRAGMA writable_schema=OFF") - - -def run_upgrade(*args, **kwargs): - pass diff --git a/synapse/storage/schema/full_schemas/16/im.sql b/synapse/storage/schema/full_schemas/16/im.sql index ba5346806e..5f5cb8d01d 100644 --- a/synapse/storage/schema/full_schemas/16/im.sql +++ b/synapse/storage/schema/full_schemas/16/im.sql @@ -19,7 +19,12 @@ CREATE TABLE IF NOT EXISTS events( event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, - content TEXT NOT NULL, + + -- 'content' used to be created NULLable, but as of delta 50 we drop that constraint. + -- the hack we use to drop the constraint doesn't work for an in-memory sqlite + -- database, which breaks the sytests. Hence, we no longer make it nullable. + content TEXT, + unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, -- cgit 1.4.1 From 7d32f0d745da6cbc2ec4bb28318488dfcab7b930 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 26 Jul 2018 14:41:59 -0600 Subject: Update the send_leave path to be an event_id It's still not used, however the parameter is an event ID not a transaction ID. --- synapse/federation/transport/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index c9beca27c2..8574898f0c 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -404,10 +404,10 @@ class FederationMakeLeaveServlet(BaseFederationServlet): class FederationSendLeaveServlet(BaseFederationServlet): - PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks - def on_PUT(self, origin, content, query, room_id, txid): + def on_PUT(self, origin, content, query, room_id, event_id): content = yield self.handler.on_send_leave_request(origin, content) defer.returnValue((200, content)) -- cgit 1.4.1 From a75231b507e025eaaa4f06d8932c04fa4e942d48 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 26 Jul 2018 22:51:30 +0100 Subject: Deduplicate redundant lazy-loaded members (#3331) * attempt at deduplicating lazy-loaded members as per the proposal; we can deduplicate redundant lazy-loaded members which are sent in the same sync sequence. we do this heuristically rather than requiring the client to somehow tell us which members it has chosen to cache, by instead caching the last N members sent to a client, and not sending them again. For now we hardcode N to 100. Each cache for a given (user,device) tuple is in turn cached for up to X minutes (to avoid the caches building up). For now we hardcode X to 30. * add include_redundant_members filter option & make it work * remove stale todo * add tests for _get_some_state_from_cache * incorporate review --- changelog.d/3331.feature | 1 + synapse/api/filtering.py | 9 +++++ synapse/handlers/sync.py | 87 ++++++++++++++++++++++++++++++++++-------------- 3 files changed, 72 insertions(+), 25 deletions(-) create mode 100644 changelog.d/3331.feature (limited to 'synapse') diff --git a/changelog.d/3331.feature b/changelog.d/3331.feature new file mode 100644 index 0000000000..e574b9bcc3 --- /dev/null +++ b/changelog.d/3331.feature @@ -0,0 +1 @@ +add support for the include_redundant_members filter param as per MSC1227 diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 7e767b9bf5..186831e118 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -117,6 +117,9 @@ ROOM_EVENT_FILTER_SCHEMA = { "lazy_load_members": { "type": "boolean" }, + "include_redundant_members": { + "type": "boolean" + }, } } @@ -267,6 +270,9 @@ class FilterCollection(object): def lazy_load_members(self): return self._room_state_filter.lazy_load_members() + def include_redundant_members(self): + return self._room_state_filter.include_redundant_members() + def filter_presence(self, events): return self._presence_filter.filter(events) @@ -426,6 +432,9 @@ class Filter(object): def lazy_load_members(self): return self.filter_json.get("lazy_load_members", False) + def include_redundant_members(self): + return self.filter_json.get("include_redundant_members", False) + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4ced3144c8..dff1f67dcb 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -26,6 +26,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.push.clientformat import format_push_rules_for_user from synapse.types import RoomStreamToken from synapse.util.async import concurrently_execute +from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.caches.lrucache import LruCache from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure, measure_func @@ -33,6 +35,14 @@ from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) +# Store the cache that tracks which lazy-loaded members have been sent to a given +# client for no more than 30 minutes. +LAZY_LOADED_MEMBERS_CACHE_MAX_AGE = 30 * 60 * 1000 + +# Remember the last 100 members we sent to a client for the purposes of +# avoiding redundantly sending the same lazy-loaded members to the client +LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100 + SyncConfig = collections.namedtuple("SyncConfig", [ "user", @@ -182,6 +192,12 @@ class SyncHandler(object): self.response_cache = ResponseCache(hs, "sync") self.state = hs.get_state_handler() + # ExpiringCache((User, Device)) -> LruCache(state_key => event_id) + self.lazy_loaded_members_cache = ExpiringCache( + "lazy_loaded_members_cache", self.clock, + max_len=0, expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE, + ) + def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0, full_state=False): """Get the sync for a client if we have new data for it now. Otherwise @@ -505,9 +521,13 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None - lazy_load_members = sync_config.filter_collection.lazy_load_members() filtered_types = None + lazy_load_members = sync_config.filter_collection.lazy_load_members() + include_redundant_members = ( + sync_config.filter_collection.include_redundant_members() + ) + if lazy_load_members: # We only request state for the members needed to display the # timeline: @@ -523,6 +543,11 @@ class SyncHandler(object): # only apply the filtering to room members filtered_types = [EventTypes.Member] + timeline_state = { + (event.type, event.state_key): event.event_id + for event in batch.events if event.is_state() + } + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( @@ -543,11 +568,6 @@ class SyncHandler(object): state_ids = current_state_ids - timeline_state = { - (event.type, event.state_key): event.event_id - for event in batch.events if event.is_state() - } - state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, @@ -571,21 +591,6 @@ class SyncHandler(object): filtered_types=filtered_types, ) - timeline_state = { - (event.type, event.state_key): event.event_id - for event in batch.events if event.is_state() - } - - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implemented, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, @@ -596,16 +601,48 @@ class SyncHandler(object): else: state_ids = {} if lazy_load_members: - # TODO: filter out redundant members based on their mxids (not their - # event_ids) at this point. We know we can do it based on mxid as this - # is an non-gappy incremental sync. - if types: state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, filtered_types=filtered_types, ) + if lazy_load_members and not include_redundant_members: + cache_key = (sync_config.user.to_string(), sync_config.device_id) + cache = self.lazy_loaded_members_cache.get(cache_key) + if cache is None: + logger.debug("creating LruCache for %r", cache_key) + cache = LruCache(LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE) + self.lazy_loaded_members_cache[cache_key] = cache + else: + logger.debug("found LruCache for %r", cache_key) + + # if it's a new sync sequence, then assume the client has had + # amnesia and doesn't want any recent lazy-loaded members + # de-duplicated. + if since_token is None: + logger.debug("clearing LruCache for %r", cache_key) + cache.clear() + else: + # only send members which aren't in our LruCache (either + # because they're new to this client or have been pushed out + # of the cache) + logger.debug("filtering state from %r...", state_ids) + state_ids = { + t: event_id + for t, event_id in state_ids.iteritems() + if cache.get(t[1]) != event_id + } + logger.debug("...to %r", state_ids) + + # add any member IDs we are about to send into our LruCache + for t, event_id in itertools.chain( + state_ids.items(), + timeline_state.items(), + ): + if t[0] == EventTypes.Member: + cache.set(t[1], event_id) + state = {} if state_ids: state = yield self.store.get_events(list(state_ids.values())) -- cgit 1.4.1