From 17d585753f1df2b2c2b13ddb8171e174cef97aac Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 4 Oct 2018 15:18:52 +0100 Subject: Delete unreferened state groups during purge --- synapse/storage/events.py | 33 +++++++++++++++++++++++++------ synapse/storage/state.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e7487311ce..0fb190530a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2025,6 +2025,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] finding state groups which depend on redundant" " state groups") remaining_state_groups = [] + unreferenced_state_groups = 0 for i in range(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] # look for state groups whose prev_state_group is one we are about @@ -2037,13 +2038,33 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore retcols=["state_group"], keyvalues={}, ) - remaining_state_groups.extend( - row["state_group"] for row in rows - # exclude state groups we are about to delete: no point in - # updating them - if row["state_group"] not in state_groups_to_delete - ) + for row in rows: + sg = row["state_group"] + + if sg in state_groups_to_delete: + # exclude state groups we are about to delete: no point in + # updating them + continue + + if not self._is_state_group_referenced(txn, sg): + # Let's also delete unreferenced state groups while we're + # here, since otherwise we'd need to de-delta them + state_groups_to_delete.add(sg) + unreferenced_state_groups += 1 + continue + + remaining_state_groups.append(sg) + + logger.info( + "[purge] found %i extra unreferenced state groups to delete", + unreferenced_state_groups, + ) + + logger.info( + "[purge] de-delta-ing %i remaining state groups", + len(remaining_state_groups), + ) # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 3f4cbd61c4..b88c7dc091 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1041,6 +1041,56 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return count + def _is_state_group_referenced(self, txn, state_group): + """Checks if a given state group is referenced, or is safe to delete. + + A state groups is referenced if it or any of its descendants are + pointed at by an event. (A descendant is a group which has the given + state_group as a prev group) + """ + + # We check this by doing a depth first search to look for any + # descendant referenced by `event_to_state_groups`. + + # State groups we need to check, contains state groups that are + # descendants of `state_group` + state_groups_to_search = [state_group] + + # Set of state groups we've already checked + state_groups_searched = set() + + while state_groups_to_search: + state_group = state_groups_to_search.pop() # Next state group to check + + is_referenced = self._simple_select_one_onecol_txn( + txn, + table="event_to_state_groups", + keyvalues={"state_group": state_group}, + retcol="event_id", + allow_none=True, + ) + if is_referenced: + # A descendant is referenced by event_to_state_groups, so + # original state group is referenced. + return True + + state_groups_searched.add(state_group) + + # Find all children of current state group and add to search + references = self._simple_select_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"prev_state_group": state_group}, + retcol="state_group", + ) + state_groups_to_search.extend(references) + + # Lets be paranoid and check for cycles + if state_groups_searched.intersection(references): + raise Exception("State group %s has cyclic dependency", state_group) + + return False + class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. -- cgit 1.4.1 From 4917ff55234718c0e650c6dc2a1117304465b9be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 4 Oct 2018 15:24:01 +0100 Subject: Add state_group index to event_to_state_groups This is needed to efficiently check for unreferenced state groups during purge. --- synapse/storage/prepare_database.py | 2 +- .../delta/52/add_event_to_state_group_index.sql | 19 +++++++++++++++++++ synapse/storage/state.py | 7 +++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/52/add_event_to_state_group_index.sql (limited to 'synapse/storage') diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b364719312..bd740e1e45 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 51 +SCHEMA_VERSION = 52 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql new file mode 100644 index 0000000000..91e03d13e1 --- /dev/null +++ b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This is needed to efficiently check for unreferenced state groups during +-- purge. Added events_to_state_group(state_group) index +INSERT into background_updates (update_name, progress_json) + VALUES ('event_to_state_groups_sg_index', '{}'); diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b88c7dc091..3f08f447a3 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1114,6 +1114,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" + EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" def __init__(self, db_conn, hs): super(StateStore, self).__init__(db_conn, hs) @@ -1132,6 +1133,12 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): columns=["state_key"], where_clause="type='m.room.member'", ) + self.register_background_index_update( + self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME, + index_name="event_to_state_groups_sg_index", + table="event_to_state_groups", + columns=["state_group"], + ) def _store_event_state_mappings_txn(self, txn, events_and_contexts): state_groups = {} -- cgit 1.4.1 From 67a1e315cc7f8b270ec87e0ab6e58aa2adaee32d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Oct 2018 13:49:48 +0100 Subject: Fix up comments --- synapse/storage/state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 3f08f447a3..f7cf5c86c9 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1044,9 +1044,9 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): def _is_state_group_referenced(self, txn, state_group): """Checks if a given state group is referenced, or is safe to delete. - A state groups is referenced if it or any of its descendants are - pointed at by an event. (A descendant is a group which has the given - state_group as a prev group) + A state group is referenced if it or any of its descendants are + pointed at by an event. (A descendant is a state_group whose chain of + prev_groups includes the given state_group.) """ # We check this by doing a depth first search to look for any -- cgit 1.4.1 From 47a9da28caa6a4f27d2df31f043971a2c9c7b555 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Oct 2018 20:43:18 +0100 Subject: Batch process handling state groups --- synapse/storage/events.py | 81 +++++++++------------------------ synapse/storage/state.py | 112 +++++++++++++++++++++++++++++----------------- 2 files changed, 92 insertions(+), 101 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 0fb190530a..e4d0f8b1a9 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -37,6 +37,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.events_worker import EventsWorkerStore +from synapse.storage.state import StateGroupWorkerStore from synapse.types import RoomStreamToken, get_domain_from_id from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -203,7 +204,8 @@ def _retry_on_integrity_error(func): # inherits from EventFederationStore so that we can call _update_backward_extremities # and _handle_mult_prev_events (though arguably those could both be moved in here) -class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore): +class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore, + BackgroundUpdateStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" @@ -1995,70 +1997,29 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] finding redundant state groups") - # Get all state groups that are only referenced by events that are - # to be deleted. - # This works by first getting state groups that we may want to delete, - # joining against event_to_state_groups to get events that use that - # state group, then left joining against events_to_purge again. Any - # state group where the left join produce *no nulls* are referenced - # only by events that are going to be purged. + # Get all state groups that are referenced by events that are to be + # deleted. We then go and check if they are referenced by other events + # or state groups, and if not we delete them. txn.execute(""" - SELECT state_group FROM - ( - SELECT DISTINCT state_group FROM events_to_purge - INNER JOIN event_to_state_groups USING (event_id) - ) AS sp - INNER JOIN event_to_state_groups USING (state_group) - LEFT JOIN events_to_purge AS ep USING (event_id) - GROUP BY state_group - HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0 + SELECT DISTINCT state_group FROM events_to_purge + INNER JOIN event_to_state_groups USING (event_id) """) - state_rows = txn.fetchall() - logger.info("[purge] found %i redundant state groups", len(state_rows)) - - # make a set of the redundant state groups, so that we can look them up - # efficiently - state_groups_to_delete = set([sg for sg, in state_rows]) - - # Now we get all the state groups that rely on these state groups - logger.info("[purge] finding state groups which depend on redundant" - " state groups") - remaining_state_groups = [] - unreferenced_state_groups = 0 - for i in range(0, len(state_rows), 100): - chunk = [sg for sg, in state_rows[i:i + 100]] - # look for state groups whose prev_state_group is one we are about - # to delete - rows = self._simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=chunk, - retcols=["state_group"], - keyvalues={}, - ) - - for row in rows: - sg = row["state_group"] - - if sg in state_groups_to_delete: - # exclude state groups we are about to delete: no point in - # updating them - continue + referenced_state_groups = set(sg for sg, in txn) + logger.info( + "[purge] found %i referenced state groups", + len(referenced_state_groups), + ) - if not self._is_state_group_referenced(txn, sg): - # Let's also delete unreferenced state groups while we're - # here, since otherwise we'd need to de-delta them - state_groups_to_delete.add(sg) - unreferenced_state_groups += 1 - continue + logger.info("[purge] finding state groups that can be deleted") - remaining_state_groups.append(sg) + state_groups_to_delete, remaining_state_groups = self._find_unreferenced_groups( + txn, referenced_state_groups, + ) logger.info( - "[purge] found %i extra unreferenced state groups to delete", - unreferenced_state_groups, + "[purge] found %i state groups to delete", + len(state_groups_to_delete), ) logger.info( @@ -2109,11 +2070,11 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", - state_rows + ((sg,) for sg in state_groups_to_delete), ) txn.executemany( "DELETE FROM state_groups WHERE id = ?", - state_rows + ((sg,) for sg in state_groups_to_delete), ) logger.info("[purge] removing events from event_to_state_groups") diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f7cf5c86c9..0f86311ed4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1041,55 +1041,85 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return count - def _is_state_group_referenced(self, txn, state_group): - """Checks if a given state group is referenced, or is safe to delete. + def _find_unreferenced_groups(self, txn, state_groups): + """Used when purging history to figure out which state groups can be + deleted and which need to be de-delta'ed (due to one of its prev groups + being scheduled for deletion). - A state group is referenced if it or any of its descendants are - pointed at by an event. (A descendant is a state_group whose chain of - prev_groups includes the given state_group.) - """ - - # We check this by doing a depth first search to look for any - # descendant referenced by `event_to_state_groups`. - - # State groups we need to check, contains state groups that are - # descendants of `state_group` - state_groups_to_search = [state_group] - - # Set of state groups we've already checked - state_groups_searched = set() - - while state_groups_to_search: - state_group = state_groups_to_search.pop() # Next state group to check + Args: + txn + state_groups (set[int]): Set of state groups referenced by events + that are going to be deleted. - is_referenced = self._simple_select_one_onecol_txn( + Returns: + tuple[set[int], set[int]]: The set of state groups that can be + deleted and the set of state groups that need to be de-delta'ed + """ + # Graph of state group -> previous group + graph = {} + + # Set of events that we have found to be referenced by events + referenced_groups = set() + + # Set of state groups we've already seen + state_groups_seen = set(state_groups) + + # Set of state groups to handle next. + next_to_search = set(state_groups) + while next_to_search: + # We bound size of groups we're looking up at once, to stop the + # SQL query getting too big + if len(next_to_search) < 100: + current_search = next_to_search + next_to_search = set() + else: + lst = list(next_to_search) + current_search = set(lst[:100]) + next_to_search = set(lst[100:]) + + # Check if state groups are referenced + sql = """ + SELECT state_group, count(*) FROM event_to_state_groups + LEFT JOIN events_to_purge AS ep USING (event_id) + WHERE state_group IN (%s) AND ep.event_id IS NULL + GROUP BY state_group + """ % (",".join("?" for _ in current_search),) + txn.execute(sql, list(current_search)) + + referenced = set(sg for sg, cnt in txn if cnt > 0) + referenced_groups |= referenced + + # We don't continue iterating up the state group graphs for state + # groups that are referenced. + current_search -= referenced + + rows = self._simple_select_many_txn( txn, - table="event_to_state_groups", - keyvalues={"state_group": state_group}, - retcol="event_id", - allow_none=True, + table="state_group_edges", + column="prev_state_group", + iterable=current_search, + keyvalues={}, + retcols=("prev_state_group", "state_group",), ) - if is_referenced: - # A descendant is referenced by event_to_state_groups, so - # original state group is referenced. - return True - state_groups_searched.add(state_group) + next_to_search.update(row["state_group"] for row in rows) + # We don't bother re-handling groups we've already seen + next_to_search -= state_groups_seen + state_groups_seen |= next_to_search - # Find all children of current state group and add to search - references = self._simple_select_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"prev_state_group": state_group}, - retcol="state_group", - ) - state_groups_to_search.extend(references) + for row in rows: + # Note: Each state group can have at most one prev group + graph[row["state_group"]] = row["prev_state_group"] + + to_delete = state_groups_seen - referenced_groups - # Lets be paranoid and check for cycles - if state_groups_searched.intersection(references): - raise Exception("State group %s has cyclic dependency", state_group) + to_dedelta = set() + for sg in referenced_groups: + prev_sg = graph.get(sg) + if prev_sg and prev_sg in to_delete: + to_dedelta.add(sg) - return False + return to_delete, to_dedelta class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): -- cgit 1.4.1 From 67f7b9cb50c246226b94027e3c1d4b958ff9f840 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 19 Oct 2018 16:06:59 +0100 Subject: pep8 --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index af822fb69d..379b5c514f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2041,7 +2041,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore INNER JOIN event_to_state_groups USING (event_id) """) - referenced_state_groups = set(sg for sg, in txn) + referenced_state_groups = set(sg for sg, in txn) logger.info( "[purge] found %i referenced state groups", len(referenced_state_groups), -- cgit 1.4.1 From 4cda300058ba68f97c032923ebf429f437eddd8e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Oct 2018 11:13:40 +0100 Subject: preserve room visibility --- synapse/handlers/room.py | 8 +++++--- synapse/storage/room.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 3cce6f6150..2f9eb8ef4c 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -32,7 +32,7 @@ from synapse.api.constants import ( JoinRules, RoomCreationPreset, ) -from synapse.api.errors import AuthError, Codes, StoreError, SynapseError +from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError from synapse.storage.state import StateFilter from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID from synapse.util import stringutils @@ -97,9 +97,11 @@ class RoomCreationHandler(BaseHandler): with (yield self._upgrade_linearizer.queue(old_room_id)): # start by allocating a new room id - is_public = False # XXX fixme + r = yield self.store.get_room(old_room_id) + if r is None: + raise NotFoundError("Unknown room id %s" % (old_room_id,)) new_room_id = yield self._generate_room_id( - creator_id=user_id, is_public=is_public, + creator_id=user_id, is_public=r["is_public"], ) # we create and auth the tombstone event before properly creating the new diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 61013b8919..41c65e112a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -47,7 +47,7 @@ class RoomWorkerStore(SQLBaseStore): Args: room_id (str): The ID of the room to retrieve. Returns: - A namedtuple containing the room information, or an empty list. + A dict containing the room information, or None if the room is unknown. """ return self._simple_select_one( table="rooms", -- cgit 1.4.1 From b2399f6281d7cd11e7762b683bdd5a4f0c24927e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Oct 2018 14:01:11 +0000 Subject: Make SQL a bit cleaner --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 59a50a5df9..45afd42b3f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1272,14 +1272,13 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # Check if state groups are referenced sql = """ - SELECT state_group, count(*) FROM event_to_state_groups + SELECT DISTINCT state_group FROM event_to_state_groups LEFT JOIN events_to_purge AS ep USING (event_id) WHERE state_group IN (%s) AND ep.event_id IS NULL - GROUP BY state_group """ % (",".join("?" for _ in current_search),) txn.execute(sql, list(current_search)) - referenced = set(sg for sg, cnt in txn if cnt > 0) + referenced = set(sg for sg, in txn) referenced_groups |= referenced # We don't continue iterating up the state group graphs for state -- cgit 1.4.1 From f4f223aa4455bea3aa642c23ae957932b1168ba3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Oct 2018 14:01:49 +0000 Subject: Don't make temporary list --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 45afd42b3f..947d3fc177 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1266,9 +1266,8 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): current_search = next_to_search next_to_search = set() else: - lst = list(next_to_search) - current_search = set(lst[:100]) - next_to_search = set(lst[100:]) + current_search = set(islice(next_to_search, 100)) + next_to_search -= current_search # Check if state groups are referenced sql = """ -- cgit 1.4.1 From 664b192a3b4aa57597d9832361b025bc078ea87a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Oct 2018 14:21:43 +0000 Subject: Fix set operations thinko --- synapse/storage/state.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 947d3fc177..dfec57c045 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1293,10 +1293,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): retcols=("prev_state_group", "state_group",), ) - next_to_search.update(row["state_group"] for row in rows) + prevs = set(row["state_group"] for row in rows) # We don't bother re-handling groups we've already seen - next_to_search -= state_groups_seen - state_groups_seen |= next_to_search + prevs -= state_groups_seen + next_to_search |= prevs + state_groups_seen |= prevs for row in rows: # Note: Each state group can have at most one prev group -- cgit 1.4.1 From ad88460e0d2e0a7c2cf39ec5539d5c4ff030bbbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Oct 2018 14:23:34 +0000 Subject: Move _find_unreferenced_groups --- synapse/storage/events.py | 85 +++++++++++++++++++++++++++++++++++++++++++++-- synapse/storage/state.py | 79 ------------------------------------------- 2 files changed, 83 insertions(+), 81 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e791922733..919e855f3b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2052,8 +2052,10 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore logger.info("[purge] finding state groups that can be deleted") - state_groups_to_delete, remaining_state_groups = self._find_unreferenced_groups( - txn, referenced_state_groups, + state_groups_to_delete, remaining_state_groups = ( + self._find_unreferenced_groups_during_purge( + txn, referenced_state_groups, + ) ) logger.info( @@ -2209,6 +2211,85 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore logger.info("[purge] done") + def _find_unreferenced_groups_during_purge(self, txn, state_groups): + """Used when purging history to figure out which state groups can be + deleted and which need to be de-delta'ed (due to one of its prev groups + being scheduled for deletion). + + Args: + txn + state_groups (set[int]): Set of state groups referenced by events + that are going to be deleted. + + Returns: + tuple[set[int], set[int]]: The set of state groups that can be + deleted and the set of state groups that need to be de-delta'ed + """ + # Graph of state group -> previous group + graph = {} + + # Set of events that we have found to be referenced by events + referenced_groups = set() + + # Set of state groups we've already seen + state_groups_seen = set(state_groups) + + # Set of state groups to handle next. + next_to_search = set(state_groups) + while next_to_search: + # We bound size of groups we're looking up at once, to stop the + # SQL query getting too big + if len(next_to_search) < 100: + current_search = next_to_search + next_to_search = set() + else: + current_search = set(itertools.islice(next_to_search, 100)) + next_to_search -= current_search + + # Check if state groups are referenced + sql = """ + SELECT DISTINCT state_group FROM event_to_state_groups + LEFT JOIN events_to_purge AS ep USING (event_id) + WHERE state_group IN (%s) AND ep.event_id IS NULL + """ % (",".join("?" for _ in current_search),) + txn.execute(sql, list(current_search)) + + referenced = set(sg for sg, in txn) + referenced_groups |= referenced + + # We don't continue iterating up the state group graphs for state + # groups that are referenced. + current_search -= referenced + + rows = self._simple_select_many_txn( + txn, + table="state_group_edges", + column="prev_state_group", + iterable=current_search, + keyvalues={}, + retcols=("prev_state_group", "state_group",), + ) + + prevs = set(row["state_group"] for row in rows) + # We don't bother re-handling groups we've already seen + prevs -= state_groups_seen + next_to_search |= prevs + state_groups_seen |= prevs + + for row in rows: + # Note: Each state group can have at most one prev group + graph[row["state_group"]] = row["prev_state_group"] + + to_delete = state_groups_seen - referenced_groups + + to_dedelta = set() + for sg in referenced_groups: + prev_sg = graph.get(sg) + if prev_sg and prev_sg in to_delete: + to_dedelta.add(sg) + + return to_delete, to_dedelta + @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream diff --git a/synapse/storage/state.py b/synapse/storage/state.py index dfec57c045..d737bd6778 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1234,85 +1234,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): return count - def _find_unreferenced_groups(self, txn, state_groups): - """Used when purging history to figure out which state groups can be - deleted and which need to be de-delta'ed (due to one of its prev groups - being scheduled for deletion). - - Args: - txn - state_groups (set[int]): Set of state groups referenced by events - that are going to be deleted. - - Returns: - tuple[set[int], set[int]]: The set of state groups that can be - deleted and the set of state groups that need to be de-delta'ed - """ - # Graph of state group -> previous group - graph = {} - - # Set of events that we have found to be referenced by events - referenced_groups = set() - - # Set of state groups we've already seen - state_groups_seen = set(state_groups) - - # Set of state groups to handle next. - next_to_search = set(state_groups) - while next_to_search: - # We bound size of groups we're looking up at once, to stop the - # SQL query getting too big - if len(next_to_search) < 100: - current_search = next_to_search - next_to_search = set() - else: - current_search = set(islice(next_to_search, 100)) - next_to_search -= current_search - - # Check if state groups are referenced - sql = """ - SELECT DISTINCT state_group FROM event_to_state_groups - LEFT JOIN events_to_purge AS ep USING (event_id) - WHERE state_group IN (%s) AND ep.event_id IS NULL - """ % (",".join("?" for _ in current_search),) - txn.execute(sql, list(current_search)) - - referenced = set(sg for sg, in txn) - referenced_groups |= referenced - - # We don't continue iterating up the state group graphs for state - # groups that are referenced. - current_search -= referenced - - rows = self._simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=current_search, - keyvalues={}, - retcols=("prev_state_group", "state_group",), - ) - - prevs = set(row["state_group"] for row in rows) - # We don't bother re-handling groups we've already seen - prevs -= state_groups_seen - next_to_search |= prevs - state_groups_seen |= prevs - - for row in rows: - # Note: Each state group can have at most one prev group - graph[row["state_group"]] = row["prev_state_group"] - - to_delete = state_groups_seen - referenced_groups - - to_dedelta = set() - for sg in referenced_groups: - prev_sg = graph.get(sg) - if prev_sg and prev_sg in to_delete: - to_dedelta.add(sg) - - return to_delete, to_dedelta - class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. -- cgit 1.4.1 From 88e5ffe6fe816e54a5471728e93fde63353d9a70 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Oct 2018 17:34:34 +0000 Subject: Deduplicate device updates sent over replication We currently send several kHz of device list updates over replication occisonally, which often causes the replications streams to lag and then get dropped. A lot of those updates will actually be duplicates, since we don't send e.g. device_ids across replication, so let's deduplicate it when we pull them out of the database. --- synapse/storage/devices.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index d10ff9e4b9..62497ab63f 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -589,10 +589,14 @@ class DeviceStore(SQLBaseStore): combined list of changes to devices, and which destinations need to be poked. `destination` may be None if no destinations need to be poked. """ + # We do a group by here as there can be a large number of duplicate + # entries, since we throw away device IDs. sql = """ - SELECT stream_id, user_id, destination FROM device_lists_stream + SELECT MAX(stream_id) AS stream_id, user_id, destination + FROM device_lists_stream LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) WHERE ? < stream_id AND stream_id <= ? + GROUP BY user_id, destination """ return self._execute( "get_all_device_list_changes_for_remotes", None, -- cgit 1.4.1 From 563f9b61b1439e7a6a8a250fd068659092583dbf Mon Sep 17 00:00:00 2001 From: David Baker Date: Mon, 29 Oct 2018 21:01:22 +0000 Subject: Make e2e backup versions numeric in the DB We were doing max(version) which does not do what we wanted on a column of type TEXT. --- synapse/handlers/e2e_room_keys.py | 2 +- synapse/storage/prepare_database.py | 2 +- synapse/storage/schema/delta/52/e2e_room_keys.sql | 53 +++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 synapse/storage/schema/delta/52/e2e_room_keys.sql (limited to 'synapse/storage') diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 5edb3cfe04..b6d302cd6b 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -138,7 +138,7 @@ class E2eRoomKeysHandler(object): else: raise - if version_info['version'] != version: + if str(version_info['version']) != version: # Check that the version we're trying to upload actually exists try: version_info = yield self.store.get_e2e_room_keys_version_info( diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b364719312..bd740e1e45 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 51 +SCHEMA_VERSION = 52 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/52/e2e_room_keys.sql b/synapse/storage/schema/delta/52/e2e_room_keys.sql new file mode 100644 index 0000000000..754985187e --- /dev/null +++ b/synapse/storage/schema/delta/52/e2e_room_keys.sql @@ -0,0 +1,53 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Change version column to an integer so we can do MAX() sensibly + */ +CREATE TABLE e2e_room_keys_versions_new ( + user_id TEXT NOT NULL, + version BIGINT NOT NULL, + algorithm TEXT NOT NULL, + auth_data TEXT NOT NULL, + deleted SMALLINT DEFAULT 0 NOT NULL +); + +INSERT INTO e2e_room_keys_versions_new + SELECT user_id, version, algorithm, auth_data, deleted FROM e2e_room_keys_versions; + +DROP TABLE e2e_room_keys_versions; +ALTER TABLE e2e_room_keys_versions_new RENAME TO e2e_room_keys_versions; + +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version); + +/* Change e2e_rooms_keys to match + */ +CREATE TABLE e2e_room_keys_new ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + session_id TEXT NOT NULL, + version BIGINT NOT NULL, + first_message_index INT, + forwarded_count INT, + is_verified BOOLEAN, + session_data TEXT NOT NULL +); + +INSERT INTO e2e_room_keys_new + SELECT user_id, room_id, session_id, version, first_message_index, forwarded_count, is_verified, session_data FROM e2e_room_keys; + +DROP TABLE e2e_room_keys; +ALTER TABLE e2e_room_keys_new RENAME TO e2e_room_keys; + +CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id); -- cgit 1.4.1 From 64fa557f80edc99318aa6152c3b84c76dd455c8a Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 30 Oct 2018 09:51:04 +0000 Subject: Try & make it work on postgres --- synapse/storage/schema/delta/52/e2e_room_keys.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/52/e2e_room_keys.sql b/synapse/storage/schema/delta/52/e2e_room_keys.sql index 754985187e..db687cccae 100644 --- a/synapse/storage/schema/delta/52/e2e_room_keys.sql +++ b/synapse/storage/schema/delta/52/e2e_room_keys.sql @@ -24,7 +24,7 @@ CREATE TABLE e2e_room_keys_versions_new ( ); INSERT INTO e2e_room_keys_versions_new - SELECT user_id, version, algorithm, auth_data, deleted FROM e2e_room_keys_versions; + SELECT user_id, CAST(version as BIGINT), algorithm, auth_data, deleted FROM e2e_room_keys_versions; DROP TABLE e2e_room_keys_versions; ALTER TABLE e2e_room_keys_versions_new RENAME TO e2e_room_keys_versions; @@ -45,7 +45,7 @@ CREATE TABLE e2e_room_keys_new ( ); INSERT INTO e2e_room_keys_new - SELECT user_id, room_id, session_id, version, first_message_index, forwarded_count, is_verified, session_data FROM e2e_room_keys; + SELECT user_id, room_id, session_id, CAST(version as BIGINT), first_message_index, forwarded_count, is_verified, session_data FROM e2e_room_keys; DROP TABLE e2e_room_keys; ALTER TABLE e2e_room_keys_new RENAME TO e2e_room_keys; -- cgit 1.4.1 From 2f0f911c52f6de48e47fbb5624a3e3beee0dd6a4 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 30 Oct 2018 10:35:18 +0000 Subject: Convert version back to a string --- synapse/storage/e2e_room_keys.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/storage') diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index f25ded2295..9f826b292c 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -236,6 +236,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): ), ) result["auth_data"] = json.loads(result["auth_data"]) + result["version"] = str(result["version"]) return result return self.runInteraction( -- cgit 1.4.1 From 12941f5f8b1f38f273e301104203149b10e9e214 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 30 Oct 2018 11:01:07 +0000 Subject: Cast bacjup version to int when querying --- synapse/storage/e2e_room_keys.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 9f826b292c..2a012e9487 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -219,7 +219,12 @@ class EndToEndRoomKeyStore(SQLBaseStore): if version is None: this_version = self._get_current_version(txn, user_id) else: - this_version = version + try: + this_version = int(version) + except ValueError: + # Our versions are all ints so if we can't convert it to an integer, + # it isn't there. + raise StoreError(404, "No row found") result = self._simple_select_one_txn( txn, -- cgit 1.4.1 From e0934acdbbd497ee1330efe42feccb11382639ec Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 30 Oct 2018 11:12:23 +0000 Subject: Cast to int here too --- synapse/storage/e2e_room_keys.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 2a012e9487..a1d203fd61 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -118,6 +118,11 @@ class EndToEndRoomKeyStore(SQLBaseStore): these room keys. """ + try: + version = int(version) + except ValueError: + defer.returnValue({'rooms': {}}) + keyvalues = { "user_id": user_id, "version": version, -- cgit 1.4.1 From 50e328d1e7a61268dbf274f10798ea5994c6c25a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Nov 2018 19:01:29 +0000 Subject: Remove redundant database locks for device list updates We can rely on the application-level per-user linearizer. --- synapse/storage/devices.py | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 62497ab63f..bc3f575b1e 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -239,7 +239,19 @@ class DeviceStore(SQLBaseStore): def update_remote_device_list_cache_entry(self, user_id, device_id, content, stream_id): - """Updates a single user's device in the cache. + """Updates a single device in the cache of a remote user's devicelist. + + Note: assumes that we are the only thread that can be updating this user's + device list. + + Args: + user_id (str): User to update device list for + device_id (str): ID of decivice being updated + content (dict): new data on this device + stream_id (int): the version of the device list + + Returns: + Deferred[None] """ return self.runInteraction( "update_remote_device_list_cache_entry", @@ -272,7 +284,11 @@ class DeviceStore(SQLBaseStore): }, values={ "content": json.dumps(content), - } + }, + + # we don't need to lock, because we assume we are the only thread + # updating this user's devices. + lock=False, ) txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,)) @@ -289,11 +305,26 @@ class DeviceStore(SQLBaseStore): }, values={ "stream_id": stream_id, - } + }, + + # again, we can assume we are the only thread updating this user's + # extremity. + lock=False, ) def update_remote_device_list_cache(self, user_id, devices, stream_id): - """Replace the cache of the remote user's devices. + """Replace the entire cache of the remote user's devices. + + Note: assumes that we are the only thread that can be updating this user's + device list. + + Args: + user_id (str): User to update device list for + devices (list[dict]): list of device objects supplied over federation + stream_id (int): the version of the device list + + Returns: + Deferred[None] """ return self.runInteraction( "update_remote_device_list_cache", @@ -338,7 +369,11 @@ class DeviceStore(SQLBaseStore): }, values={ "stream_id": stream_id, - } + }, + + # we don't need to lock, because we can assume we are the only thread + # updating this user's extremity. + lock=False, ) def get_devices_by_remote(self, destination, from_stream_id): -- cgit 1.4.1 From 350f654e7b80ff19d1fdf3861093cef09ccf3ab1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Nov 2018 19:10:33 +0000 Subject: Add unique indexes to a couple of tables The indexes on device_lists_remote_extremeties can be unique, and they therefore should, to ensure that the db remains consistent. --- synapse/storage/devices.py | 49 +++++++++++++++++++++- .../schema/delta/40/device_list_streams.sql | 9 ++-- .../delta/52/device_list_streams_unique_idx.sql | 36 ++++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql (limited to 'synapse/storage') diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index bc3f575b1e..ecdab34e7d 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -22,14 +22,19 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList -from ._base import Cache, SQLBaseStore, db_to_json +from ._base import Cache, db_to_json logger = logging.getLogger(__name__) +DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( + "drop_device_list_streams_non_unique_indexes" +) -class DeviceStore(SQLBaseStore): + +class DeviceStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): super(DeviceStore, self).__init__(db_conn, hs) @@ -52,6 +57,30 @@ class DeviceStore(SQLBaseStore): columns=["user_id", "device_id"], ) + # create a unique index on device_lists_remote_cache + self.register_background_index_update( + "device_lists_remote_cache_unique_idx", + index_name="device_lists_remote_cache_unique_id", + table="device_lists_remote_cache", + columns=["user_id", "device_id"], + unique=True, + ) + + # And one on device_lists_remote_extremeties + self.register_background_index_update( + "device_lists_remote_extremeties_unique_idx", + index_name="device_lists_remote_extremeties_unique_idx", + table="device_lists_remote_extremeties", + columns=["user_id"], + unique=True, + ) + + # once they complete, we can remove the old non-unique indexes. + self.register_background_update_handler( + DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES, + self._drop_device_list_streams_non_unique_indexes, + ) + @defer.inlineCallbacks def store_device(self, user_id, device_id, initial_device_display_name): @@ -757,3 +786,19 @@ class DeviceStore(SQLBaseStore): "_prune_old_outbound_device_pokes", _prune_txn, ) + + @defer.inlineCallbacks + def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute( + "DROP INDEX IF EXISTS device_lists_remote_cache_id" + ) + txn.execute( + "DROP INDEX IF EXISTS device_lists_remote_extremeties_id" + ) + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES) + defer.returnValue(1) diff --git a/synapse/storage/schema/delta/40/device_list_streams.sql b/synapse/storage/schema/delta/40/device_list_streams.sql index 54841b3843..dd6dcb65f1 100644 --- a/synapse/storage/schema/delta/40/device_list_streams.sql +++ b/synapse/storage/schema/delta/40/device_list_streams.sql @@ -20,9 +20,6 @@ CREATE TABLE device_lists_remote_cache ( content TEXT NOT NULL ); -CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id); - - -- The last update we got for a user. Empty if we're not receiving updates for -- that user. CREATE TABLE device_lists_remote_extremeties ( @@ -30,7 +27,11 @@ CREATE TABLE device_lists_remote_extremeties ( stream_id TEXT NOT NULL ); -CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id); +-- we used to create non-unique indexes on these tables, but as of update 52 we create +-- unique indexes concurrently: +-- +-- CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id); +-- CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id); -- Stream of device lists updates. Includes both local and remotes diff --git a/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql b/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql new file mode 100644 index 0000000000..bfa49e6f92 --- /dev/null +++ b/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql @@ -0,0 +1,36 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- register a background update which will create a unique index on +-- device_lists_remote_cache +INSERT into background_updates (update_name, progress_json) + VALUES ('device_lists_remote_cache_unique_idx', '{}'); + +-- and one on device_lists_remote_extremeties +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ( + 'device_lists_remote_extremeties_unique_idx', '{}', + + -- doesn't really depend on this, but we need to make sure both happen + -- before we drop the old indexes. + 'device_lists_remote_cache_unique_idx' + ); + +-- once they complete, we can drop the old indexes. +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ( + 'drop_device_list_streams_non_unique_indexes', '{}', + 'device_lists_remote_extremeties_unique_idx' + ); -- cgit 1.4.1 From bc80b3f454aa9b9ca8bc710ff502b83892ac0a91 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Nov 2018 13:35:15 +0000 Subject: Add helpers for getting prev and auth events (#4139) * Add helpers for getting prev and auth events This is in preparation for allowing the event format to change between room versions. --- changelog.d/4139.misc | 1 + synapse/event_auth.py | 4 +-- synapse/events/__init__.py | 18 +++++++++++++ synapse/federation/transaction_queue.py | 4 +-- synapse/handlers/federation.py | 48 ++++++++++++++++----------------- synapse/state/__init__.py | 2 +- synapse/state/v2.py | 16 +++++------ synapse/storage/event_federation.py | 4 +-- synapse/storage/events.py | 8 +++--- tests/state/test_v2.py | 2 +- 10 files changed, 62 insertions(+), 45 deletions(-) create mode 100644 changelog.d/4139.misc (limited to 'synapse/storage') diff --git a/changelog.d/4139.misc b/changelog.d/4139.misc new file mode 100644 index 0000000000..d63d9e7003 --- /dev/null +++ b/changelog.d/4139.misc @@ -0,0 +1 @@ +Add helpers functions for getting prev and auth events of an event diff --git a/synapse/event_auth.py b/synapse/event_auth.py index d4d4474847..c81d8e6729 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -200,11 +200,11 @@ def _is_membership_change_allowed(event, auth_events): membership = event.content["membership"] # Check if this is the room creator joining: - if len(event.prev_events) == 1 and Membership.JOIN == membership: + if len(event.prev_event_ids()) == 1 and Membership.JOIN == membership: # Get room creation event: key = (EventTypes.Create, "", ) create = auth_events.get(key) - if create and event.prev_events[0][0] == create.event_id: + if create and event.prev_event_ids()[0] == create.event_id: if create.content["creator"] == event.state_key: return diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 12f1eb0a3e..84c75495d5 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -159,6 +159,24 @@ class EventBase(object): def keys(self): return six.iterkeys(self._event_dict) + def prev_event_ids(self): + """Returns the list of prev event IDs. The order matches the order + specified in the event, though there is no meaning to it. + + Returns: + list[str]: The list of event IDs of this event's prev_events + """ + return [e for e, _ in self.prev_events] + + def auth_event_ids(self): + """Returns the list of auth event IDs. The order matches the order + specified in the event, though there is no meaning to it. + + Returns: + list[str]: The list of event IDs of this event's auth_events + """ + return [e for e, _ in self.auth_events] + class FrozenEvent(EventBase): def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None): diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 3fdd63be95..099ace28c1 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -183,9 +183,7 @@ class TransactionQueue(object): # banned then it won't receive the event because it won't # be in the room after the ban. destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=[ - prev_id for prev_id, _ in event.prev_events - ], + event.room_id, latest_event_ids=event.prev_event_ids(), ) except Exception: logger.exception( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cd5b9bbb19..9ca5fd8724 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -239,7 +239,7 @@ class FederationHandler(BaseHandler): room_id, event_id, min_depth, ) - prevs = {e_id for e_id, _ in pdu.prev_events} + prevs = set(pdu.prev_event_ids()) seen = yield self.store.have_seen_events(prevs) if min_depth and pdu.depth < min_depth: @@ -607,7 +607,7 @@ class FederationHandler(BaseHandler): if e.event_id in seen_ids: continue e.internal_metadata.outlier = True - auth_ids = [e_id for e_id, _ in e.auth_events] + auth_ids = e.auth_event_ids() auth = { (e.type, e.state_key): e for e in auth_chain if e.event_id in auth_ids or e.type == EventTypes.Create @@ -726,7 +726,7 @@ class FederationHandler(BaseHandler): edges = [ ev.event_id for ev in events - if set(e_id for e_id, _ in ev.prev_events) - event_ids + if set(ev.prev_event_ids()) - event_ids ] logger.info( @@ -753,7 +753,7 @@ class FederationHandler(BaseHandler): required_auth = set( a_id for event in events + list(state_events.values()) + list(auth_events.values()) - for a_id, _ in event.auth_events + for a_id in event.auth_event_ids() ) auth_events.update({ e_id: event_map[e_id] for e_id in required_auth if e_id in event_map @@ -769,7 +769,7 @@ class FederationHandler(BaseHandler): auth_events.update(ret_events) required_auth.update( - a_id for event in ret_events.values() for a_id, _ in event.auth_events + a_id for event in ret_events.values() for a_id in event.auth_event_ids() ) missing_auth = required_auth - set(auth_events) @@ -796,7 +796,7 @@ class FederationHandler(BaseHandler): required_auth.update( a_id for event in results if event - for a_id, _ in event.auth_events + for a_id in event.auth_event_ids() ) missing_auth = required_auth - set(auth_events) @@ -816,7 +816,7 @@ class FederationHandler(BaseHandler): "auth_events": { (auth_events[a_id].type, auth_events[a_id].state_key): auth_events[a_id] - for a_id, _ in a.auth_events + for a_id in a.auth_event_ids() if a_id in auth_events } }) @@ -828,7 +828,7 @@ class FederationHandler(BaseHandler): "auth_events": { (auth_events[a_id].type, auth_events[a_id].state_key): auth_events[a_id] - for a_id, _ in event_map[e_id].auth_events + for a_id in event_map[e_id].auth_event_ids() if a_id in auth_events } }) @@ -1041,17 +1041,17 @@ class FederationHandler(BaseHandler): Raises: SynapseError if the event does not pass muster """ - if len(ev.prev_events) > 20: + if len(ev.prev_event_ids()) > 20: logger.warn("Rejecting event %s which has %i prev_events", - ev.event_id, len(ev.prev_events)) + ev.event_id, len(ev.prev_event_ids())) raise SynapseError( http_client.BAD_REQUEST, "Too many prev_events", ) - if len(ev.auth_events) > 10: + if len(ev.auth_event_ids()) > 10: logger.warn("Rejecting event %s which has %i auth_events", - ev.event_id, len(ev.auth_events)) + ev.event_id, len(ev.auth_event_ids())) raise SynapseError( http_client.BAD_REQUEST, "Too many auth_events", @@ -1076,7 +1076,7 @@ class FederationHandler(BaseHandler): def on_event_auth(self, event_id): event = yield self.store.get_event(event_id) auth = yield self.store.get_auth_chain( - [auth_id for auth_id, _ in event.auth_events], + [auth_id for auth_id in event.auth_event_ids()], include_given=True ) defer.returnValue([e for e in auth]) @@ -1698,7 +1698,7 @@ class FederationHandler(BaseHandler): missing_auth_events = set() for e in itertools.chain(auth_events, state, [event]): - for e_id, _ in e.auth_events: + for e_id in e.auth_event_ids(): if e_id not in event_map: missing_auth_events.add(e_id) @@ -1717,7 +1717,7 @@ class FederationHandler(BaseHandler): for e in itertools.chain(auth_events, state, [event]): auth_for_e = { (event_map[e_id].type, event_map[e_id].state_key): event_map[e_id] - for e_id, _ in e.auth_events + for e_id in e.auth_event_ids() if e_id in event_map } if create_event: @@ -1785,10 +1785,10 @@ class FederationHandler(BaseHandler): # This is a hack to fix some old rooms where the initial join event # didn't reference the create event in its auth events. - if event.type == EventTypes.Member and not event.auth_events: - if len(event.prev_events) == 1 and event.depth < 5: + if event.type == EventTypes.Member and not event.auth_event_ids(): + if len(event.prev_event_ids()) == 1 and event.depth < 5: c = yield self.store.get_event( - event.prev_events[0][0], + event.prev_event_ids()[0], allow_none=True, ) if c and c.type == EventTypes.Create: @@ -1835,7 +1835,7 @@ class FederationHandler(BaseHandler): # Now get the current auth_chain for the event. local_auth_chain = yield self.store.get_auth_chain( - [auth_id for auth_id, _ in event.auth_events], + [auth_id for auth_id in event.auth_event_ids()], include_given=True ) @@ -1891,7 +1891,7 @@ class FederationHandler(BaseHandler): """ # Check if we have all the auth events. current_state = set(e.event_id for e in auth_events.values()) - event_auth_events = set(e_id for e_id, _ in event.auth_events) + event_auth_events = set(event.auth_event_ids()) if event.is_state(): event_key = (event.type, event.state_key) @@ -1935,7 +1935,7 @@ class FederationHandler(BaseHandler): continue try: - auth_ids = [e_id for e_id, _ in e.auth_events] + auth_ids = e.auth_event_ids() auth = { (e.type, e.state_key): e for e in remote_auth_chain if e.event_id in auth_ids or e.type == EventTypes.Create @@ -1956,7 +1956,7 @@ class FederationHandler(BaseHandler): pass have_events = yield self.store.get_seen_events_with_rejections( - [e_id for e_id, _ in event.auth_events] + event.auth_event_ids() ) seen_events = set(have_events.keys()) except Exception: @@ -2058,7 +2058,7 @@ class FederationHandler(BaseHandler): continue try: - auth_ids = [e_id for e_id, _ in ev.auth_events] + auth_ids = ev.auth_event_ids() auth = { (e.type, e.state_key): e for e in result["auth_chain"] @@ -2250,7 +2250,7 @@ class FederationHandler(BaseHandler): missing_remote_ids = [e.event_id for e in missing_remotes] base_remote_rejected = list(missing_remotes) for e in missing_remotes: - for e_id, _ in e.auth_events: + for e_id in e.auth_event_ids(): if e_id in missing_remote_ids: try: base_remote_rejected.remove(e) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 943d5d6bb5..70048b0c09 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -261,7 +261,7 @@ class StateHandler(object): logger.debug("calling resolve_state_groups from compute_event_context") entry = yield self.resolve_state_groups_for_events( - event.room_id, [e for e, _ in event.prev_events], + event.room_id, event.prev_event_ids(), ) prev_state_ids = entry.state diff --git a/synapse/state/v2.py b/synapse/state/v2.py index dbc9688c56..3573bb0028 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py @@ -159,7 +159,7 @@ def _get_power_level_for_sender(event_id, event_map, state_res_store): event = yield _get_event(event_id, event_map, state_res_store) pl = None - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): aev = yield _get_event(aid, event_map, state_res_store) if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""): pl = aev @@ -167,7 +167,7 @@ def _get_power_level_for_sender(event_id, event_map, state_res_store): if pl is None: # Couldn't find power level. Check if they're the creator of the room - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): aev = yield _get_event(aid, event_map, state_res_store) if (aev.type, aev.state_key) == (EventTypes.Create, ""): if aev.content.get("creator") == event.sender: @@ -299,7 +299,7 @@ def _add_event_and_auth_chain_to_graph(graph, event_id, event_map, graph.setdefault(eid, set()) event = yield _get_event(eid, event_map, state_res_store) - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): if aid in auth_diff: if aid not in graph: state.append(aid) @@ -369,7 +369,7 @@ def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store): event = event_map[event_id] auth_events = {} - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): ev = yield _get_event(aid, event_map, state_res_store) if ev.rejected_reason is None: @@ -417,9 +417,9 @@ def _mainline_sort(event_ids, resolved_power_event_id, event_map, while pl: mainline.append(pl) pl_ev = yield _get_event(pl, event_map, state_res_store) - auth_events = pl_ev.auth_events + auth_events = pl_ev.auth_event_ids() pl = None - for aid, _ in auth_events: + for aid in auth_events: ev = yield _get_event(aid, event_map, state_res_store) if (ev.type, ev.state_key) == (EventTypes.PowerLevels, ""): pl = aid @@ -464,10 +464,10 @@ def _get_mainline_depth_for_event(event, mainline_map, event_map, state_res_stor if depth is not None: defer.returnValue(depth) - auth_events = event.auth_events + auth_events = event.auth_event_ids() event = None - for aid, _ in auth_events: + for aid in auth_events: aev = yield _get_event(aid, event_map, state_res_store) if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""): event = aev diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 3faca2a042..d3b9dea1d6 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -477,7 +477,7 @@ class EventFederationStore(EventFederationWorkerStore): "is_state": False, } for ev in events - for e_id, _ in ev.prev_events + for e_id in ev.prev_event_ids() ], ) @@ -510,7 +510,7 @@ class EventFederationStore(EventFederationWorkerStore): txn.executemany(query, [ (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) - for ev in events for e_id, _ in ev.prev_events + for ev in events for e_id in ev.prev_event_ids() if not ev.internal_metadata.is_outlier() ]) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 919e855f3b..2047110b1d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -416,7 +416,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore ) if len_1: all_single_prev_not_state = all( - len(event.prev_events) == 1 + len(event.prev_event_ids()) == 1 and not event.is_state() for event, ctx in ev_ctx_rm ) @@ -440,7 +440,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore # guess this by looking at the prev_events and checking # if they match the current forward extremities. for ev, _ in ev_ctx_rm: - prev_event_ids = set(e for e, _ in ev.prev_events) + prev_event_ids = set(ev.prev_event_ids()) if latest_event_ids == prev_event_ids: state_delta_reuse_delta_counter.inc() break @@ -551,7 +551,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore result.difference_update( e_id for event in new_events - for e_id, _ in event.prev_events + for e_id in event.prev_event_ids() ) # Finally, remove any events which are prev_events of any existing events. @@ -869,7 +869,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore "auth_id": auth_id, } for event, _ in events_and_contexts - for auth_id, _ in event.auth_events + for auth_id in event.auth_event_ids() if event.is_state() ], ) diff --git a/tests/state/test_v2.py b/tests/state/test_v2.py index d67f59b2c7..2e073a3afc 100644 --- a/tests/state/test_v2.py +++ b/tests/state/test_v2.py @@ -753,7 +753,7 @@ class TestStateResolutionStore(object): result.add(event_id) event = self.event_map[event_id] - for aid, _ in event.auth_events: + for aid in event.auth_event_ids(): stack.append(aid) return list(result) -- cgit 1.4.1 From b1a22b24ab7532da993e673f353dd87eeef49151 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 8 Nov 2018 12:14:20 +0000 Subject: Fix noop checks when updating device keys Clients often reupload their device keys (for some reason) so its important for the server to check for no-ops before sending out device list update notifications. The check is broken in python 3 due to the fact comparing bytes and unicode always fails, and that we write bytes to the DB but get unicode when we read. --- synapse/storage/end_to_end_keys.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 1f1721e820..29281630c0 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -40,6 +40,11 @@ class EndToEndKeyStore(SQLBaseStore): allow_none=True, ) + if old_key_json and not isinstance(old_key_json, bytes): + # In py3 we need old_key_json to match new_key_json type. The DB + # returns unicode while encode_canonical_json returns bytes + old_key_json = old_key_json.encode("utf-8") + new_key_json = encode_canonical_json(device_keys) if old_key_json == new_key_json: return False -- cgit 1.4.1 From 5ebed186926eee77844730f5270a926417a0be09 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 8 Nov 2018 12:33:13 +0000 Subject: Lets convert bytes to unicode instead --- synapse/storage/end_to_end_keys.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 29281630c0..2a0f6cfca9 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -40,12 +40,10 @@ class EndToEndKeyStore(SQLBaseStore): allow_none=True, ) - if old_key_json and not isinstance(old_key_json, bytes): - # In py3 we need old_key_json to match new_key_json type. The DB - # returns unicode while encode_canonical_json returns bytes - old_key_json = old_key_json.encode("utf-8") + # In py3 we need old_key_json to match new_key_json type. The DB + # returns unicode while encode_canonical_json returns bytes. + new_key_json = encode_canonical_json(device_keys).decode("utf-8") - new_key_json = encode_canonical_json(device_keys) if old_key_json == new_key_json: return False -- cgit 1.4.1 From 4f93abd62d6800c00c9e943f75e19e09c98c8f30 Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 9 Nov 2018 13:25:38 +0000 Subject: add docs --- synapse/storage/e2e_room_keys.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index a1d203fd61..5d83707fa0 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -217,7 +217,10 @@ class EndToEndRoomKeyStore(SQLBaseStore): Raises: StoreError: with code 404 if there are no e2e_room_keys_versions present Returns: - A deferred dict giving the info metadata for this backup version + A deferred dict giving the info metadata for this backup version, with fields including: + version(str) + algorithm(str) + auth_data(object): opaque dict supplied by the client """ def _get_e2e_room_keys_version_info_txn(txn): -- cgit 1.4.1 From d44dea0223c16f86a57647e670b8a7651b93aa2a Mon Sep 17 00:00:00 2001 From: David Baker Date: Fri, 9 Nov 2018 14:38:31 +0000 Subject: pep8 --- synapse/storage/e2e_room_keys.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 5d83707fa0..16b7f005aa 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -217,7 +217,8 @@ class EndToEndRoomKeyStore(SQLBaseStore): Raises: StoreError: with code 404 if there are no e2e_room_keys_versions present Returns: - A deferred dict giving the info metadata for this backup version, with fields including: + A deferred dict giving the info metadata for this backup version, with + fields including: version(str) algorithm(str) auth_data(object): opaque dict supplied by the client -- cgit 1.4.1 From 835779f7fb8e8b84c3f8e371528d6ea08d0c373f Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 15 Nov 2018 11:08:27 -0700 Subject: Add option to track MAU stats (but not limit people) (#3830) --- changelog.d/3830.feature | 1 + synapse/app/homeserver.py | 2 +- synapse/config/server.py | 6 +++ synapse/storage/monthly_active_users.py | 74 ++++++++++++++++-------------- tests/storage/test_monthly_active_users.py | 25 ++++++++++ tests/test_mau.py | 18 ++++++++ tests/utils.py | 1 + 7 files changed, 92 insertions(+), 35 deletions(-) create mode 100644 changelog.d/3830.feature (limited to 'synapse/storage') diff --git a/changelog.d/3830.feature b/changelog.d/3830.feature new file mode 100644 index 0000000000..af472cf763 --- /dev/null +++ b/changelog.d/3830.feature @@ -0,0 +1 @@ +Add option to track MAU stats (but not limit people) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 415374a2ce..3e4dea2f19 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -535,7 +535,7 @@ def run(hs): current_mau_count = 0 reserved_count = 0 store = hs.get_datastore() - if hs.config.limit_usage_by_mau: + if hs.config.limit_usage_by_mau or hs.config.mau_stats_only: current_mau_count = yield store.get_monthly_active_count() reserved_count = yield store.get_registered_reserved_users_count() current_mau_gauge.set(float(current_mau_count)) diff --git a/synapse/config/server.py b/synapse/config/server.py index c1c7c0105e..5ff9ac288d 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -77,6 +77,7 @@ class ServerConfig(Config): self.max_mau_value = config.get( "max_mau_value", 0, ) + self.mau_stats_only = config.get("mau_stats_only", False) self.mau_limits_reserved_threepids = config.get( "mau_limit_reserved_threepids", [] @@ -372,6 +373,11 @@ class ServerConfig(Config): # max_mau_value: 50 # mau_trial_days: 2 # + # If enabled, the metrics for the number of monthly active users will + # be populated, however no one will be limited. If limit_usage_by_mau + # is true, this is implied to be true. + # mau_stats_only: False + # # Sometimes the server admin will want to ensure certain accounts are # never blocked by mau checking. These accounts are specified here. # diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index cf4104dc2e..c353b11c9a 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -96,37 +96,38 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn.execute(sql, query_args) - # If MAU user count still exceeds the MAU threshold, then delete on - # a least recently active basis. - # Note it is not possible to write this query using OFFSET due to - # incompatibilities in how sqlite and postgres support the feature. - # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present - # While Postgres does not require 'LIMIT', but also does not support - # negative LIMIT values. So there is no way to write it that both can - # support - safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) - # Must be greater than zero for postgres - safe_guard = safe_guard if safe_guard > 0 else 0 - query_args = [safe_guard] - - base_sql = """ - DELETE FROM monthly_active_users - WHERE user_id NOT IN ( - SELECT user_id FROM monthly_active_users - ORDER BY timestamp DESC - LIMIT ? + if self.hs.config.limit_usage_by_mau: + # If MAU user count still exceeds the MAU threshold, then delete on + # a least recently active basis. + # Note it is not possible to write this query using OFFSET due to + # incompatibilities in how sqlite and postgres support the feature. + # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present + # While Postgres does not require 'LIMIT', but also does not support + # negative LIMIT values. So there is no way to write it that both can + # support + safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) + # Must be greater than zero for postgres + safe_guard = safe_guard if safe_guard > 0 else 0 + query_args = [safe_guard] + + base_sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? + ) + """ + # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres + # when len(reserved_users) == 0. Works fine on sqlite. + if len(self.reserved_users) > 0: + query_args.extend(self.reserved_users) + sql = base_sql + """ AND user_id NOT IN ({})""".format( + ','.join(questionmarks) ) - """ - # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres - # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( - ','.join(questionmarks) - ) - else: - sql = base_sql - txn.execute(sql, query_args) + else: + sql = base_sql + txn.execute(sql, query_args) yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports @@ -252,8 +253,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): Args: user_id(str): the user_id to query """ - - if self.hs.config.limit_usage_by_mau: + if self.hs.config.limit_usage_by_mau or self.hs.config.mau_stats_only: # Trial users and guests should not be included as part of MAU group is_guest = yield self.is_guest(user_id) if is_guest: @@ -271,8 +271,14 @@ class MonthlyActiveUsersStore(SQLBaseStore): # but only update if we have not previously seen the user for # LAST_SEEN_GRANULARITY ms if last_seen_timestamp is None: - count = yield self.get_monthly_active_count() - if count < self.hs.config.max_mau_value: + # In the case where mau_stats_only is True and limit_usage_by_mau is + # False, there is no point in checking get_monthly_active_count - it + # adds no value and will break the logic if max_mau_value is exceeded. + if not self.hs.config.limit_usage_by_mau: yield self.upsert_monthly_active_user(user_id) + else: + count = yield self.get_monthly_active_count() + if count < self.hs.config.max_mau_value: + yield self.upsert_monthly_active_user(user_id) elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY: yield self.upsert_monthly_active_user(user_id) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 832e379a83..8664bc3d54 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -220,3 +220,28 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): self.store.user_add_threepid(user2, "email", user2_email, now, now) count = self.store.get_registered_reserved_users_count() self.assertEquals(self.get_success(count), len(threepids)) + + def test_track_monthly_users_without_cap(self): + self.hs.config.limit_usage_by_mau = False + self.hs.config.mau_stats_only = True + self.hs.config.max_mau_value = 1 # should not matter + + count = self.store.get_monthly_active_count() + self.assertEqual(0, self.get_success(count)) + + self.store.upsert_monthly_active_user("@user1:server") + self.store.upsert_monthly_active_user("@user2:server") + self.pump() + + count = self.store.get_monthly_active_count() + self.assertEqual(2, self.get_success(count)) + + def test_no_users_when_not_tracking(self): + self.hs.config.limit_usage_by_mau = False + self.hs.config.mau_stats_only = False + self.store.upsert_monthly_active_user = Mock() + + self.store.populate_monthly_active_users("@user:sever") + self.pump() + + self.store.upsert_monthly_active_user.assert_not_called() diff --git a/tests/test_mau.py b/tests/test_mau.py index 0afdeb0818..04f95c942f 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -171,6 +171,24 @@ class TestMauLimit(unittest.HomeserverTestCase): self.assertEqual(e.code, 403) self.assertEqual(e.errcode, Codes.RESOURCE_LIMIT_EXCEEDED) + def test_tracked_but_not_limited(self): + self.hs.config.max_mau_value = 1 # should not matter + self.hs.config.limit_usage_by_mau = False + self.hs.config.mau_stats_only = True + + # Simply being able to create 2 users indicates that the + # limit was not reached. + token1 = self.create_user("kermit1") + self.do_sync_for_user(token1) + token2 = self.create_user("kermit2") + self.do_sync_for_user(token2) + + # We do want to verify that the number of tracked users + # matches what we want though + count = self.store.get_monthly_active_count() + self.reactor.advance(100) + self.assertEqual(2, self.successResultOf(count)) + def create_user(self, localpart): request_data = json.dumps( { diff --git a/tests/utils.py b/tests/utils.py index 67ab916f30..52ab762010 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -134,6 +134,7 @@ def default_config(name): config.hs_disabled_limit_type = "" config.max_mau_value = 50 config.mau_trial_days = 0 + config.mau_stats_only = False config.mau_limits_reserved_threepids = [] config.admin_contact = None config.rc_messages_per_second = 10000 -- cgit 1.4.1 From 6c18cc4b506a81c2a92665a15c55a0313ca47db3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 20 Nov 2018 22:46:51 +0000 Subject: Ignore __pycache__ directories in schema delta dir Now that we use py3, compiled python ends up in __pycache__ rather than *.pyc. --- changelog.d/4214.misc | 1 + synapse/storage/prepare_database.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4214.misc (limited to 'synapse/storage') diff --git a/changelog.d/4214.misc b/changelog.d/4214.misc new file mode 100644 index 0000000000..b2f62060e3 --- /dev/null +++ b/changelog.d/4214.misc @@ -0,0 +1 @@ +Ignore __pycache__ directories in the database schema folder diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index bd740e1e45..d5d2f89a77 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -257,7 +257,7 @@ def _upgrade_existing_database(cur, current_version, applied_delta_files, module.run_create(cur, database_engine) if not is_empty: module.run_upgrade(cur, database_engine, config=config) - elif ext == ".pyc": + elif ext == ".pyc" or file_name == "__pycache__": # Sometimes .pyc files turn up anyway even though we've # disabled their generation; e.g. from distribution package # installers. Silently skip it -- cgit 1.4.1 From f9b136a8869ea9351fa109ac62b36303f4a1dc47 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Nov 2018 09:33:41 +0000 Subject: Neilj/fix mau initial reserved users (#4211) * fix transaction wrapping bug that caused get_user_id_by_threepid_txn to fail * towncrier * white space --- changelog.d/4211.bugfix | 2 ++ synapse/storage/monthly_active_users.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 changelog.d/4211.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4211.bugfix b/changelog.d/4211.bugfix new file mode 100644 index 0000000000..376f80c55a --- /dev/null +++ b/changelog.d/4211.bugfix @@ -0,0 +1,2 @@ +fix start up failure when mau_limit_reserved_threepids set and db is postgres + diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index c353b11c9a..479e01ddc1 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -34,8 +34,9 @@ class MonthlyActiveUsersStore(SQLBaseStore): self.hs = hs self.reserved_users = () # Do not add more reserved users than the total allowable number - self._initialise_reserved_users( - dbconn.cursor(), + self._new_transaction( + dbconn, "initialise_mau_threepids", [], [], + self._initialise_reserved_users, hs.config.mau_limits_reserved_threepids[:self.hs.config.max_mau_value], ) -- cgit 1.4.1 From 704c5298f092516d0a9a31c1b384e32140e6896f Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Sat, 1 Dec 2018 23:07:10 -0600 Subject: Drop sent_transactions Signed-off-by: Aaron Raimist --- synapse/storage/__init__.py | 1 - synapse/storage/prepare_database.py | 2 +- synapse/storage/schema/delta/11/v11.sql | 16 ----------- synapse/storage/schema/delta/34/sent_txn_purge.py | 32 ---------------------- .../schema/delta/53/drop_sent_transactions.sql | 16 +++++++++++ .../schema/full_schemas/11/transactions.sql | 19 ------------- .../schema/full_schemas/16/transactions.sql | 19 ------------- 7 files changed, 17 insertions(+), 88 deletions(-) delete mode 100644 synapse/storage/schema/delta/11/v11.sql delete mode 100644 synapse/storage/schema/delta/34/sent_txn_purge.py create mode 100644 synapse/storage/schema/delta/53/drop_sent_transactions.sql (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 53c685c173..b23fb7e56c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -119,7 +119,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "device_lists_stream", "stream_id", ) - self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d5d2f89a77..fa36daac52 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 52 +SCHEMA_VERSION = 53 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/11/v11.sql b/synapse/storage/schema/delta/11/v11.sql deleted file mode 100644 index e7b4f90127..0000000000 --- a/synapse/storage/schema/delta/11/v11.sql +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright 2015, 2016 OpenMarket Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CREATE INDEX IF NOT EXISTS sent_transaction_txn_id ON sent_transactions(transaction_id); \ No newline at end of file diff --git a/synapse/storage/schema/delta/34/sent_txn_purge.py b/synapse/storage/schema/delta/34/sent_txn_purge.py deleted file mode 100644 index 0ffab10b6f..0000000000 --- a/synapse/storage/schema/delta/34/sent_txn_purge.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -from synapse.storage.engines import PostgresEngine - -logger = logging.getLogger(__name__) - - -def run_create(cur, database_engine, *args, **kwargs): - if isinstance(database_engine, PostgresEngine): - cur.execute("TRUNCATE sent_transactions") - else: - cur.execute("DELETE FROM sent_transactions") - - cur.execute("CREATE INDEX sent_transactions_ts ON sent_transactions(ts)") - - -def run_upgrade(cur, database_engine, *args, **kwargs): - pass diff --git a/synapse/storage/schema/delta/53/drop_sent_transactions.sql b/synapse/storage/schema/delta/53/drop_sent_transactions.sql new file mode 100644 index 0000000000..e372f5a44a --- /dev/null +++ b/synapse/storage/schema/delta/53/drop_sent_transactions.sql @@ -0,0 +1,16 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +DROP TABLE IF EXISTS sent_transactions; diff --git a/synapse/storage/schema/full_schemas/11/transactions.sql b/synapse/storage/schema/full_schemas/11/transactions.sql index a3f4a0a790..f6a058832e 100644 --- a/synapse/storage/schema/full_schemas/11/transactions.sql +++ b/synapse/storage/schema/full_schemas/11/transactions.sql @@ -25,25 +25,6 @@ CREATE TABLE IF NOT EXISTS received_transactions( CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0; - --- Stores what transactions we've sent, what their response was (if we got one) and whether we have --- since referenced the transaction in another outgoing transaction -CREATE TABLE IF NOT EXISTS sent_transactions( - id INTEGER PRIMARY KEY AUTOINCREMENT, -- This is used to apply insertion ordering - transaction_id TEXT, - destination TEXT, - response_code INTEGER DEFAULT 0, - response_json TEXT, - ts BIGINT -); - -CREATE INDEX sent_transaction_dest ON sent_transactions(destination); -CREATE INDEX sent_transaction_txn_id ON sent_transactions(transaction_id); --- So that we can do an efficient look up of all transactions that have yet to be successfully --- sent. -CREATE INDEX sent_transaction_sent ON sent_transactions(response_code); - - -- For sent transactions only. CREATE TABLE IF NOT EXISTS transaction_id_to_pdu( transaction_id INTEGER, diff --git a/synapse/storage/schema/full_schemas/16/transactions.sql b/synapse/storage/schema/full_schemas/16/transactions.sql index 14b67cce25..17e67bedac 100644 --- a/synapse/storage/schema/full_schemas/16/transactions.sql +++ b/synapse/storage/schema/full_schemas/16/transactions.sql @@ -25,25 +25,6 @@ CREATE TABLE IF NOT EXISTS received_transactions( CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0; - --- Stores what transactions we've sent, what their response was (if we got one) and whether we have --- since referenced the transaction in another outgoing transaction -CREATE TABLE IF NOT EXISTS sent_transactions( - id BIGINT PRIMARY KEY, -- This is used to apply insertion ordering - transaction_id TEXT, - destination TEXT, - response_code INTEGER DEFAULT 0, - response_json TEXT, - ts BIGINT -); - -CREATE INDEX sent_transaction_dest ON sent_transactions(destination); -CREATE INDEX sent_transaction_txn_id ON sent_transactions(transaction_id); --- So that we can do an efficient look up of all transactions that have yet to be successfully --- sent. -CREATE INDEX sent_transaction_sent ON sent_transactions(response_code); - - -- For sent transactions only. CREATE TABLE IF NOT EXISTS transaction_id_to_pdu( transaction_id INTEGER, -- cgit 1.4.1 From ecc23188f4b88bef1f08a8fac2779720308863d6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 4 Dec 2018 11:55:52 +0100 Subject: Fix UnicodeDecodeError when postgres is not configured in english (#4253) This is a bit of a half-assed effort at fixing https://github.com/matrix-org/synapse/issues/4252. Fundamentally the right answer is to drop support for Python 2. --- changelog.d/4253.bugfix | 1 + synapse/storage/_base.py | 15 ++++++++------- synapse/util/stringutils.py | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 changelog.d/4253.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4253.bugfix b/changelog.d/4253.bugfix new file mode 100644 index 0000000000..1796e95b86 --- /dev/null +++ b/changelog.d/4253.bugfix @@ -0,0 +1 @@ +Fix UnicodeDecodeError when postgres is configured to give non-English errors diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d9d0255d0b..38e7d26365 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -29,6 +29,7 @@ from synapse.api.errors import StoreError from synapse.storage.engines import PostgresEngine from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.stringutils import exception_to_unicode logger = logging.getLogger(__name__) @@ -249,32 +250,32 @@ class SQLBaseStore(object): except self.database_engine.module.OperationalError as e: # This can happen if the database disappears mid # transaction. - logger.warn( + logger.warning( "[TXN OPERROR] {%s} %s %d/%d", - name, e, i, N + name, exception_to_unicode(e), i, N ) if i < N: i += 1 try: conn.rollback() except self.database_engine.module.Error as e1: - logger.warn( + logger.warning( "[TXN EROLL] {%s} %s", - name, e1, + name, exception_to_unicode(e1), ) continue raise except self.database_engine.module.DatabaseError as e: if self.database_engine.is_deadlock(e): - logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N) + logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N) if i < N: i += 1 try: conn.rollback() except self.database_engine.module.Error as e1: - logger.warn( + logger.warning( "[TXN EROLL] {%s} %s", - name, e1, + name, exception_to_unicode(e1), ) continue raise diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 6f318c6a29..fdcb375f95 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -16,7 +16,8 @@ import random import string -from six import PY3 +import six +from six import PY2, PY3 from six.moves import range _string_with_symbols = ( @@ -71,3 +72,39 @@ def to_ascii(s): return s.encode("ascii") except UnicodeEncodeError: return s + + +def exception_to_unicode(e): + """Helper function to extract the text of an exception as a unicode string + + Args: + e (Exception): exception to be stringified + + Returns: + unicode + """ + # urgh, this is a mess. The basic problem here is that psycopg2 constructs its + # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will + # then produce the raw byte sequence. Under Python 2, this will then cause another + # error if it gets mixed with a `unicode` object, as per + # https://github.com/matrix-org/synapse/issues/4252 + + # First of all, if we're under python3, everything is fine because it will sort this + # nonsense out for us. + if not PY2: + return str(e) + + # otherwise let's have a stab at decoding the exception message. We'll circumvent + # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii') + # and instead look at what is in the args member. + + if len(e.args) == 0: + return u"" + elif len(e.args) > 1: + return six.text_type(repr(e.args)) + + msg = e.args[0] + if isinstance(msg, bytes): + return msg.decode('utf-8', errors='replace') + else: + return msg -- cgit 1.4.1 From b5ac0ffa0a842d99ea4ef903047cc6ca668057ed Mon Sep 17 00:00:00 2001 From: Ben Parsons Date: Tue, 4 Dec 2018 10:57:39 +0000 Subject: add more detail to logging regarding "More than one row matched" error (#4234) --- changelog.d/4234.misc | 1 + synapse/storage/_base.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/4234.misc (limited to 'synapse/storage') diff --git a/changelog.d/4234.misc b/changelog.d/4234.misc new file mode 100644 index 0000000000..b5a01d38af --- /dev/null +++ b/changelog.d/4234.misc @@ -0,0 +1 @@ +add more detail to logging regarding "More than one row matched" error \ No newline at end of file diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 38e7d26365..1d3069b143 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -850,9 +850,9 @@ class SQLBaseStore(object): rowcount = cls._simple_update_txn(txn, table, keyvalues, updatevalues) if rowcount == 0: - raise StoreError(404, "No row found") + raise StoreError(404, "No row found (%s)" % (table,)) if rowcount > 1: - raise StoreError(500, "More than one row matched") + raise StoreError(500, "More than one row matched (%s)" % (table,)) @staticmethod def _simple_select_one_txn(txn, table, keyvalues, retcols, @@ -869,9 +869,9 @@ class SQLBaseStore(object): if not row: if allow_none: return None - raise StoreError(404, "No row found") + raise StoreError(404, "No row found (%s)" % (table,)) if txn.rowcount > 1: - raise StoreError(500, "More than one row matched") + raise StoreError(500, "More than one row matched (%s)" % (table,)) return dict(zip(retcols, row)) @@ -903,9 +903,9 @@ class SQLBaseStore(object): txn.execute(sql, list(keyvalues.values())) if txn.rowcount == 0: - raise StoreError(404, "No row found") + raise StoreError(404, "No row found (%s)" % (table,)) if txn.rowcount > 1: - raise StoreError(500, "more than one row matched") + raise StoreError(500, "More than one row matched (%s)" % (table,)) def _simple_delete(self, table, keyvalues, desc): return self.runInteraction( -- cgit 1.4.1 From 158ffb92f1ba0e247fb0a71f0d400655643ae68e Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 4 Dec 2018 04:01:02 -0700 Subject: Add an option to disable search for homeservers which may not be interested in it (#4230) This is useful for homeservers not intended for users, such as bot-only homeservers or ones that only process IoT data. --- changelog.d/4230.feature | 1 + synapse/config/server.py | 12 +++++++++++- synapse/handlers/search.py | 3 +++ synapse/storage/search.py | 6 ++++++ 4 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4230.feature (limited to 'synapse/storage') diff --git a/changelog.d/4230.feature b/changelog.d/4230.feature new file mode 100644 index 0000000000..0ecb1d5ec6 --- /dev/null +++ b/changelog.d/4230.feature @@ -0,0 +1 @@ +Add an option to disable search for homeservers that may not be interested in it. diff --git a/synapse/config/server.py b/synapse/config/server.py index 5ff9ac288d..4a5b902f8e 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -62,6 +62,11 @@ class ServerConfig(Config): # master, potentially causing inconsistency. self.enable_media_repo = config.get("enable_media_repo", True) + # whether to enable search. If disabled, new entries will not be inserted + # into the search tables and they will not be indexed. Users will receive + # errors when attempting to search for messages. + self.enable_search = config.get("enable_search", True) + self.filter_timeline_limit = config.get("filter_timeline_limit", -1) # Whether we should block invites sent to users on this server @@ -384,7 +389,12 @@ class ServerConfig(Config): # mau_limit_reserved_threepids: # - medium: 'email' # address: 'reserved_user@example.com' - + # + # Room searching + # + # If disabled, new messages will not be indexed for searching and users + # will receive errors when searching for messages. Defaults to enabled. + # enable_search: true """ % locals() def read_arguments(self, args): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 80e7b15de8..ec936bbb4e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -50,6 +50,9 @@ class SearchHandler(BaseHandler): dict to be returned to the client with results of search """ + if not self.hs.config.enable_search: + raise SynapseError(400, "Search is disabled on this homeserver") + batch_group = None batch_group_key = None batch_token = None diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d5b5df93e6..c6420b2374 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -45,6 +45,10 @@ class SearchStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) + + if not hs.config.enable_search: + return + self.register_background_update_handler( self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search ) @@ -316,6 +320,8 @@ class SearchStore(BackgroundUpdateStore): entries (iterable[SearchEntry]): entries to be added to the table """ + if not self.hs.config.enable_search: + return if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" -- cgit 1.4.1 From 9a3e24a13d02d57660ae52a0b85bed681c56af00 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 5 Dec 2018 18:52:42 +0100 Subject: drop undocumented dependency on dateutil (#4266) It turns out we were relying on dateutil being pulled in transitively by pysaml2. There's no need for that bloat. --- changelog.d/4266.misc | 1 + synapse/storage/__init__.py | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 changelog.d/4266.misc (limited to 'synapse/storage') diff --git a/changelog.d/4266.misc b/changelog.d/4266.misc new file mode 100644 index 0000000000..67fbde7484 --- /dev/null +++ b/changelog.d/4266.misc @@ -0,0 +1 @@ +drop undocumented dependency on dateutil diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b23fb7e56c..24329879e5 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,12 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime +import calendar import logging import time -from dateutil import tz - from synapse.api.constants import PresenceState from synapse.storage.devices import DeviceStore from synapse.storage.user_erasure_store import UserErasureStore @@ -357,10 +355,11 @@ class DataStore(RoomMemberStore, RoomStore, """ Returns millisecond unixtime for start of UTC day. """ - now = datetime.datetime.utcnow() - today_start = datetime.datetime(now.year, now.month, - now.day, tzinfo=tz.tzutc()) - return int(time.mktime(today_start.timetuple())) * 1000 + now = time.gmtime() + today_start = calendar.timegm(( + now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0, + )) + return today_start * 1000 def generate_user_daily_visits(self): """ -- cgit 1.4.1 From 30da50a5b80e63c05e4b7ca637e3be9dd88dea59 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 7 Dec 2018 14:44:46 +0100 Subject: Initialise user displayname from SAML2 data (#4272) When we register a new user from SAML2 data, initialise their displayname correctly. --- changelog.d/4272.feature | 1 + synapse/handlers/register.py | 23 ++++++++++++++++------- synapse/rest/client/v1/login.py | 5 +++++ synapse/rest/saml2/response_resource.py | 3 +++ synapse/storage/registration.py | 20 +++++++++++++------- tests/storage/test_monthly_active_users.py | 2 +- 6 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 changelog.d/4272.feature (limited to 'synapse/storage') diff --git a/changelog.d/4272.feature b/changelog.d/4272.feature new file mode 100644 index 0000000000..7a8f286957 --- /dev/null +++ b/changelog.d/4272.feature @@ -0,0 +1 @@ +SAML2 authentication: Initialise user display name from SAML2 data diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 0f87c4610e..ba39e67f6f 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -126,6 +126,7 @@ class RegistrationHandler(BaseHandler): make_guest=False, admin=False, threepid=None, + default_display_name=None, ): """Registers a new client on the server. @@ -140,6 +141,8 @@ class RegistrationHandler(BaseHandler): since it offers no means of associating a device_id with the access_token. Instead you should call auth_handler.issue_access_token after registration. + default_display_name (unicode|None): if set, the new user's displayname + will be set to this. Defaults to 'localpart'. Returns: A tuple of (user_id, access_token). Raises: @@ -169,6 +172,13 @@ class RegistrationHandler(BaseHandler): user = UserID(localpart, self.hs.hostname) user_id = user.to_string() + if was_guest: + # If the user was a guest then they already have a profile + default_display_name = None + + elif default_display_name is None: + default_display_name = localpart + token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) @@ -178,10 +188,7 @@ class RegistrationHandler(BaseHandler): password_hash=password_hash, was_guest=was_guest, make_guest=make_guest, - create_profile_with_localpart=( - # If the user was a guest then they already have a profile - None if was_guest else user.localpart - ), + create_profile_with_displayname=default_display_name, admin=admin, ) @@ -203,13 +210,15 @@ class RegistrationHandler(BaseHandler): yield self.check_user_id_not_appservice_exclusive(user_id) if generate_token: token = self.macaroon_gen.generate_access_token(user_id) + if default_display_name is None: + default_display_name = localpart try: yield self.store.register( user_id=user_id, token=token, password_hash=password_hash, make_guest=make_guest, - create_profile_with_localpart=user.localpart, + create_profile_with_displayname=default_display_name, ) except SynapseError: # if user id is taken, just generate another @@ -300,7 +309,7 @@ class RegistrationHandler(BaseHandler): user_id=user_id, password_hash="", appservice_id=service_id, - create_profile_with_localpart=user.localpart, + create_profile_with_displayname=user.localpart, ) defer.returnValue(user_id) @@ -478,7 +487,7 @@ class RegistrationHandler(BaseHandler): user_id=user_id, token=token, password_hash=password_hash, - create_profile_with_localpart=user.localpart, + create_profile_with_displayname=user.localpart, ) else: yield self._auth_handler.delete_access_tokens_for_user(user_id) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index b7c5b58b01..e9d3032498 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -451,6 +451,7 @@ class SSOAuthHandler(object): @defer.inlineCallbacks def on_successful_auth( self, username, request, client_redirect_url, + user_display_name=None, ): """Called once the user has successfully authenticated with the SSO. @@ -467,6 +468,9 @@ class SSOAuthHandler(object): client_redirect_url (unicode): the redirect_url the client gave us when it first started the process. + user_display_name (unicode|None): if set, and we have to register a new user, + we will set their displayname to this. + Returns: Deferred[none]: Completes once we have handled the request. """ @@ -478,6 +482,7 @@ class SSOAuthHandler(object): yield self._registration_handler.register( localpart=localpart, generate_token=False, + default_display_name=user_display_name, ) ) diff --git a/synapse/rest/saml2/response_resource.py b/synapse/rest/saml2/response_resource.py index ad2ed157b5..69fb77b322 100644 --- a/synapse/rest/saml2/response_resource.py +++ b/synapse/rest/saml2/response_resource.py @@ -66,6 +66,9 @@ class SAML2ResponseResource(Resource): raise CodeMessageException(400, "uid not in SAML2 response") username = saml2_auth.ava["uid"][0] + + displayName = saml2_auth.ava.get("displayName", [None])[0] return self._sso_auth_handler.on_successful_auth( username, request, relay_state, + user_display_name=displayName, ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 80d76bf9d7..3d55441e33 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -22,6 +22,7 @@ from twisted.internet import defer from synapse.api.errors import Codes, StoreError from synapse.storage import background_updates from synapse.storage._base import SQLBaseStore +from synapse.types import UserID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -167,7 +168,7 @@ class RegistrationStore(RegistrationWorkerStore, def register(self, user_id, token=None, password_hash=None, was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_localpart=None, admin=False): + create_profile_with_displayname=None, admin=False): """Attempts to register an account. Args: @@ -181,8 +182,8 @@ class RegistrationStore(RegistrationWorkerStore, make_guest (boolean): True if the the new user should be guest, false to add a regular user account. appservice_id (str): The ID of the appservice registering the user. - create_profile_with_localpart (str): Optionally create a profile for - the given localpart. + create_profile_with_displayname (unicode): Optionally create a profile for + the user, setting their displayname to the given value Raises: StoreError if the user_id could not be registered. """ @@ -195,7 +196,7 @@ class RegistrationStore(RegistrationWorkerStore, was_guest, make_guest, appservice_id, - create_profile_with_localpart, + create_profile_with_displayname, admin ) @@ -208,9 +209,11 @@ class RegistrationStore(RegistrationWorkerStore, was_guest, make_guest, appservice_id, - create_profile_with_localpart, + create_profile_with_displayname, admin, ): + user_id_obj = UserID.from_string(user_id) + now = int(self.clock.time()) next_id = self._access_tokens_id_gen.get_next() @@ -273,12 +276,15 @@ class RegistrationStore(RegistrationWorkerStore, (next_id, user_id, token,) ) - if create_profile_with_localpart: + if create_profile_with_displayname: # set a default displayname serverside to avoid ugly race # between auto-joins and clients trying to set displaynames + # + # *obviously* the 'profiles' table uses localpart for user_id + # while everything else uses the full mxid. txn.execute( "INSERT INTO profiles(user_id, displayname) VALUES (?,?)", - (create_profile_with_localpart, create_profile_with_localpart) + (user_id_obj.localpart, create_profile_with_displayname) ) self._invalidate_cache_and_stream( diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 8664bc3d54..9618d57463 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -149,7 +149,7 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): def test_populate_monthly_users_is_guest(self): # Test that guest users are not added to mau list - user_id = "user_id" + user_id = "@user_id:host" self.store.register( user_id=user_id, token="123", password_hash=None, make_guest=True ) -- cgit 1.4.1 From e93a0ebf50e46ee4d0c8d5e3e73034465fb8884e Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 14 Dec 2018 05:10:31 +1100 Subject: Settings Fix deleting e2e room keys on xenial (#4295) --- changelog.d/4295.bugfix | 1 + synapse/storage/e2e_room_keys.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4295.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4295.bugfix b/changelog.d/4295.bugfix new file mode 100644 index 0000000000..e1603cbcda --- /dev/null +++ b/changelog.d/4295.bugfix @@ -0,0 +1 @@ +Fix deleting E2E room keys when using old SQLite versions. diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 16b7f005aa..45cebe61d1 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -182,7 +182,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): keyvalues = { "user_id": user_id, - "version": version, + "version": int(version), } if room_id: keyvalues['room_id'] = room_id -- cgit 1.4.1 From d2f7c4e6b1efbdd3275d02a19220a10cf00a8f66 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 14 Dec 2018 18:20:59 +0000 Subject: create support user (#4141) Allow for the creation of a support user. A support user can access the server, join rooms, interact with other users, but does not appear in the user directory nor does it contribute to monthly active user limits. --- changelog.d/4141.feature | 1 + docs/admin_api/register_api.rst | 11 ++- synapse/_scripts/register_new_matrix_user.py | 19 ++++- synapse/api/auth.py | 5 +- synapse/api/constants.py | 8 ++ synapse/handlers/register.py | 15 +++- synapse/handlers/room.py | 2 +- synapse/handlers/user_directory.py | 45 ++++++----- synapse/rest/client/v1/admin.py | 11 ++- synapse/storage/monthly_active_users.py | 30 ++++++- synapse/storage/registration.py | 38 ++++++++- .../schema/delta/53/add_user_type_to_users.sql | 19 +++++ tests/api/test_auth.py | 2 + tests/handlers/test_register.py | 30 ++++++- tests/handlers/test_user_directory.py | 91 ++++++++++++++++++++++ tests/rest/client/v1/test_admin.py | 33 ++++++-- tests/storage/test_monthly_active_users.py | 34 +++++++- tests/storage/test_registration.py | 22 ++++++ tests/unittest.py | 1 + tests/utils.py | 1 - 20 files changed, 371 insertions(+), 47 deletions(-) create mode 100644 changelog.d/4141.feature create mode 100644 synapse/storage/schema/delta/53/add_user_type_to_users.sql create mode 100644 tests/handlers/test_user_directory.py (limited to 'synapse/storage') diff --git a/changelog.d/4141.feature b/changelog.d/4141.feature new file mode 100644 index 0000000000..632d3547cb --- /dev/null +++ b/changelog.d/4141.feature @@ -0,0 +1 @@ +Special-case a support user for use in verifying behaviour of a given server. The support user does not appear in user directory or monthly active user counts. diff --git a/docs/admin_api/register_api.rst b/docs/admin_api/register_api.rst index 16d65c86b3..084e74ebf5 100644 --- a/docs/admin_api/register_api.rst +++ b/docs/admin_api/register_api.rst @@ -39,13 +39,13 @@ As an example:: } The MAC is the hex digest output of the HMAC-SHA1 algorithm, with the key being -the shared secret and the content being the nonce, user, password, and either -the string "admin" or "notadmin", each separated by NULs. For an example of -generation in Python:: +the shared secret and the content being the nonce, user, password, either the +string "admin" or "notadmin", and optionally the user_type +each separated by NULs. For an example of generation in Python:: import hmac, hashlib - def generate_mac(nonce, user, password, admin=False): + def generate_mac(nonce, user, password, admin=False, user_type=None): mac = hmac.new( key=shared_secret, @@ -59,5 +59,8 @@ generation in Python:: mac.update(password.encode('utf8')) mac.update(b"\x00") mac.update(b"admin" if admin else b"notadmin") + if user_type: + mac.update(b"\x00") + mac.update(user_type.encode('utf8')) return mac.hexdigest() diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py index 70cecde486..4c3abf06fe 100644 --- a/synapse/_scripts/register_new_matrix_user.py +++ b/synapse/_scripts/register_new_matrix_user.py @@ -35,6 +35,7 @@ def request_registration( server_location, shared_secret, admin=False, + user_type=None, requests=_requests, _print=print, exit=sys.exit, @@ -65,6 +66,9 @@ def request_registration( mac.update(password.encode('utf8')) mac.update(b"\x00") mac.update(b"admin" if admin else b"notadmin") + if user_type: + mac.update(b"\x00") + mac.update(user_type.encode('utf8')) mac = mac.hexdigest() @@ -74,6 +78,7 @@ def request_registration( "password": password, "mac": mac, "admin": admin, + "user_type": user_type, } _print("Sending registration request...") @@ -91,7 +96,7 @@ def request_registration( _print("Success!") -def register_new_user(user, password, server_location, shared_secret, admin): +def register_new_user(user, password, server_location, shared_secret, admin, user_type): if not user: try: default_user = getpass.getuser() @@ -129,7 +134,8 @@ def register_new_user(user, password, server_location, shared_secret, admin): else: admin = False - request_registration(user, password, server_location, shared_secret, bool(admin)) + request_registration(user, password, server_location, shared_secret, + bool(admin), user_type) def main(): @@ -154,6 +160,12 @@ def main(): default=None, help="New password for user. Will prompt if omitted.", ) + parser.add_argument( + "-t", + "--user_type", + default=None, + help="User type as specified in synapse.api.constants.UserTypes", + ) admin_group = parser.add_mutually_exclusive_group() admin_group.add_argument( "-a", @@ -208,7 +220,8 @@ def main(): if args.admin or args.no_admin: admin = args.admin - register_new_user(args.user, args.password, args.server_url, secret, admin) + register_new_user(args.user, args.password, args.server_url, secret, + admin, args.user_type) if __name__ == "__main__": diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 5309899703..b8a9af7158 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -802,9 +802,10 @@ class Auth(object): threepid should never be set at the same time. """ - # Never fail an auth check for the server notices users + # Never fail an auth check for the server notices users or support user # This can be a problem where event creation is prohibited due to blocking - if user_id == self.hs.config.server_notices_mxid: + is_support = yield self.store.is_support_user(user_id) + if user_id == self.hs.config.server_notices_mxid or is_support: return if self.hs.config.hs_disabled: diff --git a/synapse/api/constants.py b/synapse/api/constants.py index f20e0fcf0b..b7f25a42a2 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -119,3 +119,11 @@ KNOWN_ROOM_VERSIONS = { ServerNoticeMsgType = "m.server_notice" ServerNoticeLimitReached = "m.server_notice.usage_limit_reached" + + +class UserTypes(object): + """Allows for user type specific behaviour. With the benefit of hindsight + 'admin' and 'guest' users should also be UserTypes. Normal users are type None + """ + SUPPORT = "support" + ALL_USER_TYPES = (SUPPORT) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index ba39e67f6f..21c17c59a0 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -126,6 +126,7 @@ class RegistrationHandler(BaseHandler): make_guest=False, admin=False, threepid=None, + user_type=None, default_display_name=None, ): """Registers a new client on the server. @@ -141,6 +142,8 @@ class RegistrationHandler(BaseHandler): since it offers no means of associating a device_id with the access_token. Instead you should call auth_handler.issue_access_token after registration. + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. default_display_name (unicode|None): if set, the new user's displayname will be set to this. Defaults to 'localpart'. Returns: @@ -190,6 +193,7 @@ class RegistrationHandler(BaseHandler): make_guest=make_guest, create_profile_with_displayname=default_display_name, admin=admin, + user_type=user_type, ) if self.hs.config.user_directory_search_all_users: @@ -242,9 +246,16 @@ class RegistrationHandler(BaseHandler): # auto-join the user to any rooms we're supposed to dump them into fake_requester = create_requester(user_id) - # try to create the room if we're the first user on the server + # try to create the room if we're the first real user on the server. Note + # that an auto-generated support user is not a real user and will never be + # the user to create the room should_auto_create_rooms = False - if self.hs.config.autocreate_auto_join_rooms: + is_support = yield self.store.is_support_user(user_id) + # There is an edge case where the first user is the support user, then + # the room is never created, though this seems unlikely and + # recoverable from given the support user being involved in the first + # place. + if self.hs.config.autocreate_auto_join_rooms and not is_support: count = yield self.store.count_all_users() should_auto_create_rooms = count == 1 for r in self.hs.config.auto_join_rooms: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 3928faa6e7..581e96c743 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -433,7 +433,7 @@ class RoomCreationHandler(BaseHandler): """ user_id = requester.user.to_string() - self.auth.check_auth_blocking(user_id) + yield self.auth.check_auth_blocking(user_id) if not self.spam_checker.user_may_create_room(user_id): raise SynapseError(403, "You are not permitted to create rooms") diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index f11b430126..3c40999338 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -125,9 +125,12 @@ class UserDirectoryHandler(object): """ # FIXME(#3714): We should probably do this in the same worker as all # the other changes. - yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None, - ) + is_support = yield self.store.is_support_user(user_id) + # Support users are for diagnostics and should not appear in the user directory. + if not is_support: + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url, None, + ) @defer.inlineCallbacks def handle_user_deactivated(self, user_id): @@ -329,14 +332,7 @@ class UserDirectoryHandler(object): public_value=Membership.JOIN, ) - if change is None: - # Handle any profile changes - yield self._handle_profile_change( - state_key, room_id, prev_event_id, event_id, - ) - continue - - if not change: + if change is False: # Need to check if the server left the room entirely, if so # we might need to remove all the users in that room is_in_room = yield self.store.is_host_joined( @@ -354,16 +350,25 @@ class UserDirectoryHandler(object): else: logger.debug("Server is still in room: %r", room_id) - if change: # The user joined - event = yield self.store.get_event(event_id, allow_none=True) - profile = ProfileInfo( - avatar_url=event.content.get("avatar_url"), - display_name=event.content.get("displayname"), - ) + is_support = yield self.store.is_support_user(state_key) + if not is_support: + if change is None: + # Handle any profile changes + yield self._handle_profile_change( + state_key, room_id, prev_event_id, event_id, + ) + continue + + if change: # The user joined + event = yield self.store.get_event(event_id, allow_none=True) + profile = ProfileInfo( + avatar_url=event.content.get("avatar_url"), + display_name=event.content.get("displayname"), + ) - yield self._handle_new_user(room_id, state_key, profile) - else: # The user left - yield self._handle_remove_user(room_id, state_key) + yield self._handle_new_user(room_id, state_key, profile) + else: # The user left + yield self._handle_remove_user(room_id, state_key) else: logger.debug("Ignoring irrelevant type: %r", typ) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 41534b8c2a..82433a2aa9 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -23,7 +23,7 @@ from six.moves import http_client from twisted.internet import defer -from synapse.api.constants import Membership +from synapse.api.constants import Membership, UserTypes from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( assert_params_in_dict, @@ -158,6 +158,11 @@ class UserRegisterServlet(ClientV1RestServlet): raise SynapseError(400, "Invalid password") admin = body.get("admin", None) + user_type = body.get("user_type", None) + + if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: + raise SynapseError(400, "Invalid user type") + got_mac = body["mac"] want_mac = hmac.new( @@ -171,6 +176,9 @@ class UserRegisterServlet(ClientV1RestServlet): want_mac.update(password) want_mac.update(b"\x00") want_mac.update(b"admin" if admin else b"notadmin") + if user_type: + want_mac.update(b"\x00") + want_mac.update(user_type.encode('utf8')) want_mac = want_mac.hexdigest() if not hmac.compare_digest( @@ -189,6 +197,7 @@ class UserRegisterServlet(ClientV1RestServlet): password=body["password"], admin=bool(admin), generate_token=False, + user_type=user_type, ) result = yield register._create_registration_details(user_id, body) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 479e01ddc1..d6fc8edd4c 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -55,9 +55,12 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn, tp["medium"], tp["address"] ) + if user_id: - self.upsert_monthly_active_user_txn(txn, user_id) - reserved_user_list.append(user_id) + is_support = self.is_support_user_txn(txn, user_id) + if not is_support: + self.upsert_monthly_active_user_txn(txn, user_id) + reserved_user_list.append(user_id) else: logger.warning( "mau limit reserved threepid %s not found in db" % tp @@ -182,6 +185,18 @@ class MonthlyActiveUsersStore(SQLBaseStore): Args: user_id (str): user to add/update """ + # Support user never to be included in MAU stats. Note I can't easily call this + # from upsert_monthly_active_user_txn because then I need a _txn form of + # is_support_user which is complicated because I want to cache the result. + # Therefore I call it here and ignore the case where + # upsert_monthly_active_user_txn is called directly from + # _initialise_reserved_users reasoning that it would be very strange to + # include a support user in this context. + + is_support = yield self.is_support_user(user_id) + if is_support: + return + is_insert = yield self.runInteraction( "upsert_monthly_active_user", self.upsert_monthly_active_user_txn, user_id @@ -200,6 +215,16 @@ class MonthlyActiveUsersStore(SQLBaseStore): in a database thread rather than the main thread, and we can't call txn.call_after because txn may not be a LoggingTransaction. + We consciously do not call is_support_txn from this method because it + is not possible to cache the response. is_support_txn will be false in + almost all cases, so it seems reasonable to call it only for + upsert_monthly_active_user and to call is_support_txn manually + for cases where upsert_monthly_active_user_txn is called directly, + like _initialise_reserved_users + + In short, don't call this method with support users. (Support users + should not appear in the MAU stats). + Args: txn (cursor): user_id (str): user to add/update @@ -208,6 +233,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): bool: True if a new entry was created, False if an existing one was updated. """ + # Am consciously deciding to lock the table on the basis that is ought # never be a big table and alternative approaches (batching multiple # upserts into a single txn) introduced a lot of extra complexity. diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 3d55441e33..10c3b9757f 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -19,6 +19,7 @@ from six.moves import range from twisted.internet import defer +from synapse.api.constants import UserTypes from synapse.api.errors import Codes, StoreError from synapse.storage import background_updates from synapse.storage._base import SQLBaseStore @@ -168,7 +169,7 @@ class RegistrationStore(RegistrationWorkerStore, def register(self, user_id, token=None, password_hash=None, was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_displayname=None, admin=False): + create_profile_with_displayname=None, admin=False, user_type=None): """Attempts to register an account. Args: @@ -184,6 +185,10 @@ class RegistrationStore(RegistrationWorkerStore, appservice_id (str): The ID of the appservice registering the user. create_profile_with_displayname (unicode): Optionally create a profile for the user, setting their displayname to the given value + admin (boolean): is an admin user? + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + Raises: StoreError if the user_id could not be registered. """ @@ -197,7 +202,8 @@ class RegistrationStore(RegistrationWorkerStore, make_guest, appservice_id, create_profile_with_displayname, - admin + admin, + user_type ) def _register( @@ -211,6 +217,7 @@ class RegistrationStore(RegistrationWorkerStore, appservice_id, create_profile_with_displayname, admin, + user_type, ): user_id_obj = UserID.from_string(user_id) @@ -247,6 +254,7 @@ class RegistrationStore(RegistrationWorkerStore, "is_guest": 1 if make_guest else 0, "appservice_id": appservice_id, "admin": 1 if admin else 0, + "user_type": user_type, } ) else: @@ -260,6 +268,7 @@ class RegistrationStore(RegistrationWorkerStore, "is_guest": 1 if make_guest else 0, "appservice_id": appservice_id, "admin": 1 if admin else 0, + "user_type": user_type, } ) except self.database_engine.module.IntegrityError: @@ -456,6 +465,31 @@ class RegistrationStore(RegistrationWorkerStore, defer.returnValue(res if res else False) + @cachedInlineCallbacks() + def is_support_user(self, user_id): + """Determines if the user is of type UserTypes.SUPPORT + + Args: + user_id (str): user id to test + + Returns: + Deferred[bool]: True if user is of type UserTypes.SUPPORT + """ + res = yield self.runInteraction( + "is_support_user", self.is_support_user_txn, user_id + ) + defer.returnValue(res) + + def is_support_user_txn(self, txn, user_id): + res = self._simple_select_one_onecol_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + retcol="user_type", + allow_none=True, + ) + return True if res == UserTypes.SUPPORT else False + @defer.inlineCallbacks def user_add_threepid(self, user_id, medium, address, validated_at, added_at): yield self._simple_upsert("user_threepids", { diff --git a/synapse/storage/schema/delta/53/add_user_type_to_users.sql b/synapse/storage/schema/delta/53/add_user_type_to_users.sql new file mode 100644 index 0000000000..88ec2f83e5 --- /dev/null +++ b/synapse/storage/schema/delta/53/add_user_type_to_users.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* The type of the user: NULL for a regular user, or one of the constants in + * synapse.api.constants.UserTypes + */ +ALTER TABLE users ADD COLUMN user_type TEXT DEFAULT NULL; diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index 379e9c4ab1..69dc40428b 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -50,6 +50,8 @@ class AuthTestCase(unittest.TestCase): # this is overridden for the appservice tests self.store.get_app_service_by_token = Mock(return_value=None) + self.store.is_support_user = Mock(return_value=defer.succeed(False)) + @defer.inlineCallbacks def test_get_user_by_req_user_valid_token(self): user_info = {"name": self.test_user, "token_id": "ditto", "device_id": "device"} diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 23bce6ee7d..eb70e1daa6 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -17,7 +17,8 @@ from mock import Mock from twisted.internet import defer -from synapse.api.errors import ResourceLimitError +from synapse.api.constants import UserTypes +from synapse.api.errors import ResourceLimitError, SynapseError from synapse.handlers.register import RegistrationHandler from synapse.types import RoomAlias, UserID, create_requester @@ -64,6 +65,7 @@ class RegistrationTestCase(unittest.TestCase): requester, frank.localpart, "Frankie" ) self.assertEquals(result_user_id, user_id) + self.assertTrue(result_token is not None) self.assertEquals(result_token, 'secret') @defer.inlineCallbacks @@ -82,7 +84,7 @@ class RegistrationTestCase(unittest.TestCase): requester, local_part, None ) self.assertEquals(result_user_id, user_id) - self.assertEquals(result_token, 'secret') + self.assertTrue(result_token is not None) @defer.inlineCallbacks def test_mau_limits_when_disabled(self): @@ -169,6 +171,20 @@ class RegistrationTestCase(unittest.TestCase): rooms = yield self.store.get_rooms_for_user(res[0]) self.assertEqual(len(rooms), 0) + @defer.inlineCallbacks + def test_auto_create_auto_join_rooms_when_support_user_exists(self): + room_alias_str = "#room:test" + self.hs.config.auto_join_rooms = [room_alias_str] + + self.store.is_support_user = Mock(return_value=True) + res = yield self.handler.register(localpart='support') + rooms = yield self.store.get_rooms_for_user(res[0]) + self.assertEqual(len(rooms), 0) + directory_handler = self.hs.get_handlers().directory_handler + room_alias = RoomAlias.from_string(room_alias_str) + with self.assertRaises(SynapseError): + yield directory_handler.get_association(room_alias) + @defer.inlineCallbacks def test_auto_create_auto_join_where_no_consent(self): self.hs.config.user_consent_at_registration = True @@ -179,3 +195,13 @@ class RegistrationTestCase(unittest.TestCase): yield self.handler.post_consent_actions(res[0]) rooms = yield self.store.get_rooms_for_user(res[0]) self.assertEqual(len(rooms), 0) + + @defer.inlineCallbacks + def test_register_support_user(self): + res = yield self.handler.register(localpart='user', user_type=UserTypes.SUPPORT) + self.assertTrue(self.store.is_support_user(res[0])) + + @defer.inlineCallbacks + def test_register_not_support_user(self): + res = yield self.handler.register(localpart='user') + self.assertFalse(self.store.is_support_user(res[0])) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py new file mode 100644 index 0000000000..11f2bae698 --- /dev/null +++ b/tests/handlers/test_user_directory.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from mock import Mock + +from twisted.internet import defer + +from synapse.api.constants import UserTypes +from synapse.handlers.user_directory import UserDirectoryHandler +from synapse.storage.roommember import ProfileInfo + +from tests import unittest +from tests.utils import setup_test_homeserver + + +class UserDirectoryHandlers(object): + def __init__(self, hs): + self.user_directory_handler = UserDirectoryHandler(hs) + + +class UserDirectoryTestCase(unittest.TestCase): + """ Tests the UserDirectoryHandler. """ + + @defer.inlineCallbacks + def setUp(self): + hs = yield setup_test_homeserver(self.addCleanup) + self.store = hs.get_datastore() + hs.handlers = UserDirectoryHandlers(hs) + + self.handler = hs.get_handlers().user_directory_handler + + @defer.inlineCallbacks + def test_handle_local_profile_change_with_support_user(self): + support_user_id = "@support:test" + yield self.store.register( + user_id=support_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT + ) + + yield self.handler.handle_local_profile_change(support_user_id, None) + profile = yield self.store.get_user_in_directory(support_user_id) + self.assertTrue(profile is None) + display_name = 'display_name' + + profile_info = ProfileInfo( + avatar_url='avatar_url', + display_name=display_name, + ) + regular_user_id = '@regular:test' + yield self.handler.handle_local_profile_change(regular_user_id, profile_info) + profile = yield self.store.get_user_in_directory(regular_user_id) + self.assertTrue(profile['display_name'] == display_name) + + @defer.inlineCallbacks + def test_handle_user_deactivated_support_user(self): + s_user_id = "@support:test" + self.store.register( + user_id=s_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT + ) + + self.store.remove_from_user_dir = Mock() + self.store.remove_from_user_in_public_room = Mock() + yield self.handler.handle_user_deactivated(s_user_id) + self.store.remove_from_user_dir.not_called() + self.store.remove_from_user_in_public_room.not_called() + + @defer.inlineCallbacks + def test_handle_user_deactivated_regular_user(self): + r_user_id = "@regular:test" + self.store.register(user_id=r_user_id, token="123", password_hash=None) + self.store.remove_from_user_dir = Mock() + self.store.remove_from_user_in_public_room = Mock() + yield self.handler.handle_user_deactivated(r_user_id) + self.store.remove_from_user_dir.called_once_with(r_user_id) + self.store.remove_from_user_in_public_room.assert_called_once_with(r_user_id) diff --git a/tests/rest/client/v1/test_admin.py b/tests/rest/client/v1/test_admin.py index e38eb628a9..407bf0ac4c 100644 --- a/tests/rest/client/v1/test_admin.py +++ b/tests/rest/client/v1/test_admin.py @@ -19,6 +19,7 @@ import json from mock import Mock +from synapse.api.constants import UserTypes from synapse.rest.client.v1.admin import register_servlets from tests import unittest @@ -147,7 +148,9 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): nonce = channel.json_body["nonce"] want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) - want_mac.update(nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin") + want_mac.update( + nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin\x00support" + ) want_mac = want_mac.hexdigest() body = json.dumps( @@ -156,6 +159,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): "username": "bob", "password": "abc123", "admin": True, + "user_type": UserTypes.SUPPORT, "mac": want_mac, } ) @@ -174,7 +178,9 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): nonce = channel.json_body["nonce"] want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) - want_mac.update(nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin") + want_mac.update( + nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin" + ) want_mac = want_mac.hexdigest() body = json.dumps( @@ -202,8 +208,8 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): def test_missing_parts(self): """ Synapse will complain if you don't give nonce, username, password, and - mac. Admin is optional. Additional checks are done for length and - type. + mac. Admin and user_types are optional. Additional checks are done for length + and type. """ def nonce(): @@ -260,7 +266,7 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): self.assertEqual('Invalid username', channel.json_body["error"]) # - # Username checks + # Password checks # # Must be present @@ -296,3 +302,20 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual('Invalid password', channel.json_body["error"]) + + # + # user_type check + # + + # Invalid user_type + body = json.dumps({ + "nonce": nonce(), + "username": "a", + "password": "1234", + "user_type": "invalid"} + ) + request, channel = self.make_request("POST", self.url, body.encode('utf8')) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual('Invalid user type', channel.json_body["error"]) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 9618d57463..9605301b59 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -16,6 +16,8 @@ from mock import Mock from twisted.internet import defer +from synapse.api.constants import UserTypes + from tests.unittest import HomeserverTestCase FORTY_DAYS = 40 * 24 * 60 * 60 @@ -28,6 +30,7 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): self.store = hs.get_datastore() hs.config.limit_usage_by_mau = True hs.config.max_mau_value = 50 + # Advance the clock a bit reactor.advance(FORTY_DAYS) @@ -39,14 +42,23 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): user1_email = "user1@matrix.org" user2 = "@user2:server" user2_email = "user2@matrix.org" + user3 = "@user3:server" + user3_email = "user3@matrix.org" + threepids = [ {'medium': 'email', 'address': user1_email}, {'medium': 'email', 'address': user2_email}, + {'medium': 'email', 'address': user3_email}, ] - user_num = len(threepids) + # -1 because user3 is a support user and does not count + user_num = len(threepids) - 1 self.store.register(user_id=user1, token="123", password_hash=None) self.store.register(user_id=user2, token="456", password_hash=None) + self.store.register( + user_id=user3, token="789", + password_hash=None, user_type=UserTypes.SUPPORT + ) self.pump() now = int(self.hs.get_clock().time_msec()) @@ -60,7 +72,7 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): active_count = self.store.get_monthly_active_count() - # Test total counts + # Test total counts, ensure user3 (support user) is not counted self.assertEquals(self.get_success(active_count), user_num) # Test user is marked as active @@ -221,6 +233,24 @@ class MonthlyActiveUsersTestCase(HomeserverTestCase): count = self.store.get_registered_reserved_users_count() self.assertEquals(self.get_success(count), len(threepids)) + def test_support_user_not_add_to_mau_limits(self): + support_user_id = "@support:test" + count = self.store.get_monthly_active_count() + self.pump() + self.assertEqual(self.get_success(count), 0) + + self.store.register( + user_id=support_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT + ) + + self.store.upsert_monthly_active_user(support_user_id) + count = self.store.get_monthly_active_count() + self.pump() + self.assertEqual(self.get_success(count), 0) + def test_track_monthly_users_without_cap(self): self.hs.config.limit_usage_by_mau = False self.hs.config.mau_stats_only = True diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 3dfb7b903a..cb3cc4d2e5 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -16,6 +16,8 @@ from twisted.internet import defer +from synapse.api.constants import UserTypes + from tests import unittest from tests.utils import setup_test_homeserver @@ -99,6 +101,26 @@ class RegistrationStoreTestCase(unittest.TestCase): user = yield self.store.get_user_by_access_token(self.tokens[0]) self.assertIsNone(user, "access token was not deleted without device_id") + @defer.inlineCallbacks + def test_is_support_user(self): + TEST_USER = "@test:test" + SUPPORT_USER = "@support:test" + + res = yield self.store.is_support_user(None) + self.assertFalse(res) + yield self.store.register(user_id=TEST_USER, token="123", password_hash=None) + res = yield self.store.is_support_user(TEST_USER) + self.assertFalse(res) + + yield self.store.register( + user_id=SUPPORT_USER, + token="456", + password_hash=None, + user_type=UserTypes.SUPPORT + ) + res = yield self.store.is_support_user(SUPPORT_USER) + self.assertTrue(res) + class TokenGenerator: def __init__(self): diff --git a/tests/unittest.py b/tests/unittest.py index 092c930396..78d2f740f9 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -373,6 +373,7 @@ class HomeserverTestCase(TestCase): nonce_str += b"\x00admin" else: nonce_str += b"\x00notadmin" + want_mac.update(nonce.encode('ascii') + b"\x00" + nonce_str) want_mac = want_mac.hexdigest() diff --git a/tests/utils.py b/tests/utils.py index 04796a9b30..38e689983d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -140,7 +140,6 @@ def default_config(name): config.rc_messages_per_second = 10000 config.rc_message_burst_count = 10000 config.saml2_enabled = False - config.use_frozen_dicts = False # we need a sane default_room_version, otherwise attempts to create rooms will -- cgit 1.4.1 From 0708f437ccfb57fe44e2d993f57163a844fe726c Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Mon, 24 Dec 2018 09:49:42 +0000 Subject: Log roomid along with Unknown room (#4297) --- changelog.d/4297.misc | 1 + synapse/storage/state.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4297.misc (limited to 'synapse/storage') diff --git a/changelog.d/4297.misc b/changelog.d/4297.misc new file mode 100644 index 0000000000..63106e26f6 --- /dev/null +++ b/changelog.d/4297.misc @@ -0,0 +1 @@ +Log room_id in Unknown room errors diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d737bd6778..a134e9b3e8 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -432,7 +432,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): create_id = state_ids.get((EventTypes.Create, "")) if not create_id: - raise NotFoundError("Unknown room") + raise NotFoundError("Unknown room %s" % (room_id)) create_event = yield self.get_event(create_id) defer.returnValue(create_event.content.get("room_version", "1")) -- cgit 1.4.1 From 84b6fae1f512e37d36baf490f91b3062ce9495f7 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 2 Jan 2019 10:19:59 +0000 Subject: Ensure synchrotrons can access is_support_user in the storage layer --- changelog.d/4344.bugfix | 1 + synapse/storage/registration.py | 50 ++++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 25 deletions(-) create mode 100644 changelog.d/4344.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4344.bugfix b/changelog.d/4344.bugfix new file mode 100644 index 0000000000..cf9a873db5 --- /dev/null +++ b/changelog.d/4344.bugfix @@ -0,0 +1 @@ +Fix synchrotron exploding due to being unable to access is_support_user in storage layer diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 10c3b9757f..c9e11c3135 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -114,6 +114,31 @@ class RegistrationWorkerStore(SQLBaseStore): return None + @cachedInlineCallbacks() + def is_support_user(self, user_id): + """Determines if the user is of type UserTypes.SUPPORT + + Args: + user_id (str): user id to test + + Returns: + Deferred[bool]: True if user is of type UserTypes.SUPPORT + """ + res = yield self.runInteraction( + "is_support_user", self.is_support_user_txn, user_id + ) + defer.returnValue(res) + + def is_support_user_txn(self, txn, user_id): + res = self._simple_select_one_onecol_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + retcol="user_type", + allow_none=True, + ) + return True if res == UserTypes.SUPPORT else False + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -465,31 +490,6 @@ class RegistrationStore(RegistrationWorkerStore, defer.returnValue(res if res else False) - @cachedInlineCallbacks() - def is_support_user(self, user_id): - """Determines if the user is of type UserTypes.SUPPORT - - Args: - user_id (str): user id to test - - Returns: - Deferred[bool]: True if user is of type UserTypes.SUPPORT - """ - res = yield self.runInteraction( - "is_support_user", self.is_support_user_txn, user_id - ) - defer.returnValue(res) - - def is_support_user_txn(self, txn, user_id): - res = self._simple_select_one_onecol_txn( - txn=txn, - table="users", - keyvalues={"name": user_id}, - retcol="user_type", - allow_none=True, - ) - return True if res == UserTypes.SUPPORT else False - @defer.inlineCallbacks def user_add_threepid(self, user_id, medium, address, validated_at, added_at): yield self._simple_upsert("user_threepids", { -- cgit 1.4.1 From 7960c26fda8dd8d2d1333b45ef4f5c644930a619 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 9 Jan 2019 22:26:25 +1100 Subject: Fix adding new rows instead of updating them if one of the key values is a NULL in upserts. (#4369) --- changelog.d/4369.bugfix | 1 + synapse/storage/_base.py | 10 +++++- tests/storage/test_client_ips.py | 71 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4369.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4369.bugfix b/changelog.d/4369.bugfix new file mode 100644 index 0000000000..a78d557932 --- /dev/null +++ b/changelog.d/4369.bugfix @@ -0,0 +1 @@ +Prevent users with access tokens predating the introduction of device IDs from creating spurious entries in the user_ips table. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1d3069b143..865b5e915a 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -547,11 +547,19 @@ class SQLBaseStore(object): if lock: self.database_engine.lock_table(txn, table) + def _getwhere(key): + # If the value we're passing in is None (aka NULL), we need to use + # IS, not =, as NULL = NULL equals NULL (False). + if keyvalues[key] is None: + return "%s IS ?" % (key,) + else: + return "%s = ?" % (key,) + # First try to update. sql = "UPDATE %s SET %s WHERE %s" % ( table, ", ".join("%s = ?" % (k,) for k in values), - " AND ".join("%s = ?" % (k,) for k in keyvalues) + " AND ".join(_getwhere(k) for k in keyvalues) ) sqlargs = list(values.values()) + list(keyvalues.values()) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 4577e9422b..858efe4992 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -62,6 +62,77 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): r, ) + def test_insert_new_client_ip_none_device_id(self): + """ + An insert with a device ID of NULL will not create a new entry, but + update an existing entry in the user_ips table. + """ + self.reactor.advance(12345678) + + user_id = "@user:id" + + # Add & trigger the storage loop + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", None + ) + ) + self.reactor.advance(200) + self.pump(0) + + result = self.get_success( + self.store._simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + + self.assertEqual( + result, + [ + { + 'access_token': 'access_token', + 'ip': 'ip', + 'user_agent': 'user_agent', + 'device_id': None, + 'last_seen': 12345678000, + } + ], + ) + + # Add another & trigger the storage loop + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", None + ) + ) + self.reactor.advance(10) + self.pump(0) + + result = self.get_success( + self.store._simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], + desc="get_user_ip_and_agents", + ) + ) + # Only one result, has been upserted. + self.assertEqual( + result, + [ + { + 'access_token': 'access_token', + 'ip': 'ip', + 'user_agent': 'user_agent', + 'device_id': None, + 'last_seen': 12345878000, + } + ], + ) + def test_disabled_monthly_active_user(self): self.hs.config.limit_usage_by_mau = False self.hs.config.max_mau_value = 50 -- cgit 1.4.1 From a35c66a00bbee0bb6185c4d37914a41c52c83ddf Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 12 Jan 2019 06:21:50 +1100 Subject: Remove duplicates in the user_ips table and add an index (#4370) --- .travis.yml | 3 + changelog.d/4370.misc | 1 + synapse/storage/client_ips.py | 138 ++++++++++++++++++++- synapse/storage/schema/delta/53/user_ips_index.sql | 26 ++++ 4 files changed, 164 insertions(+), 4 deletions(-) create mode 100644 changelog.d/4370.misc create mode 100644 synapse/storage/schema/delta/53/user_ips_index.sql (limited to 'synapse/storage') diff --git a/.travis.yml b/.travis.yml index 84d5efff9b..728f0e248a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,9 @@ cache: # - $HOME/.cache/pip/wheels +addons: + postgresql: "9.4" + # don't clone the whole repo history, one commit will do git: depth: 1 diff --git a/changelog.d/4370.misc b/changelog.d/4370.misc new file mode 100644 index 0000000000..047061ed3c --- /dev/null +++ b/changelog.d/4370.misc @@ -0,0 +1 @@ +Apply a unique index to the user_ips table, preventing duplicates. diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 9ad17b7c25..5d548f250a 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -65,7 +65,27 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["last_seen"], ) - # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) + self.register_background_update_handler( + "user_ips_remove_dupes", + self._remove_user_ip_dupes, + ) + + # Register a unique index + self.register_background_index_update( + "user_ips_device_unique_index", + index_name="user_ips_user_token_ip_unique_index", + table="user_ips", + columns=["user_id", "access_token", "ip"], + unique=True, + ) + + # Drop the old non-unique index + self.register_background_update_handler( + "user_ips_drop_nonunique_index", + self._remove_user_ip_nonunique, + ) + + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) self._batch_row_update = {} self._client_ip_looper = self._clock.looping_call( @@ -75,6 +95,116 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "before", "shutdown", self._update_client_ips_batch ) + @defer.inlineCallbacks + def _remove_user_ip_nonunique(self, progress, batch_size): + def f(conn): + txn = conn.cursor() + txn.execute( + "DROP INDEX IF EXISTS user_ips_user_ip" + ) + txn.close() + + yield self.runWithConnection(f) + yield self._end_background_update("user_ips_drop_nonunique_index") + defer.returnValue(1) + + @defer.inlineCallbacks + def _remove_user_ip_dupes(self, progress, batch_size): + + last_seen_progress = progress.get("last_seen", 0) + + def get_last_seen(txn): + txn.execute( + """ + SELECT last_seen FROM user_ips + WHERE last_seen > ? + ORDER BY last_seen + LIMIT 1 + OFFSET ? + """, + (last_seen_progress, batch_size) + ) + results = txn.fetchone() + return results + + # Get a last seen that's sufficiently far away enough from the last one + last_seen = yield self.runInteraction( + "user_ips_dups_get_last_seen", get_last_seen + ) + + if not last_seen: + # If we get a None then we're reaching the end and just need to + # delete the last batch. + last = True + + # We fake not having an upper bound by using a future date, by + # just multiplying the current time by two.... + last_seen = int(self.clock.time_msec()) * 2 + else: + last = False + last_seen = last_seen[0] + + def remove(txn, last_seen_progress, last_seen): + # This works by looking at all entries in the given time span, and + # then for each (user_id, access_token, ip) tuple in that range + # checking for any duplicates in the rest of the table (via a join). + # It then only returns entries which have duplicates, and the max + # last_seen across all duplicates, which can the be used to delete + # all other duplicates. + # It is efficient due to the existence of (user_id, access_token, + # ip) and (last_seen) indices. + txn.execute( + """ + SELECT user_id, access_token, ip, + MAX(device_id), MAX(user_agent), MAX(last_seen) + FROM ( + SELECT user_id, access_token, ip + FROM user_ips + WHERE ? <= last_seen AND last_seen < ? + ORDER BY last_seen + ) c + INNER JOIN user_ips USING (user_id, access_token, ip) + GROUP BY user_id, access_token, ip + HAVING count(*) > 1""", + (last_seen_progress, last_seen) + ) + res = txn.fetchall() + + # We've got some duplicates + for i in res: + user_id, access_token, ip, device_id, user_agent, last_seen = i + + # Drop all the duplicates + txn.execute( + """ + DELETE FROM user_ips + WHERE user_id = ? AND access_token = ? AND ip = ? + """, + (user_id, access_token, ip) + ) + + # Add in one to be the last_seen + txn.execute( + """ + INSERT INTO user_ips + (user_id, access_token, ip, device_id, user_agent, last_seen) + VALUES (?, ?, ?, ?, ?, ?) + """, + (user_id, access_token, ip, device_id, user_agent, last_seen) + ) + + self._background_update_progress_txn( + txn, "user_ips_remove_dupes", {"last_seen": last_seen} + ) + + yield self.runInteraction( + "user_ips_dups_remove", remove, last_seen_progress, last_seen + ) + if last: + yield self._end_background_update("user_ips_remove_dupes") + + defer.returnValue(batch_size) + @defer.inlineCallbacks def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id, now=None): @@ -127,10 +257,10 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_id": user_id, "access_token": access_token, "ip": ip, - "user_agent": user_agent, - "device_id": device_id, }, values={ + "user_agent": user_agent, + "device_id": device_id, "last_seen": last_seen, }, lock=False, @@ -227,7 +357,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): results = {} for key in self._batch_row_update: - uid, access_token, ip = key + uid, access_token, ip, = key if uid == user_id: user_agent, _, last_seen = self._batch_row_update[key] results[(access_token, ip)] = (user_agent, last_seen) diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql new file mode 100644 index 0000000000..4ca346c111 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_ips_index.sql @@ -0,0 +1,26 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- delete duplicates +INSERT INTO background_updates (update_name, progress_json) VALUES + ('user_ips_remove_dupes', '{}'); + +-- add a new unique index to user_ips table +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_device_unique_index', '{}', 'user_ips_remove_dupes'); + +-- drop the old original index +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); \ No newline at end of file -- cgit 1.4.1 From df3a661e4adb7676682a5e3c298a2dfda18b08a1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 10:04:47 +0000 Subject: Search for messages across predecessor rooms Signed-off-by: Andrew Morgan --- synapse/api/filtering.py | 3 ++ synapse/handlers/search.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++ synapse/storage/state.py | 1 + 3 files changed, 73 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 16ad654864..84000e6422 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,6 +444,9 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) + def add_room_ids(self, room_ids): + self.rooms += room_ids + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index ec936bbb4e..77e7e4e0fb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -37,6 +37,54 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def get_old_rooms_from_upgraded_room(self, room_id): + """Retrieves room IDs of old rooms in the history of an upgraded room. + + We do so by checking the m.room.create event of the room for a + `predecessor` key. If it exists, we add the room ID to our return + list and then check that room for a m.room.create event and so on + until we can no longer find any more previous rooms. + + The full list of all found rooms in then returned. + + Args: + room_id (str): The ID of the room to search through. + + Returns: + dict of past room IDs as strings + """ + + historical_room_ids = [] + + while True: + state_ids = yield self.store.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + break + + # Retrieve the room's create event + create_event = yield self.store.get_event(create_id) + + if not create_event: + break + + # Check if a predecessor room is present + predecessor = create_event.content.get("predecessor", None) + if not predecessor: + break + + # Add predecessor's room ID + historical_room_id = predecessor["room_id"] + historical_room_ids.append(historical_room_id) + + # Scan through the old room for further predecessors + room_id = historical_room_id + + defer.returnValue(historical_room_ids) + @defer.inlineCallbacks def search(self, user, content, batch=None): """Performs a full text search for a user. @@ -139,6 +187,27 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) + # If doing a subset of all rooms seearch, check if any of the rooms + # are from an upgraded room, and search their contents as well + # XXX: There is the possibility that we don't have a create event for + # the room in question, in which case we can't return all the results + # we want to. + # Ideally we would just return the results we can get now, and + # try to get more results from other servers in the background. + if search_filter.rooms: + historical_room_ids = [] + for room_id in room_ids: + # Add any previous rooms to the search if they exist + ids = yield self.get_old_rooms_from_upgraded_room(room_id) + historical_room_ids += ids + + # Add any found rooms to the list to search + for historical_room_id in historical_room_ids: + room_ids.add(historical_room_id) + + # Prevent any historical events from being filtered + search_filter.add_room_ids(historical_room_ids) + if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a134e9b3e8..49b3ff4a71 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,6 +448,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: deferred: dict of (type, state_key) -> event_id """ + def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events -- cgit 1.4.1 From 25dd56ace36e9d3dbb717a34c9b9051b243f84ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 18 Jan 2019 12:17:04 +0000 Subject: Fix race when persisting create event (#4404) * Fix race when persisting create event When persisting a chunk of DAG it is sometimes requried to do a state resolution, which requires knowledge of the room version. If this happens while we're persisting the create event then we need to use that event rather than attempting to look it up in the database. --- changelog.d/4404.bugfix | 1 + synapse/storage/events.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4404.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4404.bugfix b/changelog.d/4404.bugfix new file mode 100644 index 0000000000..5d40a3a60b --- /dev/null +++ b/changelog.d/4404.bugfix @@ -0,0 +1 @@ +Fix potential bug where creating or joining a room could fail diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2047110b1d..79e0276de6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -739,7 +739,18 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore } events_map = {ev.event_id: ev for ev, _ in events_context} - room_version = yield self.get_room_version(room_id) + + # We need to get the room version, which is in the create event. + # Normally that'd be in the database, but its also possible that we're + # currently trying to persist it. + room_version = None + for ev, _ in events_context: + if ev.type == EventTypes.Create and ev.state_key == "": + room_version = ev.content.get("room_version", "1") + break + + if not room_version: + room_version = yield self.get_room_version(room_id) logger.debug("calling resolve_state_groups from preserve_events") res = yield self._state_resolution_handler.resolve_state_groups( -- cgit 1.4.1 From 25d64a846ab4974da0d8a51b3a1ff014a10d319d Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 15:27:11 +0000 Subject: Fix typos --- changelog.d/4412.bugfix | 2 +- synapse/handlers/room_member.py | 16 ++++++++-------- synapse/rest/client/v1/room.py | 8 ++++---- synapse/storage/background_updates.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/changelog.d/4412.bugfix b/changelog.d/4412.bugfix index 2388ed5b59..007be1b7db 100644 --- a/changelog.d/4412.bugfix +++ b/changelog.d/4412.bugfix @@ -1 +1 @@ -Copy over whether a room is a direct message or not on room upgrade. \ No newline at end of file +Copy over whether a room is a direct message and any associated room tags on room upgrade. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 07fd3e82fc..1d337ad95c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -63,7 +63,7 @@ class RoomMemberHandler(object): self.directory_handler = hs.get_handlers().directory_handler self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() - self.event_creation_hander = hs.get_event_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.member_linearizer = Linearizer(name="member") @@ -168,7 +168,7 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - event, context = yield self.event_creation_hander.create_event( + event, context = yield self.event_creation_handler.create_event( requester, { "type": EventTypes.Member, @@ -186,14 +186,14 @@ class RoomMemberHandler(object): ) # Check if this event matches the previous membership event for the user. - duplicate = yield self.event_creation_hander.deduplicate_state_event( + duplicate = yield self.event_creation_handler.deduplicate_state_event( event, context, ) if duplicate is not None: # Discard the new event since this membership change is a no-op. defer.returnValue(duplicate) - yield self.event_creation_hander.handle_new_client_event( + yield self.event_creation_handler.handle_new_client_event( requester, event, context, @@ -493,7 +493,7 @@ class RoomMemberHandler(object): else: requester = synapse.types.create_requester(target_user) - prev_event = yield self.event_creation_hander.deduplicate_state_event( + prev_event = yield self.event_creation_handler.deduplicate_state_event( event, context, ) if prev_event is not None: @@ -513,7 +513,7 @@ class RoomMemberHandler(object): if is_blocked: raise SynapseError(403, "This room has been blocked on this server") - yield self.event_creation_hander.handle_new_client_event( + yield self.event_creation_handler.handle_new_client_event( requester, event, context, @@ -527,7 +527,7 @@ class RoomMemberHandler(object): ) if event.membership == Membership.JOIN: - # Only fire user_joined_room if the user has acutally joined the + # Only fire user_joined_room if the user has actually joined the # room. Don't bother if the user is just changing their profile # info. newly_joined = True @@ -755,7 +755,7 @@ class RoomMemberHandler(object): ) ) - yield self.event_creation_hander.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.ThirdPartyInvite, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index fcfe7857f6..48da4d557f 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -89,7 +89,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() - self.event_creation_hander = hs.get_event_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() self.message_handler = hs.get_message_handler() @@ -172,7 +172,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): content=content, ) else: - event = yield self.event_creation_hander.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, @@ -189,7 +189,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) - self.event_creation_hander = hs.get_event_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] @@ -211,7 +211,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): if b'ts' in request.args and requester.app_service: event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) - event = yield self.event_creation_hander.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 5fe1ca2de7..60cdc884e6 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -240,7 +240,7 @@ class BackgroundUpdateStore(SQLBaseStore): * An integer count of the number of items to update in this batch. The handler should return a deferred integer count of items updated. - The hander is responsible for updating the progress of the update. + The handler is responsible for updating the progress of the update. Args: update_name(str): The name of the update that this code handles. -- cgit 1.4.1 From c9bfb058d85f6205fada062c78a4d1eca119417c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 11:12:48 +0000 Subject: Fix a bug with single-room search searching all rooms * Create a new method for getting predecessor rooms * Remove formatting change --- synapse/api/filtering.py | 15 +++++++++++++-- synapse/handlers/search.py | 42 ++++++++++-------------------------------- synapse/storage/state.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 35 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 84000e6422..0d8957175d 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,8 +444,19 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) - def add_room_ids(self, room_ids): - self.rooms += room_ids + def with_room_ids(self, room_ids): + """Returns a new filter with the given room IDs appended. + + Args: + room_ids (list): A list of room_ids. + + Returns: + filter: A new filter including the given rooms and the old + filter's rooms. + """ + newFilter = self + newFilter.rooms += room_ids + return newFilter def _matches_wildcard(actual_value, filter_value): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 77e7e4e0fb..75c26fe065 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -49,39 +49,26 @@ class SearchHandler(BaseHandler): The full list of all found rooms in then returned. Args: - room_id (str): The ID of the room to search through. + room_id (str): id of the room to search through. Returns: - dict of past room IDs as strings + Deferred[iterable[str]]: predecessor room ids """ historical_room_ids = [] while True: - state_ids = yield self.store.get_current_state_ids(room_id) - create_id = state_ids.get((EventTypes.Create, "")) + predecessor = yield self.store.get_room_predecessor(room_id) - # If we can't find the create event, assume we've hit a dead end - if not create_id: - break - - # Retrieve the room's create event - create_event = yield self.store.get_event(create_id) - - if not create_event: - break - - # Check if a predecessor room is present - predecessor = create_event.content.get("predecessor", None) + # If no predecessor, assume we've hit a dead end if not predecessor: break # Add predecessor's room ID - historical_room_id = predecessor["room_id"] - historical_room_ids.append(historical_room_id) + historical_room_ids.append(predecessor["room_id"]) # Scan through the old room for further predecessors - room_id = historical_room_id + room_id = predecessor["room_id"] defer.returnValue(historical_room_ids) @@ -185,28 +172,19 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - room_ids = search_filter.filter_rooms(room_ids) - # If doing a subset of all rooms seearch, check if any of the rooms # are from an upgraded room, and search their contents as well - # XXX: There is the possibility that we don't have a create event for - # the room in question, in which case we can't return all the results - # we want to. - # Ideally we would just return the results we can get now, and - # try to get more results from other servers in the background. if search_filter.rooms: historical_room_ids = [] - for room_id in room_ids: + for room_id in search_filter.rooms: # Add any previous rooms to the search if they exist ids = yield self.get_old_rooms_from_upgraded_room(room_id) historical_room_ids += ids - # Add any found rooms to the list to search - for historical_room_id in historical_room_ids: - room_ids.add(historical_room_id) - # Prevent any historical events from being filtered - search_filter.add_room_ids(historical_room_ids) + search_filter = search_filter.with_room_ids(historical_room_ids) + + room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 49b3ff4a71..b064671851 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -437,6 +437,34 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): create_event = yield self.get_event(create_id) defer.returnValue(create_event.content.get("room_version", "1")) + @defer.inlineCallbacks + def get_room_predecessor(self, room_id): + """Get the predecessor room of an upgraded room if one exists. + Otherwise return None. + + Args: + room_id (str) + + Returns: + Deferred[str]: predecessor room id + """ + + state_ids = yield self.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + return None + + # Retrieve the room's create event + create_event = yield self.get_event(create_id) + + if not create_event: + return None + + # Return predecessor if present + return create_event.content.get("predecessor", None) + @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): """Get the current state event ids for a room based on the @@ -448,7 +476,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: deferred: dict of (type, state_key) -> event_id """ - def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events -- cgit 1.4.1 From c433f6109145f0cf6c80dd07ee118b68a3a0cd4e Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 12:06:36 +0000 Subject: Ensure new filter is actually created --- synapse/api/filtering.py | 2 +- synapse/storage/state.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 0d8957175d..f3a056110f 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -454,7 +454,7 @@ class Filter(object): filter: A new filter including the given rooms and the old filter's rooms. """ - newFilter = self + newFilter = Filter(self.filter_json) newFilter.rooms += room_ids return newFilter diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b064671851..981d1e3600 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,7 +448,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: Deferred[str]: predecessor room id """ - state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) -- cgit 1.4.1 From 277e50462d1422ac8cfe2df7cd2213288f3d14c5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 12:40:26 +0000 Subject: Do not return in a deferred function --- synapse/storage/state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 981d1e3600..fceb9744aa 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -453,16 +453,16 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # If we can't find the create event, assume we've hit a dead end if not create_id: - return None + defer.returnValue(None) # Retrieve the room's create event create_event = yield self.get_event(create_id) if not create_event: - return None + defer.returnValue(None) # Return predecessor if present - return create_event.content.get("predecessor", None) + defer.returnValue(create_event.content.get("predecessor", None)) @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): -- cgit 1.4.1 From 2557531f0f60ee1891a74babed54800cb1bcfd06 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Jan 2019 11:55:53 +0000 Subject: Fix bug when removing duplicate rows from user_ips This was caused by accidentally overwritting a `last_seen` variable in a for loop, causing the wrong value to be written to the progress table. The result of which was that we didn't scan sections of the table when searching for duplicates, and so some duplicates did not get deleted. --- synapse/storage/client_ips.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 5d548f250a..9548cfd3f8 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -110,8 +110,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): + # This works function works by scanning the user_ips table in batches + # based on `last_seen`. For each row in a batch it searches the rest of + # the table to see if there are any duplicates, if there are then they + # are removed and replaced with a suitable row. - last_seen_progress = progress.get("last_seen", 0) + # Fetch the start of the batch + begin_last_seen = progress.get("last_seen", 0) def get_last_seen(txn): txn.execute( @@ -122,17 +127,20 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): LIMIT 1 OFFSET ? """, - (last_seen_progress, batch_size) + (begin_last_seen, batch_size) ) - results = txn.fetchone() - return results - - # Get a last seen that's sufficiently far away enough from the last one - last_seen = yield self.runInteraction( + row = txn.fetchone() + if row: + return row[0] + else: + return None + + # Get a last seen that has roughly `batch_size` since `begin_last_seen` + end_last_seen = yield self.runInteraction( "user_ips_dups_get_last_seen", get_last_seen ) - if not last_seen: + if end_last_seen is None: # If we get a None then we're reaching the end and just need to # delete the last batch. last = True @@ -142,9 +150,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): last_seen = int(self.clock.time_msec()) * 2 else: last = False - last_seen = last_seen[0] - def remove(txn, last_seen_progress, last_seen): + def remove(txn, begin_last_seen, end_last_seen): # This works by looking at all entries in the given time span, and # then for each (user_id, access_token, ip) tuple in that range # checking for any duplicates in the rest of the table (via a join). @@ -166,7 +173,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): INNER JOIN user_ips USING (user_id, access_token, ip) GROUP BY user_id, access_token, ip HAVING count(*) > 1""", - (last_seen_progress, last_seen) + (begin_last_seen, end_last_seen) ) res = txn.fetchall() @@ -194,11 +201,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) self._background_update_progress_txn( - txn, "user_ips_remove_dupes", {"last_seen": last_seen} + txn, "user_ips_remove_dupes", {"last_seen": end_last_seen} ) yield self.runInteraction( - "user_ips_dups_remove", remove, last_seen_progress, last_seen + "user_ips_dups_remove", remove, begin_last_seen, end_last_seen ) if last: yield self._end_background_update("user_ips_remove_dupes") -- cgit 1.4.1 From 1c9704f8ab72047a83c6a3b364f3693b332434f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Jan 2019 16:20:33 +0000 Subject: Don't shadow params --- synapse/storage/client_ips.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 9548cfd3f8..c485211175 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -151,7 +151,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): else: last = False - def remove(txn, begin_last_seen, end_last_seen): + def remove(txn): # This works by looking at all entries in the given time span, and # then for each (user_id, access_token, ip) tuple in that range # checking for any duplicates in the rest of the table (via a join). @@ -204,9 +204,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): txn, "user_ips_remove_dupes", {"last_seen": end_last_seen} ) - yield self.runInteraction( - "user_ips_dups_remove", remove, begin_last_seen, end_last_seen - ) + yield self.runInteraction("user_ips_dups_remove", remove) + if last: yield self._end_background_update("user_ips_remove_dupes") -- cgit 1.4.1 From 7f503f83b92150edeb4cc5ae98f6d9bb2e9cdb49 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 22 Jan 2019 16:31:05 +0000 Subject: Refactor to rewrite the SQL instead --- synapse/storage/client_ips.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index c485211175..78721a941a 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -140,16 +140,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_ips_dups_get_last_seen", get_last_seen ) - if end_last_seen is None: - # If we get a None then we're reaching the end and just need to - # delete the last batch. - last = True - - # We fake not having an upper bound by using a future date, by - # just multiplying the current time by two.... - last_seen = int(self.clock.time_msec()) * 2 - else: - last = False + # If it returns None, then we're processing the last batch + last = end_last_seen is None def remove(txn): # This works by looking at all entries in the given time span, and @@ -160,6 +152,16 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): # all other duplicates. # It is efficient due to the existence of (user_id, access_token, # ip) and (last_seen) indices. + + # Define the search space, which requires handling the last batch in + # a different way + if last: + clause = "? <= last_seen" + args = (begin_last_seen,) + else: + clause = "? <= last_seen AND last_seen < ?" + args = (begin_last_seen, end_last_seen) + txn.execute( """ SELECT user_id, access_token, ip, @@ -167,13 +169,14 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): FROM ( SELECT user_id, access_token, ip FROM user_ips - WHERE ? <= last_seen AND last_seen < ? + WHERE {} ORDER BY last_seen ) c INNER JOIN user_ips USING (user_id, access_token, ip) GROUP BY user_id, access_token, ip - HAVING count(*) > 1""", - (begin_last_seen, end_last_seen) + HAVING count(*) > 1 + """.format(clause), + args ) res = txn.fetchall() -- cgit 1.4.1 From 90743c9d8910679bb688070b3929f3005e106d00 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Jan 2019 08:45:18 +0000 Subject: Fixup removal of duplicate `user_ips` rows (#4432) * Remove unnecessary ORDER BY clause * Add logging * Newsfile --- changelog.d/4432.misc | 1 + synapse/storage/client_ips.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4432.misc (limited to 'synapse/storage') diff --git a/changelog.d/4432.misc b/changelog.d/4432.misc new file mode 100644 index 0000000000..047061ed3c --- /dev/null +++ b/changelog.d/4432.misc @@ -0,0 +1 @@ +Apply a unique index to the user_ips table, preventing duplicates. diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 78721a941a..b228a20ac2 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -143,6 +143,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): # If it returns None, then we're processing the last batch last = end_last_seen is None + logger.info( + "Scanning for duplicate 'user_ips' rows in range: %s <= last_seen < %s", + begin_last_seen, end_last_seen, + ) + def remove(txn): # This works by looking at all entries in the given time span, and # then for each (user_id, access_token, ip) tuple in that range @@ -170,7 +175,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): SELECT user_id, access_token, ip FROM user_ips WHERE {} - ORDER BY last_seen ) c INNER JOIN user_ips USING (user_id, access_token, ip) GROUP BY user_id, access_token, ip -- cgit 1.4.1 From c5a296b10c53aa523facf16c3a4c918f93b8188c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Jan 2019 11:11:52 +0000 Subject: Add support for persisting event format versions Currently we only have the one event format version defined, but this adds the necessary infrastructure to persist and fetch the format versions alongside the events. We specify the format version rather than the room version as: 1. We don't necessarily know the room version, existing events may be either v1 or v2. 2. We'd need to be careful to prevent/handle correctly if different events in the same room reported to be of different versions, which sounds annoying. --- synapse/api/constants.py | 13 +++++++++++++ synapse/events/__init__.py | 5 +++++ synapse/storage/events.py | 1 + synapse/storage/events_worker.py | 19 +++++++++++++++---- .../storage/schema/delta/53/event_format_version.sql | 16 ++++++++++++++++ 5 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/delta/53/event_format_version.sql (limited to 'synapse/storage') diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 46c4b4b9dc..51ee078bc3 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -120,6 +120,19 @@ KNOWN_ROOM_VERSIONS = { RoomVersions.STATE_V2_TEST, } + +class EventFormatVersions(object): + """This is an internal enum for tracking the version of the event format, + independently from the room version. + """ + V1 = 1 + + +KNOWN_EVENT_FORMAT_VERSIONS = { + EventFormatVersions.V1, +} + + ServerNoticeMsgType = "m.server_notice" ServerNoticeLimitReached = "m.server_notice.usage_limit_reached" diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 84c75495d5..310075c2b1 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -18,6 +18,9 @@ from distutils.util import strtobool import six +from synapse.api.constants import ( + EventFormatVersions, +) from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze @@ -179,6 +182,8 @@ class EventBase(object): class FrozenEvent(EventBase): + format_version = EventFormatVersions.V1 # All events of this type are V1 + def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None): event_dict = dict(event_dict) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 79e0276de6..3e1915fb87 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1268,6 +1268,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore event.internal_metadata.get_dict() ), "json": encode_json(event_dict(event)), + "format_version": event.format_version, } for event, _ in events_and_contexts ], diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index a8326f5296..8dff91e5f8 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -21,11 +21,11 @@ from canonicaljson import json from twisted.internet import defer +from synapse.api.constants import EventFormatVersions from synapse.api.errors import NotFoundError # these are only included to make the type annotations work -from synapse.events import EventBase # noqa: F401 -from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.logcontext import ( @@ -353,6 +353,7 @@ class EventsWorkerStore(SQLBaseStore): self._get_event_from_row, row["internal_metadata"], row["json"], row["redacts"], rejected_reason=row["rejects"], + format_version=row["format_version"], ) for row in rows ], @@ -377,6 +378,7 @@ class EventsWorkerStore(SQLBaseStore): " e.event_id as event_id, " " e.internal_metadata," " e.json," + " e.format_version, " " r.redacts as redacts," " rej.event_id as rejects " " FROM event_json as e" @@ -392,7 +394,7 @@ class EventsWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): + format_version, rejected_reason=None): with Measure(self._clock, "_get_event_from_row"): d = json.loads(js) internal_metadata = json.loads(internal_metadata) @@ -405,8 +407,17 @@ class EventsWorkerStore(SQLBaseStore): desc="_get_event_from_row_rejected_reason", ) + if format_version is None: + # This means that we stored the event before we had the concept + # of a event format version, so it must be a V1 event. + format_version = EventFormatVersions.V1 + + # TODO: When we implement new event formats we'll need to use a + # different event python type + assert format_version == EventFormatVersions.V1 + original_ev = FrozenEvent( - d, + event_dict=d, internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, ) diff --git a/synapse/storage/schema/delta/53/event_format_version.sql b/synapse/storage/schema/delta/53/event_format_version.sql new file mode 100644 index 0000000000..1d977c2834 --- /dev/null +++ b/synapse/storage/schema/delta/53/event_format_version.sql @@ -0,0 +1,16 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE event_json ADD COLUMN format_version INTEGER; -- cgit 1.4.1 From be1065af598e46224aee2eb7b8bcd7e723a72426 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Jan 2019 11:48:16 +0000 Subject: isort --- synapse/events/__init__.py | 4 +--- synapse/storage/events_worker.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 310075c2b1..9dd6940385 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -18,9 +18,7 @@ from distutils.util import strtobool import six -from synapse.api.constants import ( - EventFormatVersions, -) +from synapse.api.constants import EventFormatVersions from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 8dff91e5f8..599f892858 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -23,9 +23,9 @@ from twisted.internet import defer from synapse.api.constants import EventFormatVersions from synapse.api.errors import NotFoundError +from synapse.events import FrozenEvent # these are only included to make the type annotations work from synapse.events.snapshot import EventContext # noqa: F401 -from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.logcontext import ( -- cgit 1.4.1 From 886e5acc762b879b606773b511ff92345aef14c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 16 Jan 2019 15:13:07 +0000 Subject: Store rejected remote invite events as outliers Currently they're stored as non-outliers even though the server isn't in the room, which can be problematic in places where the code assumes it has the state for all non outlier events. In particular, there is an edge case where persisting the leave event triggers a state resolution, which requires looking up the room version from state. Since the server doesn't have the state, this causes an exception to be thrown. --- synapse/federation/federation_client.py | 10 ++++++-- synapse/handlers/federation.py | 44 +++++++++------------------------ synapse/storage/roommember.py | 5 +--- 3 files changed, 21 insertions(+), 38 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index d05ed91d64..8fa726759e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -32,7 +32,6 @@ from synapse.api.errors import ( HttpResponseException, SynapseError, ) -from synapse.events import builder from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache @@ -66,6 +65,8 @@ class FederationClient(FederationBase): self.state = hs.get_state_handler() self.transport_layer = hs.get_federation_transport_client() + self.event_builder_factory = hs.get_event_builder_factory() + self._get_pdu_cache = ExpiringCache( cache_name="get_pdu_cache", clock=self._clock, @@ -571,7 +572,12 @@ class FederationClient(FederationBase): if "prev_state" not in pdu_dict: pdu_dict["prev_state"] = [] - ev = builder.EventBuilder(pdu_dict) + # Strip off the fields that we want to clobber. + pdu_dict.pop("origin", None) + pdu_dict.pop("origin_server_ts", None) + pdu_dict.pop("unsigned", None) + + ev = self.event_builder_factory.new(pdu_dict) defer.returnValue( (destination, ev) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a3bb864bb2..70be87cd3d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -43,10 +43,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) -from synapse.crypto.event_signing import ( - add_hashes_and_signatures, - compute_event_signature, -) +from synapse.crypto.event_signing import compute_event_signature from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -58,7 +55,6 @@ from synapse.types import UserID, get_domain_from_id from synapse.util import logcontext, unwrapFirstError from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_joined_room -from synapse.util.frozenutils import unfreeze from synapse.util.logutils import log_function from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server @@ -1083,7 +1079,9 @@ class FederationHandler(BaseHandler): handled_events = set() try: - event = self._sign_event(event) + self._sign_event(event) + event.internal_metadata.outlier = False + # Try the host we successfully got a response to /make_join/ # request first. try: @@ -1289,13 +1287,7 @@ class FederationHandler(BaseHandler): event.internal_metadata.outlier = True event.internal_metadata.invite_from_remote = True - event.signatures.update( - compute_event_signature( - event, - self.hs.hostname, - self.hs.config.signing_key[0] - ) - ) + self._sign_event(event) context = yield self.state_handler.compute_event_context(event) yield self.persist_events_and_notify([(event, context)]) @@ -1313,7 +1305,7 @@ class FederationHandler(BaseHandler): # Mark as outlier as we don't have any state for this event; we're not # even in the room. event.internal_metadata.outlier = True - event = self._sign_event(event) + self._sign_event(event) # Try the host that we succesfully called /make_leave/ on first for # the /send_leave/ request. @@ -1358,26 +1350,14 @@ class FederationHandler(BaseHandler): defer.returnValue((origin, event)) def _sign_event(self, event): - event.internal_metadata.outlier = False - - builder = self.event_builder_factory.new( - unfreeze(event.get_pdu_json()) - ) - - builder.event_id = self.event_builder_factory.create_event_id() - builder.origin = self.hs.hostname - - if not hasattr(event, "signatures"): - builder.signatures = {} - - add_hashes_and_signatures( - builder, - self.hs.hostname, - self.hs.config.signing_key[0], + event.signatures.update( + compute_event_signature( + event, + self.hs.hostname, + self.hs.config.signing_key[0] + ) ) - return builder.build() - @defer.inlineCallbacks @log_function def on_make_leave_request(self, room_id, user_id): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 0707f9a86a..c7488f4259 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -591,10 +591,7 @@ class RoomMemberStore(RoomMemberWorkerStore): # i.e., its something that has just happened. # The only current event that can also be an outlier is if its an # invite that has come in across federation. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_invite_from_remote() - ) + is_new_state = not backfilled is_mine = self.hs.is_mine_id(event.state_key) if is_new_state and is_mine: if event.membership == Membership.INVITE: -- cgit 1.4.1 From 7c288c22500e2045d36a29c38d2671fad6484e30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Jan 2019 20:05:44 +0000 Subject: Clarify the invite flows --- synapse/events/__init__.py | 8 ++++++-- synapse/handlers/federation.py | 12 +++++++++++- synapse/storage/roommember.py | 11 +++++++---- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 84c75495d5..5030636c7e 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -41,8 +41,12 @@ class _EventInternalMetadata(object): def is_outlier(self): return getattr(self, "outlier", False) - def is_invite_from_remote(self): - return getattr(self, "invite_from_remote", False) + def is_new_remote_event(self): + """Whether this is a new remote event, like an invite or an invite + rejection. This is needed as those events are marked as outliers, but + they still need to be processed. + """ + return getattr(self, "new_remote_event", False) def get_send_on_behalf_of(self): """Whether this server should send the event on behalf of another server. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 9a14ba4517..e017cab777 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -43,6 +43,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) +from synapse.crypto.event_signing import compute_event_signature from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -1283,7 +1284,15 @@ class FederationHandler(BaseHandler): ) event.internal_metadata.outlier = True - event.internal_metadata.invite_from_remote = True + event.internal_metadata.new_remote_event = True + + event.signatures.update( + compute_event_signature( + event, + self.hs.hostname, + self.hs.config.signing_key[0] + ) + ) context = yield self.state_handler.compute_event_context(event) yield self.persist_events_and_notify([(event, context)]) @@ -1301,6 +1310,7 @@ class FederationHandler(BaseHandler): # Mark as outlier as we don't have any state for this event; we're not # even in the room. event.internal_metadata.outlier = True + event.internal_metadata.new_remote_event = True # Try the host that we succesfully called /make_leave/ on first for # the /send_leave/ request. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index c7488f4259..40b13de80b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -588,10 +588,13 @@ class RoomMemberStore(RoomMemberWorkerStore): ) # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. - # The only current event that can also be an outlier is if its an - # invite that has come in across federation. - is_new_state = not backfilled + # i.e., its something that has just happened. If the event is an + # outlier it is only current if its a "new remote event", like a + # remote invite or a rejection of a remote invite. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_new_remote_event() + ) is_mine = self.hs.is_mine_id(event.state_key) if is_new_state and is_mine: if event.membership == Membership.INVITE: -- cgit 1.4.1 From 58f6c4818337364dd9c6bf01062e7b0dadcb8a25 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 24 Jan 2019 21:31:54 +1100 Subject: Use native UPSERTs where possible (#4306) --- .coveragerc | 6 +- .gitignore | 6 +- changelog.d/4306.misc | 1 + synapse/storage/_base.py | 148 +++++++++++++++++++++++++++++++++--- synapse/storage/client_ips.py | 5 +- synapse/storage/engines/__init__.py | 2 +- synapse/storage/engines/postgres.py | 14 ++++ synapse/storage/engines/sqlite.py | 96 +++++++++++++++++++++++ synapse/storage/engines/sqlite3.py | 87 --------------------- synapse/storage/pusher.py | 9 ++- synapse/storage/user_directory.py | 55 ++++++++++---- tests/storage/test_base.py | 1 + tests/test_server.py | 12 ++- tests/unittest.py | 12 ++- tox.ini | 1 + 15 files changed, 325 insertions(+), 130 deletions(-) create mode 100644 changelog.d/4306.misc create mode 100644 synapse/storage/engines/sqlite.py delete mode 100644 synapse/storage/engines/sqlite3.py (limited to 'synapse/storage') diff --git a/.coveragerc b/.coveragerc index 9873a30738..e9460a340a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,11 +1,7 @@ [run] branch = True parallel = True -source = synapse - -[paths] -source= - coverage +include = synapse/* [report] precision = 2 diff --git a/.gitignore b/.gitignore index d739595c3a..1033124f1d 100644 --- a/.gitignore +++ b/.gitignore @@ -25,9 +25,9 @@ homeserver*.pid *.tls.dh *.tls.key -.coverage -.coverage.* -!.coverage.rc +.coverage* +coverage.* +!.coveragerc htmlcov demo/*/*.db diff --git a/changelog.d/4306.misc b/changelog.d/4306.misc new file mode 100644 index 0000000000..58130b6190 --- /dev/null +++ b/changelog.d/4306.misc @@ -0,0 +1 @@ +Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 865b5e915a..254fdc04c6 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -192,6 +192,41 @@ class SQLBaseStore(object): self.database_engine = hs.database_engine + # A set of tables that are not safe to use native upserts in. + self._unsafe_to_upsert_tables = {"user_ips"} + + if self.database_engine.can_native_upsert: + # Check ASAP (and then later, every 1s) to see if we have finished + # background updates of tables that aren't safe to update. + self._clock.call_later(0.0, self._check_safe_to_upsert) + + @defer.inlineCallbacks + def _check_safe_to_upsert(self): + """ + Is it safe to use native UPSERT? + + If there are background updates, we will need to wait, as they may be + the addition of indexes that set the UNIQUE constraint that we require. + + If the background updates have not completed, wait a second and check again. + """ + updates = yield self._simple_select_list( + "background_updates", + keyvalues=None, + retcols=["update_name"], + desc="check_background_updates", + ) + updates = [x["update_name"] for x in updates] + + # The User IPs table in schema #53 was missing a unique index, which we + # run as a background update. + if "user_ips_device_unique_index" not in updates: + self._unsafe_to_upsert_tables.discard("user_id") + + # If there's any tables left to check, reschedule to run. + if self._unsafe_to_upsert_tables: + self._clock.call_later(1.0, self._check_safe_to_upsert) + def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -494,8 +529,15 @@ class SQLBaseStore(object): txn.executemany(sql, vals) @defer.inlineCallbacks - def _simple_upsert(self, table, keyvalues, values, - insertion_values={}, desc="_simple_upsert", lock=True): + def _simple_upsert( + self, + table, + keyvalues, + values, + insertion_values={}, + desc="_simple_upsert", + lock=True + ): """ `lock` should generally be set to True (the default), but can be set @@ -516,16 +558,21 @@ class SQLBaseStore(object): inserting lock (bool): True to lock the table when doing the upsert. Returns: - Deferred(bool): True if a new entry was created, False if an - existing one was updated. + Deferred(None or bool): Native upserts always return None. Emulated + upserts return True if a new entry was created, False if an existing + one was updated. """ attempts = 0 while True: try: result = yield self.runInteraction( desc, - self._simple_upsert_txn, table, keyvalues, values, insertion_values, - lock=lock + self._simple_upsert_txn, + table, + keyvalues, + values, + insertion_values, + lock=lock, ) defer.returnValue(result) except self.database_engine.module.IntegrityError as e: @@ -537,12 +584,59 @@ class SQLBaseStore(object): # presumably we raced with another transaction: let's retry. logger.warn( - "IntegrityError when upserting into %s; retrying: %s", - table, e + "%s when upserting into %s; retrying: %s", e.__name__, table, e ) - def _simple_upsert_txn(self, txn, table, keyvalues, values, insertion_values={}, - lock=True): + def _simple_upsert_txn( + self, + txn, + table, + keyvalues, + values, + insertion_values={}, + lock=True, + ): + """ + Pick the UPSERT method which works best on the platform. Either the + native one (Pg9.5+, recent SQLites), or fall back to an emulated method. + + Args: + txn: The transaction to use. + table (str): The table to upsert into + keyvalues (dict): The unique key tables and their new values + values (dict): The nonunique columns and their new values + insertion_values (dict): additional key/values to use only when + inserting + lock (bool): True to lock the table when doing the upsert. + Returns: + Deferred(None or bool): Native upserts always return None. Emulated + upserts return True if a new entry was created, False if an existing + one was updated. + """ + if ( + self.database_engine.can_native_upsert + and table not in self._unsafe_to_upsert_tables + ): + return self._simple_upsert_txn_native_upsert( + txn, + table, + keyvalues, + values, + insertion_values=insertion_values, + ) + else: + return self._simple_upsert_txn_emulated( + txn, + table, + keyvalues, + values, + insertion_values=insertion_values, + lock=lock, + ) + + def _simple_upsert_txn_emulated( + self, txn, table, keyvalues, values, insertion_values={}, lock=True + ): # We need to lock the table :(, unless we're *really* careful if lock: self.database_engine.lock_table(txn, table) @@ -577,12 +671,44 @@ class SQLBaseStore(object): sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, ", ".join(k for k in allvalues), - ", ".join("?" for _ in allvalues) + ", ".join("?" for _ in allvalues), ) txn.execute(sql, list(allvalues.values())) # successfully inserted return True + def _simple_upsert_txn_native_upsert( + self, txn, table, keyvalues, values, insertion_values={} + ): + """ + Use the native UPSERT functionality in recent PostgreSQL versions. + + Args: + table (str): The table to upsert into + keyvalues (dict): The unique key tables and their new values + values (dict): The nonunique columns and their new values + insertion_values (dict): additional key/values to use only when + inserting + Returns: + None + """ + allvalues = {} + allvalues.update(keyvalues) + allvalues.update(values) + allvalues.update(insertion_values) + + sql = ( + "INSERT INTO %s (%s) VALUES (%s) " + "ON CONFLICT (%s) DO UPDATE SET %s" + ) % ( + table, + ", ".join(k for k in allvalues), + ", ".join("?" for _ in allvalues), + ", ".join(k for k in keyvalues), + ", ".join(k + "=EXCLUDED." + k for k in values), + ) + txn.execute(sql, list(allvalues.values())) + def _simple_select_one(self, table, keyvalues, retcols, allow_none=False, desc="_simple_select_one"): """Executes a SELECT query on the named table, which is expected to diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index b228a20ac2..091d7116c5 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -257,7 +257,10 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) def _update_client_ips_batch_txn(self, txn, to_update): - self.database_engine.lock_table(txn, "user_ips") + if "user_ips" in self._unsafe_to_upsert_tables or ( + not self.database_engine.can_native_upsert + ): + self.database_engine.lock_table(txn, "user_ips") for entry in iteritems(to_update): (user_id, access_token, ip), (user_agent, device_id, last_seen) = entry diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index e2f9de8451..ff5ef97ca8 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -18,7 +18,7 @@ import platform from ._base import IncorrectDatabaseSetup from .postgres import PostgresEngine -from .sqlite3 import Sqlite3Engine +from .sqlite import Sqlite3Engine SUPPORTED_MODULE = { "sqlite3": Sqlite3Engine, diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 42225f8a2a..4004427c7b 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -38,6 +38,13 @@ class PostgresEngine(object): return sql.replace("?", "%s") def on_new_connection(self, db_conn): + + # Get the version of PostgreSQL that we're using. As per the psycopg2 + # docs: The number is formed by converting the major, minor, and + # revision numbers into two-decimal-digit numbers and appending them + # together. For example, version 8.1.5 will be returned as 80105 + self._version = db_conn.server_version + db_conn.set_isolation_level( self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ ) @@ -54,6 +61,13 @@ class PostgresEngine(object): cursor.close() + @property + def can_native_upsert(self): + """ + Can we use native UPSERTs? This requires PostgreSQL 9.5+. + """ + return self._version >= 90500 + def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py new file mode 100644 index 0000000000..c64d73ff21 --- /dev/null +++ b/synapse/storage/engines/sqlite.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# Copyright 2015, 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import struct +import threading +from sqlite3 import sqlite_version_info + +from synapse.storage.prepare_database import prepare_database + + +class Sqlite3Engine(object): + single_threaded = True + + def __init__(self, database_module, database_config): + self.module = database_module + + # The current max state_group, or None if we haven't looked + # in the DB yet. + self._current_state_group_id = None + self._current_state_group_id_lock = threading.Lock() + + @property + def can_native_upsert(self): + """ + Do we support native UPSERTs? This requires SQLite3 3.24+, plus some + more work we haven't done yet to tell what was inserted vs updated. + """ + return sqlite_version_info >= (3, 24, 0) + + def check_database(self, txn): + pass + + def convert_param_style(self, sql): + return sql + + def on_new_connection(self, db_conn): + prepare_database(db_conn, self, config=None) + db_conn.create_function("rank", 1, _rank) + + def is_deadlock(self, error): + return False + + def is_connection_closed(self, conn): + return False + + def lock_table(self, txn, table): + return + + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + # We do application locking here since if we're using sqlite then + # we are a single process synapse. + with self._current_state_group_id_lock: + if self._current_state_group_id is None: + txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") + self._current_state_group_id = txn.fetchone()[0] + + self._current_state_group_id += 1 + return self._current_state_group_id + + +# Following functions taken from: https://github.com/coleifer/peewee + +def _parse_match_info(buf): + bufsize = len(buf) + return [struct.unpack('@I', buf[i:i + 4])[0] for i in range(0, bufsize, 4)] + + +def _rank(raw_match_info): + """Handle match_info called w/default args 'pcx' - based on the example rank + function http://sqlite.org/fts3.html#appendix_a + """ + match_info = _parse_match_info(raw_match_info) + score = 0.0 + p, c = match_info[:2] + for phrase_num in range(p): + phrase_info_idx = 2 + (phrase_num * c * 3) + for col_num in range(c): + col_idx = phrase_info_idx + (col_num * 3) + x1, x2 = match_info[col_idx:col_idx + 2] + if x1 > 0: + score += float(x1) / x2 + return score diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py deleted file mode 100644 index 19949fc474..0000000000 --- a/synapse/storage/engines/sqlite3.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import struct -import threading - -from synapse.storage.prepare_database import prepare_database - - -class Sqlite3Engine(object): - single_threaded = True - - def __init__(self, database_module, database_config): - self.module = database_module - - # The current max state_group, or None if we haven't looked - # in the DB yet. - self._current_state_group_id = None - self._current_state_group_id_lock = threading.Lock() - - def check_database(self, txn): - pass - - def convert_param_style(self, sql): - return sql - - def on_new_connection(self, db_conn): - prepare_database(db_conn, self, config=None) - db_conn.create_function("rank", 1, _rank) - - def is_deadlock(self, error): - return False - - def is_connection_closed(self, conn): - return False - - def lock_table(self, txn, table): - return - - def get_next_state_group_id(self, txn): - """Returns an int that can be used as a new state_group ID - """ - # We do application locking here since if we're using sqlite then - # we are a single process synapse. - with self._current_state_group_id_lock: - if self._current_state_group_id is None: - txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") - self._current_state_group_id = txn.fetchone()[0] - - self._current_state_group_id += 1 - return self._current_state_group_id - - -# Following functions taken from: https://github.com/coleifer/peewee - -def _parse_match_info(buf): - bufsize = len(buf) - return [struct.unpack('@I', buf[i:i + 4])[0] for i in range(0, bufsize, 4)] - - -def _rank(raw_match_info): - """Handle match_info called w/default args 'pcx' - based on the example rank - function http://sqlite.org/fts3.html#appendix_a - """ - match_info = _parse_match_info(raw_match_info) - score = 0.0 - p, c = match_info[:2] - for phrase_num in range(p): - phrase_info_idx = 2 + (phrase_num * c * 3) - for col_num in range(c): - col_idx = phrase_info_idx + (col_num * 3) - x1, x2 = match_info[col_idx:col_idx + 2] - if x1 > 0: - score += float(x1) / x2 - return score diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 2743b52bad..134297e284 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -215,7 +215,7 @@ class PusherStore(PusherWorkerStore): with self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on # (app_id, pushkey, user_name) so _simple_upsert will retry - newly_inserted = yield self._simple_upsert( + yield self._simple_upsert( table="pushers", keyvalues={ "app_id": app_id, @@ -238,7 +238,12 @@ class PusherStore(PusherWorkerStore): lock=False, ) - if newly_inserted: + user_has_pusher = self.get_if_user_has_pusher.cache.get( + (user_id,), None, update_metrics=False + ) + + if user_has_pusher is not True: + # invalidate, since we the user might not have had a pusher before yield self.runInteraction( "add_pusher", self._invalidate_cache_and_stream, diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index a8781b0e5d..ce48212265 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -168,14 +168,14 @@ class UserDirectoryStore(SQLBaseStore): if isinstance(self.database_engine, PostgresEngine): # We weight the localpart most highly, then display name and finally # server name - if new_entry: + if self.database_engine.can_native_upsert: sql = """ INSERT INTO user_directory_search(user_id, vector) VALUES (?, setweight(to_tsvector('english', ?), 'A') || setweight(to_tsvector('english', ?), 'D') || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - ) + ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector """ txn.execute( sql, @@ -185,20 +185,45 @@ class UserDirectoryStore(SQLBaseStore): ) ) else: - sql = """ - UPDATE user_directory_search - SET vector = setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - WHERE user_id = ? - """ - txn.execute( - sql, - ( - get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, user_id, + # TODO: Remove this code after we've bumped the minimum version + # of postgres to always support upserts, so we can get rid of + # `new_entry` usage + if new_entry is True: + sql = """ + INSERT INTO user_directory_search(user_id, vector) + VALUES (?, + setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + ) + """ + txn.execute( + sql, + ( + user_id, get_localpart_from_id(user_id), + get_domain_from_id(user_id), display_name, + ) + ) + elif new_entry is False: + sql = """ + UPDATE user_directory_search + SET vector = setweight(to_tsvector('english', ?), 'A') + || setweight(to_tsvector('english', ?), 'D') + || setweight(to_tsvector('english', COALESCE(?, '')), 'B') + WHERE user_id = ? + """ + txn.execute( + sql, + ( + get_localpart_from_id(user_id), + get_domain_from_id(user_id), + display_name, user_id, + ) + ) + else: + raise RuntimeError( + "upsert returned None when 'can_native_upsert' is False" ) - ) elif isinstance(self.database_engine, Sqlite3Engine): value = "%s %s" % (user_id, display_name,) if display_name else user_id self._simple_upsert_txn( diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index 829f47d2e8..452d76ddd5 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -49,6 +49,7 @@ class SQLBaseStoreTestCase(unittest.TestCase): self.db_pool.runWithConnection = runWithConnection config = Mock() + config._enable_native_upserts = False config.event_cache_size = 1 config.database_config = {"name": "sqlite3"} hs = TestHomeServer( diff --git a/tests/test_server.py b/tests/test_server.py index 634a8fbca5..08fb3fe02f 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -19,7 +19,7 @@ from six import StringIO from twisted.internet.defer import Deferred from twisted.python.failure import Failure -from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock +from twisted.test.proto_helpers import AccumulatingProtocol from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET @@ -30,12 +30,18 @@ from synapse.util import Clock from synapse.util.logcontext import make_deferred_yieldable from tests import unittest -from tests.server import FakeTransport, make_request, render, setup_test_homeserver +from tests.server import ( + FakeTransport, + ThreadedMemoryReactorClock, + make_request, + render, + setup_test_homeserver, +) class JsonResourceTests(unittest.TestCase): def setUp(self): - self.reactor = MemoryReactorClock() + self.reactor = ThreadedMemoryReactorClock() self.hs_clock = Clock(self.reactor) self.homeserver = setup_test_homeserver( self.addCleanup, http_client=None, clock=self.hs_clock, reactor=self.reactor diff --git a/tests/unittest.py b/tests/unittest.py index 78d2f740f9..cda549c783 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -96,7 +96,7 @@ class TestCase(unittest.TestCase): method = getattr(self, methodName) - level = getattr(method, "loglevel", getattr(self, "loglevel", logging.ERROR)) + level = getattr(method, "loglevel", getattr(self, "loglevel", logging.WARNING)) @around(self) def setUp(orig): @@ -333,7 +333,15 @@ class HomeserverTestCase(TestCase): """ kwargs = dict(kwargs) kwargs.update(self._hs_args) - return setup_test_homeserver(self.addCleanup, *args, **kwargs) + hs = setup_test_homeserver(self.addCleanup, *args, **kwargs) + stor = hs.get_datastore() + + # Run the database background updates. + if hasattr(stor, "do_next_background_update"): + while not self.get_success(stor.has_completed_background_updates()): + self.get_success(stor.do_next_background_update(1)) + + return hs def pump(self, by=0.0): """ diff --git a/tox.ini b/tox.ini index a0f5486829..9b2d78ed6d 100644 --- a/tox.ini +++ b/tox.ini @@ -149,4 +149,5 @@ deps = codecov commands = coverage combine + coverage xml codecov -X gcov -- cgit 1.4.1 From e8c9f1539716b9593de837343da5dc4fa9bc10b0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 24 Jan 2019 11:14:07 +0000 Subject: Replace missed usages of FrozenEvent --- synapse/replication/http/federation.py | 8 ++++++-- synapse/replication/http/send_event.py | 8 ++++++-- synapse/storage/events_worker.py | 8 ++------ 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 64a79da162..2e16c69666 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -70,6 +70,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_payloads.append({ "event": event.get_pdu_json(), + "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -94,9 +95,12 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_and_contexts = [] for event_payload in event_payloads: event_dict = event_payload["event"] + format_ver = content["event_format_version"] internal_metadata = event_payload["internal_metadata"] rejected_reason = event_payload["rejected_reason"] - event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + EventType = event_type_from_format_version(format_ver) + event = EventType(event_dict, internal_metadata, rejected_reason) context = yield EventContext.deserialize( self.store, event_payload["context"], diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 5b52c91650..3635015eda 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -74,6 +74,7 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): payload = { "event": event.get_pdu_json(), + "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -90,9 +91,12 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): content = parse_json_object_from_request(request) event_dict = content["event"] + format_ver = content["event_format_version"] internal_metadata = content["internal_metadata"] rejected_reason = content["rejected_reason"] - event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + EventType = event_type_from_format_version(format_ver) + event = EventType(event_dict, internal_metadata, rejected_reason) requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 599f892858..0a0ca58fc4 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.api.constants import EventFormatVersions from synapse.api.errors import NotFoundError -from synapse.events import FrozenEvent +from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 # these are only included to make the type annotations work from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event @@ -412,11 +412,7 @@ class EventsWorkerStore(SQLBaseStore): # of a event format version, so it must be a V1 event. format_version = EventFormatVersions.V1 - # TODO: When we implement new event formats we'll need to use a - # different event python type - assert format_version == EventFormatVersions.V1 - - original_ev = FrozenEvent( + original_ev = event_type_from_format_version(format_version)( event_dict=d, internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, -- cgit 1.4.1 From 0e27501ee5454aca019cdfbfe899027aa8ae44a1 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 24 Jan 2019 22:57:41 +1100 Subject: Fix UPSERT check (#4459) --- changelog.d/4459.misc | 1 + synapse/storage/_base.py | 33 ++++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4459.misc (limited to 'synapse/storage') diff --git a/changelog.d/4459.misc b/changelog.d/4459.misc new file mode 100644 index 0000000000..58130b6190 --- /dev/null +++ b/changelog.d/4459.misc @@ -0,0 +1 @@ +Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 254fdc04c6..f62f70b9f1 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -26,6 +26,7 @@ from prometheus_client import Histogram from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.engines import PostgresEngine from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext @@ -198,7 +199,12 @@ class SQLBaseStore(object): if self.database_engine.can_native_upsert: # Check ASAP (and then later, every 1s) to see if we have finished # background updates of tables that aren't safe to update. - self._clock.call_later(0.0, self._check_safe_to_upsert) + self._clock.call_later( + 0.0, + run_as_background_process, + "upsert_safety_check", + self._check_safe_to_upsert + ) @defer.inlineCallbacks def _check_safe_to_upsert(self): @@ -208,7 +214,7 @@ class SQLBaseStore(object): If there are background updates, we will need to wait, as they may be the addition of indexes that set the UNIQUE constraint that we require. - If the background updates have not completed, wait a second and check again. + If the background updates have not completed, wait 15 sec and check again. """ updates = yield self._simple_select_list( "background_updates", @@ -221,11 +227,16 @@ class SQLBaseStore(object): # The User IPs table in schema #53 was missing a unique index, which we # run as a background update. if "user_ips_device_unique_index" not in updates: - self._unsafe_to_upsert_tables.discard("user_id") + self._unsafe_to_upsert_tables.discard("user_ips") # If there's any tables left to check, reschedule to run. if self._unsafe_to_upsert_tables: - self._clock.call_later(1.0, self._check_safe_to_upsert) + self._clock.call_later( + 15.0, + run_as_background_process, + "upsert_safety_check", + self._check_safe_to_upsert + ) def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -609,7 +620,7 @@ class SQLBaseStore(object): inserting lock (bool): True to lock the table when doing the upsert. Returns: - Deferred(None or bool): Native upserts always return None. Emulated + None or bool: Native upserts always return None. Emulated upserts return True if a new entry was created, False if an existing one was updated. """ @@ -637,6 +648,18 @@ class SQLBaseStore(object): def _simple_upsert_txn_emulated( self, txn, table, keyvalues, values, insertion_values={}, lock=True ): + """ + Args: + table (str): The table to upsert into + keyvalues (dict): The unique key tables and their new values + values (dict): The nonunique columns and their new values + insertion_values (dict): additional key/values to use only when + inserting + lock (bool): True to lock the table when doing the upsert. + Returns: + bool: Return True if a new entry was created, False if an existing + one was updated. + """ # We need to lock the table :(, unless we're *really* careful if lock: self.database_engine.lock_table(txn, table) -- cgit 1.4.1 From 03c85335d1d386c0523af3b6bf992f83bfb905d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 24 Jan 2019 17:22:09 +0000 Subject: Apply suggestions from code review Co-Authored-By: anoadragon453 <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/search.py | 2 +- synapse/storage/state.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 75c26fe065..49c439313e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -52,7 +52,7 @@ class SearchHandler(BaseHandler): room_id (str): id of the room to search through. Returns: - Deferred[iterable[str]]: predecessor room ids + Deferred[iterable[unicode]]: predecessor room ids """ historical_room_ids = [] diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa..0a0691cd00 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -446,7 +446,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): room_id (str) Returns: - Deferred[str]: predecessor room id + Deferred[unicode|None]: predecessor room id """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) -- cgit 1.4.1 From e1781b043b4a73e1b22142b6e09c07ddd782ae57 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 24 Jan 2019 17:23:39 +0000 Subject: Remove redundant create event None check --- synapse/storage/state.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa..c02130d9d6 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -458,9 +458,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # Retrieve the room's create event create_event = yield self.get_event(create_id) - if not create_event: - defer.returnValue(None) - # Return predecessor if present defer.returnValue(create_event.content.get("predecessor", None)) -- cgit 1.4.1 From b8082a54451bb4db30e3b2a4d19dc8cb23330eb7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 24 Jan 2019 17:33:19 +0000 Subject: Use term 'out of band membership' instead --- synapse/events/__init__.py | 9 +++++---- synapse/handlers/federation.py | 4 ++-- synapse/storage/roommember.py | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 5030636c7e..48289cad06 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -41,12 +41,13 @@ class _EventInternalMetadata(object): def is_outlier(self): return getattr(self, "outlier", False) - def is_new_remote_event(self): - """Whether this is a new remote event, like an invite or an invite + def is_out_of_band_membership(self): + """Whether this is an out of band membership, like an invite or an invite rejection. This is needed as those events are marked as outliers, but - they still need to be processed. + they still need to be processed as if they're new events (e.g. updating + invite state in the database, relaying to clients, etc). """ - return getattr(self, "new_remote_event", False) + return getattr(self, "out_of_band_membership", False) def get_send_on_behalf_of(self): """Whether this server should send the event on behalf of another server. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e017cab777..242719b7ce 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1284,7 +1284,7 @@ class FederationHandler(BaseHandler): ) event.internal_metadata.outlier = True - event.internal_metadata.new_remote_event = True + event.internal_metadata.out_of_band_membership = True event.signatures.update( compute_event_signature( @@ -1310,7 +1310,7 @@ class FederationHandler(BaseHandler): # Mark as outlier as we don't have any state for this event; we're not # even in the room. event.internal_metadata.outlier = True - event.internal_metadata.new_remote_event = True + event.internal_metadata.out_of_band_membership = True # Try the host that we succesfully called /make_leave/ on first for # the /send_leave/ request. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 40b13de80b..592c1bcd33 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -589,11 +589,11 @@ class RoomMemberStore(RoomMemberWorkerStore): # We update the local_invites table only if the event is "current", # i.e., its something that has just happened. If the event is an - # outlier it is only current if its a "new remote event", like a - # remote invite or a rejection of a remote invite. + # outlier it is only current if its an "out of band membership", + # like a remote invite or a rejection of a remote invite. is_new_state = not backfilled and ( not event.internal_metadata.is_outlier() - or event.internal_metadata.is_new_remote_event() + or event.internal_metadata.is_out_of_band_membership() ) is_mine = self.hs.is_mine_id(event.state_key) if is_new_state and is_mine: -- cgit 1.4.1 From be6a7e47fa75d72466a356c254376b8eb0707bb2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 25 Jan 2019 10:23:51 +0000 Subject: Revert "Require event format version to parse or create events" --- changelog.d/4447.misc | 1 - changelog.d/4451.misc | 1 - synapse/events/__init__.py | 39 +---------------- synapse/events/builder.py | 51 +---------------------- synapse/federation/federation_base.py | 9 ++-- synapse/federation/federation_client.py | 74 ++++++++++++--------------------- synapse/federation/federation_server.py | 41 ++++++------------ synapse/federation/transport/server.py | 4 +- synapse/handlers/federation.py | 72 +++++++++++++------------------- synapse/handlers/message.py | 10 +---- synapse/replication/http/federation.py | 8 +--- synapse/replication/http/send_event.py | 8 +--- synapse/storage/events_worker.py | 8 +++- tests/storage/test_redaction.py | 5 +-- tests/storage/test_roommember.py | 3 +- tests/storage/test_state.py | 3 +- tests/test_visibility.py | 4 -- tests/utils.py | 3 +- 18 files changed, 91 insertions(+), 253 deletions(-) delete mode 100644 changelog.d/4447.misc delete mode 100644 changelog.d/4451.misc (limited to 'synapse/storage') diff --git a/changelog.d/4447.misc b/changelog.d/4447.misc deleted file mode 100644 index 43f8963614..0000000000 --- a/changelog.d/4447.misc +++ /dev/null @@ -1 +0,0 @@ -Add infrastructure to support different event formats diff --git a/changelog.d/4451.misc b/changelog.d/4451.misc deleted file mode 100644 index 43f8963614..0000000000 --- a/changelog.d/4451.misc +++ /dev/null @@ -1 +0,0 @@ -Add infrastructure to support different event formats diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index c3e6caf597..888296933b 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -18,11 +18,7 @@ from distutils.util import strtobool import six -from synapse.api.constants import ( - KNOWN_EVENT_FORMAT_VERSIONS, - KNOWN_ROOM_VERSIONS, - EventFormatVersions, -) +from synapse.api.constants import EventFormatVersions from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze @@ -244,36 +240,3 @@ class FrozenEvent(EventBase): self.get("type", None), self.get("state_key", None), ) - - -def room_version_to_event_format(room_version): - """Converts a room version string to the event format - - Args: - room_version (str) - - Returns: - int - """ - if room_version not in KNOWN_ROOM_VERSIONS: - raise - - return EventFormatVersions.V1 - - -def event_type_from_format_version(format_version): - """Returns the python type to use to construct an Event object for the - given event format version. - - Args: - format_version (int): The event format version - - Returns: - type: A type that can be initialized as per the initializer of - `FrozenEvent` - """ - if format_version not in KNOWN_EVENT_FORMAT_VERSIONS: - raise Exception( - "No event format %r" % (format_version,) - ) - return FrozenEvent diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 7e63371095..e662eaef10 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -15,39 +15,12 @@ import copy -from synapse.api.constants import RoomVersions from synapse.types import EventID from synapse.util.stringutils import random_string from . import EventBase, FrozenEvent, _event_dict_property -def get_event_builder(room_version, key_values={}, internal_metadata_dict={}): - """Generate an event builder appropriate for the given room version - - Args: - room_version (str): Version of the room that we're creating an - event builder for - key_values (dict): Fields used as the basis of the new event - internal_metadata_dict (dict): Used to create the `_EventInternalMetadata` - object. - - Returns: - EventBuilder - """ - if room_version in { - RoomVersions.V1, - RoomVersions.V2, - RoomVersions.VDH_TEST, - RoomVersions.STATE_V2_TEST, - }: - return EventBuilder(key_values, internal_metadata_dict) - else: - raise Exception( - "No event format defined for version %r" % (room_version,) - ) - - class EventBuilder(EventBase): def __init__(self, key_values={}, internal_metadata_dict={}): signatures = copy.deepcopy(key_values.pop("signatures", {})) @@ -85,29 +58,7 @@ class EventBuilderFactory(object): return e_id.to_string() - def new(self, room_version, key_values={}): - """Generate an event builder appropriate for the given room version - - Args: - room_version (str): Version of the room that we're creating an - event builder for - key_values (dict): Fields used as the basis of the new event - - Returns: - EventBuilder - """ - - # There's currently only the one event version defined - if room_version not in { - RoomVersions.V1, - RoomVersions.V2, - RoomVersions.VDH_TEST, - RoomVersions.STATE_V2_TEST, - }: - raise Exception( - "No event format defined for version %r" % (room_version,) - ) - + def new(self, key_values={}): key_values["event_id"] = self.create_event_id() time_now = int(self.clock.time_msec()) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 5c31e5f85f..d749bfdd3a 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -23,7 +23,7 @@ from twisted.internet.defer import DeferredList from synapse.api.constants import MAX_DEPTH, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.crypto.event_signing import check_event_content_hash -from synapse.events import event_type_from_format_version +from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.http.servlet import assert_params_in_dict from synapse.types import get_domain_from_id @@ -302,12 +302,11 @@ def _is_invite_via_3pid(event): ) -def event_from_pdu_json(pdu_json, event_format_version, outlier=False): +def event_from_pdu_json(pdu_json, outlier=False): """Construct a FrozenEvent from an event json received over federation Args: pdu_json (object): pdu as received over federation - event_format_version (int): The event format version outlier (bool): True to mark this event as an outlier Returns: @@ -331,8 +330,8 @@ def event_from_pdu_json(pdu_json, event_format_version, outlier=False): elif depth > MAX_DEPTH: raise SynapseError(400, "Depth too large", Codes.BAD_JSON) - event = event_type_from_format_version(event_format_version)( - pdu_json, + event = FrozenEvent( + pdu_json ) event.internal_metadata.outlier = outlier diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 4b25f891ca..777deabdf7 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -38,7 +38,6 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import add_hashes_and_signatures -from synapse.events import room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache @@ -170,13 +169,13 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function - def backfill(self, dest, room_id, limit, extremities): + def backfill(self, dest, context, limit, extremities): """Requests some more historic PDUs for the given context from the given destination server. Args: dest (str): The remote home server to ask. - room_id (str): The room_id to backfill. + context (str): The context to backfill. limit (int): The maximum number of PDUs to return. extremities (list): List of PDU id and origins of the first pdus we have seen from the context @@ -191,15 +190,12 @@ class FederationClient(FederationBase): return transaction_data = yield self.transport_layer.backfill( - dest, room_id, extremities, limit) + dest, context, extremities, limit) logger.debug("backfill transaction_data=%s", repr(transaction_data)) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - pdus = [ - event_from_pdu_json(p, format_ver, outlier=False) + event_from_pdu_json(p, outlier=False) for p in transaction_data["pdus"] ] @@ -243,8 +239,6 @@ class FederationClient(FederationBase): pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {}) - format_ver = room_version_to_event_format(room_version) - signed_pdu = None for destination in destinations: now = self._clock.time_msec() @@ -260,7 +254,7 @@ class FederationClient(FederationBase): logger.debug("transaction_data %r", transaction_data) pdu_list = [ - event_from_pdu_json(p, format_ver, outlier=outlier) + event_from_pdu_json(p, outlier=outlier) for p in transaction_data["pdus"] ] @@ -354,16 +348,12 @@ class FederationClient(FederationBase): destination, room_id, event_id=event_id, ) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - pdus = [ - event_from_pdu_json(p, format_ver, outlier=True) - for p in result["pdus"] + event_from_pdu_json(p, outlier=True) for p in result["pdus"] ] auth_chain = [ - event_from_pdu_json(p, format_ver, outlier=True) + event_from_pdu_json(p, outlier=True) for p in result.get("auth_chain", []) ] @@ -371,6 +361,8 @@ class FederationClient(FederationBase): ev.event_id for ev in itertools.chain(pdus, auth_chain) ]) + room_version = yield self.store.get_room_version(room_id) + signed_pdus = yield self._check_sigs_and_hash_and_fetch( destination, [p for p in pdus if p.event_id not in seen_events], @@ -469,14 +461,13 @@ class FederationClient(FederationBase): destination, room_id, event_id, ) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - auth_chain = [ - event_from_pdu_json(p, format_ver, outlier=True) + event_from_pdu_json(p, outlier=True) for p in res["auth_chain"] ] + room_version = yield self.store.get_room_version(room_id) + signed_auth = yield self._check_sigs_and_hash_and_fetch( destination, auth_chain, outlier=True, room_version=room_version, @@ -566,9 +557,9 @@ class FederationClient(FederationBase): params (dict[str, str|Iterable[str]]): Query parameters to include in the request. Return: - Deferred[tuple[str, FrozenEvent, int]]: resolves to a tuple of - `(origin, event, event_format)` where origin is the remote - homeserver which generated the event. + Deferred[tuple[str, FrozenEvent]]: resolves to a tuple of `origin` + and event where origin is the remote homeserver which generated + the event. Fails with a ``SynapseError`` if the chosen remote server returns a 300/400 code. @@ -588,11 +579,6 @@ class FederationClient(FederationBase): destination, room_id, user_id, membership, params, ) - # Note: If not supplied, the room version may be either v1 or v2, - # however either way the event format version will be v1. - room_version = ret.get("room_version", RoomVersions.V1) - event_format = room_version_to_event_format(room_version) - pdu_dict = ret.get("event", None) if not isinstance(pdu_dict, dict): raise InvalidResponseError("Bad 'event' field in response") @@ -612,7 +598,7 @@ class FederationClient(FederationBase): pdu_dict.pop("origin_server_ts", None) pdu_dict.pop("unsigned", None) - builder = self.event_builder_factory.new(room_version, pdu_dict) + builder = self.event_builder_factory.new(pdu_dict) add_hashes_and_signatures( builder, self.hs.hostname, @@ -621,14 +607,14 @@ class FederationClient(FederationBase): ev = builder.build() defer.returnValue( - (destination, ev, event_format) + (destination, ev) ) return self._try_destination_list( "make_" + membership, destinations, send_request, ) - def send_join(self, destinations, pdu, event_format_version): + def send_join(self, destinations, pdu): """Sends a join event to one of a list of homeservers. Doing so will cause the remote server to add the event to the graph, @@ -638,7 +624,6 @@ class FederationClient(FederationBase): destinations (str): Candidate homeservers which are probably participating in the room. pdu (BaseEvent): event to be sent - event_format_version (int): The event format version Return: Deferred: resolves to a dict with members ``origin`` (a string @@ -684,12 +669,12 @@ class FederationClient(FederationBase): logger.debug("Got content: %s", content) state = [ - event_from_pdu_json(p, event_format_version, outlier=True) + event_from_pdu_json(p, outlier=True) for p in content.get("state", []) ] auth_chain = [ - event_from_pdu_json(p, event_format_version, outlier=True) + event_from_pdu_json(p, outlier=True) for p in content.get("auth_chain", []) ] @@ -767,10 +752,7 @@ class FederationClient(FederationBase): logger.debug("Got response to send_invite: %s", pdu_dict) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - - pdu = event_from_pdu_json(pdu_dict, format_ver) + pdu = event_from_pdu_json(pdu_dict) # Check signatures are correct. pdu = yield self._check_sigs_and_hash(pdu) @@ -848,14 +830,13 @@ class FederationClient(FederationBase): content=send_content, ) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - auth_chain = [ - event_from_pdu_json(e, format_ver) + event_from_pdu_json(e) for e in content["auth_chain"] ] + room_version = yield self.store.get_room_version(room_id) + signed_auth = yield self._check_sigs_and_hash_and_fetch( destination, auth_chain, outlier=True, room_version=room_version, ) @@ -899,14 +880,13 @@ class FederationClient(FederationBase): timeout=timeout, ) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - events = [ - event_from_pdu_json(e, format_ver) + event_from_pdu_json(e) for e in content.get("events", []) ] + room_version = yield self.store.get_room_version(room_id) + signed_events = yield self._check_sigs_and_hash_and_fetch( destination, events, outlier=False, room_version=room_version, ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 4aa04b9588..cb729c69ea 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -34,7 +34,6 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import compute_event_signature -from synapse.events import room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction @@ -179,13 +178,14 @@ class FederationServer(FederationBase): continue try: - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) + # In future we will actually use the room version to parse the + # PDU into an event. + yield self.store.get_room_version(room_id) except NotFoundError: logger.info("Ignoring PDU for unknown room_id: %s", room_id) continue - event = event_from_pdu_json(p, format_ver) + event = event_from_pdu_json(p) pdus_by_room.setdefault(room_id, []).append(event) pdu_results = {} @@ -370,9 +370,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_invite_request(self, origin, content, room_version): - format_ver = room_version_to_event_format(room_version) - - pdu = event_from_pdu_json(content, format_ver) + pdu = event_from_pdu_json(content) origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, pdu.room_id) ret_pdu = yield self.handler.on_invite_request(origin, pdu) @@ -380,12 +378,9 @@ class FederationServer(FederationBase): defer.returnValue({"event": ret_pdu.get_pdu_json(time_now)}) @defer.inlineCallbacks - def on_send_join_request(self, origin, content, room_id): + def on_send_join_request(self, origin, content): logger.debug("on_send_join_request: content: %s", content) - - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - pdu = event_from_pdu_json(content, format_ver) + pdu = event_from_pdu_json(content) origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, pdu.room_id) @@ -405,22 +400,13 @@ class FederationServer(FederationBase): origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, room_id) pdu = yield self.handler.on_make_leave_request(room_id, user_id) - - room_version = yield self.store.get_room_version(room_id) - time_now = self._clock.time_msec() - defer.returnValue({ - "event": pdu.get_pdu_json(time_now), - "room_version": room_version, - }) + defer.returnValue({"event": pdu.get_pdu_json(time_now)}) @defer.inlineCallbacks - def on_send_leave_request(self, origin, content, room_id): + def on_send_leave_request(self, origin, content): logger.debug("on_send_leave_request: content: %s", content) - - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - pdu = event_from_pdu_json(content, format_ver) + pdu = event_from_pdu_json(content) origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, pdu.room_id) @@ -466,14 +452,13 @@ class FederationServer(FederationBase): origin_host, _ = parse_server_name(origin) yield self.check_server_matches_acl(origin_host, room_id) - room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) - auth_chain = [ - event_from_pdu_json(e, format_ver) + event_from_pdu_json(e) for e in content["auth_chain"] ] + room_version = yield self.store.get_room_version(room_id) + signed_auth = yield self._check_sigs_and_hash_and_fetch( origin, auth_chain, outlier=True, room_version=room_version, ) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 67ae0212c3..4557a9e66e 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -469,7 +469,7 @@ class FederationSendLeaveServlet(BaseFederationServlet): @defer.inlineCallbacks def on_PUT(self, origin, content, query, room_id, event_id): - content = yield self.handler.on_send_leave_request(origin, content, room_id) + content = yield self.handler.on_send_leave_request(origin, content) defer.returnValue((200, content)) @@ -487,7 +487,7 @@ class FederationSendJoinServlet(BaseFederationServlet): def on_PUT(self, origin, content, query, context, event_id): # TODO(paul): assert that context/event_id parsed from path actually # match those given in content - content = yield self.handler.on_send_join_request(origin, content, context) + content = yield self.handler.on_send_join_request(origin, content) defer.returnValue((200, content)) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a4b771049c..453d393ce1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1061,7 +1061,7 @@ class FederationHandler(BaseHandler): """ logger.debug("Joining %s to %s", joinee, room_id) - origin, event, event_format_version = yield self._make_and_verify_event( + origin, event = yield self._make_and_verify_event( target_hosts, room_id, joinee, @@ -1091,9 +1091,7 @@ class FederationHandler(BaseHandler): target_hosts.insert(0, origin) except ValueError: pass - ret = yield self.federation_client.send_join( - target_hosts, event, event_format_version, - ) + ret = yield self.federation_client.send_join(target_hosts, event) origin = ret["origin"] state = ret["state"] @@ -1166,18 +1164,13 @@ class FederationHandler(BaseHandler): """ event_content = {"membership": Membership.JOIN} - room_version = yield self.store.get_room_version(room_id) - - builder = self.event_builder_factory.new( - room_version, - { - "type": EventTypes.Member, - "content": event_content, - "room_id": room_id, - "sender": user_id, - "state_key": user_id, - } - ) + builder = self.event_builder_factory.new({ + "type": EventTypes.Member, + "content": event_content, + "room_id": room_id, + "sender": user_id, + "state_key": user_id, + }) try: event, context = yield self.event_creation_handler.create_new_client_event( @@ -1311,7 +1304,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def do_remotely_reject_invite(self, target_hosts, room_id, user_id): - origin, event, event_format_version = yield self._make_and_verify_event( + origin, event = yield self._make_and_verify_event( target_hosts, room_id, user_id, @@ -1343,7 +1336,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _make_and_verify_event(self, target_hosts, room_id, user_id, membership, content={}, params=None): - origin, event, format_ver = yield self.federation_client.make_membership_event( + origin, pdu = yield self.federation_client.make_membership_event( target_hosts, room_id, user_id, @@ -1352,7 +1345,9 @@ class FederationHandler(BaseHandler): params=params, ) - logger.debug("Got response to make_%s: %s", membership, event) + logger.debug("Got response to make_%s: %s", membership, pdu) + + event = pdu # We should assert some things. # FIXME: Do this in a nicer way @@ -1360,7 +1355,7 @@ class FederationHandler(BaseHandler): assert(event.user_id == user_id) assert(event.state_key == user_id) assert(event.room_id == room_id) - defer.returnValue((origin, event, format_ver)) + defer.returnValue((origin, event)) @defer.inlineCallbacks @log_function @@ -1369,17 +1364,13 @@ class FederationHandler(BaseHandler): leave event for the room and return that. We do *not* persist or process it until the other server has signed it and sent it back. """ - room_version = yield self.store.get_room_version(room_id) - builder = self.event_builder_factory.new( - room_version, - { - "type": EventTypes.Member, - "content": {"membership": Membership.LEAVE}, - "room_id": room_id, - "sender": user_id, - "state_key": user_id, - } - ) + builder = self.event_builder_factory.new({ + "type": EventTypes.Member, + "content": {"membership": Membership.LEAVE}, + "room_id": room_id, + "sender": user_id, + "state_key": user_id, + }) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, @@ -2275,16 +2266,14 @@ class FederationHandler(BaseHandler): } if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): - room_version = yield self.store.get_room_version(room_id) - builder = self.event_builder_factory.new(room_version, event_dict) - + builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder ) event, context = yield self.add_display_name_to_third_party_invite( - room_version, event_dict, event, context + event_dict, event, context ) try: @@ -2315,18 +2304,14 @@ class FederationHandler(BaseHandler): Returns: Deferred: resolves (to None) """ - room_version = yield self.store.get_room_version(room_id) - - # NB: event_dict has a particular specced format we might need to fudge - # if we change event formats too much. - builder = self.event_builder_factory.new(room_version, event_dict) + builder = self.event_builder_factory.new(event_dict) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) event, context = yield self.add_display_name_to_third_party_invite( - room_version, event_dict, event, context + event_dict, event, context ) try: @@ -2346,8 +2331,7 @@ class FederationHandler(BaseHandler): yield member_handler.send_membership_event(None, event, context) @defer.inlineCallbacks - def add_display_name_to_third_party_invite(self, room_version, event_dict, - event, context): + def add_display_name_to_third_party_invite(self, event_dict, event, context): key = ( EventTypes.ThirdPartyInvite, event.content["third_party_invite"]["signed"]["token"] @@ -2371,7 +2355,7 @@ class FederationHandler(BaseHandler): # auth checks. If we need the invite and don't have it then the # auth check code will explode appropriately. - builder = self.event_builder_factory.new(room_version, event_dict) + builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7aaa4fba33..a7cd779b02 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -278,15 +278,7 @@ class EventCreationHandler(object): """ yield self.auth.check_auth_blocking(requester.user.to_string()) - if event_dict["type"] == EventTypes.Create and event_dict["state_key"] == "": - room_version = event_dict["content"]["room_version"] - else: - try: - room_version = yield self.store.get_room_version(event_dict["room_id"]) - except NotFoundError: - raise AuthError(403, "Unknown room") - - builder = self.event_builder_factory.new(room_version, event_dict) + builder = self.event_builder_factory.new(event_dict) self.validator.validate_new(builder) diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 2e16c69666..64a79da162 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import event_type_from_format_version +from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -70,7 +70,6 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_payloads.append({ "event": event.get_pdu_json(), - "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -95,12 +94,9 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_and_contexts = [] for event_payload in event_payloads: event_dict = event_payload["event"] - format_ver = content["event_format_version"] internal_metadata = event_payload["internal_metadata"] rejected_reason = event_payload["rejected_reason"] - - EventType = event_type_from_format_version(format_ver) - event = EventType(event_dict, internal_metadata, rejected_reason) + event = FrozenEvent(event_dict, internal_metadata, rejected_reason) context = yield EventContext.deserialize( self.store, event_payload["context"], diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 3635015eda..5b52c91650 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import event_type_from_format_version +from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -74,7 +74,6 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): payload = { "event": event.get_pdu_json(), - "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -91,12 +90,9 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): content = parse_json_object_from_request(request) event_dict = content["event"] - format_ver = content["event_format_version"] internal_metadata = content["internal_metadata"] rejected_reason = content["rejected_reason"] - - EventType = event_type_from_format_version(format_ver) - event = EventType(event_dict, internal_metadata, rejected_reason) + event = FrozenEvent(event_dict, internal_metadata, rejected_reason) requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 0a0ca58fc4..599f892858 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.api.constants import EventFormatVersions from synapse.api.errors import NotFoundError -from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 +from synapse.events import FrozenEvent # these are only included to make the type annotations work from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event @@ -412,7 +412,11 @@ class EventsWorkerStore(SQLBaseStore): # of a event format version, so it must be a V1 event. format_version = EventFormatVersions.V1 - original_ev = event_type_from_format_version(format_version)( + # TODO: When we implement new event formats we'll need to use a + # different event python type + assert format_version == EventFormatVersions.V1 + + original_ev = FrozenEvent( event_dict=d, internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 3957561b1e..02bf975fbf 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -18,7 +18,7 @@ from mock import Mock from twisted.internet import defer -from synapse.api.constants import EventTypes, Membership, RoomVersions +from synapse.api.constants import EventTypes, Membership from synapse.types import RoomID, UserID from tests import unittest @@ -52,7 +52,6 @@ class RedactionTestCase(unittest.TestCase): content = {"membership": membership} content.update(extra_content) builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": EventTypes.Member, "sender": user.to_string(), @@ -75,7 +74,6 @@ class RedactionTestCase(unittest.TestCase): self.depth += 1 builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": EventTypes.Message, "sender": user.to_string(), @@ -96,7 +94,6 @@ class RedactionTestCase(unittest.TestCase): @defer.inlineCallbacks def inject_redaction(self, room, event_id, user, reason): builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": EventTypes.Redaction, "sender": user.to_string(), diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 7fa2f4fd70..978c66133d 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -18,7 +18,7 @@ from mock import Mock from twisted.internet import defer -from synapse.api.constants import EventTypes, Membership, RoomVersions +from synapse.api.constants import EventTypes, Membership from synapse.types import RoomID, UserID from tests import unittest @@ -50,7 +50,6 @@ class RoomMemberStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def inject_room_member(self, room, user, membership, replaces_state=None): builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": EventTypes.Member, "sender": user.to_string(), diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index a1f99134dc..086a39d834 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.api.constants import EventTypes, Membership, RoomVersions +from synapse.api.constants import EventTypes, Membership from synapse.storage.state import StateFilter from synapse.types import RoomID, UserID @@ -52,7 +52,6 @@ class StateStoreTestCase(tests.unittest.TestCase): @defer.inlineCallbacks def inject_state_event(self, room, sender, typ, state_key, content): builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": typ, "sender": sender.to_string(), diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 82d63ce00e..2eea3b098b 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -17,7 +17,6 @@ import logging from twisted.internet import defer from twisted.internet.defer import succeed -from synapse.api.constants import RoomVersions from synapse.events import FrozenEvent from synapse.visibility import filter_events_for_server @@ -125,7 +124,6 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): def inject_visibility(self, user_id, visibility): content = {"history_visibility": visibility} builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": "m.room.history_visibility", "sender": user_id, @@ -146,7 +144,6 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): content = {"membership": membership} content.update(extra_content) builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": "m.room.member", "sender": user_id, @@ -168,7 +165,6 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): if content is None: content = {"body": "testytest"} builder = self.event_builder_factory.new( - RoomVersions.V1, { "type": "m.room.message", "sender": user_id, diff --git a/tests/utils.py b/tests/utils.py index 2dfcb70a93..df73c539c3 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -26,7 +26,7 @@ from six.moves.urllib import parse as urlparse from twisted.internet import defer, reactor -from synapse.api.constants import EventTypes, RoomVersions +from synapse.api.constants import EventTypes from synapse.api.errors import CodeMessageException, cs_error from synapse.config.server import ServerConfig from synapse.federation.transport import server @@ -624,7 +624,6 @@ def create_room(hs, room_id, creator_id): event_creation_handler = hs.get_event_creation_handler() builder = event_builder_factory.new( - RoomVersions.V1, { "type": EventTypes.Create, "state_key": "", -- cgit 1.4.1 From 678a92cb56d547dcadffa723e29b4855a27d0901 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 24 Jan 2019 11:14:07 +0000 Subject: Replace missed usages of FrozenEvent --- synapse/replication/http/federation.py | 8 ++++++-- synapse/replication/http/send_event.py | 8 ++++++-- synapse/storage/events_worker.py | 8 ++------ 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index 64a79da162..2e16c69666 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -70,6 +70,7 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_payloads.append({ "event": event.get_pdu_json(), + "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -94,9 +95,12 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): event_and_contexts = [] for event_payload in event_payloads: event_dict = event_payload["event"] + format_ver = content["event_format_version"] internal_metadata = event_payload["internal_metadata"] rejected_reason = event_payload["rejected_reason"] - event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + EventType = event_type_from_format_version(format_ver) + event = EventType(event_dict, internal_metadata, rejected_reason) context = yield EventContext.deserialize( self.store, event_payload["context"], diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 5b52c91650..3635015eda 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer -from synapse.events import FrozenEvent +from synapse.events import event_type_from_format_version from synapse.events.snapshot import EventContext from synapse.http.servlet import parse_json_object_from_request from synapse.replication.http._base import ReplicationEndpoint @@ -74,6 +74,7 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): payload = { "event": event.get_pdu_json(), + "event_format_version": event.format_version, "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, "context": serialized_context, @@ -90,9 +91,12 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): content = parse_json_object_from_request(request) event_dict = content["event"] + format_ver = content["event_format_version"] internal_metadata = content["internal_metadata"] rejected_reason = content["rejected_reason"] - event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + EventType = event_type_from_format_version(format_ver) + event = EventType(event_dict, internal_metadata, rejected_reason) requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 599f892858..0a0ca58fc4 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.api.constants import EventFormatVersions from synapse.api.errors import NotFoundError -from synapse.events import FrozenEvent +from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 # these are only included to make the type annotations work from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event @@ -412,11 +412,7 @@ class EventsWorkerStore(SQLBaseStore): # of a event format version, so it must be a V1 event. format_version = EventFormatVersions.V1 - # TODO: When we implement new event formats we'll need to use a - # different event python type - assert format_version == EventFormatVersions.V1 - - original_ev = FrozenEvent( + original_ev = event_type_from_format_version(format_version)( event_dict=d, internal_metadata_dict=internal_metadata, rejected_reason=rejected_reason, -- cgit 1.4.1 From 19530671362021b2ef53f4926c278e346406997b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 25 Jan 2019 10:46:49 +0000 Subject: Disable native upserts for sqlite, as they don't work --- synapse/storage/engines/sqlite.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index c64d73ff21..206919fcd5 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -34,10 +34,14 @@ class Sqlite3Engine(object): @property def can_native_upsert(self): """ - Do we support native UPSERTs? This requires SQLite3 3.24+, plus some - more work we haven't done yet to tell what was inserted vs updated. + Do we support native UPSERTs? """ - return sqlite_version_info >= (3, 24, 0) + # SQLite3 3.24+ supports them, but empirically the unit tests don't work + # when its enabled. + # FIXME: Figure out what is wrong so we can re-enable native upserts + + # return sqlite_version_info >= (3, 24, 0) + return False def check_database(self, txn): pass -- cgit 1.4.1 From 0b3fd1401fdebb944729cf46d6de9c3bff482933 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 25 Jan 2019 11:25:02 +0000 Subject: Don't require sqlite3 when using postgres (#4466) --- changelog.d/4466.misc | 1 + synapse/storage/engines/sqlite.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 changelog.d/4466.misc (limited to 'synapse/storage') diff --git a/changelog.d/4466.misc b/changelog.d/4466.misc new file mode 100644 index 0000000000..58130b6190 --- /dev/null +++ b/changelog.d/4466.misc @@ -0,0 +1 @@ +Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index c64d73ff21..059ab81055 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -15,7 +15,6 @@ import struct import threading -from sqlite3 import sqlite_version_info from synapse.storage.prepare_database import prepare_database @@ -37,7 +36,7 @@ class Sqlite3Engine(object): Do we support native UPSERTs? This requires SQLite3 3.24+, plus some more work we haven't done yet to tell what was inserted vs updated. """ - return sqlite_version_info >= (3, 24, 0) + return self.module.sqlite_version_info >= (3, 24, 0) def check_database(self, txn): pass -- cgit 1.4.1 From 7072fe30846c47849c3cb142acef03ef3825a892 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 28 Jan 2019 15:43:32 +0000 Subject: Fix UPSERTs on SQLite 3.24+ (#4477) --- changelog.d/4306.misc | 2 +- changelog.d/4471.misc | 2 +- changelog.d/4477.misc | 1 + synapse/storage/_base.py | 10 ++++++++-- synapse/storage/engines/sqlite.py | 10 +++------- synapse/storage/monthly_active_users.py | 12 +++++++++--- tests/storage/test_base.py | 7 +++++-- tests/storage/test_monthly_active_users.py | 4 ++-- 8 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 changelog.d/4477.misc (limited to 'synapse/storage') diff --git a/changelog.d/4306.misc b/changelog.d/4306.misc index 7f48b02fbf..58130b6190 100644 --- a/changelog.d/4306.misc +++ b/changelog.d/4306.misc @@ -1 +1 @@ -Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+. +Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/changelog.d/4471.misc b/changelog.d/4471.misc index 7f48b02fbf..994801fd1e 100644 --- a/changelog.d/4471.misc +++ b/changelog.d/4471.misc @@ -1 +1 @@ -Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+. + Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/changelog.d/4477.misc b/changelog.d/4477.misc new file mode 100644 index 0000000000..58130b6190 --- /dev/null +++ b/changelog.d/4477.misc @@ -0,0 +1 @@ +Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f62f70b9f1..5109bc3e2e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -27,7 +27,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.stringutils import exception_to_unicode @@ -196,6 +196,12 @@ class SQLBaseStore(object): # A set of tables that are not safe to use native upserts in. self._unsafe_to_upsert_tables = {"user_ips"} + # We add the user_directory_search table to the blacklist on SQLite + # because the existing search table does not have an index, making it + # unsafe to use native upserts. + if isinstance(self.database_engine, Sqlite3Engine): + self._unsafe_to_upsert_tables.add("user_directory_search") + if self.database_engine.can_native_upsert: # Check ASAP (and then later, every 1s) to see if we have finished # background updates of tables that aren't safe to update. @@ -230,7 +236,7 @@ class SQLBaseStore(object): self._unsafe_to_upsert_tables.discard("user_ips") # If there's any tables left to check, reschedule to run. - if self._unsafe_to_upsert_tables: + if self.updates: self._clock.call_later( 15.0, run_as_background_process, diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 31b8449ca1..059ab81055 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -33,14 +33,10 @@ class Sqlite3Engine(object): @property def can_native_upsert(self): """ - Do we support native UPSERTs? + Do we support native UPSERTs? This requires SQLite3 3.24+, plus some + more work we haven't done yet to tell what was inserted vs updated. """ - # SQLite3 3.24+ supports them, but empirically the unit tests don't work - # when its enabled. - # FIXME: Figure out what is wrong so we can re-enable native upserts - - # return self.module.sqlite_version_info >= (3, 24, 0) - return False + return self.module.sqlite_version_info >= (3, 24, 0) def check_database(self, txn): pass diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index d6fc8edd4c..9e7e09b8c1 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -197,15 +197,21 @@ class MonthlyActiveUsersStore(SQLBaseStore): if is_support: return - is_insert = yield self.runInteraction( + yield self.runInteraction( "upsert_monthly_active_user", self.upsert_monthly_active_user_txn, user_id ) - if is_insert: - self.user_last_seen_monthly_active.invalidate((user_id,)) + user_in_mau = self.user_last_seen_monthly_active.cache.get( + (user_id,), + None, + update_metrics=False + ) + if user_in_mau is None: self.get_monthly_active_count.invalidate(()) + self.user_last_seen_monthly_active.invalidate((user_id,)) + def upsert_monthly_active_user_txn(self, txn, user_id): """Updates or inserts monthly active user member diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index 452d76ddd5..f18db8c384 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -49,14 +49,17 @@ class SQLBaseStoreTestCase(unittest.TestCase): self.db_pool.runWithConnection = runWithConnection config = Mock() - config._enable_native_upserts = False + config._disable_native_upserts = True config.event_cache_size = 1 config.database_config = {"name": "sqlite3"} + engine = create_engine(config.database_config) + fake_engine = Mock(wraps=engine) + fake_engine.can_native_upsert = False hs = TestHomeServer( "test", db_pool=self.db_pool, config=config, - database_engine=create_engine(config.database_config), + database_engine=fake_engine, ) self.datastore = SQLBaseStore(None, hs) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 9605301b59..d6569a82bb 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -18,12 +18,12 @@ from twisted.internet import defer from synapse.api.constants import UserTypes -from tests.unittest import HomeserverTestCase +from tests import unittest FORTY_DAYS = 40 * 24 * 60 * 60 -class MonthlyActiveUsersTestCase(HomeserverTestCase): +class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): hs = self.setup_test_homeserver() -- cgit 1.4.1 From 94fb63e44f33b6679cdb7f0bb989e9864d6babd0 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 29 Jan 2019 10:04:23 +0000 Subject: Fix typo in upserts code (#4505) * fix obvious problem :| * changelog --- changelog.d/4505.misc | 1 + synapse/storage/_base.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4505.misc (limited to 'synapse/storage') diff --git a/changelog.d/4505.misc b/changelog.d/4505.misc new file mode 100644 index 0000000000..994801fd1e --- /dev/null +++ b/changelog.d/4505.misc @@ -0,0 +1 @@ + Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 5109bc3e2e..4872ff55b6 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -236,7 +236,7 @@ class SQLBaseStore(object): self._unsafe_to_upsert_tables.discard("user_ips") # If there's any tables left to check, reschedule to run. - if self.updates: + if updates: self._clock.call_later( 15.0, run_as_background_process, -- cgit 1.4.1 From be47cfa9c97b4acfd884440f1953ed000225eb37 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 25 Jan 2019 17:19:31 +0000 Subject: Refactor event building into EventBuilder This is so that everything is done in one place, making it easier to change the event format based on room version --- synapse/events/builder.py | 284 ++++++++++++++++++++++++-------- synapse/federation/federation_client.py | 20 +-- synapse/handlers/message.py | 34 +--- synapse/server.py | 5 +- synapse/storage/event_federation.py | 23 +++ 5 files changed, 254 insertions(+), 112 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 7e63371095..225b5fd670 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -13,79 +13,156 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy +import attr -from synapse.api.constants import RoomVersions +from twisted.internet import defer + +from synapse.api.constants import ( + KNOWN_EVENT_FORMAT_VERSIONS, + KNOWN_ROOM_VERSIONS, + MAX_DEPTH, +) +from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.types import EventID from synapse.util.stringutils import random_string -from . import EventBase, FrozenEvent, _event_dict_property +from . import ( + _EventInternalMetadata, + event_type_from_format_version, + room_version_to_event_format, +) -def get_event_builder(room_version, key_values={}, internal_metadata_dict={}): - """Generate an event builder appropriate for the given room version +@attr.s(slots=True, cmp=False, frozen=True) +class EventBuilder(object): + """A format independent event builder used to build up the event content + before signing the event. - Args: - room_version (str): Version of the room that we're creating an - event builder for - key_values (dict): Fields used as the basis of the new event - internal_metadata_dict (dict): Used to create the `_EventInternalMetadata` - object. + (Note that while objects of this class are frozen, the + content/unsigned/internal_metadata fields are still mutable) - Returns: - EventBuilder + Attributes: + format_version (int): Event format version + room_id (str) + type (str) + sender (str) + content (dict) + unsigned (dict) + internal_metadata (_EventInternalMetadata) + + _state (StateHandler) + _auth (synapse.api.Auth) + _store (DataStore) + _clock (Clock) + _hostname (str): The hostname of the server creating the event + _signing_key: The signing key to use to sign the event as the server """ - if room_version in { - RoomVersions.V1, - RoomVersions.V2, - RoomVersions.VDH_TEST, - RoomVersions.STATE_V2_TEST, - }: - return EventBuilder(key_values, internal_metadata_dict) - else: - raise Exception( - "No event format defined for version %r" % (room_version,) - ) + _state = attr.ib() + _auth = attr.ib() + _store = attr.ib() + _clock = attr.ib() + _hostname = attr.ib() + _signing_key = attr.ib() + + format_version = attr.ib() + + room_id = attr.ib() + type = attr.ib() + sender = attr.ib() + + content = attr.ib(default=attr.Factory(dict)) + unsigned = attr.ib(default=attr.Factory(dict)) + + # These only exist on a subset of events, so they raise AttributeError if + # someone tries to get them when they don't exist. + _state_key = attr.ib(default=None) + _redacts = attr.ib(default=None) + + internal_metadata = attr.ib(default=attr.Factory(lambda: _EventInternalMetadata({}))) + + @property + def state_key(self): + if self._state_key is not None: + return self._state_key + + raise AttributeError("state_key") + + def is_state(self): + return self._state_key is not None -class EventBuilder(EventBase): - def __init__(self, key_values={}, internal_metadata_dict={}): - signatures = copy.deepcopy(key_values.pop("signatures", {})) - unsigned = copy.deepcopy(key_values.pop("unsigned", {})) + @defer.inlineCallbacks + def build(self, prev_event_ids): + """Transform into a fully signed and hashed event - super(EventBuilder, self).__init__( - key_values, - signatures=signatures, - unsigned=unsigned, - internal_metadata_dict=internal_metadata_dict, + Args: + prev_event_ids (list[str]): The event IDs to use as the prev events + + Returns: + Deferred[FrozenEvent] + """ + + state_ids = yield self._state.get_current_state_ids( + self.room_id, prev_event_ids, + ) + auth_ids = yield self._auth.compute_auth_events( + self, state_ids, ) - event_id = _event_dict_property("event_id") - state_key = _event_dict_property("state_key") - type = _event_dict_property("type") + auth_events = yield self._store.add_event_hashes(auth_ids) + prev_events = yield self._store.add_event_hashes(prev_event_ids) - def build(self): - return FrozenEvent.from_event(self) + old_depth = yield self._store.get_max_depth_of( + prev_event_ids, + ) + depth = old_depth + 1 + # we cap depth of generated events, to ensure that they are not + # rejected by other servers (and so that they can be persisted in + # the db) + depth = min(depth, MAX_DEPTH) -class EventBuilderFactory(object): - def __init__(self, clock, hostname): - self.clock = clock - self.hostname = hostname + event_dict = { + "auth_events": auth_events, + "prev_events": prev_events, + "type": self.type, + "room_id": self.room_id, + "sender": self.sender, + "content": self.content, + "unsigned": self.unsigned, + "depth": depth, + "prev_state": [], + } + + if self.is_state(): + event_dict["state_key"] = self._state_key - self.event_id_count = 0 + if self._redacts is not None: + event_dict["redacts"] = self._redacts - def create_event_id(self): - i = str(self.event_id_count) - self.event_id_count += 1 + defer.returnValue( + create_local_event_from_event_dict( + clock=self._clock, + hostname=self._hostname, + signing_key=self._signing_key, + format_version=self.format_version, + event_dict=event_dict, + internal_metadata_dict=self.internal_metadata.get_dict(), + ) + ) - local_part = str(int(self.clock.time())) + i + random_string(5) - e_id = EventID(local_part, self.hostname) +class EventBuilderFactory(object): + def __init__(self, hs): + self.clock = hs.get_clock() + self.hostname = hs.hostname + self.signing_key = hs.config.signing_key[0] - return e_id.to_string() + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.auth = hs.get_auth() - def new(self, room_version, key_values={}): + def new(self, room_version, key_values): """Generate an event builder appropriate for the given room version Args: @@ -98,27 +175,104 @@ class EventBuilderFactory(object): """ # There's currently only the one event version defined - if room_version not in { - RoomVersions.V1, - RoomVersions.V2, - RoomVersions.VDH_TEST, - RoomVersions.STATE_V2_TEST, - }: + if room_version not in KNOWN_ROOM_VERSIONS: raise Exception( "No event format defined for version %r" % (room_version,) ) - key_values["event_id"] = self.create_event_id() + key_values["event_id"] = _create_event_id(self.clock, self.hostname) + + return EventBuilder( + store=self.store, + state=self.state, + auth=self.auth, + clock=self.clock, + hostname=self.hostname, + signing_key=self.signing_key, + format_version=room_version_to_event_format(room_version), + type=key_values["type"], + state_key=key_values.get("state_key"), + room_id=key_values["room_id"], + sender=key_values["sender"], + content=key_values.get("content", {}), + unsigned=key_values.get("unsigned", {}), + redacts=key_values.get("redacts", None), + ) + + +def create_local_event_from_event_dict(clock, hostname, signing_key, + format_version, event_dict, + internal_metadata_dict=None): + """Takes a fully formed event dict, ensuring that fields like `origin` + and `origin_server_ts` have correct values for a locally produced event, + then signs and hashes it. + + Args: + clock (Clock) + hostname (str) + signing_key + format_version (int) + event_dict (dict) + internal_metadata_dict (dict|None) + + Returns: + FrozenEvent + """ + + # There's currently only the one event version defined + if format_version not in KNOWN_EVENT_FORMAT_VERSIONS: + raise Exception( + "No event format defined for version %r" % (format_version,) + ) + + if internal_metadata_dict is None: + internal_metadata_dict = {} + + time_now = int(clock.time_msec()) + + event_dict["event_id"] = _create_event_id(clock, hostname) + + event_dict["origin"] = hostname + event_dict["origin_server_ts"] = time_now + + event_dict.setdefault("unsigned", {}) + age = event_dict["unsigned"].pop("age", 0) + event_dict["unsigned"].setdefault("age_ts", time_now - age) + + event_dict.setdefault("signatures", {}) + + add_hashes_and_signatures( + event_dict, + hostname, + signing_key, + ) + return event_type_from_format_version(format_version)( + event_dict, internal_metadata_dict=internal_metadata_dict, + ) + + +# A counter used when generating new event IDs +_event_id_counter = 0 + + +def _create_event_id(clock, hostname): + """Create a new event ID + + Args: + clock (Clock) + hostname (str): The server name for the event ID + + Returns: + str + """ - time_now = int(self.clock.time_msec()) + global _event_id_counter - key_values.setdefault("origin", self.hostname) - key_values.setdefault("origin_server_ts", time_now) + i = str(_event_id_counter) + _event_id_counter += 1 - key_values.setdefault("unsigned", {}) - age = key_values["unsigned"].pop("age", 0) - key_values["unsigned"].setdefault("age_ts", time_now - age) + local_part = str(int(clock.time())) + i + random_string(5) - key_values["signatures"] = {} + e_id = EventID(local_part, hostname) - return EventBuilder(key_values=key_values,) + return e_id.to_string() diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 71809893c5..be3bb59431 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -37,8 +37,7 @@ from synapse.api.errors import ( HttpResponseException, SynapseError, ) -from synapse.crypto.event_signing import add_hashes_and_signatures -from synapse.events import room_version_to_event_format +from synapse.events import builder, room_version_to_event_format from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache @@ -72,7 +71,8 @@ class FederationClient(FederationBase): self.state = hs.get_state_handler() self.transport_layer = hs.get_federation_transport_client() - self.event_builder_factory = hs.get_event_builder_factory() + self.hostname = hs.hostname + self.signing_key = hs.config.signing_key[0] self._get_pdu_cache = ExpiringCache( cache_name="get_pdu_cache", @@ -608,18 +608,10 @@ class FederationClient(FederationBase): if "prev_state" not in pdu_dict: pdu_dict["prev_state"] = [] - # Strip off the fields that we want to clobber. - pdu_dict.pop("origin", None) - pdu_dict.pop("origin_server_ts", None) - pdu_dict.pop("unsigned", None) - - builder = self.event_builder_factory.new(room_version, pdu_dict) - add_hashes_and_signatures( - builder, - self.hs.hostname, - self.hs.config.signing_key[0] + ev = builder.create_local_event_from_event_dict( + self._clock, self.hostname, self.signing_key, + format_version=event_format, event_dict=pdu_dict, ) - ev = builder.build() defer.returnValue( (destination, ev, event_format) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 05d1370c18..ac6f4fd985 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -22,7 +22,7 @@ from canonicaljson import encode_canonical_json, json from twisted.internet import defer from twisted.internet.defer import succeed -from synapse.api.constants import MAX_DEPTH, EventTypes, Membership, RoomVersions +from synapse.api.constants import EventTypes, Membership, RoomVersions from synapse.api.errors import ( AuthError, Codes, @@ -31,7 +31,6 @@ from synapse.api.errors import ( SynapseError, ) from synapse.api.urls import ConsentURIBuilder -from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.replication.http.send_event import ReplicationSendEventRestServlet @@ -545,40 +544,17 @@ class EventCreationHandler(object): prev_events_and_hashes = \ yield self.store.get_prev_events_for_room(builder.room_id) - if prev_events_and_hashes: - depth = max([d for _, _, d in prev_events_and_hashes]) + 1 - # we cap depth of generated events, to ensure that they are not - # rejected by other servers (and so that they can be persisted in - # the db) - depth = min(depth, MAX_DEPTH) - else: - depth = 1 - prev_events = [ (event_id, prev_hashes) for event_id, prev_hashes, _ in prev_events_and_hashes ] - builder.prev_events = prev_events - builder.depth = depth - - context = yield self.state.compute_event_context(builder) - if requester: - context.app_service = requester.app_service - - if builder.is_state(): - builder.prev_state = yield self.store.add_event_hashes( - context.prev_state_events - ) - - yield self.auth.add_auth_events(builder, context) - - signing_key = self.hs.config.signing_key[0] - add_hashes_and_signatures( - builder, self.server_name, signing_key + event = yield builder.build( + prev_event_ids=[p for p, _ in prev_events], ) + context = yield self.state.compute_event_context(event) - event = builder.build() + self.validator.validate_new(event) logger.debug( "Created event %s", diff --git a/synapse/server.py b/synapse/server.py index c8914302cf..6c52101616 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -355,10 +355,7 @@ class HomeServer(object): return Keyring(self) def build_event_builder_factory(self): - return EventBuilderFactory( - clock=self.get_clock(), - hostname=self.hostname, - ) + return EventBuilderFactory(self) def build_filtering(self): return Filtering(self) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index d3b9dea1d6..38809ed0fc 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -125,6 +125,29 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, return dict(txn) + @defer.inlineCallbacks + def get_max_depth_of(self, event_ids): + """Returns the max depth of a set of event IDs + + Args: + event_ids (list[str]) + + Returns + Deferred[int] + """ + rows = yield self._simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=("depth",), + desc="get_max_depth_of", + ) + + if not rows: + defer.returnValue(0) + else: + defer.returnValue(max(row["depth"] for row in rows)) + def _get_oldest_events_in_room_txn(self, txn, room_id): return self._simple_select_onecol_txn( txn, -- cgit 1.4.1 From 7709d2bd167e27493b134e938410c307f8c10396 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 28 Jan 2019 21:09:45 +0000 Subject: Implement rechecking of redactions --- synapse/api/auth.py | 4 ++-- synapse/event_auth.py | 24 ++++++++++++++++++------ synapse/events/__init__.py | 3 +++ synapse/handlers/message.py | 6 +++++- synapse/storage/events_worker.py | 26 +++++++++++++++++++++++++- 5 files changed, 53 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 7b213e54c8..963e0e7d60 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -627,7 +627,7 @@ class Auth(object): defer.returnValue(auth_ids) - def check_redaction(self, event, auth_events): + def check_redaction(self, room_version, event, auth_events): """Check whether the event sender is allowed to redact the target event. Returns: @@ -640,7 +640,7 @@ class Auth(object): AuthError if the event sender is definitely not allowed to redact the target event. """ - return event_auth.check_redaction(event, auth_events) + return event_auth.check_redaction(room_version, event, auth_events) @defer.inlineCallbacks def check_can_change_room_list(self, room_id, user): diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 9adedbbb02..a95d142f0c 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -20,7 +20,13 @@ from signedjson.key import decode_verify_key_bytes from signedjson.sign import SignatureVerifyException, verify_signed_json from unpaddedbase64 import decode_base64 -from synapse.api.constants import KNOWN_ROOM_VERSIONS, EventTypes, JoinRules, Membership +from synapse.api.constants import ( + KNOWN_ROOM_VERSIONS, + EventTypes, + JoinRules, + Membership, + RoomVersions, +) from synapse.api.errors import AuthError, EventSizeError, SynapseError from synapse.types import UserID, get_domain_from_id @@ -168,7 +174,7 @@ def check(room_version, event, auth_events, do_sig_check=True, do_size_check=Tru _check_power_levels(event, auth_events) if event.type == EventTypes.Redaction: - check_redaction(event, auth_events) + check_redaction(room_version, event, auth_events) logger.debug("Allowing! %s", event) @@ -422,7 +428,7 @@ def _can_send_event(event, auth_events): return True -def check_redaction(event, auth_events): +def check_redaction(room_version, event, auth_events): """Check whether the event sender is allowed to redact the target event. Returns: @@ -442,10 +448,16 @@ def check_redaction(event, auth_events): if user_level >= redact_level: return False - redacter_domain = get_domain_from_id(event.event_id) - redactee_domain = get_domain_from_id(event.redacts) - if redacter_domain == redactee_domain: + if room_version in (RoomVersions.V1, RoomVersions.V2, RoomVersions.VDH_TEST): + redacter_domain = get_domain_from_id(event.event_id) + redactee_domain = get_domain_from_id(event.redacts) + if redacter_domain == redactee_domain: + return True + elif room_version == RoomVersions.V3: + event.internal_metadata.recheck_redaction = True return True + else: + raise RuntimeError("Unrecognized room version %r" % (room_version,)) raise AuthError( 403, diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 3fe52aaa45..70d3c0fbd9 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -63,6 +63,9 @@ class _EventInternalMetadata(object): """ return getattr(self, "send_on_behalf_of", None) + def need_to_check_redaction(self): + return getattr(self, "recheck_redaction", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 05d1370c18..0cfced43d5 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -767,7 +767,8 @@ class EventCreationHandler(object): auth_events = { (e.type, e.state_key): e for e in auth_events.values() } - if self.auth.check_redaction(event, auth_events=auth_events): + room_version = yield self.store.get_room_version(event.room_id) + if self.auth.check_redaction(room_version, event, auth_events=auth_events): original_event = yield self.store.get_event( event.redacts, check_redacted=False, @@ -781,6 +782,9 @@ class EventCreationHandler(object): "You don't have permission to redact events" ) + # We've already checked. + event.internal_metadata.recheck_redaction = False + if event.type == EventTypes.Create: prev_state_ids = yield context.get_prev_state_ids(self.store) if prev_state_ids: diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 0a0ca58fc4..9ce19430e8 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -21,13 +21,14 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.constants import EventFormatVersions +from synapse.api.constants import EventFormatVersions, EventTypes from synapse.api.errors import NotFoundError from synapse.events import FrozenEvent, event_type_from_format_version # noqa: F401 # these are only included to make the type annotations work from synapse.events.snapshot import EventContext # noqa: F401 from synapse.events.utils import prune_event from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.types import get_domain_from_id from synapse.util.logcontext import ( LoggingContext, PreserveLoggingContext, @@ -174,6 +175,29 @@ class EventsWorkerStore(SQLBaseStore): if not entry: continue + # Some redactions in room version v3 need to be rechecked if we + # didn't have the redacted event at the time, so we recheck on read + # instead. + if not allow_rejected and entry.event.type == EventTypes.Redaction: + if entry.event.internal_metadata.need_to_check_redaction(): + orig = yield self.get_event( + entry.event.redacts, + allow_none=True, + allow_rejected=True, + get_prev_content=False, + ) + expected_domain = get_domain_from_id(entry.event.sender) + if orig and get_domain_from_id(orig.sender) == expected_domain: + # This redaction event is allowed. Mark as not needing a + # recheck. + entry.event.recheck_redaction = False + else: + # We don't have the event that is being redacted, so we + # assume that the event isn't authorized for now. (If we + # later receive the event, then we will always redact + # it anyway, since we have this redaction) + continue + if allow_rejected or not entry.event.rejected_reason: if check_redacted and entry.redacted_event: event = entry.redacted_event -- cgit 1.4.1 From 82165eeb052ce69f078cdf3371127cc89c702424 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 29 Jan 2019 21:14:39 +0000 Subject: Update synapse/storage/events_worker.py Co-Authored-By: erikjohnston --- synapse/storage/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 9ce19430e8..9dcb27b65d 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -175,7 +175,7 @@ class EventsWorkerStore(SQLBaseStore): if not entry: continue - # Some redactions in room version v3 need to be rechecked if we + # Starting in room version v3, some redactions need to be rechecked if we # didn't have the redacted event at the time, so we recheck on read # instead. if not allow_rejected and entry.event.type == EventTypes.Redaction: -- cgit 1.4.1 From b5d510ad64ad3ed4e40a61c389af875058a73258 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Jan 2019 21:45:28 +0000 Subject: Remove unused arg --- synapse/storage/events_worker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 9dcb27b65d..df8169b91e 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -163,7 +163,6 @@ class EventsWorkerStore(SQLBaseStore): missing_events = yield self._enqueue_events( missing_events_ids, - check_redacted=check_redacted, allow_rejected=allow_rejected, ) @@ -334,7 +333,7 @@ class EventsWorkerStore(SQLBaseStore): self.hs.get_reactor().callFromThread(fire, event_list, e) @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + def _enqueue_events(self, events, allow_rejected=False): """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. -- cgit 1.4.1 From 6d23ec21116d5312e26315c0c75f3761398daf00 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Jan 2019 21:45:53 +0000 Subject: Fix typo --- synapse/storage/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index df8169b91e..0ff80fa728 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -189,7 +189,7 @@ class EventsWorkerStore(SQLBaseStore): if orig and get_domain_from_id(orig.sender) == expected_domain: # This redaction event is allowed. Mark as not needing a # recheck. - entry.event.recheck_redaction = False + entry.event.internal_metadata.recheck_redaction = False else: # We don't have the event that is being redacted, so we # assume that the event isn't authorized for now. (If we -- cgit 1.4.1 From 4db252c0738c590ed5fa0b53c61f9dd9da1cb33e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Jan 2019 21:46:17 +0000 Subject: Check redaction state when event is pulled out of the database --- synapse/storage/events_worker.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 0ff80fa728..1d75e309b3 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -466,6 +466,19 @@ class EventsWorkerStore(SQLBaseStore): # will serialise this field correctly redacted_event.unsigned["redacted_because"] = because + # Starting in room version v3, some redactions need to be + # rechecked if we didn't have the redacted event at the + # time, so we recheck on read instead. + if because.internal_metadata.need_to_check_redaction(): + expected_domain = get_domain_from_id(original_ev.sender) + if get_domain_from_id(because.sender) == expected_domain: + # This redaction event is allowed. Mark as not needing a + # recheck. + because.internal_metadata.recheck_redaction = False + else: + # Senders don't match, so the event is actually redacted + redacted_event = None + cache_entry = _EventCacheEntry( event=original_ev, redacted_event=redacted_event, -- cgit 1.4.1 From c21b7cbc0978080beb9837c3027b15a094670f91 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Jan 2019 21:53:48 +0000 Subject: Update synapse/storage/events_worker.py --- synapse/storage/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 1d75e309b3..ebe1429acb 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -476,7 +476,7 @@ class EventsWorkerStore(SQLBaseStore): # recheck. because.internal_metadata.recheck_redaction = False else: - # Senders don't match, so the event is actually redacted + # Senders don't match, so the event isn't actually redacted redacted_event = None cache_entry = _EventCacheEntry( -- cgit 1.4.1 From 3f189c902ea1146a497512049aa38fe9a0a91169 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 30 Jan 2019 10:53:17 +0000 Subject: Fix flake8 (#4519) --- changelog.d/4519.misc | 1 + synapse/_scripts/register_new_matrix_user.py | 4 +- synapse/handlers/directory.py | 4 +- synapse/handlers/federation.py | 2 +- synapse/push/clientformat.py | 2 +- synapse/storage/__init__.py | 2 +- synapse/storage/events.py | 168 +++++++++++++-------------- synapse/storage/events_worker.py | 2 +- tests/storage/test_background_update.py | 2 +- tests/storage/test_end_to_end_keys.py | 3 - tests/storage/test_keys.py | 3 - tests/storage/test_state.py | 3 - 12 files changed, 94 insertions(+), 102 deletions(-) create mode 100644 changelog.d/4519.misc (limited to 'synapse/storage') diff --git a/changelog.d/4519.misc b/changelog.d/4519.misc new file mode 100644 index 0000000000..897e783d28 --- /dev/null +++ b/changelog.d/4519.misc @@ -0,0 +1 @@ +Fix code to comply with linting in PyFlakes 3.7.1. diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py index 4c3abf06fe..6e93f5a0c6 100644 --- a/synapse/_scripts/register_new_matrix_user.py +++ b/synapse/_scripts/register_new_matrix_user.py @@ -46,7 +46,7 @@ def request_registration( # Get the nonce r = requests.get(url, verify=False) - if r.status_code is not 200: + if r.status_code != 200: _print("ERROR! Received %d %s" % (r.status_code, r.reason)) if 400 <= r.status_code < 500: try: @@ -84,7 +84,7 @@ def request_registration( _print("Sending registration request...") r = requests.post(url, json=data, verify=False) - if r.status_code is not 200: + if r.status_code != 200: _print("ERROR! Received %d %s" % (r.status_code, r.reason)) if 400 <= r.status_code < 500: try: diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 0699731c13..6bb254f899 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -57,8 +57,8 @@ class DirectoryHandler(BaseHandler): # general association creation for both human users and app services for wchar in string.whitespace: - if wchar in room_alias.localpart: - raise SynapseError(400, "Invalid characters in room alias") + if wchar in room_alias.localpart: + raise SynapseError(400, "Invalid characters in room alias") if not self.hs.is_mine(room_alias): raise SynapseError(400, "Room alias must be local") diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f89dabb9eb..083f2e0ac3 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -102,7 +102,7 @@ class FederationHandler(BaseHandler): self.hs = hs - self.store = hs.get_datastore() # type: synapse.storage.DataStore + self.store = hs.get_datastore() self.federation_client = hs.get_federation_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index ecbf364a5e..8bd96b1178 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -84,7 +84,7 @@ def _rule_to_template(rule): templaterule["pattern"] = thecond["pattern"] if unscoped_rule_id: - templaterule['rule_id'] = unscoped_rule_id + templaterule['rule_id'] = unscoped_rule_id if 'default' in rule: templaterule['default'] = rule['default'] return templaterule diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 24329879e5..42cd3c83ad 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -317,7 +317,7 @@ class DataStore(RoomMemberStore, RoomStore, thirty_days_ago_in_secs)) for row in txn: - if row[0] is 'unknown': + if row[0] == 'unknown': pass results[row[0]] = row[1] diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3e1915fb87..81b250480d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -904,106 +904,106 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in iteritems(state_delta_by_room): - to_delete, to_insert = current_state_tuple - - # First we add entries to the current_state_delta_stream. We - # do this before updating the current_state_events table so - # that we can use it to calculate the `prev_event_id`. (This - # allows us to not have to pull out the existing state - # unnecessarily). - sql = """ - INSERT INTO current_state_delta_stream - (stream_id, room_id, type, state_key, event_id, prev_event_id) - SELECT ?, ?, ?, ?, ?, ( - SELECT event_id FROM current_state_events - WHERE room_id = ? AND type = ? AND state_key = ? - ) - """ - txn.executemany(sql, ( - ( - max_stream_order, room_id, etype, state_key, None, - room_id, etype, state_key, - ) - for etype, state_key in to_delete - # We sanity check that we're deleting rather than updating - if (etype, state_key) not in to_insert - )) - txn.executemany(sql, ( - ( - max_stream_order, room_id, etype, state_key, ev_id, - room_id, etype, state_key, - ) - for (etype, state_key), ev_id in iteritems(to_insert) - )) + to_delete, to_insert = current_state_tuple - # Now we actually update the current_state_events table - - txn.executemany( - "DELETE FROM current_state_events" - " WHERE room_id = ? AND type = ? AND state_key = ?", - ( - (room_id, etype, state_key) - for etype, state_key in itertools.chain(to_delete, to_insert) - ), + # First we add entries to the current_state_delta_stream. We + # do this before updating the current_state_events table so + # that we can use it to calculate the `prev_event_id`. (This + # allows us to not have to pull out the existing state + # unnecessarily). + sql = """ + INSERT INTO current_state_delta_stream + (stream_id, room_id, type, state_key, event_id, prev_event_id) + SELECT ?, ?, ?, ?, ?, ( + SELECT event_id FROM current_state_events + WHERE room_id = ? AND type = ? AND state_key = ? ) - - self._simple_insert_many_txn( - txn, - table="current_state_events", - values=[ - { - "event_id": ev_id, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - } - for key, ev_id in iteritems(to_insert) - ], + """ + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, None, + room_id, etype, state_key, ) - - txn.call_after( - self._curr_state_delta_stream_cache.entity_has_changed, - room_id, max_stream_order, + for etype, state_key in to_delete + # We sanity check that we're deleting rather than updating + if (etype, state_key) not in to_insert + )) + txn.executemany(sql, ( + ( + max_stream_order, room_id, etype, state_key, ev_id, + room_id, etype, state_key, ) + for (etype, state_key), ev_id in iteritems(to_insert) + )) - # Invalidate the various caches - - # Figure out the changes of membership to invalidate the - # `get_rooms_for_user` cache. - # We find out which membership events we may have deleted - # and which we have added, then we invlidate the caches for all - # those users. - members_changed = set( - state_key - for ev_type, state_key in itertools.chain(to_delete, to_insert) - if ev_type == EventTypes.Member - ) + # Now we actually update the current_state_events table - for member in members_changed: - self._invalidate_cache_and_stream( - txn, self.get_rooms_for_user_with_stream_ordering, (member,) - ) + txn.executemany( + "DELETE FROM current_state_events" + " WHERE room_id = ? AND type = ? AND state_key = ?", + ( + (room_id, etype, state_key) + for etype, state_key in itertools.chain(to_delete, to_insert) + ), + ) - for host in set(get_domain_from_id(u) for u in members_changed): - self._invalidate_cache_and_stream( - txn, self.is_host_joined, (room_id, host) - ) - self._invalidate_cache_and_stream( - txn, self.was_host_joined, (room_id, host) - ) + self._simple_insert_many_txn( + txn, + table="current_state_events", + values=[ + { + "event_id": ev_id, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + } + for key, ev_id in iteritems(to_insert) + ], + ) + + txn.call_after( + self._curr_state_delta_stream_cache.entity_has_changed, + room_id, max_stream_order, + ) + + # Invalidate the various caches + + # Figure out the changes of membership to invalidate the + # `get_rooms_for_user` cache. + # We find out which membership events we may have deleted + # and which we have added, then we invlidate the caches for all + # those users. + members_changed = set( + state_key + for ev_type, state_key in itertools.chain(to_delete, to_insert) + if ev_type == EventTypes.Member + ) + for member in members_changed: self._invalidate_cache_and_stream( - txn, self.get_users_in_room, (room_id,) + txn, self.get_rooms_for_user_with_stream_ordering, (member,) ) + for host in set(get_domain_from_id(u) for u in members_changed): self._invalidate_cache_and_stream( - txn, self.get_room_summary, (room_id,) + txn, self.is_host_joined, (room_id, host) ) - self._invalidate_cache_and_stream( - txn, self.get_current_state_ids, (room_id,) + txn, self.was_host_joined, (room_id, host) ) + self._invalidate_cache_and_stream( + txn, self.get_users_in_room, (room_id,) + ) + + self._invalidate_cache_and_stream( + txn, self.get_room_summary, (room_id,) + ) + + self._invalidate_cache_and_stream( + txn, self.get_current_state_ids, (room_id,) + ) + def _update_forward_extremities_txn(self, txn, new_forward_extremities, max_stream_order): for room_id, new_extrem in iteritems(new_forward_extremities): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index ebe1429acb..57dae324c7 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -220,7 +220,7 @@ class EventsWorkerStore(SQLBaseStore): defer.returnValue(events) def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) + self._get_event_cache.invalidate((event_id,)) def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): """Fetch events from the caches diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index 81403727c5..5568a607c7 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -11,7 +11,7 @@ class BackgroundUpdateTestCase(unittest.TestCase): def setUp(self): hs = yield setup_test_homeserver( self.addCleanup - ) # type: synapse.server.HomeServer + ) self.store = hs.get_datastore() self.clock = hs.get_clock() diff --git a/tests/storage/test_end_to_end_keys.py b/tests/storage/test_end_to_end_keys.py index b83f7336d3..11fb8c0c19 100644 --- a/tests/storage/test_end_to_end_keys.py +++ b/tests/storage/test_end_to_end_keys.py @@ -20,9 +20,6 @@ import tests.utils class EndToEndKeyStoreTestCase(tests.unittest.TestCase): - def __init__(self, *args, **kwargs): - super(EndToEndKeyStoreTestCase, self).__init__(*args, **kwargs) - self.store = None # type: synapse.storage.DataStore @defer.inlineCallbacks def setUp(self): diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py index 47f4a8ceac..0d2dc9f325 100644 --- a/tests/storage/test_keys.py +++ b/tests/storage/test_keys.py @@ -22,9 +22,6 @@ import tests.utils class KeyStoreTestCase(tests.unittest.TestCase): - def __init__(self, *args, **kwargs): - super(KeyStoreTestCase, self).__init__(*args, **kwargs) - self.store = None # type: synapse.storage.keys.KeyStore @defer.inlineCallbacks def setUp(self): diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index a1f99134dc..99cd3e09eb 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -28,9 +28,6 @@ logger = logging.getLogger(__name__) class StateStoreTestCase(tests.unittest.TestCase): - def __init__(self, *args, **kwargs): - super(StateStoreTestCase, self).__init__(*args, **kwargs) - self.store = None # type: synapse.storage.DataStore @defer.inlineCallbacks def setUp(self): -- cgit 1.4.1 From e707e7b38d67240451d0818c1508bee900952bb0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 31 Jan 2019 15:34:17 +0000 Subject: Fix infinite loop when an event is redacted in a v3 room (#4535) --- changelog.d/4535.bugfix | 1 + synapse/storage/events_worker.py | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4535.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4535.bugfix b/changelog.d/4535.bugfix new file mode 100644 index 0000000000..facd344cdd --- /dev/null +++ b/changelog.d/4535.bugfix @@ -0,0 +1 @@ +Fix infinite loop when an event is redacted in a v3 room diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 57dae324c7..1716be529a 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -161,6 +161,12 @@ class EventsWorkerStore(SQLBaseStore): log_ctx = LoggingContext.current_context() log_ctx.record_event_fetch(len(missing_events_ids)) + # Note that _enqueue_events is also responsible for turning db rows + # into FrozenEvents (via _get_event_from_row), which involves seeing if + # the events have been redacted, and if so pulling the redaction event out + # of the database to check it. + # + # _enqueue_events is a bit of a rubbish name but naming is hard. missing_events = yield self._enqueue_events( missing_events_ids, allow_rejected=allow_rejected, @@ -179,14 +185,35 @@ class EventsWorkerStore(SQLBaseStore): # instead. if not allow_rejected and entry.event.type == EventTypes.Redaction: if entry.event.internal_metadata.need_to_check_redaction(): - orig = yield self.get_event( - entry.event.redacts, + # XXX: we need to avoid calling get_event here. + # + # The problem is that we end up at this point when an event + # which has been redacted is pulled out of the database by + # _enqueue_events, because _enqueue_events needs to check the + # redaction before it can cache the redacted event. So obviously, + # calling get_event to get the redacted event out of the database + # gives us an infinite loop. + # + # For now (quick hack to fix during 0.99 release cycle), we just + # go and fetch the relevant row from the db, but it would be nice + # to think about how we can cache this rather than hit the db + # every time we access a redaction event. + # + # One thought on how to do this: + # 1. split _get_events up so that it is divided into (a) get the + # rawish event from the db/cache, (b) do the redaction/rejection + # filtering + # 2. have _get_event_from_row just call the first half of that + + orig_sender = yield self._simple_select_one_onecol( + table="events", + keyvalues={"event_id": entry.event.redacts}, + retcol="sender", allow_none=True, - allow_rejected=True, - get_prev_content=False, ) + expected_domain = get_domain_from_id(entry.event.sender) - if orig and get_domain_from_id(orig.sender) == expected_domain: + if orig_sender and get_domain_from_id(orig_sender) == expected_domain: # This redaction event is allowed. Mark as not needing a # recheck. entry.event.internal_metadata.recheck_redaction = False -- cgit 1.4.1 From 3ed3cb43394b41e76f4739f22760c1d8ebfed3c7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 31 Jan 2019 18:21:39 +0000 Subject: New function for getting room's create event --- synapse/handlers/room.py | 8 +------- synapse/storage/state.py | 31 ++++++++++++++++++++----------- 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a69441b96f..5e40e9ea46 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -266,13 +266,7 @@ class RoomCreationHandler(BaseHandler): # Check if old room was non-federatable # Get old room's create event - old_room_create_event_ids = yield self.store.get_filtered_current_state_ids( - old_room_id, StateFilter.from_types(((EventTypes.Create, ""),)), - ) - old_room_create_event_dict = yield self.store.get_events( - old_room_create_event_ids.values(), - ) - old_room_create_event = list(old_room_create_event_dict.values())[0] + old_room_create_event = yield self.store.get_create_event_for_room(old_room_id) # Check if the create event specified a non-federatable room if not old_room_create_event.content.get("m.federate", True): diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c3ab7db7ae..522aaee918 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -24,7 +24,6 @@ import attr from twisted.internet import defer from synapse.api.constants import EventTypes -from synapse.api.errors import NotFoundError from synapse.storage._base import SQLBaseStore from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine @@ -428,13 +427,9 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): """ # for now we do this by looking at the create event. We may want to cache this # more intelligently in future. - state_ids = yield self.get_current_state_ids(room_id) - create_id = state_ids.get((EventTypes.Create, "")) - - if not create_id: - raise NotFoundError("Unknown room %s" % (room_id)) - create_event = yield self.get_event(create_id) + # Retrieve the room's create event + create_event = yield self.get_create_event_for_room(room_id) defer.returnValue(create_event.content.get("room_version", "1")) @defer.inlineCallbacks @@ -448,6 +443,22 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: Deferred[unicode|None]: predecessor room id """ + # Retrieve the room's create event + create_event = yield self.get_create_event_for_room(room_id) + + # Return predecessor if present + defer.returnValue(create_event.content.get("predecessor", None)) + + @defer.inlineCallbacks + def get_create_event_for_room(self, room_id): + """Get the create state event for a room. + + Args: + room_id (str) + + Returns: + Deferred[EventBase|None]: The room creation event. None if can not be found + """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) @@ -455,11 +466,9 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): if not create_id: defer.returnValue(None) - # Retrieve the room's create event + # Retrieve the room's create event and return create_event = yield self.get_event(create_id) - - # Return predecessor if present - defer.returnValue(create_event.content.get("predecessor", None)) + defer.returnValue(create_event) @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): -- cgit 1.4.1 From d239f67c25a31257a11852be08e6615e99423fe7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 31 Jan 2019 18:34:15 +0000 Subject: Raise an exception instead of returning None --- synapse/storage/state.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 522aaee918..d14a7b2538 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -24,6 +24,7 @@ import attr from twisted.internet import defer from synapse.api.constants import EventTypes +from synapse.api.errors import NotFoundError from synapse.storage._base import SQLBaseStore from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine @@ -442,6 +443,9 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: Deferred[unicode|None]: predecessor room id + + Raises: + NotFoundError if the room is unknown """ # Retrieve the room's create event create_event = yield self.get_create_event_for_room(room_id) @@ -457,14 +461,17 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): room_id (str) Returns: - Deferred[EventBase|None]: The room creation event. None if can not be found + Deferred[EventBase]: The room creation event. + + Raises: + NotFoundError if the room is unknown """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) # If we can't find the create event, assume we've hit a dead end if not create_id: - defer.returnValue(None) + raise NotFoundError("Unknown room %s" % (room_id)) # Retrieve the room's create event and return create_event = yield self.get_event(create_id) -- cgit 1.4.1 From 627ecd358eda66f81afd676523f520762571728c Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 5 Feb 2019 12:16:28 +0000 Subject: Filter user directory state query to a subset of state events (#4462) * Filter user directory state query to a subset of state events * Add changelog --- changelog.d/4462.misc | 1 + synapse/storage/user_directory.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 changelog.d/4462.misc (limited to 'synapse/storage') diff --git a/changelog.d/4462.misc b/changelog.d/4462.misc new file mode 100644 index 0000000000..03a4d7ae1c --- /dev/null +++ b/changelog.d/4462.misc @@ -0,0 +1 @@ +Change the user directory state query to use a filtered call to the db instead of a generic one. \ No newline at end of file diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index ce48212265..e8b574ee5e 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -22,6 +22,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -31,12 +32,19 @@ logger = logging.getLogger(__name__) class UserDirectoryStore(SQLBaseStore): - @cachedInlineCallbacks(cache_context=True) - def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context): + @defer.inlineCallbacks + def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable """ - current_state_ids = yield self.get_current_state_ids( - room_id, on_invalidate=cache_context.invalidate + + # Create a state filter that only queries join and history state event + types_to_filter = ( + (EventTypes.JoinRules, ""), + (EventTypes.RoomHistoryVisibility, ""), + ) + + current_state_ids = yield self.get_filtered_current_state_ids( + room_id, StateFilter.from_types(types_to_filter), ) join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) -- cgit 1.4.1 From 82486371738a130322f3a1829bc4b49f79c1a3e4 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 6 Feb 2019 17:57:10 -0500 Subject: add new endpoint to update backup versions --- synapse/handlers/e2e_room_keys.py | 34 ++++++++++++++++++++++++++++++- synapse/rest/client/v2_alpha/room_keys.py | 33 ++++++++++++++++++++++++++++++ synapse/storage/e2e_room_keys.py | 21 +++++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index c5d7bf0c29..e0e5ece747 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -19,7 +19,8 @@ from six import iteritems from twisted.internet import defer -from synapse.api.errors import NotFoundError, RoomKeysVersionError, StoreError +from synapse.api.errors import Codes, NotFoundError, RoomKeysVersionError, \ + StoreError, SynapseError from synapse.util.async_helpers import Linearizer logger = logging.getLogger(__name__) @@ -307,3 +308,34 @@ class E2eRoomKeysHandler(object): raise NotFoundError("Unknown backup version") else: raise + + @defer.inlineCallbacks + def update_version(self, user_id, version, version_info): + """Update the info about a given version of the user's backup + + Args: + user_id(str): the user whose current backup version we're updating + version(str): the backup version we're updating + version_info(dict): the new information about the backup + Raises: + NotFoundError: if the requested backup version doesn't exist + Returns: + A deferred of an empty dict. + """ + try: + old_info = yield self.store.get_e2e_room_keys_version_info(user_id, version) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown backup version") + else: + raise + if old_info["algorithm"] != version_info["algorithm"]: + raise SynapseError( + 400, + "Algorithm does not match", + Codes.INVALID_PARAM + ) + + yield self.store.update_e2e_room_keys_version(user_id, version, version_info) + + defer.returnValue({}) diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/v2_alpha/room_keys.py index ab3f1bd21a..1c39d2af1c 100644 --- a/synapse/rest/client/v2_alpha/room_keys.py +++ b/synapse/rest/client/v2_alpha/room_keys.py @@ -380,6 +380,39 @@ class RoomKeysVersionServlet(RestServlet): ) defer.returnValue((200, {})) + @defer.inlineCallbacks + def on_PUT(self, request, version): + """ + Update the information about a given version of the user's room_keys backup. + + POST /room_keys/version/12345 HTTP/1.1 + Content-Type: application/json + { + "algorithm": "m.megolm_backup.v1", + "auth_data": { + "public_key": "abcdefg", + "signatures": { + "ed25519:something": "hijklmnop" + } + } + } + + HTTP/1.1 200 OK + Content-Type: application/json + {} + """ + requester = yield self.auth.get_user_by_req(request, allow_guest=False) + user_id = requester.user.to_string() + info = parse_json_object_from_request(request) + + if version is None: + raise SynapseError(400, "No version specified to update", Codes.MISSING_PARAM) + + yield self.e2e_room_keys_handler.update_version( + user_id, version, info + ) + defer.returnValue((200, {})) + def register_servlets(hs, http_server): RoomKeysServlet(hs).register(http_server) diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py index 45cebe61d1..9a3aec759e 100644 --- a/synapse/storage/e2e_room_keys.py +++ b/synapse/storage/e2e_room_keys.py @@ -298,6 +298,27 @@ class EndToEndRoomKeyStore(SQLBaseStore): "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn ) + def update_e2e_room_keys_version(self, user_id, version, info): + """Update a given backup version + + Args: + user_id(str): the user whose backup version we're updating + version(str): the version ID of the backup version we're updating + info(dict): the new backup version info to store + """ + + return self._simple_update( + table="e2e_room_keys_versions", + keyvalues={ + "user_id": user_id, + "version": version, + }, + updatevalues={ + "auth_data": json.dumps(info["auth_data"]), + }, + desc="update_e2e_room_keys_version" + ) + def delete_e2e_room_keys_version(self, user_id, version=None): """Delete a given backup version of the user's room keys. Doesn't delete their actual key data. -- cgit 1.4.1 From 56710c7df55edc782f6562d520c4ebc839dfe500 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 8 Feb 2019 18:30:46 +0000 Subject: Fix 'no unique or exclusion constraint' error (#4591) Add more tables to the list of tables which need a background update to complete before we can upsert into them, which fixes a race against the background updates. --- changelog.d/4591.bugfix | 1 + synapse/storage/_base.py | 27 +++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 changelog.d/4591.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/4591.bugfix b/changelog.d/4591.bugfix new file mode 100644 index 0000000000..628bbb6d81 --- /dev/null +++ b/changelog.d/4591.bugfix @@ -0,0 +1 @@ +Fix 'no unique or exclusion constraint' error diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 4872ff55b6..e124161845 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -50,6 +50,21 @@ sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"]) sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"]) +# Unique indexes which have been added in background updates. Maps from table name +# to the name of the background update which added the unique index to that table. +# +# This is used by the upsert logic to figure out which tables are safe to do a proper +# UPSERT on: until the relevant background update has completed, we +# have to emulate an upsert by locking the table. +# +UNIQUE_INDEX_BACKGROUND_UPDATES = { + "user_ips": "user_ips_device_unique_index", + "device_lists_remote_extremeties": "device_lists_remote_extremeties_unique_idx", + "device_lists_remote_cache": "device_lists_remote_cache_unique_idx", + "event_search": "event_search_event_id_idx", +} + + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() @@ -194,7 +209,7 @@ class SQLBaseStore(object): self.database_engine = hs.database_engine # A set of tables that are not safe to use native upserts in. - self._unsafe_to_upsert_tables = {"user_ips"} + self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys()) # We add the user_directory_search table to the blacklist on SQLite # because the existing search table does not have an index, making it @@ -230,12 +245,12 @@ class SQLBaseStore(object): ) updates = [x["update_name"] for x in updates] - # The User IPs table in schema #53 was missing a unique index, which we - # run as a background update. - if "user_ips_device_unique_index" not in updates: - self._unsafe_to_upsert_tables.discard("user_ips") + for table, update_name in UNIQUE_INDEX_BACKGROUND_UPDATES.items(): + if update_name not in updates: + logger.debug("Now safe to upsert in %s", table) + self._unsafe_to_upsert_tables.discard(table) - # If there's any tables left to check, reschedule to run. + # If there's any updates still running, reschedule to run. if updates: self._clock.call_later( 15.0, -- cgit 1.4.1 From 362d80b7706c7bcd57ea5ee3e8e38b31c7d1ad8b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 11:28:08 +0000 Subject: Reduce user_ips bloat during dedupe background update The background update to remove duplicate rows naively deleted and reinserted the duplicates. For large tables with a large number of duplicates this causes a lot of bloat (with postgres), as the inserted rows are appended to the table, since deleted rows will not be overwritten until a VACUUM has happened. This should hopefully also help ensure that the query in the last batch uses the correct index, as inserting a large number of new rows without analyzing will upset the query planner. --- synapse/storage/client_ips.py | 63 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 091d7116c5..a20cc8231f 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -167,12 +167,16 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): clause = "? <= last_seen AND last_seen < ?" args = (begin_last_seen, end_last_seen) + # (Note: The DISTINCT in the inner query is important to ensure that + # the COUNT(*) is accurate, otherwise double counting may happen due + # to the join effectively being a cross product) txn.execute( """ SELECT user_id, access_token, ip, - MAX(device_id), MAX(user_agent), MAX(last_seen) + MAX(device_id), MAX(user_agent), MAX(last_seen), + COUNT(*) FROM ( - SELECT user_id, access_token, ip + SELECT DISTINCT user_id, access_token, ip FROM user_ips WHERE {} ) c @@ -186,7 +190,60 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): # We've got some duplicates for i in res: - user_id, access_token, ip, device_id, user_agent, last_seen = i + user_id, access_token, ip, device_id, user_agent, last_seen, count = i + + # We want to delete the duplicates so we end up with only a + # single row. + # + # The naive way of doing this would be just to delete all rows + # and reinsert a constructed row. However, if there are a lot of + # duplicate rows this can cause the table to grow a lot, which + # can be problematic in two ways: + # 1. If user_ips is already large then this can cause the + # table to rapidly grow, potentially filling the disk. + # 2. Reinserting a lot of rows can confuse the table + # statistics for postgres, causing it to not use the + # correct indices for the query above, resulting in a full + # table scan. This is incredibly slow for large tables and + # can kill database performance. (This seems to mainly + # happen for the last query where the clause is simply `? < + # last_seen`) + # + # So instead we want to delete all but *one* of the duplicate + # rows. That is hard to do reliably, so we cheat and do a two + # step process: + # 1. Delete all rows with a last_seen strictly less than the + # max last_seen. This hopefully results in deleting all but + # one row the majority of the time, but there may be + # duplicate last_seen + # 2. If multiple rows remain, we fall back to the naive method + # and simply delete all rows and reinsert. + # + # Note that this relies on no new duplicate rows being inserted, + # but if that is happening then this entire process is futile + # anyway. + + # Do step 1: + + txn.execute( + """ + DELETE FROM user_ips + WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ? + """, + (user_id, access_token, ip, last_seen) + ) + if txn.rowcount == count - 1: + # We deleted all but one of the duplicate rows, i.e. there + # is exactly one remaining and so there is nothing left to + # do. + continue + elif txn.rowcount >= count: + raise Exception( + "We deleted more duplicate rows from 'user_ips' than expected", + ) + + # The previous step didn't delete enough rows, so we fallback to + # step 2: # Drop all the duplicates txn.execute( -- cgit 1.4.1 From 483ba85c7a1a8ee9b7eebcc5c07d522c71229c9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 11:55:27 +0000 Subject: Analyze user_ips before running deduplication Due to the table locks taken out by the naive upsert, the table statistics may be out of date. During deduplication it is important that the correct index is used as otherwise a full table scan may be incorrectly used, which can end up thrashing the database badly. --- synapse/storage/client_ips.py | 24 ++++++++++++++++++++++ synapse/storage/schema/delta/53/user_ips_index.sql | 10 ++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 091d7116c5..6f81406269 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -65,6 +65,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["last_seen"], ) + self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + self.register_background_update_handler( "user_ips_remove_dupes", self._remove_user_ip_dupes, @@ -108,6 +113,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): yield self._end_background_update("user_ips_drop_nonunique_index") defer.returnValue(1) + @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + end_last_seen = yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): # This works function works by scanning the user_ips table in batches diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql index 4ca346c111..b812c5794f 100644 --- a/synapse/storage/schema/delta/53/user_ips_index.sql +++ b/synapse/storage/schema/delta/53/user_ips_index.sql @@ -13,9 +13,13 @@ * limitations under the License. */ --- delete duplicates + -- analyze user_ips, to help ensure the correct indices are used INSERT INTO background_updates (update_name, progress_json) VALUES - ('user_ips_remove_dupes', '{}'); + ('user_ips_analyze', '{}'); + +-- delete duplicates +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_remove_dupes', '{}', 'user_ips_analyze'); -- add a new unique index to user_ips table INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES @@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES -- drop the old original index INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); \ No newline at end of file + ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); -- cgit 1.4.1 From 495ea92350c4a3f6bd92cded5da939326b858d9b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 12:40:42 +0000 Subject: Fix pep8 --- synapse/storage/client_ips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 6f81406269..cc23d7cdbe 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -124,7 +124,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): def user_ips_analyze(txn): txn.execute("ANALYZE user_ips") - end_last_seen = yield self.runInteraction( + yield self.runInteraction( "user_ips_analyze", user_ips_analyze ) -- cgit 1.4.1 From bb4fd8f927807bdd6efb08ee65ec01269e000417 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 13 Feb 2019 23:05:32 +1100 Subject: Run `black` on user directory code (#4635) --- changelog.d/4635.misc | 1 + synapse/handlers/user_directory.py | 94 ++++++++++++----------- synapse/storage/user_directory.py | 153 ++++++++++++++++--------------------- 3 files changed, 117 insertions(+), 131 deletions(-) create mode 100644 changelog.d/4635.misc (limited to 'synapse/storage') diff --git a/changelog.d/4635.misc b/changelog.d/4635.misc new file mode 100644 index 0000000000..0f45957b84 --- /dev/null +++ b/changelog.d/4635.misc @@ -0,0 +1 @@ +Run `black` to reformat user directory code. diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 120815b09b..283c6c1b81 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -130,7 +130,7 @@ class UserDirectoryHandler(object): # Support users are for diagnostics and should not appear in the user directory. if not is_support: yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None, + user_id, profile.display_name, profile.avatar_url, None ) @defer.inlineCallbacks @@ -166,8 +166,9 @@ class UserDirectoryHandler(object): self.pos = deltas[-1]["stream_id"] # Expose current event processing position to prometheus - synapse.metrics.event_processing_positions.labels( - "user_dir").set(self.pos) + synapse.metrics.event_processing_positions.labels("user_dir").set( + self.pos + ) yield self.store.update_user_directory_stream_pos(self.pos) @@ -191,21 +192,25 @@ class UserDirectoryHandler(object): logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) yield self._handle_initial_room(room_id) num_processed_rooms += 1 - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) logger.info("Processed all rooms.") if self.search_all_users: num_processed_users = 0 user_ids = yield self.store.get_all_local_users() - logger.info("Doing initial update of user directory. %d users", len(user_ids)) + logger.info( + "Doing initial update of user directory. %d users", len(user_ids) + ) for user_id in user_ids: # We add profiles for all users even if they don't match the # include pattern, just in case we want to change it in future - logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids)) + logger.info( + "Handling user %d/%d", num_processed_users + 1, len(user_ids) + ) yield self._handle_local_user(user_id) num_processed_users += 1 - yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0) logger.info("Processed all users") @@ -224,24 +229,24 @@ class UserDirectoryHandler(object): if not is_in_room: return - is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id) + is_public = yield self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) users_with_profile = yield self.state.get_current_user_in_room(room_id) user_ids = set(users_with_profile) unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - room_id, { - user_id: users_with_profile[user_id] for user_id in unhandled_users - } + room_id, + {user_id: users_with_profile[user_id] for user_id in unhandled_users}, ) self.initially_handled_users |= unhandled_users if is_public: yield self.store.add_users_to_public_room( - room_id, - user_ids=user_ids - self.initially_handled_users_in_public + room_id, user_ids=user_ids - self.initially_handled_users_in_public ) self.initially_handled_users_in_public |= user_ids @@ -253,7 +258,7 @@ class UserDirectoryHandler(object): count = 0 for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) if not self.is_mine_id(user_id): count += 1 @@ -268,7 +273,7 @@ class UserDirectoryHandler(object): continue if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) count += 1 user_set = (user_id, other_user_id) @@ -290,25 +295,23 @@ class UserDirectoryHandler(object): if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert, + room_id, not is_public, to_insert ) to_insert.clear() if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.update_users_who_share_room( - room_id, not is_public, to_update, + room_id, not is_public, to_update ) to_update.clear() if to_insert: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert, - ) + yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) to_insert.clear() if to_update: yield self.store.update_users_who_share_room( - room_id, not is_public, to_update, + room_id, not is_public, to_update ) to_update.clear() @@ -329,11 +332,12 @@ class UserDirectoryHandler(object): # may have become public or not and add/remove the users in said room if typ in (EventTypes.RoomHistoryVisibility, EventTypes.JoinRules): yield self._handle_room_publicity_change( - room_id, prev_event_id, event_id, typ, + room_id, prev_event_id, event_id, typ ) elif typ == EventTypes.Member: change = yield self._get_key_change( - prev_event_id, event_id, + prev_event_id, + event_id, key_name="membership", public_value=Membership.JOIN, ) @@ -342,14 +346,16 @@ class UserDirectoryHandler(object): # Need to check if the server left the room entirely, if so # we might need to remove all the users in that room is_in_room = yield self.store.is_host_joined( - room_id, self.server_name, + room_id, self.server_name ) if not is_in_room: logger.info("Server left room: %r", room_id) # Fetch all the users that we marked as being in user # directory due to being in the room and then check if # need to remove those users or not - user_ids = yield self.store.get_users_in_dir_due_to_room(room_id) + user_ids = yield self.store.get_users_in_dir_due_to_room( + room_id + ) for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return @@ -361,7 +367,7 @@ class UserDirectoryHandler(object): if change is None: # Handle any profile changes yield self._handle_profile_change( - state_key, room_id, prev_event_id, event_id, + state_key, room_id, prev_event_id, event_id ) continue @@ -393,13 +399,15 @@ class UserDirectoryHandler(object): if typ == EventTypes.RoomHistoryVisibility: change = yield self._get_key_change( - prev_event_id, event_id, + prev_event_id, + event_id, key_name="history_visibility", public_value="world_readable", ) elif typ == EventTypes.JoinRules: change = yield self._get_key_change( - prev_event_id, event_id, + prev_event_id, + event_id, key_name="join_rule", public_value=JoinRules.PUBLIC, ) @@ -524,7 +532,7 @@ class UserDirectoryHandler(object): ) if self.is_mine_id(other_user_id) and not is_appservice: shared_is_private = yield self.store.get_if_users_share_a_room( - other_user_id, user_id, + other_user_id, user_id ) if shared_is_private is True: # We've already marked in the database they share a private room @@ -539,13 +547,11 @@ class UserDirectoryHandler(object): to_insert.add((other_user_id, user_id)) if to_insert: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert, - ) + yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) if to_update: yield self.store.update_users_who_share_room( - room_id, not is_public, to_update, + room_id, not is_public, to_update ) @defer.inlineCallbacks @@ -564,15 +570,15 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_public_room(user_id) update_user_in_public = row and row["room_id"] == room_id - if (update_user_in_public or update_user_dir): + if update_user_in_public or update_user_dir: # XXX: Make this faster? rooms = yield self.store.get_rooms_for_user(user_id) for j_room_id in rooms: - if (not update_user_in_public and not update_user_dir): + if not update_user_in_public and not update_user_dir: break is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name, + j_room_id, self.server_name ) if not is_in_room: @@ -600,19 +606,19 @@ class UserDirectoryHandler(object): # Get a list of user tuples that were in the DB due to this room and # users (this includes tuples where the other user matches `user_id`) user_tuples = yield self.store.get_users_in_share_dir_with_room_id( - user_id, room_id, + user_id, room_id ) for user_id, other_user_id in user_tuples: # For each user tuple get a list of rooms that they still share, # trying to find a private room, and update the entry in the DB - rooms = yield self.store.get_rooms_in_common_for_users(user_id, other_user_id) + rooms = yield self.store.get_rooms_in_common_for_users( + user_id, other_user_id + ) # If they dont share a room anymore, remove the mapping if not rooms: - yield self.store.remove_user_who_share_room( - user_id, other_user_id, - ) + yield self.store.remove_user_who_share_room(user_id, other_user_id) continue found_public_share = None @@ -626,13 +632,13 @@ class UserDirectoryHandler(object): else: found_public_share = None yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)], + room_id, not is_public, [(user_id, other_user_id)] ) break if found_public_share: yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)], + room_id, not is_public, [(user_id, other_user_id)] ) @defer.inlineCallbacks @@ -660,7 +666,7 @@ class UserDirectoryHandler(object): if prev_name != new_name or prev_avatar != new_avatar: yield self.store.update_profile_in_user_dir( - user_id, new_name, new_avatar, room_id, + user_id, new_name, new_avatar, room_id ) @defer.inlineCallbacks diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index e8b574ee5e..fea866c043 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -44,7 +44,7 @@ class UserDirectoryStore(SQLBaseStore): ) current_state_ids = yield self.get_filtered_current_state_ids( - room_id, StateFilter.from_types(types_to_filter), + room_id, StateFilter.from_types(types_to_filter) ) join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) @@ -74,14 +74,8 @@ class UserDirectoryStore(SQLBaseStore): """ yield self._simple_insert_many( table="users_in_public_rooms", - values=[ - { - "user_id": user_id, - "room_id": room_id, - } - for user_id in user_ids - ], - desc="add_users_to_public_room" + values=[{"user_id": user_id, "room_id": room_id} for user_id in user_ids], + desc="add_users_to_public_room", ) for user_id in user_ids: self.get_user_in_public_room.invalidate((user_id,)) @@ -107,7 +101,9 @@ class UserDirectoryStore(SQLBaseStore): """ args = ( ( - user_id, get_localpart_from_id(user_id), get_domain_from_id(user_id), + user_id, + get_localpart_from_id(user_id), + get_domain_from_id(user_id), profile.display_name, ) for user_id, profile in iteritems(users_with_profile) @@ -120,7 +116,7 @@ class UserDirectoryStore(SQLBaseStore): args = ( ( user_id, - "%s %s" % (user_id, p.display_name,) if p.display_name else user_id + "%s %s" % (user_id, p.display_name) if p.display_name else user_id, ) for user_id, p in iteritems(users_with_profile) ) @@ -141,12 +137,10 @@ class UserDirectoryStore(SQLBaseStore): "avatar_url": profile.avatar_url, } for user_id, profile in iteritems(users_with_profile) - ] + ], ) for user_id in users_with_profile: - txn.call_after( - self.get_user_in_directory.invalidate, (user_id,) - ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) return self.runInteraction( "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn @@ -188,9 +182,11 @@ class UserDirectoryStore(SQLBaseStore): txn.execute( sql, ( - user_id, get_localpart_from_id(user_id), - get_domain_from_id(user_id), display_name, - ) + user_id, + get_localpart_from_id(user_id), + get_domain_from_id(user_id), + display_name, + ), ) else: # TODO: Remove this code after we've bumped the minimum version @@ -208,9 +204,11 @@ class UserDirectoryStore(SQLBaseStore): txn.execute( sql, ( - user_id, get_localpart_from_id(user_id), - get_domain_from_id(user_id), display_name, - ) + user_id, + get_localpart_from_id(user_id), + get_domain_from_id(user_id), + display_name, + ), ) elif new_entry is False: sql = """ @@ -225,15 +223,16 @@ class UserDirectoryStore(SQLBaseStore): ( get_localpart_from_id(user_id), get_domain_from_id(user_id), - display_name, user_id, - ) + display_name, + user_id, + ), ) else: raise RuntimeError( "upsert returned None when 'can_native_upsert' is False" ) elif isinstance(self.database_engine, Sqlite3Engine): - value = "%s %s" % (user_id, display_name,) if display_name else user_id + value = "%s %s" % (user_id, display_name) if display_name else user_id self._simple_upsert_txn( txn, table="user_directory_search", @@ -264,29 +263,18 @@ class UserDirectoryStore(SQLBaseStore): def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( - txn, - table="user_directory", - keyvalues={"user_id": user_id}, + txn, table="user_directory", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, - table="user_directory_search", - keyvalues={"user_id": user_id}, + txn, table="user_directory_search", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - ) - txn.call_after( - self.get_user_in_directory.invalidate, (user_id,) - ) - txn.call_after( - self.get_user_in_public_room.invalidate, (user_id,) + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) - return self.runInteraction( - "remove_from_user_dir", _remove_from_user_dir_txn, - ) + txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) + txn.call_after(self.get_user_in_public_room.invalidate, (user_id,)) + + return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) @defer.inlineCallbacks def remove_from_user_in_public_room(self, user_id): @@ -371,6 +359,7 @@ class UserDirectoryStore(SQLBaseStore): share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ + def _add_users_who_share_room_txn(txn): self._simple_insert_many_txn( txn, @@ -387,13 +376,12 @@ class UserDirectoryStore(SQLBaseStore): ) for user_id, other_user_id in user_id_tuples: txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, - (user_id,), + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) txn.call_after( - self.get_if_users_share_a_room.invalidate, - (user_id, other_user_id), + self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) ) + return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) @@ -407,6 +395,7 @@ class UserDirectoryStore(SQLBaseStore): share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ + def _update_users_who_share_room_txn(txn): sql = """ UPDATE users_who_share_rooms @@ -414,21 +403,16 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND other_user_id = ? """ txn.executemany( - sql, - ( - (room_id, share_private, uid, oid) - for uid, oid in user_id_sets - ) + sql, ((room_id, share_private, uid, oid) for uid, oid in user_id_sets) ) for user_id, other_user_id in user_id_sets: txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, - (user_id,), + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) txn.call_after( - self.get_if_users_share_a_room.invalidate, - (user_id, other_user_id), + self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) ) + return self.runInteraction( "update_users_who_share_room", _update_users_who_share_room_txn ) @@ -442,22 +426,18 @@ class UserDirectoryStore(SQLBaseStore): share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ + def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, table="users_who_share_rooms", - keyvalues={ - "user_id": user_id, - "other_user_id": other_user_id, - }, + keyvalues={"user_id": user_id, "other_user_id": other_user_id}, ) txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, - (user_id,), + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) txn.call_after( - self.get_if_users_share_a_room.invalidate, - (user_id, other_user_id), + self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) ) return self.runInteraction( @@ -478,10 +458,7 @@ class UserDirectoryStore(SQLBaseStore): """ return self._simple_select_one_onecol( table="users_who_share_rooms", - keyvalues={ - "user_id": user_id, - "other_user_id": other_user_id, - }, + keyvalues={"user_id": user_id, "other_user_id": other_user_id}, retcol="share_private", allow_none=True, desc="get_if_users_share_a_room", @@ -499,17 +476,12 @@ class UserDirectoryStore(SQLBaseStore): """ rows = yield self._simple_select_list( table="users_who_share_rooms", - keyvalues={ - "user_id": user_id, - }, - retcols=("other_user_id", "share_private",), + keyvalues={"user_id": user_id}, + retcols=("other_user_id", "share_private"), desc="get_users_who_share_room_with_user", ) - defer.returnValue({ - row["other_user_id"]: row["share_private"] - for row in rows - }) + defer.returnValue({row["other_user_id"]: row["share_private"] for row in rows}) def get_users_in_share_dir_with_room_id(self, user_id, room_id): """Get all user tuples that are in the users_who_share_rooms due to the @@ -556,6 +528,7 @@ class UserDirectoryStore(SQLBaseStore): def delete_all_from_user_dir(self): """Delete the entire user directory """ + def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") @@ -565,6 +538,7 @@ class UserDirectoryStore(SQLBaseStore): txn.call_after(self.get_user_in_public_room.invalidate_all) txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) txn.call_after(self.get_if_users_share_a_room.invalidate_all) + return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn ) @@ -574,7 +548,7 @@ class UserDirectoryStore(SQLBaseStore): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, - retcols=("room_id", "display_name", "avatar_url",), + retcols=("room_id", "display_name", "avatar_url"), allow_none=True, desc="get_user_in_directory", ) @@ -607,7 +581,9 @@ class UserDirectoryStore(SQLBaseStore): def get_current_state_deltas(self, prev_stream_id): prev_stream_id = int(prev_stream_id) - if not self._curr_state_delta_stream_cache.has_any_entity_changed(prev_stream_id): + if not self._curr_state_delta_stream_cache.has_any_entity_changed( + prev_stream_id + ): return [] def get_current_state_deltas_txn(txn): @@ -641,7 +617,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC """ - txn.execute(sql, (prev_stream_id, max_stream_id,)) + txn.execute(sql, (prev_stream_id, max_stream_id)) return self.cursor_to_dict(txn) return self.runInteraction( @@ -731,8 +707,11 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % (join_clause, where_clause) - args = join_args + (full_query, exact_query, prefix_query, limit + 1,) + """ % ( + join_clause, + where_clause, + ) + args = join_args + (full_query, exact_query, prefix_query, limit + 1) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -749,7 +728,10 @@ class UserDirectoryStore(SQLBaseStore): display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % (join_clause, where_clause) + """ % ( + join_clause, + where_clause, + ) args = join_args + (search_query, limit + 1) else: # This should be unreachable. @@ -761,10 +743,7 @@ class UserDirectoryStore(SQLBaseStore): limited = len(results) > limit - defer.returnValue({ - "limited": limited, - "results": results, - }) + defer.returnValue({"limited": limited, "results": results}) def _parse_query_sqlite(search_term): @@ -779,7 +758,7 @@ def _parse_query_sqlite(search_term): # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) - return " & ".join("(%s* OR %s)" % (result, result,) for result in results) + return " & ".join("(%s* OR %s)" % (result, result) for result in results) def _parse_query_postgres(search_term): @@ -792,7 +771,7 @@ def _parse_query_postgres(search_term): # Pull out the individual words, discarding any non-word characters. results = re.findall(r"([\w\-]+)", search_term, re.UNICODE) - both = " & ".join("(%s:* | %s)" % (result, result,) for result in results) + both = " & ".join("(%s:* | %s)" % (result, result) for result in results) exact = " & ".join("%s" % (result,) for result in results) prefix = " & ".join("%s:*" % (result,) for result in results) -- cgit 1.4.1 From eaf4d11af9da7d6d9ce71cb83f70424bb38e0703 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 14 Feb 2019 16:02:23 +0000 Subject: Add configurable room list publishing rules This allows specifying who and what is allowed to be published onto the public room list --- synapse/config/room_directory.py | 94 ++++++++++++++++++++++++++++++------- synapse/handlers/directory.py | 29 ++++++++++-- synapse/storage/state.py | 25 ++++++++++ tests/config/test_room_directory.py | 73 ++++++++++++++++++++++++++++ tests/handlers/test_directory.py | 1 + 5 files changed, 200 insertions(+), 22 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 9da13ab11b..a0869ed6ab 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -23,70 +23,121 @@ class RoomDirectoryConfig(Config): alias_creation_rules = config["alias_creation_rules"] self._alias_creation_rules = [ - _AliasRule(rule) + _RoomDirectoryRule("alias_creation_rules", rule) for rule in alias_creation_rules ] + room_list_publication_rules = config["room_list_publication_rules"] + + self._room_list_publication_rules = [ + _RoomDirectoryRule("room_list_publication_rules", rule) + for rule in room_list_publication_rules + ] + def default_config(self, config_dir_path, server_name, **kwargs): return """ # The `alias_creation` option controls who's allowed to create aliases # on this server. # # The format of this option is a list of rules that contain globs that - # match against user_id and the new alias (fully qualified with server - # name). The action in the first rule that matches is taken, which can - # currently either be "allow" or "deny". + # match against user_id, room_id and the new alias (fully qualified with + # server name). The action in the first rule that matches is taken, + # which can currently either be "allow" or "deny". + # + # Missing user_id/room_id/alias fields default to "*". # # If no rules match the request is denied. alias_creation_rules: - user_id: "*" - alias: "*" + alias: "*" # This matches alias being created + room_id: "*" + action: allow + + # The `room_list_publication_rules` option control who and what can be + # published in the public room list. + # + # The format of this option is the same as that for + # `alias_creation_rules` + room_list_publication_rules: + - user_id: "*" + alias: "*" # This matches any local or canonical alias + # associated with the room + room_id: "*" action: allow """ - def is_alias_creation_allowed(self, user_id, alias): + def is_alias_creation_allowed(self, user_id, room_id, alias): """Checks if the given user is allowed to create the given alias Args: user_id (str) + room_id (str) alias (str) Returns: boolean: True if user is allowed to crate the alias """ for rule in self._alias_creation_rules: - if rule.matches(user_id, alias): + if rule.matches(user_id, room_id, [alias]): + return rule.action == "allow" + + return False + + def is_publishing_room_allowed(self, user_id, room_id, aliases): + """Checks if the given user is allowed to publish the room + + Args: + user_id (str) + room_id (str) + aliases (list[str]): any local aliases associated with the room + + Returns: + boolean: True if user can publish room + """ + for rule in self._room_list_publication_rules: + if rule.matches(user_id, room_id, aliases): return rule.action == "allow" return False -class _AliasRule(object): - def __init__(self, rule): +class _RoomDirectoryRule(object): + """Helper class to test whether a room directory action is allowed, like + creating an alias or publishing a room. + """ + + def __init__(self, option_name, rule): action = rule["action"] - user_id = rule["user_id"] - alias = rule["alias"] + user_id = rule.get("user_id", "*") + room_id = rule.get("room_id", "*") + alias = rule.get("alias", "*") if action in ("allow", "deny"): self.action = action else: raise ConfigError( - "alias_creation_rules rules can only have action of 'allow'" - " or 'deny'" + "%s rules can only have action of 'allow'" + " or 'deny'" % (option_name,) ) + self._alias_matches_all = alias == "*" + try: self._user_id_regex = glob_to_regex(user_id) self._alias_regex = glob_to_regex(alias) + self._room_id_regex = glob_to_regex(room_id) except Exception as e: raise ConfigError("Failed to parse glob into regex: %s", e) - def matches(self, user_id, alias): - """Tests if this rule matches the given user_id and alias. + def matches(self, user_id, room_id, aliases): + """Tests if this rule matches the given user_id, room_id and aliases. Args: user_id (str) - alias (str) + room_id (str) + aliases (list[str]): The associated aliases to the room. Will be a + single element for testing alias creation, and can be empty for + testing room publishing. Returns: boolean @@ -96,7 +147,16 @@ class _AliasRule(object): if not self._user_id_regex.match(user_id): return False - if not self._alias_regex.match(alias): + # If we are not given any aliases then this rule only matches if the + # alias glob matches all aliases + if not aliases and not self._alias_matches_all: + return False + + for alias in aliases: + if not self._alias_regex.match(alias): + return False + + if not self._room_id_regex.match(room_id): return False return True diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 6bb254f899..e5319b42a6 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -112,7 +112,9 @@ class DirectoryHandler(BaseHandler): 403, "This user is not permitted to create this alias", ) - if not self.config.is_alias_creation_allowed(user_id, room_alias.to_string()): + if not self.config.is_alias_creation_allowed( + user_id, room_id, room_alias.to_string(), + ): # Lets just return a generic message, as there may be all sorts of # reasons why we said no. TODO: Allow configurable error messages # per alias creation rule? @@ -395,9 +397,9 @@ class DirectoryHandler(BaseHandler): room_id (str) visibility (str): "public" or "private" """ - if not self.spam_checker.user_may_publish_room( - requester.user.to_string(), room_id - ): + user_id = requester.user.to_string() + + if not self.spam_checker.user_may_publish_room(user_id, room_id): raise AuthError( 403, "This user is not permitted to publish rooms to the room list" @@ -415,7 +417,24 @@ class DirectoryHandler(BaseHandler): yield self.auth.check_can_change_room_list(room_id, requester.user) - yield self.store.set_room_is_public(room_id, visibility == "public") + room_aliases = yield self.store.get_aliases_for_room(room_id) + canonical_alias = yield self.store.get_canonical_alias_for_room(room_id) + if canonical_alias: + room_aliases.append(canonical_alias) + + making_public = visibility == "public" + + if making_public and not self.config.is_publishing_room_allowed( + user_id, room_id, room_aliases, + ): + # Lets just return a generic message, as there may be all sorts of + # reasons why we said no. TODO: Allow configurable error messages + # per alias creation rule? + raise SynapseError( + 403, "Not allowed to publish room", + ) + + yield self.store.set_room_is_public(room_id, making_public) @defer.inlineCallbacks def edit_published_appservice_room_list(self, appservice_id, network_id, diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d14a7b2538..6ddc4055d2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -548,6 +548,31 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): _get_filtered_current_state_ids_txn, ) + @defer.inlineCallbacks + def get_canonical_alias_for_room(self, room_id): + """Get canonical alias for room, if any + + Args: + room_id (str) + + Returns: + Deferred[str|None]: The canonical alias, if any + """ + + state = yield self.get_filtered_current_state_ids(room_id, StateFilter.from_types( + [(EventTypes.CanonicalAlias, "")] + )) + + event_id = state.get((EventTypes.CanonicalAlias, "")) + if not event_id: + return + + event = yield self.get_event(event_id, allow_none=True) + if not event: + return + + defer.returnValue(event.content.get("canonical_alias")) + @cached(max_entries=10000, iterable=True) def get_state_group_delta(self, state_group): """Given a state group try to return a previous group and a delta between diff --git a/tests/config/test_room_directory.py b/tests/config/test_room_directory.py index f37a17d618..1d4ca0055c 100644 --- a/tests/config/test_room_directory.py +++ b/tests/config/test_room_directory.py @@ -36,6 +36,8 @@ class RoomDirectoryConfigTestCase(unittest.TestCase): - user_id: "@gah:example.com" alias: "#goo:example.com" action: "allow" + + room_list_publication_rules: [] """) rd_config = RoomDirectoryConfig() @@ -43,25 +45,96 @@ class RoomDirectoryConfigTestCase(unittest.TestCase): self.assertFalse(rd_config.is_alias_creation_allowed( user_id="@bob:example.com", + room_id="!test", alias="#test:example.com", )) self.assertTrue(rd_config.is_alias_creation_allowed( user_id="@test:example.com", + room_id="!test", alias="#unofficial_st:example.com", )) self.assertTrue(rd_config.is_alias_creation_allowed( user_id="@foobar:example.com", + room_id="!test", alias="#test:example.com", )) self.assertTrue(rd_config.is_alias_creation_allowed( user_id="@gah:example.com", + room_id="!test", alias="#goo:example.com", )) self.assertFalse(rd_config.is_alias_creation_allowed( user_id="@test:example.com", + room_id="!test", alias="#test:example.com", )) + + def test_room_publish_acl(self): + config = yaml.load(""" + alias_creation_rules: [] + + room_list_publication_rules: + - user_id: "*bob*" + alias: "*" + action: "deny" + - user_id: "*" + alias: "#unofficial_*" + action: "allow" + - user_id: "@foo*:example.com" + alias: "*" + action: "allow" + - user_id: "@gah:example.com" + alias: "#goo:example.com" + action: "allow" + - room_id: "!test-deny" + action: "deny" + """) + + rd_config = RoomDirectoryConfig() + rd_config.read_config(config) + + self.assertFalse(rd_config.is_publishing_room_allowed( + user_id="@bob:example.com", + room_id="!test", + aliases=["#test:example.com"], + )) + + self.assertTrue(rd_config.is_publishing_room_allowed( + user_id="@test:example.com", + room_id="!test", + aliases=["#unofficial_st:example.com"], + )) + + self.assertTrue(rd_config.is_publishing_room_allowed( + user_id="@foobar:example.com", + room_id="!test", + aliases=[], + )) + + self.assertTrue(rd_config.is_publishing_room_allowed( + user_id="@gah:example.com", + room_id="!test", + aliases=["#goo:example.com"], + )) + + self.assertFalse(rd_config.is_publishing_room_allowed( + user_id="@test:example.com", + room_id="!test", + aliases=["#test:example.com"], + )) + + self.assertTrue(rd_config.is_publishing_room_allowed( + user_id="@foobar:example.com", + room_id="!test-deny", + aliases=[], + )) + + self.assertFalse(rd_config.is_publishing_room_allowed( + user_id="@gah:example.com", + room_id="!test-deny", + aliases=[], + )) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index 8ae6556c0a..9bf395e923 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -121,6 +121,7 @@ class TestCreateAliasACL(unittest.HomeserverTestCase): "action": "allow", } ] + config["room_list_publication_rules"] = [] rd_config = RoomDirectoryConfig() rd_config.read_config(config) -- cgit 1.4.1 From eb2b8523ae1ddd38bf1dd19ee37e44e7f4a3ee68 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Feb 2019 12:12:57 +0000 Subject: Split out registration to worker This allows registration to be handled by a worker, though the actual write to the database still happens on master. Note: due to the in-memory session map all registration requests must be handled by the same worker. --- synapse/app/client_reader.py | 2 + synapse/handlers/register.py | 63 ++++++++- synapse/replication/http/__init__.py | 4 +- synapse/replication/http/login.py | 85 ++++++++++++ synapse/replication/http/register.py | 91 ++++++++++++ synapse/rest/client/v2_alpha/register.py | 73 ++++++---- synapse/storage/registration.py | 230 +++++++++++++++---------------- 7 files changed, 401 insertions(+), 147 deletions(-) create mode 100644 synapse/replication/http/login.py create mode 100644 synapse/replication/http/register.py (limited to 'synapse/storage') diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index a9d2147022..9250b6c239 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -47,6 +47,7 @@ from synapse.rest.client.v1.room import ( RoomMemberListRestServlet, RoomStateRestServlet, ) +from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -92,6 +93,7 @@ class ClientReaderServer(HomeServer): JoinedRoomMemberListRestServlet(self).register(resource) RoomStateRestServlet(self).register(resource) RoomEventContextServlet(self).register(resource) + RegisterRestServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 21c17c59a0..8ea557a003 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -27,6 +27,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.http.client import CaptchaServerHttpClient +from synapse.replication.http.register import ReplicationRegisterServlet from synapse.types import RoomAlias, RoomID, UserID, create_requester from synapse.util.async_helpers import Linearizer from synapse.util.threepids import check_3pid_allowed @@ -61,6 +62,9 @@ class RegistrationHandler(BaseHandler): ) self._server_notices_mxid = hs.config.server_notices_mxid + if hs.config.worker_app: + self._register_client = ReplicationRegisterServlet.make_client(hs) + @defer.inlineCallbacks def check_username(self, localpart, guest_access_token=None, assigned_user_id=None): @@ -185,7 +189,7 @@ class RegistrationHandler(BaseHandler): token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) - yield self.store.register( + yield self._register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -217,7 +221,7 @@ class RegistrationHandler(BaseHandler): if default_display_name is None: default_display_name = localpart try: - yield self.store.register( + yield self._register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -316,7 +320,7 @@ class RegistrationHandler(BaseHandler): user_id, allowed_appservice=service ) - yield self.store.register( + yield self._register_with_store( user_id=user_id, password_hash="", appservice_id=service_id, @@ -494,7 +498,7 @@ class RegistrationHandler(BaseHandler): token = self.macaroon_gen.generate_access_token(user_id) if need_register: - yield self.store.register( + yield self._register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -573,3 +577,54 @@ class RegistrationHandler(BaseHandler): action="join", ratelimit=False, ) + + def _register_with_store(self, user_id, token=None, password_hash=None, + was_guest=False, make_guest=False, appservice_id=None, + create_profile_with_displayname=None, admin=False, + user_type=None): + """Register user in the datastore. + + Args: + user_id (str): The desired user ID to register. + token (str): The desired access token to use for this user. If this + is not None, the given access token is associated with the user + id. + password_hash (str|None): Optional. The password hash for this user. + was_guest (bool): Optional. Whether this is a guest account being + upgraded to a non-guest account. + make_guest (boolean): True if the the new user should be guest, + false to add a regular user account. + appservice_id (str|None): The ID of the appservice registering the user. + create_profile_with_displayname (unicode|None): Optionally create a + profile for the user, setting their displayname to the given value + admin (boolean): is an admin user? + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + + Returns: + Deferred + """ + if self.hs.config.worker_app: + return self._register_client( + user_id=user_id, + token=token, + password_hash=password_hash, + was_guest=was_guest, + make_guest=make_guest, + appservice_id=appservice_id, + create_profile_with_displayname=create_profile_with_displayname, + admin=admin, + user_type=user_type, + ) + else: + return self.store.register( + user_id=user_id, + token=token, + password_hash=password_hash, + was_guest=was_guest, + make_guest=make_guest, + appservice_id=appservice_id, + create_profile_with_displayname=create_profile_with_displayname, + admin=admin, + user_type=user_type, + ) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index 19f214281e..81b85352b1 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. from synapse.http.server import JsonResource -from synapse.replication.http import federation, membership, send_event +from synapse.replication.http import federation, login, membership, register, send_event REPLICATION_PREFIX = "/_synapse/replication" @@ -28,3 +28,5 @@ class ReplicationRestResource(JsonResource): send_event.register_servlets(hs, self) membership.register_servlets(hs, self) federation.register_servlets(hs, self) + login.register_servlets(hs, self) + register.register_servlets(hs, self) diff --git a/synapse/replication/http/login.py b/synapse/replication/http/login.py new file mode 100644 index 0000000000..797f6aabd1 --- /dev/null +++ b/synapse/replication/http/login.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.http.servlet import parse_json_object_from_request +from synapse.replication.http._base import ReplicationEndpoint + +logger = logging.getLogger(__name__) + + +class RegisterDeviceReplicationServlet(ReplicationEndpoint): + """Ensure a device is registered, generating a new access token for the + device. + + Used during registration and login. + """ + + NAME = "device_check_registered" + PATH_ARGS = ("user_id",) + + def __init__(self, hs): + super(RegisterDeviceReplicationServlet, self).__init__(hs) + self.auth_handler = hs.get_auth_handler() + self.device_handler = hs.get_device_handler() + self.macaroon_gen = hs.get_macaroon_generator() + + @staticmethod + def _serialize_payload(user_id, device_id, initial_display_name, is_guest): + """ + Args: + device_id (str|None): Device ID to use, if None a new one is + generated. + initial_display_name (str|None) + is_guest (bool) + """ + return { + "device_id": device_id, + "initial_display_name": initial_display_name, + "is_guest": is_guest, + } + + @defer.inlineCallbacks + def _handle_request(self, request, user_id): + content = parse_json_object_from_request(request) + + device_id = content["device_id"] + initial_display_name = content["initial_display_name"] + is_guest = content["is_guest"] + + device_id = yield self.device_handler.check_device_registered( + user_id, device_id, initial_display_name, + ) + + if is_guest: + access_token = self.macaroon_gen.generate_access_token( + user_id, ["guest = true"] + ) + else: + access_token = yield self.auth_handler.get_access_token_for_user_id( + user_id, device_id=device_id, + ) + + defer.returnValue((200, { + "device_id": device_id, + "access_token": access_token, + })) + + +def register_servlets(hs, http_server): + RegisterDeviceReplicationServlet(hs).register(http_server) diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py new file mode 100644 index 0000000000..bdaf37396c --- /dev/null +++ b/synapse/replication/http/register.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.http.servlet import parse_json_object_from_request +from synapse.replication.http._base import ReplicationEndpoint + +logger = logging.getLogger(__name__) + + +class ReplicationRegisterServlet(ReplicationEndpoint): + """Register a new user + """ + + NAME = "register_user" + PATH_ARGS = ("user_id",) + + def __init__(self, hs): + super(ReplicationRegisterServlet, self).__init__(hs) + self.store = hs.get_datastore() + + @staticmethod + def _serialize_payload( + user_id, token, password_hash, was_guest, make_guest, appservice_id, + create_profile_with_displayname, admin, user_type, + ): + """ + Args: + user_id (str): The desired user ID to register. + token (str): The desired access token to use for this user. If this + is not None, the given access token is associated with the user + id. + password_hash (str|None): Optional. The password hash for this user. + was_guest (bool): Optional. Whether this is a guest account being + upgraded to a non-guest account. + make_guest (boolean): True if the the new user should be guest, + false to add a regular user account. + appservice_id (str|None): The ID of the appservice registering the user. + create_profile_with_displayname (unicode|None): Optionally create a + profile for the user, setting their displayname to the given value + admin (boolean): is an admin user? + user_type (str|None): type of user. One of the values from + api.constants.UserTypes, or None for a normal user. + """ + return { + "token": token, + "password_hash": password_hash, + "was_guest": was_guest, + "make_guest": make_guest, + "appservice_id": appservice_id, + "create_profile_with_displayname": create_profile_with_displayname, + "admin": admin, + "user_type": user_type, + } + + @defer.inlineCallbacks + def _handle_request(self, request, user_id): + content = parse_json_object_from_request(request) + + yield self.store.register( + user_id=user_id, + token=content["token"], + password_hash=content["password_hash"], + was_guest=content["was_guest"], + make_guest=content["make_guest"], + appservice_id=content["appservice_id"], + create_profile_with_displayname=content["create_profile_with_displayname"], + admin=content["admin"], + user_type=content["user_type"], + ) + + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + ReplicationRegisterServlet(hs).register(http_server) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 7f812b8209..d78da50787 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -33,6 +33,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.replication.http.login import RegisterDeviceReplicationServlet from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.threepids import check_3pid_allowed @@ -190,9 +191,15 @@ class RegisterRestServlet(RestServlet): self.registration_handler = hs.get_handlers().registration_handler self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() - self.device_handler = hs.get_device_handler() self.macaroon_gen = hs.get_macaroon_generator() + if self.hs.config.worker_app: + self._register_device_client = ( + RegisterDeviceReplicationServlet.make_client(hs) + ) + else: + self.device_handler = hs.get_device_handler() + @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): @@ -633,12 +640,10 @@ class RegisterRestServlet(RestServlet): "home_server": self.hs.hostname, } if not params.get("inhibit_login", False): - device_id = yield self._register_device(user_id, params) - - access_token = ( - yield self.auth_handler.get_access_token_for_user_id( - user_id, device_id=device_id, - ) + device_id = params.get("device_id") + initial_display_name = params.get("initial_device_display_name") + device_id, access_token = yield self._register_device( + user_id, device_id, initial_display_name, is_guest=False, ) result.update({ @@ -647,25 +652,42 @@ class RegisterRestServlet(RestServlet): }) defer.returnValue(result) - def _register_device(self, user_id, params): - """Register a device for a user. - - This is called after the user's credentials have been validated, but - before the access token has been issued. + @defer.inlineCallbacks + def _register_device(self, user_id, device_id, initial_display_name, + is_guest): + """Register a device for a user and generate an access token. Args: - (str) user_id: full canonical @user:id - (object) params: registration parameters, from which we pull - device_id and initial_device_name + user_id (str): full canonical @user:id + device_id (str|None): The device ID to check, or None to generate + a new one. + initial_display_name (str|None): An optional display name for the + device. + is_guest (bool): Whether this is a guest account Returns: - defer.Deferred: (str) device_id + defer.Deferred[(str, str)]: Tuple of device ID and access token """ - # register the user's device - device_id = params.get("device_id") - initial_display_name = params.get("initial_device_display_name") - return self.device_handler.check_device_registered( - user_id, device_id, initial_display_name - ) + if self.hs.config.worker_app: + r = yield self._register_device_client( + user_id=user_id, + device_id=device_id, + initial_display_name=initial_display_name, + is_guest=is_guest, + ) + defer.returnValue((r["device_id"], r["access_token"])) + else: + device_id = yield self.device_handler.check_device_registered( + user_id, device_id, initial_display_name + ) + if is_guest: + access_token = self.macaroon_gen.generate_access_token( + user_id, ["guest = true"] + ) + else: + access_token = yield self.auth_handler.get_access_token_for_user_id( + user_id, device_id=device_id, + ) + defer.returnValue((device_id, access_token)) @defer.inlineCallbacks def _do_guest_registration(self, params): @@ -680,13 +702,10 @@ class RegisterRestServlet(RestServlet): # we have nowhere to store it. device_id = synapse.api.auth.GUEST_DEVICE_ID initial_display_name = params.get("initial_device_display_name") - yield self.device_handler.check_device_registered( - user_id, device_id, initial_display_name + device_id, access_token = yield self._register_device( + user_id, device_id, initial_display_name, is_guest=True, ) - access_token = self.macaroon_gen.generate_access_token( - user_id, ["guest = true"] - ) defer.returnValue((200, { "user_id": user_id, "device_id": device_id, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index c9e11c3135..3bc5def48e 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -139,6 +139,121 @@ class RegistrationWorkerStore(SQLBaseStore): ) return True if res == UserTypes.SUPPORT else False + def get_users_by_id_case_insensitive(self, user_id): + """Gets users that match user_id case insensitively. + Returns a mapping of user_id -> password_hash. + """ + def f(txn): + sql = ( + "SELECT name, password_hash FROM users" + " WHERE lower(name) = lower(?)" + ) + txn.execute(sql, (user_id,)) + return dict(txn) + + return self.runInteraction("get_users_by_id_case_insensitive", f) + + @defer.inlineCallbacks + def count_all_users(self): + """Counts all users registered on the homeserver.""" + def _count_users(txn): + txn.execute("SELECT COUNT(*) AS users FROM users") + rows = self.cursor_to_dict(txn) + if rows: + return rows[0]["users"] + return 0 + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) + + def count_daily_user_type(self): + """ + Counts 1) native non guest users + 2) native guests users + 3) bridged users + who registered on the homeserver in the past 24 hours + """ + def _count_daily_user_type(txn): + yesterday = int(self._clock.time()) - (60 * 60 * 24) + + sql = """ + SELECT user_type, COALESCE(count(*), 0) AS count FROM ( + SELECT + CASE + WHEN is_guest=0 AND appservice_id IS NULL THEN 'native' + WHEN is_guest=1 AND appservice_id IS NULL THEN 'guest' + WHEN is_guest=0 AND appservice_id IS NOT NULL THEN 'bridged' + END AS user_type + FROM users + WHERE creation_ts > ? + ) AS t GROUP BY user_type + """ + results = {'native': 0, 'guest': 0, 'bridged': 0} + txn.execute(sql, (yesterday,)) + for row in txn: + results[row[0]] = row[1] + return results + return self.runInteraction("count_daily_user_type", _count_daily_user_type) + + @defer.inlineCallbacks + def count_nonbridged_users(self): + def _count_users(txn): + txn.execute(""" + SELECT COALESCE(COUNT(*), 0) FROM users + WHERE appservice_id IS NULL + """) + count, = txn.fetchone() + return count + + ret = yield self.runInteraction("count_users", _count_users) + defer.returnValue(ret) + + @defer.inlineCallbacks + def find_next_generated_user_id_localpart(self): + """ + Gets the localpart of the next generated user ID. + + Generated user IDs are integers, and we aim for them to be as small as + we can. Unfortunately, it's possible some of them are already taken by + existing users, and there may be gaps in the already taken range. This + function returns the start of the first allocatable gap. This is to + avoid the case of ID 10000000 being pre-allocated, so us wasting the + first (and shortest) many generated user IDs. + """ + def _find_next_generated_user_id(txn): + txn.execute("SELECT name FROM users") + + regex = re.compile(r"^@(\d+):") + + found = set() + + for user_id, in txn: + match = regex.search(user_id) + if match: + found.add(int(match.group(1))) + for i in range(len(found) + 1): + if i not in found: + return i + + defer.returnValue((yield self.runInteraction( + "find_next_generated_user_id", + _find_next_generated_user_id + ))) + + @defer.inlineCallbacks + def get_3pid_guest_access_token(self, medium, address): + ret = yield self._simple_select_one( + "threepid_guest_access_tokens", + { + "medium": medium, + "address": address + }, + ["guest_access_token"], True, 'get_3pid_guest_access_token' + ) + if ret: + defer.returnValue(ret["guest_access_token"]) + defer.returnValue(None) + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -326,20 +441,6 @@ class RegistrationStore(RegistrationWorkerStore, ) txn.call_after(self.is_guest.invalidate, (user_id,)) - def get_users_by_id_case_insensitive(self, user_id): - """Gets users that match user_id case insensitively. - Returns a mapping of user_id -> password_hash. - """ - def f(txn): - sql = ( - "SELECT name, password_hash FROM users" - " WHERE lower(name) = lower(?)" - ) - txn.execute(sql, (user_id,)) - return dict(txn) - - return self.runInteraction("get_users_by_id_case_insensitive", f) - def user_set_password_hash(self, user_id, password_hash): """ NB. This does *not* evict any cache because the one use for this @@ -564,107 +665,6 @@ class RegistrationStore(RegistrationWorkerStore, desc="user_delete_threepids", ) - @defer.inlineCallbacks - def count_all_users(self): - """Counts all users registered on the homeserver.""" - def _count_users(txn): - txn.execute("SELECT COUNT(*) AS users FROM users") - rows = self.cursor_to_dict(txn) - if rows: - return rows[0]["users"] - return 0 - - ret = yield self.runInteraction("count_users", _count_users) - defer.returnValue(ret) - - def count_daily_user_type(self): - """ - Counts 1) native non guest users - 2) native guests users - 3) bridged users - who registered on the homeserver in the past 24 hours - """ - def _count_daily_user_type(txn): - yesterday = int(self._clock.time()) - (60 * 60 * 24) - - sql = """ - SELECT user_type, COALESCE(count(*), 0) AS count FROM ( - SELECT - CASE - WHEN is_guest=0 AND appservice_id IS NULL THEN 'native' - WHEN is_guest=1 AND appservice_id IS NULL THEN 'guest' - WHEN is_guest=0 AND appservice_id IS NOT NULL THEN 'bridged' - END AS user_type - FROM users - WHERE creation_ts > ? - ) AS t GROUP BY user_type - """ - results = {'native': 0, 'guest': 0, 'bridged': 0} - txn.execute(sql, (yesterday,)) - for row in txn: - results[row[0]] = row[1] - return results - return self.runInteraction("count_daily_user_type", _count_daily_user_type) - - @defer.inlineCallbacks - def count_nonbridged_users(self): - def _count_users(txn): - txn.execute(""" - SELECT COALESCE(COUNT(*), 0) FROM users - WHERE appservice_id IS NULL - """) - count, = txn.fetchone() - return count - - ret = yield self.runInteraction("count_users", _count_users) - defer.returnValue(ret) - - @defer.inlineCallbacks - def find_next_generated_user_id_localpart(self): - """ - Gets the localpart of the next generated user ID. - - Generated user IDs are integers, and we aim for them to be as small as - we can. Unfortunately, it's possible some of them are already taken by - existing users, and there may be gaps in the already taken range. This - function returns the start of the first allocatable gap. This is to - avoid the case of ID 10000000 being pre-allocated, so us wasting the - first (and shortest) many generated user IDs. - """ - def _find_next_generated_user_id(txn): - txn.execute("SELECT name FROM users") - - regex = re.compile(r"^@(\d+):") - - found = set() - - for user_id, in txn: - match = regex.search(user_id) - if match: - found.add(int(match.group(1))) - for i in range(len(found) + 1): - if i not in found: - return i - - defer.returnValue((yield self.runInteraction( - "find_next_generated_user_id", - _find_next_generated_user_id - ))) - - @defer.inlineCallbacks - def get_3pid_guest_access_token(self, medium, address): - ret = yield self._simple_select_one( - "threepid_guest_access_tokens", - { - "medium": medium, - "address": address - }, - ["guest_access_token"], True, 'get_3pid_guest_access_token' - ) - if ret: - defer.returnValue(ret["guest_access_token"]) - defer.returnValue(None) - @defer.inlineCallbacks def save_or_get_3pid_guest_access_token( self, medium, address, access_token, inviter_user_id -- cgit 1.4.1 From 4cc4400b4deabed113548b296656d0e6e404dbde Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Feb 2019 17:19:01 +0000 Subject: Split /login into client_reader --- synapse/app/client_reader.py | 2 + synapse/storage/registration.py | 82 ++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 41 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 9250b6c239..043b48f8f3 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -40,6 +40,7 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.client.v1.login import LoginRestServlet from synapse.rest.client.v1.room import ( JoinedRoomMemberListRestServlet, PublicRoomListRestServlet, @@ -94,6 +95,7 @@ class ClientReaderServer(HomeServer): RoomStateRestServlet(self).register(resource) RoomEventContextServlet(self).register(resource) RegisterRestServlet(self).register(resource) + LoginRestServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 3bc5def48e..9b9572890b 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -254,6 +254,47 @@ class RegistrationWorkerStore(SQLBaseStore): defer.returnValue(ret["guest_access_token"]) defer.returnValue(None) + @defer.inlineCallbacks + def get_user_id_by_threepid(self, medium, address): + """Returns user id from threepid + + Args: + medium (str): threepid medium e.g. email + address (str): threepid address e.g. me@example.com + + Returns: + Deferred[str|None]: user id or None if no user id/threepid mapping exists + """ + user_id = yield self.runInteraction( + "get_user_id_by_threepid", self.get_user_id_by_threepid_txn, + medium, address + ) + defer.returnValue(user_id) + + def get_user_id_by_threepid_txn(self, txn, medium, address): + """Returns user id from threepid + + Args: + txn (cursor): + medium (str): threepid medium e.g. email + address (str): threepid address e.g. me@example.com + + Returns: + str|None: user id or None if no user id/threepid mapping exists + """ + ret = self._simple_select_one_txn( + txn, + "user_threepids", + { + "medium": medium, + "address": address + }, + ['user_id'], True + ) + if ret: + return ret['user_id'] + return None + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -613,47 +654,6 @@ class RegistrationStore(RegistrationWorkerStore, ) defer.returnValue(ret) - @defer.inlineCallbacks - def get_user_id_by_threepid(self, medium, address): - """Returns user id from threepid - - Args: - medium (str): threepid medium e.g. email - address (str): threepid address e.g. me@example.com - - Returns: - Deferred[str|None]: user id or None if no user id/threepid mapping exists - """ - user_id = yield self.runInteraction( - "get_user_id_by_threepid", self.get_user_id_by_threepid_txn, - medium, address - ) - defer.returnValue(user_id) - - def get_user_id_by_threepid_txn(self, txn, medium, address): - """Returns user id from threepid - - Args: - txn (cursor): - medium (str): threepid medium e.g. email - address (str): threepid address e.g. me@example.com - - Returns: - str|None: user id or None if no user id/threepid mapping exists - """ - ret = self._simple_select_one_txn( - txn, - "user_threepids", - { - "medium": medium, - "address": address - }, - ['user_id'], True - ) - if ret: - return ret['user_id'] - return None - def user_delete_threepid(self, user_id, medium, address): return self._simple_delete( "user_threepids", -- cgit 1.4.1 From a9b5ea6fc1e26ff791118b67af01fdad8e9c68c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Feb 2019 17:53:31 +0000 Subject: Batch cache invalidation over replication Currently whenever the current state changes in a room invalidate a lot of caches, which cause *a lot* of traffic over replication. Instead, lets batch up all those invalidations and send a single poke down the replication streams. Hopefully this will reduce load on the master process by substantially reducing traffic. --- synapse/replication/slave/storage/_base.py | 19 ++++++---- synapse/storage/_base.py | 57 +++++++++++++++++++++++++++++- synapse/storage/events.py | 25 +------------ 3 files changed, 69 insertions(+), 32 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py index 2d81d49e9a..1353a32d00 100644 --- a/synapse/replication/slave/storage/_base.py +++ b/synapse/replication/slave/storage/_base.py @@ -17,7 +17,7 @@ import logging import six -from synapse.storage._base import SQLBaseStore +from synapse.storage._base import _CURRENT_STATE_CACHE_NAME, SQLBaseStore from synapse.storage.engines import PostgresEngine from ._slaved_id_tracker import SlavedIdTracker @@ -54,12 +54,17 @@ class BaseSlavedStore(SQLBaseStore): if stream_name == "caches": self._cache_id_gen.advance(token) for row in rows: - try: - getattr(self, row.cache_func).invalidate(tuple(row.keys)) - except AttributeError: - # We probably haven't pulled in the cache in this worker, - # which is fine. - pass + if row.cache_func == _CURRENT_STATE_CACHE_NAME: + room_id = row.keys[0] + members_changed = set(row.keys[1:]) + self._invalidate_state_caches(room_id, members_changed) + else: + try: + getattr(self, row.cache_func).invalidate(tuple(row.keys)) + except AttributeError: + # We probably haven't pulled in the cache in this worker, + # which is fine. + pass def _invalidate_cache_and_stream(self, txn, cache_func, keys): txn.call_after(cache_func.invalidate, keys) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index e124161845..f7c6d714ab 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -28,6 +28,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.types import get_domain_from_id from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.stringutils import exception_to_unicode @@ -64,6 +65,10 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = { "event_search": "event_search_event_id_idx", } +# This is a special cache name we use to batch multiple invalidations of caches +# based on the current state when notifying workers over replication. +_CURRENT_STATE_CACHE_NAME = "cs_cache_fake" + class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object @@ -1184,6 +1189,56 @@ class SQLBaseStore(object): be invalidated. """ txn.call_after(cache_func.invalidate, keys) + self._send_invalidation_to_replication(txn, cache_func.__name__, keys) + + def _invalidate_state_caches_and_stream(self, txn, room_id, members_changed): + """Special case invalidation of caches based on current state. + + We special case this so that we can batch the cache invalidations into a + single replication poke. + + Args: + txn + room_id (str): Room were state changed + members_changed (set[str]): The user_ids of members that have changed + """ + txn.call_after(self._invalidate_state_caches, room_id, members_changed) + + keys = [room_id] + keys.extend(members_changed) + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, keys, + ) + + def _invalidate_state_caches(self, room_id, members_changed): + """Invalidates caches that are based on the current state, but does + not stream invalidations down replication. + + Args: + room_id (str): Room were state changed + members_changed (set[str]): The user_ids of members that have changed + """ + for member in members_changed: + self.get_rooms_for_user_with_stream_ordering.invalidate((member,)) + + for host in set(get_domain_from_id(u) for u in members_changed): + self.is_host_joined.invalidate((room_id, host)) + self.was_host_joined.invalidate((room_id, host)) + + self.get_users_in_room.invalidate((room_id,)) + self.get_room_summary.invalidate((room_id,)) + self.get_current_state_ids.invalidate((room_id,)) + + def _send_invalidation_to_replication(self, txn, cache_name, keys): + """Notifies replication that given cache has been invalidated. + + Note that this does *not* invalidate the cache locally. + + Args: + txn + cache_name (str) + keys (list[str]) + """ if isinstance(self.database_engine, PostgresEngine): # get_next() returns a context manager which is designed to wrap @@ -1201,7 +1256,7 @@ class SQLBaseStore(object): table="cache_invalidation_stream", values={ "stream_id": stream_id, - "cache_func": cache_func.__name__, + "cache_func": cache_name, "keys": list(keys), "invalidation_ts": self.clock.time_msec(), } diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 81b250480d..06db9e56e6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -979,30 +979,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore if ev_type == EventTypes.Member ) - for member in members_changed: - self._invalidate_cache_and_stream( - txn, self.get_rooms_for_user_with_stream_ordering, (member,) - ) - - for host in set(get_domain_from_id(u) for u in members_changed): - self._invalidate_cache_and_stream( - txn, self.is_host_joined, (room_id, host) - ) - self._invalidate_cache_and_stream( - txn, self.was_host_joined, (room_id, host) - ) - - self._invalidate_cache_and_stream( - txn, self.get_users_in_room, (room_id,) - ) - - self._invalidate_cache_and_stream( - txn, self.get_room_summary, (room_id,) - ) - - self._invalidate_cache_and_stream( - txn, self.get_current_state_ids, (room_id,) - ) + self._invalidate_state_caches_and_stream(txn, room_id, members_changed) def _update_forward_extremities_txn(self, txn, new_forward_extremities, max_stream_order): -- cgit 1.4.1 From bc8fa1509d3885d111a2ef98e12e9ce66a19a3a8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Feb 2019 11:24:59 +0000 Subject: Documentation --- docs/tcp_replication.rst | 21 ++++++++++++++++++++- synapse/storage/_base.py | 8 ++++---- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst index 62225ba6f4..852f1113a3 100644 --- a/docs/tcp_replication.rst +++ b/docs/tcp_replication.rst @@ -137,7 +137,6 @@ for each stream so that on reconneciton it can start streaming from the correct place. Note: not all RDATA have valid tokens due to batching. See ``RdataCommand`` for more details. - Example ~~~~~~~ @@ -221,3 +220,23 @@ SYNC (S, C) See ``synapse/replication/tcp/commands.py`` for a detailed description and the format of each command. + + +Cache Invalidation Stream +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The cache invalidation stream is used to inform workers when they need to +invalidate any of their caches in the data store. This is done by streaming all +cache invalidations done on master down to the workers, assuming that any caches +on the workers also exist on the master. + +Each individual cache invalidation results in a row being sent down replication, +which includes the cache name (the name of the function) and they key to +invalidate. For example:: + + > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251] + +However, there are times when a number of caches need to be invalidated at the +same time with the same key. To reduce traffic we batch those invalidations into +a single poke by defining a special cache name that workers understand to mean +to expand to invalidate the correct caches. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f7c6d714ab..1c8d3f0026 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1199,8 +1199,8 @@ class SQLBaseStore(object): Args: txn - room_id (str): Room were state changed - members_changed (set[str]): The user_ids of members that have changed + room_id (str): Room where state changed + members_changed (Iterable[str]): The user_ids of members that have changed """ txn.call_after(self._invalidate_state_caches, room_id, members_changed) @@ -1215,7 +1215,7 @@ class SQLBaseStore(object): not stream invalidations down replication. Args: - room_id (str): Room were state changed + room_id (str): Room where state changed members_changed (set[str]): The user_ids of members that have changed """ for member in members_changed: @@ -1237,7 +1237,7 @@ class SQLBaseStore(object): Args: txn cache_name (str) - keys (list[str]) + keys (iterable[str]) """ if isinstance(self.database_engine, PostgresEngine): -- cgit 1.4.1 From 1bb35e3a83146a55bf7d8a18d38aa0d59f1289d5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Feb 2019 11:34:40 +0000 Subject: Use itertools --- synapse/storage/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1c8d3f0026..9db594bc42 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging import sys import threading @@ -1204,8 +1205,7 @@ class SQLBaseStore(object): """ txn.call_after(self._invalidate_state_caches, room_id, members_changed) - keys = [room_id] - keys.extend(members_changed) + keys = itertools.chain([room_id], members_changed) self._send_invalidation_to_replication( txn, _CURRENT_STATE_CACHE_NAME, keys, ) -- cgit 1.4.1 From 62175a20e51b4ce71b9e7a18755a42e259bd2ff8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Feb 2019 11:38:40 +0000 Subject: Docs --- docs/tcp_replication.rst | 5 +++++ synapse/storage/_base.py | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst index 852f1113a3..73436cea62 100644 --- a/docs/tcp_replication.rst +++ b/docs/tcp_replication.rst @@ -240,3 +240,8 @@ However, there are times when a number of caches need to be invalidated at the same time with the same key. To reduce traffic we batch those invalidations into a single poke by defining a special cache name that workers understand to mean to expand to invalidate the correct caches. + +Currently the special cache names are declared in ``synapse/storage/_base.py`` +and are: + +1. ``cs_cache_fake`` ─ invalidates caches that depend on the current state diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 9db594bc42..f1a5366b95 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1201,7 +1201,7 @@ class SQLBaseStore(object): Args: txn room_id (str): Room where state changed - members_changed (Iterable[str]): The user_ids of members that have changed + members_changed (iterable[str]): The user_ids of members that have changed """ txn.call_after(self._invalidate_state_caches, room_id, members_changed) @@ -1216,7 +1216,8 @@ class SQLBaseStore(object): Args: room_id (str): Room where state changed - members_changed (set[str]): The user_ids of members that have changed + members_changed (iterable[str]): The user_ids of members that have + changed """ for member in members_changed: self.get_rooms_for_user_with_stream_ordering.invalidate((member,)) -- cgit 1.4.1 From a06614bd2aba5d751e002433c88e8ba1ba02c50b Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 20 Feb 2019 23:03:30 +1100 Subject: UPSERT many functionality (#4644) --- .travis.yml | 2 +- changelog.d/4644.misc | 1 + synapse/storage/_base.py | 146 ++++++++++++++++++++++++++++++++++++++++---- tests/storage/test__base.py | 88 ++++++++++++++++++++++++++ 4 files changed, 224 insertions(+), 13 deletions(-) create mode 100644 changelog.d/4644.misc (limited to 'synapse/storage') diff --git a/.travis.yml b/.travis.yml index d88f10324f..5d763123a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -89,7 +89,7 @@ install: - psql -At -U postgres -c 'select version();' || true - pip install tox - + # if we don't have python3.6 in this environment, travis unhelpfully gives us # a `python3.6` on our path which does nothing but spit out a warning. Tox # tries to run it (even if we're not running a py36 env), so the build logs diff --git a/changelog.d/4644.misc b/changelog.d/4644.misc new file mode 100644 index 0000000000..84137c3412 --- /dev/null +++ b/changelog.d/4644.misc @@ -0,0 +1 @@ +Introduce upsert batching functionality in the database layer. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f1a5366b95..3d895da43c 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -106,6 +106,14 @@ class LoggingTransaction(object): def __iter__(self): return self.txn.__iter__() + def execute_batch(self, sql, args): + if isinstance(self.database_engine, PostgresEngine): + from psycopg2.extras import execute_batch + self._do_execute(lambda *x: execute_batch(self.txn, *x), sql, args) + else: + for val in args: + self.execute(sql, val) + def execute(self, sql, *args): self._do_execute(self.txn.execute, sql, *args) @@ -699,20 +707,34 @@ class SQLBaseStore(object): else: return "%s = ?" % (key,) - # First try to update. - sql = "UPDATE %s SET %s WHERE %s" % ( - table, - ", ".join("%s = ?" % (k,) for k in values), - " AND ".join(_getwhere(k) for k in keyvalues) - ) - sqlargs = list(values.values()) + list(keyvalues.values()) + if not values: + # If `values` is empty, then all of the values we care about are in + # the unique key, so there is nothing to UPDATE. We can just do a + # SELECT instead to see if it exists. + sql = "SELECT 1 FROM %s WHERE %s" % ( + table, + " AND ".join(_getwhere(k) for k in keyvalues) + ) + sqlargs = list(keyvalues.values()) + txn.execute(sql, sqlargs) + if txn.fetchall(): + # We have an existing record. + return False + else: + # First try to update. + sql = "UPDATE %s SET %s WHERE %s" % ( + table, + ", ".join("%s = ?" % (k,) for k in values), + " AND ".join(_getwhere(k) for k in keyvalues) + ) + sqlargs = list(values.values()) + list(keyvalues.values()) - txn.execute(sql, sqlargs) - if txn.rowcount > 0: - # successfully updated at least one row. - return False + txn.execute(sql, sqlargs) + if txn.rowcount > 0: + # successfully updated at least one row. + return False - # We didn't update any rows so insert a new one + # We didn't find any existing rows, so insert a new one allvalues = {} allvalues.update(keyvalues) allvalues.update(values) @@ -759,6 +781,106 @@ class SQLBaseStore(object): ) txn.execute(sql, list(allvalues.values())) + def _simple_upsert_many_txn( + self, txn, table, key_names, key_values, value_names, value_values + ): + """ + Upsert, many times. + + Args: + table (str): The table to upsert into + key_names (list[str]): The key column names. + key_values (list[list]): A list of each row's key column values. + value_names (list[str]): The value column names. If empty, no + values will be used, even if value_values is provided. + value_values (list[list]): A list of each row's value column values. + Returns: + None + """ + if ( + self.database_engine.can_native_upsert + and table not in self._unsafe_to_upsert_tables + ): + return self._simple_upsert_many_txn_native_upsert( + txn, table, key_names, key_values, value_names, value_values + ) + else: + return self._simple_upsert_many_txn_emulated( + txn, table, key_names, key_values, value_names, value_values + ) + + def _simple_upsert_many_txn_emulated( + self, txn, table, key_names, key_values, value_names, value_values + ): + """ + Upsert, many times, but without native UPSERT support or batching. + + Args: + table (str): The table to upsert into + key_names (list[str]): The key column names. + key_values (list[list]): A list of each row's key column values. + value_names (list[str]): The value column names. If empty, no + values will be used, even if value_values is provided. + value_values (list[list]): A list of each row's value column values. + Returns: + None + """ + # No value columns, therefore make a blank list so that the following + # zip() works correctly. + if not value_names: + value_values = [() for x in range(len(key_values))] + + for keyv, valv in zip(key_values, value_values): + _keys = {x: y for x, y in zip(key_names, keyv)} + _vals = {x: y for x, y in zip(value_names, valv)} + + self._simple_upsert_txn_emulated(txn, table, _keys, _vals) + + def _simple_upsert_many_txn_native_upsert( + self, txn, table, key_names, key_values, value_names, value_values + ): + """ + Upsert, many times, using batching where possible. + + Args: + table (str): The table to upsert into + key_names (list[str]): The key column names. + key_values (list[list]): A list of each row's key column values. + value_names (list[str]): The value column names. If empty, no + values will be used, even if value_values is provided. + value_values (list[list]): A list of each row's value column values. + Returns: + None + """ + allnames = [] + allnames.extend(key_names) + allnames.extend(value_names) + + if not value_names: + # No value columns, therefore make a blank list so that the + # following zip() works correctly. + latter = "NOTHING" + value_values = [() for x in range(len(key_values))] + else: + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in value_names) + ) + + sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s" % ( + table, + ", ".join(k for k in allnames), + ", ".join("?" for _ in allnames), + ", ".join(key_names), + latter, + ) + + args = [] + + for x, y in zip(key_values, value_values): + args.append(tuple(x) + tuple(y)) + + return txn.execute_batch(sql, args) + def _simple_select_one(self, table, keyvalues, retcols, allow_none=False, desc="_simple_select_one"): """Executes a SELECT query on the named table, which is expected to diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 52eb05bfbf..dd49a14524 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -314,3 +315,90 @@ class CacheDecoratorTestCase(unittest.TestCase): self.assertEquals(callcount[0], 2) self.assertEquals(callcount2[0], 3) + + +class UpsertManyTests(unittest.HomeserverTestCase): + def prepare(self, reactor, clock, hs): + self.storage = hs.get_datastore() + + self.table_name = "table_" + hs.get_secrets().token_hex(6) + self.get_success( + self.storage.runInteraction( + "create", + lambda x, *a: x.execute(*a), + "CREATE TABLE %s (id INTEGER, username TEXT, value TEXT)" + % (self.table_name,), + ) + ) + self.get_success( + self.storage.runInteraction( + "index", + lambda x, *a: x.execute(*a), + "CREATE UNIQUE INDEX %sindex ON %s(id, username)" + % (self.table_name, self.table_name), + ) + ) + + def _dump_to_tuple(self, res): + for i in res: + yield (i["id"], i["username"], i["value"]) + + def test_upsert_many(self): + """ + Upsert_many will perform the upsert operation across a batch of data. + """ + # Add some data to an empty table + key_names = ["id", "username"] + value_names = ["value"] + key_values = [[1, "user1"], [2, "user2"]] + value_values = [["hello"], ["there"]] + + self.get_success( + self.storage.runInteraction( + "test", + self.storage._simple_upsert_many_txn, + self.table_name, + key_names, + key_values, + value_names, + value_values, + ) + ) + + # Check results are what we expect + res = self.get_success( + self.storage._simple_select_list( + self.table_name, None, ["id, username, value"] + ) + ) + self.assertEqual( + set(self._dump_to_tuple(res)), + set([(1, "user1", "hello"), (2, "user2", "there")]), + ) + + # Update only user2 + key_values = [[2, "user2"]] + value_values = [["bleb"]] + + self.get_success( + self.storage.runInteraction( + "test", + self.storage._simple_upsert_many_txn, + self.table_name, + key_names, + key_values, + value_names, + value_values, + ) + ) + + # Check results are what we expect + res = self.get_success( + self.storage._simple_select_list( + self.table_name, None, ["id, username, value"] + ) + ) + self.assertEqual( + set(self._dump_to_tuple(res)), + set([(1, "user1", "hello"), (2, "user2", "bleb")]), + ) -- cgit 1.4.1 From 80467bbac3be6e008b807793dfd27c733936c15c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 22 Feb 2019 14:38:14 +0000 Subject: Fix state cache invalidation on workers --- synapse/replication/slave/storage/_base.py | 7 +----- synapse/storage/_base.py | 40 +++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py index 1353a32d00..817d1f67f9 100644 --- a/synapse/replication/slave/storage/_base.py +++ b/synapse/replication/slave/storage/_base.py @@ -59,12 +59,7 @@ class BaseSlavedStore(SQLBaseStore): members_changed = set(row.keys[1:]) self._invalidate_state_caches(room_id, members_changed) else: - try: - getattr(self, row.cache_func).invalidate(tuple(row.keys)) - except AttributeError: - # We probably haven't pulled in the cache in this worker, - # which is fine. - pass + self._attempt_to_invalidate_cache(row.cache_func, tuple(row.keys)) def _invalidate_cache_and_stream(self, txn, cache_func, keys): txn.call_after(cache_func.invalidate, keys) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 3d895da43c..5a80eef211 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1342,15 +1342,43 @@ class SQLBaseStore(object): changed """ for member in members_changed: - self.get_rooms_for_user_with_stream_ordering.invalidate((member,)) + self._attempt_to_invalidate_cache( + "get_rooms_for_user_with_stream_ordering", (member,), + ) for host in set(get_domain_from_id(u) for u in members_changed): - self.is_host_joined.invalidate((room_id, host)) - self.was_host_joined.invalidate((room_id, host)) + self._attempt_to_invalidate_cache( + "is_host_joined", (room_id, host,), + ) + self._attempt_to_invalidate_cache( + "was_host_joined", (room_id, host,), + ) + + self._attempt_to_invalidate_cache( + "get_users_in_room", (room_id,), + ) + self._attempt_to_invalidate_cache( + "get_room_summary", (room_id,), + ) + self._attempt_to_invalidate_cache( + "get_current_state_ids", (room_id,), + ) + + def _attempt_to_invalidate_cache(self, cache_name, key): + """Attempts to invalidate the cache of the given name, ignoring if the + cache doesn't exist. Mainly used for invalidating caches on workers, + where they may not have the cache. - self.get_users_in_room.invalidate((room_id,)) - self.get_room_summary.invalidate((room_id,)) - self.get_current_state_ids.invalidate((room_id,)) + Args: + cache_name (str) + key (tuple) + """ + try: + getattr(self, cache_name).invalidate(key) + except AttributeError: + # We probably haven't pulled in the cache in this worker, + # which is fine. + pass def _send_invalidation_to_replication(self, txn, cache_name, keys): """Notifies replication that given cache has been invalidated. -- cgit 1.4.1