From 6cdfb0207e2de72286a7a8d3b3c417c2808e90dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:54:56 +0100 Subject: Add index to temp table --- synapse/storage/schema/delta/54/delete_forward_extremities.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index aa40f13da7..b062ec840c 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -20,3 +20,4 @@ INSERT INTO background_updates (update_name, progress_json) VALUES DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; +CREATE INDEX _extremities_to_check_id ON _extremities_to_check(event_id); -- cgit 1.5.1 From 06675db684f06b5a369846aac896216bf4cc74ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:05:26 +0100 Subject: Newsfile --- changelog.d/5291.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5291.bugfix diff --git a/changelog.d/5291.bugfix b/changelog.d/5291.bugfix new file mode 100644 index 0000000000..9e14d20289 --- /dev/null +++ b/changelog.d/5291.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. -- cgit 1.5.1 From 54d50fbfdf8c39d92c36291a572419cee6b9b916 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:15:13 +0100 Subject: Get events all at once --- synapse/storage/stats.py | 59 +++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index eb0ced5b5e..99b4af5555 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -179,46 +179,39 @@ class StatsStore(StateDeltasStore): current_state_ids = yield self.get_current_state_ids(room_id) - join_rules = yield self.get_event( - current_state_ids.get((EventTypes.JoinRules, "")), allow_none=True + join_rules_id = current_state_ids.get((EventTypes.JoinRules, "")) + history_visibility_id = current_state_ids.get( + (EventTypes.RoomHistoryVisibility, "") ) - history_visibility = yield self.get_event( - current_state_ids.get((EventTypes.RoomHistoryVisibility, "")), - allow_none=True, - ) - encryption = yield self.get_event( - current_state_ids.get((EventTypes.RoomEncryption, "")), allow_none=True - ) - name = yield self.get_event( - current_state_ids.get((EventTypes.Name, "")), allow_none=True - ) - topic = yield self.get_event( - current_state_ids.get((EventTypes.Topic, "")), allow_none=True - ) - avatar = yield self.get_event( - current_state_ids.get((EventTypes.RoomAvatar, "")), allow_none=True - ) - canonical_alias = yield self.get_event( - current_state_ids.get((EventTypes.CanonicalAlias, "")), allow_none=True - ) - - def _or_none(x, arg): - if x: - return x.content.get(arg) + encryption_id = current_state_ids.get((EventTypes.RoomEncryption, "")) + name_id = current_state_ids.get((EventTypes.Name, "")) + topic_id = current_state_ids.get((EventTypes.Topic, "")) + avatar_id = current_state_ids.get((EventTypes.RoomAvatar, "")) + canonical_alias_id = current_state_ids.get((EventTypes.CanonicalAlias, "")) + + state_events = yield self.get_events([ + join_rules_id, history_visibility_id, encryption_id, name_id, + topic_id, avatar_id, canonical_alias_id, + ]) + + def _get_or_none(event_id, arg): + event = state_events.get(event_id) + if event: + return event.content.get(arg) return None yield self.update_room_state( room_id, { - "join_rules": _or_none(join_rules, "join_rule"), - "history_visibility": _or_none( - history_visibility, "history_visibility" + "join_rules": _get_or_none(join_rules_id, "join_rule"), + "history_visibility": _get_or_none( + history_visibility_id, "history_visibility" ), - "encryption": _or_none(encryption, "algorithm"), - "name": _or_none(name, "name"), - "topic": _or_none(topic, "topic"), - "avatar": _or_none(avatar, "url"), - "canonical_alias": _or_none(canonical_alias, "alias"), + "encryption": _get_or_none(encryption_id, "algorithm"), + "name": _get_or_none(name_id, "name"), + "topic": _get_or_none(topic_id, "topic"), + "avatar": _get_or_none(avatar_id, "url"), + "canonical_alias": _get_or_none(canonical_alias_id, "alias"), }, ) -- cgit 1.5.1 From 04710cc2d71127b1f416e87f7a4aea3ce6d93410 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:22:32 +0100 Subject: Fetch membership counts all at once --- synapse/storage/roommember.py | 33 +++++++++++---------------------- synapse/storage/stats.py | 23 +++++++---------------- 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 4bd1669458..7617913326 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -142,26 +142,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return self.runInteraction("get_room_summary", _get_room_summary_txn) - def _get_user_count_in_room_txn(self, txn, room_id, membership): + def _get_user_counts_in_room_txn(self, txn, room_id): """ - See get_user_count_in_room. - """ - sql = ( - "SELECT count(*) FROM room_memberships as m" - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id " - " AND m.room_id = c.room_id " - " AND m.user_id = c.state_key" - " WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?" - ) - - txn.execute(sql, (room_id, membership)) - row = txn.fetchone() - return row[0] - - def get_user_count_in_room(self, room_id, membership): - """ - Get the user count in a room with a particular membership. + Get the user count in a room by membership. Args: room_id (str) @@ -170,9 +153,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): Returns: Deferred[int] """ - return self.runInteraction( - "get_users_in_room", self._get_user_count_in_room_txn, room_id, membership - ) + sql = """ + SELECT m.membership, count(*) FROM room_memberships as m + INNER JOIN current_state_events as c USING(event_id) + WHERE c.type = 'm.room.member' AND c.room_id = ? + GROUP BY m.membership + """ + + txn.execute(sql, (room_id,)) + return {row[0]: row[1] for row in txn} @cached() def get_invited_rooms_for_user(self, user_id): diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 99b4af5555..727f60b3bd 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -226,18 +226,9 @@ class StatsStore(StateDeltasStore): current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn) current_state_events = len(current_state_ids) - joined_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.JOIN - ) - invited_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.INVITE - ) - left_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.LEAVE - ) - banned_members = self._get_user_count_in_room_txn( - txn, room_id, Membership.BAN - ) + + membership_counts = self._get_user_counts_in_room_txn(txn, room_id) + total_state_events = self._get_total_state_event_counts_txn( txn, room_id ) @@ -250,10 +241,10 @@ class StatsStore(StateDeltasStore): { "bucket_size": self.stats_bucket_size, "current_state_events": current_state_events, - "joined_members": joined_members, - "invited_members": invited_members, - "left_members": left_members, - "banned_members": banned_members, + "joined_members": membership_counts.get(Membership.JOIN, 0), + "invited_members": membership_counts.get(Membership.INVITE, 0), + "left_members": membership_counts.get(Membership.LEAVE, 0), + "banned_members": membership_counts.get(Membership.BAN, 0), "state_events": total_state_events, }, ) -- cgit 1.5.1 From e2c46ed851599dc08cc8a822e07c0d4f9a050ee2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:26:38 +0100 Subject: Move deletion from table inside txn --- synapse/storage/stats.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index 727f60b3bd..a99637d4b4 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -254,10 +254,13 @@ class StatsStore(StateDeltasStore): {"room_id": room_id, "token": current_token}, ) + # We've finished a room. Delete it from the table. + self._simple_delete_one_txn( + txn, TEMP_TABLE + "_rooms", {"room_id": room_id}, + ) + yield self.runInteraction("update_room_stats", _fetch_data) - # We've finished a room. Delete it from the table. - yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) # Update the remaining counter. progress["remaining"] -= 1 yield self.runInteraction( -- cgit 1.5.1 From 5ac75fc9a2d80ddf2974d281c381f82515606403 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:26:55 +0100 Subject: Join against events to use its room_id index --- synapse/storage/events_worker.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index b56c83e460..1782428048 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -618,7 +618,12 @@ class EventsWorkerStore(SQLBaseStore): """ See get_total_state_event_counts. """ - sql = "SELECT COUNT(*) FROM state_events WHERE room_id=?" + # We join against the events table as that has an index on room_id + sql = """ + SELECT COUNT(*) FROM state_events + INNER JOIN events USING (room_id, event_id) + WHERE room_id=? + """ txn.execute(sql, (room_id,)) row = txn.fetchone() return row[0] if row else 0 -- cgit 1.5.1 From 8824325b829baa5262242a50d0ea2c9b738feb78 Mon Sep 17 00:00:00 2001 From: Eisha Chen-yen-su Date: Thu, 30 May 2019 16:58:53 +0200 Subject: Fix ignored filter field in `/messages` endpoint This fixes a bug which were causing the "event_format" field to be ignored in the filter of requests to the `/messages` endpoint of the CS API. Signed-off-by: Eisha Chen-yen-su --- synapse/rest/client/v1/room.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 255a85c588..b92c6a9a9c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -475,6 +475,8 @@ class RoomMessageListRestServlet(ClientV1RestServlet): if filter_bytes: filter_json = urlparse.unquote(filter_bytes.decode("UTF-8")) event_filter = Filter(json.loads(filter_json)) + if event_filter.filter_json.get("event_format", "client") == "federation": + as_client_event = False else: event_filter = None msgs = yield self.pagination_handler.get_messages( -- cgit 1.5.1 From 0b6bc36402b747a6c1bad119aaffdcd326990346 Mon Sep 17 00:00:00 2001 From: Eisha Chen-yen-su Date: Thu, 30 May 2019 17:07:21 +0200 Subject: Add changelog --- changelog.d/5293.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5293.bugfix diff --git a/changelog.d/5293.bugfix b/changelog.d/5293.bugfix new file mode 100644 index 0000000000..aa519a8433 --- /dev/null +++ b/changelog.d/5293.bugfix @@ -0,0 +1 @@ +Fix a bug where it is not possible to get events in the federation format with the request `GET /_matrix/client/r0/rooms/{roomId}/messages`. -- cgit 1.5.1 From df9d9005448d837c5e1a2b75edb5730e2062b0f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 11:56:24 +0100 Subject: Correctly filter out extremities with soft failed prevs (#5274) When we receive a soft failed event we, correctly, *do not* update the forward extremity table with the event. However, if we later receive an event that references the soft failed event we then need to remove the soft failed events prev events from the forward extremities table, otherwise we just build up forward extremities. Fixes #5269 --- changelog.d/5274.bugfix | 1 + synapse/storage/events.py | 82 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 changelog.d/5274.bugfix diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix new file mode 100644 index 0000000000..9e14d20289 --- /dev/null +++ b/changelog.d/5274.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2ffc27ff41..6e9f3d1dc0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -554,10 +554,18 @@ class EventsStore( e_id for event in new_events for e_id in event.prev_event_ids() ) - # Finally, remove any events which are prev_events of any existing events. + # Remove any events which are prev_events of any existing events. existing_prevs = yield self._get_events_which_are_prevs(result) result.difference_update(existing_prevs) + # Finally handle the case where the new events have soft-failed prev + # events. If they do we need to remove them and their prev events, + # otherwise we end up with dangling extremities. + existing_prevs = yield self._get_prevs_before_rejected( + e_id for event in new_events for e_id in event.prev_event_ids() + ) + result.difference_update(existing_prevs) + defer.returnValue(result) @defer.inlineCallbacks @@ -573,7 +581,7 @@ class EventsStore( """ results = [] - def _get_events(txn, batch): + def _get_events_which_are_prevs_txn(txn, batch): sql = """ SELECT prev_event_id, internal_metadata FROM event_edges @@ -596,10 +604,78 @@ class EventsStore( ) for chunk in batch_iter(event_ids, 100): - yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk) + yield self.runInteraction( + "_get_events_which_are_prevs", + _get_events_which_are_prevs_txn, + chunk, + ) defer.returnValue(results) + @defer.inlineCallbacks + def _get_prevs_before_rejected(self, event_ids): + """Get soft-failed ancestors to remove from the extremities. + + Given a set of events, find all those that have been soft-failed or + rejected. Returns those soft failed/rejected events and their prev + events (whether soft-failed/rejected or not), and recurses up the + prev-event graph until it finds no more soft-failed/rejected events. + + This is used to find extremities that are ancestors of new events, but + are separated by soft failed events. + + Args: + event_ids (Iterable[str]): Events to find prev events for. Note + that these must have already been persisted. + + Returns: + Deferred[set[str]] + """ + + # The set of event_ids to return. This includes all soft-failed events + # and their prev events. + existing_prevs = set() + + def _get_prevs_before_rejected_txn(txn, batch): + to_recursively_check = batch + + while to_recursively_check: + sql = """ + SELECT + event_id, prev_event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + LEFT JOIN rejections USING (event_id) + LEFT JOIN event_json USING (event_id) + WHERE + event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_recursively_check), + ) + + txn.execute(sql, to_recursively_check) + to_recursively_check = [] + + for event_id, prev_event_id, metadata, rejected in txn: + if prev_event_id in existing_prevs: + continue + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + to_recursively_check.append(prev_event_id) + existing_prevs.add(prev_event_id) + + for chunk in batch_iter(event_ids, 100): + yield self.runInteraction( + "_get_prevs_before_rejected", + _get_prevs_before_rejected_txn, + chunk, + ) + + defer.returnValue(existing_prevs) + @defer.inlineCallbacks def _get_new_state_after_events( self, room_id, events_context, old_latest_event_ids, new_latest_event_ids -- cgit 1.5.1 From 6ebc08c09d4ced251750cb087aa4689f90cdd4b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 18:52:41 +0100 Subject: Add DB bg update to cleanup extremities. Due to #5269 we may have extremities in our DB that we shouldn't have, so lets add a cleanup task such to remove those. --- synapse/storage/events.py | 186 +++++++++++++++++++++ .../schema/delta/54/delete_forward_extremities.sql | 19 +++ 2 files changed, 205 insertions(+) create mode 100644 synapse/storage/schema/delta/54/delete_forward_extremities.sql diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 6e9f3d1dc0..a9be143bd5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -221,6 +221,7 @@ class EventsStore( ): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" + EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) @@ -252,6 +253,11 @@ class EventsStore( psql_only=True, ) + self.register_background_update_handler( + self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self._cleanup_extremities_bg_update, + ) + self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() @@ -2341,6 +2347,186 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) + @defer.inlineCallbacks + def _cleanup_extremities_bg_update(self, progress, batch_size): + """Background update to clean out extremities that should have been + deleted previously. + + Mainly used to deal with the aftermath of #5269. + """ + + # This works by first copying all existing forward extremities into the + # `_extremities_to_check` table at start up, and then checking each + # event in that table whether we have any descendants that are not + # soft-failed/rejected. If that is the case then we delete that event + # from the forward extremities table. + # + # For efficiency, we do this in batches by recursively pulling out all + # descendants of a batch until we find the non soft-failed/rejected + # events, i.e. the set of descendants whose chain of prev events back + # to the batch of extremities are all soft-failed or rejected. + # Typically, we won't find any such events as extremities will rarely + # have any descendants, but if they do then we should delete those + # extremities. + + def _cleanup_extremities_bg_update_txn(txn): + # The set of extremity event IDs that we're checking this round + original_set = set() + + # A dict[str, set[str]] of event ID to their prev events. + graph = {} + + # The set of descendants of the original set that are not rejected + # nor soft-failed. Ancestors of these events should be removed + # from the forward extremities table. + non_rejected_leaves = set() + + # Set of event IDs that have been soft failed, and for which we + # should check if they have descendants which haven't been soft + # failed. + soft_failed_events_to_lookup = set() + + # First, we get `batch_size` events from the table, pulling out + # their prev events, if any, and their prev events rejection status. + txn.execute( + """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL, events.outlier + FROM ( + SELECT event_id AS prev_event_id + FROM _extremities_to_check + LIMIT ? + ) AS f + LEFT JOIN event_edges USING (prev_event_id) + LEFT JOIN events USING (event_id) + LEFT JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + """, (batch_size,) + ) + + for prev_event_id, event_id, metadata, rejected, outlier in txn: + original_set.add(prev_event_id) + + if not event_id or outlier: + # Common case where the forward extremity doesn't have any + # descendants. + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = False + if metadata: + soft_failed = json.loads(metadata).get("soft_failed") + + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # Now we recursively check all the soft-failed descendants we + # found above in the same way, until we have nothing left to + # check. + while soft_failed_events_to_lookup: + # We only want to do 100 at a time, so we split given list + # into two. + batch = list(soft_failed_events_to_lookup) + to_check, to_defer = batch[:100], batch[100:] + soft_failed_events_to_lookup = set(to_defer) + + sql = """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + INNER JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + WHERE + prev_event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_check), + ) + txn.execute(sql, to_check) + + for prev_event_id, event_id, metadata, rejected in txn: + if event_id in graph: + # Already handled this event previously, but we still + # want to record the edge. + graph.setdefault(event_id, set()).add(prev_event_id) + logger.info("Already handled") + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # We have a set of non-soft-failed descendants, so we recurse up + # the graph to find all ancestors and add them to the set of event + # IDs that we can delete from forward extremities table. + to_delete = set() + while non_rejected_leaves: + event_id = non_rejected_leaves.pop() + prev_event_ids = graph.get(event_id, set()) + non_rejected_leaves.update(prev_event_ids) + to_delete.update(prev_event_ids) + + to_delete.intersection_update(original_set) + + logger.info("Deleting up to %d forward extremities", len(to_delete)) + + self._simple_delete_many_txn( + txn=txn, + table="event_forward_extremities", + column="event_id", + iterable=to_delete, + keyvalues={}, + ) + + if to_delete: + # We now need to invalidate the caches of these rooms + rows = self._simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",) + ) + for row in rows: + txn.call_after( + self.get_latest_event_ids_in_room.invalidate, + (row["room_id"],) + ) + + self._simple_delete_many_txn( + txn=txn, + table="_extremities_to_check", + column="event_id", + iterable=original_set, + keyvalues={}, + ) + + return len(original_set) + + num_handled = yield self.runInteraction( + "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, + ) + + if not num_handled: + yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + + def _drop_table_txn(txn): + txn.execute("DROP TABLE _extremities_to_check") + + yield self.runInteraction( + "_cleanup_extremities_bg_update_drop_table", + _drop_table_txn, + ) + + defer.returnValue(num_handled) + AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql new file mode 100644 index 0000000000..7056bd1d00 --- /dev/null +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -0,0 +1,19 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('delete_soft_failed_extremities', '{}'); + +CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; -- cgit 1.5.1 From 1d818fde14595299de9e13008c67239ca677014f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 11:58:32 +0100 Subject: Log actual number of entries deleted --- synapse/storage/_base.py | 12 +++++++++--- synapse/storage/events.py | 6 ++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index fa6839ceca..3fe827cd43 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -1261,7 +1261,8 @@ class SQLBaseStore(object): " AND ".join("%s = ?" % (k,) for k in keyvalues), ) - return txn.execute(sql, list(keyvalues.values())) + txn.execute(sql, list(keyvalues.values())) + return txn.rowcount def _simple_delete_many(self, table, column, iterable, keyvalues, desc): return self.runInteraction( @@ -1280,9 +1281,12 @@ class SQLBaseStore(object): column : column name to test for inclusion against `iterable` iterable : list keyvalues : dict of column names and values to select the rows with + + Returns: + int: Number rows deleted """ if not iterable: - return + return 0 sql = "DELETE FROM %s" % table @@ -1297,7 +1301,9 @@ class SQLBaseStore(object): if clauses: sql = "%s WHERE %s" % (sql, " AND ".join(clauses)) - return txn.execute(sql, values) + txn.execute(sql, values) + + return txn.rowcount def _get_cache_dict( self, db_conn, table, entity_column, stream_column, max_value, limit=100000 diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a9be143bd5..a9664928ca 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2476,7 +2476,7 @@ class EventsStore( logger.info("Deleting up to %d forward extremities", len(to_delete)) - self._simple_delete_many_txn( + deleted = self._simple_delete_many_txn( txn=txn, table="event_forward_extremities", column="event_id", @@ -2484,7 +2484,9 @@ class EventsStore( keyvalues={}, ) - if to_delete: + logger.info("Deleted %d forward extremities", deleted) + + if deleted: # We now need to invalidate the caches of these rooms rows = self._simple_select_many_txn( txn, -- cgit 1.5.1 From 6574d4ad0a95e1d5a709e5d1d61555561dc180f2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 14:19:11 +0100 Subject: Add test --- tests/storage/test_cleanup_extrems.py | 248 ++++++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100644 tests/storage/test_cleanup_extrems.py diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py new file mode 100644 index 0000000000..6dda66ecd3 --- /dev/null +++ b/tests/storage/test_cleanup_extrems.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path + +from synapse.api.constants import EventTypes +from synapse.storage import prepare_database +from synapse.types import Requester, UserID + +from tests.unittest import HomeserverTestCase + + +class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase): + """Test the background update to clean forward extremities table. + """ + + def prepare(self, reactor, clock, homeserver): + self.store = homeserver.get_datastore() + self.event_creator = homeserver.get_event_creation_handler() + self.room_creator = homeserver.get_room_creation_handler() + + # Create a test user and room + self.user = UserID("alice", "test") + self.requester = Requester(self.user, None, False, None, None) + info = self.get_success(self.room_creator.create_room(self.requester, {})) + self.room_id = info["room_id"] + + def create_and_send_event(self, soft_failed=False, prev_event_ids=None): + """Create and send an event. + + Args: + soft_failed (bool): Whether to create a soft failed event or not + prev_event_ids (list[str]|None): Explicitly set the prev events, + or if None just use the default + + Returns: + str: The new event's ID. + """ + prev_events_and_hashes = None + if prev_event_ids: + prev_events_and_hashes = [[p, {}, 0] for p in prev_event_ids] + + event, context = self.get_success( + self.event_creator.create_event( + self.requester, + { + "type": EventTypes.Message, + "room_id": self.room_id, + "sender": self.user.to_string(), + "content": {"body": "", "msgtype": "m.text"}, + }, + prev_events_and_hashes=prev_events_and_hashes, + ) + ) + + if soft_failed: + event.internal_metadata.soft_failed = True + + self.get_success( + self.event_creator.send_nonmember_event(self.requester, event, context) + ) + + return event.event_id + + def add_extremity(self, event_id): + """Add the given event as an extremity to the room. + """ + self.get_success( + self.store._simple_insert( + table="event_forward_extremities", + values={"room_id": self.room_id, "event_id": event_id}, + desc="test_add_extremity", + ) + ) + + self.store.get_latest_event_ids_in_room.invalidate((self.room_id,)) + + def run_background_update(self): + """Re run the background update to clean up the extremities. + """ + # Make sure we don't clash with in progress updates. + self.assertTrue(self.store._all_done, "Background updates are still ongoing") + + schema_path = os.path.join( + prepare_database.dir_path, + "schema", + "delta", + "54", + "delete_forward_extremities.sql", + ) + + def run_delta_file(txn): + prepare_database.executescript(txn, schema_path) + + self.get_success( + self.store.runInteraction("test_delete_forward_extremities", run_delta_file) + ) + + # Ugh, have to reset this flag + self.store._all_done = False + + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) + + def test_soft_failed_extremities_handled_correctly(self): + """Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like: + + A <- SF1 <- SF2 <- B + + Where SF* are soft failed. + """ + + # Create the room graph + event_id_1 = self.create_and_send_event() + event_id_2 = self.create_and_send_event(True, [event_id_1]) + event_id_3 = self.create_and_send_event(True, [event_id_2]) + event_id_4 = self.create_and_send_event(False, [event_id_3]) + + # Check the latest events are as expected + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + + self.assertEqual(latest_event_ids, [event_id_4]) + + def test_basic_cleanup(self): + """Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like: + + A <- SF1 <- B + + Where SF* are soft failed, and with extremities of A and B + """ + # Create the room graph + event_id_a = self.create_and_send_event() + event_id_sf1 = self.create_and_send_event(True, [event_id_a]) + event_id_b = self.create_and_send_event(False, [event_id_sf1]) + + # Add the new extremity and check the latest events are as expected + self.add_extremity(event_id_a) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b))) + + # Run the background update and check it did the right thing + self.run_background_update() + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(latest_event_ids, [event_id_b]) + + def test_chain_of_fail_cleanup(self): + """Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like: + + A <- SF1 <- SF2 <- B + + Where SF* are soft failed, and with extremities of A and B + """ + # Create the room graph + event_id_a = self.create_and_send_event() + event_id_sf1 = self.create_and_send_event(True, [event_id_a]) + event_id_sf2 = self.create_and_send_event(True, [event_id_sf1]) + event_id_b = self.create_and_send_event(False, [event_id_sf2]) + + # Add the new extremity and check the latest events are as expected + self.add_extremity(event_id_a) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b))) + + # Run the background update and check it did the right thing + self.run_background_update() + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(latest_event_ids, [event_id_b]) + + def test_forked_graph_cleanup(self): + r"""Test that extremities are correctly calculated in the presence of + soft failed events. + + Tests a graph like, where time flows down the page: + + A B + / \ / + / \ / + SF1 SF2 + | | + SF3 | + / \ | + | \ | + C SF4 + + Where SF* are soft failed, and with them A, B and C marked as + extremities. This should resolve to B and C being marked as extremity. + """ + # Create the room graph + event_id_a = self.create_and_send_event() + event_id_b = self.create_and_send_event() + event_id_sf1 = self.create_and_send_event(True, [event_id_a]) + event_id_sf2 = self.create_and_send_event(True, [event_id_a, event_id_b]) + event_id_sf3 = self.create_and_send_event(True, [event_id_sf1]) + self.create_and_send_event(True, [event_id_sf2, event_id_sf3]) # SF4 + event_id_c = self.create_and_send_event(False, [event_id_sf3]) + + # Add the new extremity and check the latest events are as expected + self.add_extremity(event_id_a) + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual( + set(latest_event_ids), set((event_id_a, event_id_b, event_id_c)) + ) + + # Run the background update and check it did the right thing + self.run_background_update() + + latest_event_ids = self.get_success( + self.store.get_latest_event_ids_in_room(self.room_id) + ) + self.assertEqual(set(latest_event_ids), set([event_id_b, event_id_c])) -- cgit 1.5.1 From 9f5268388abb266260c30e9855da35a0b6b11bcb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 18:56:02 +0100 Subject: Newsfile --- changelog.d/5278.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5278.bugfix diff --git a/changelog.d/5278.bugfix b/changelog.d/5278.bugfix new file mode 100644 index 0000000000..9e14d20289 --- /dev/null +++ b/changelog.d/5278.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. -- cgit 1.5.1 From 9b8cd66524304f76209a59e12f4eca561b1a43d3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 10:55:55 +0100 Subject: Fixup comments and logging --- synapse/storage/events.py | 21 ++++++++++++--------- .../schema/delta/54/delete_forward_extremities.sql | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index a9664928ca..418d88b8dc 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2387,7 +2387,8 @@ class EventsStore( soft_failed_events_to_lookup = set() # First, we get `batch_size` events from the table, pulling out - # their prev events, if any, and their prev events rejection status. + # their successor events, if any, and their successor events + # rejection status. txn.execute( """SELECT prev_event_id, event_id, internal_metadata, rejections.event_id IS NOT NULL, events.outlier @@ -2450,11 +2451,10 @@ class EventsStore( if event_id in graph: # Already handled this event previously, but we still # want to record the edge. - graph.setdefault(event_id, set()).add(prev_event_id) - logger.info("Already handled") + graph[event_id].add(prev_event_id) continue - graph.setdefault(event_id, set()).add(prev_event_id) + graph[event_id] = {prev_event_id} soft_failed = json.loads(metadata).get("soft_failed") if soft_failed or rejected: @@ -2474,8 +2474,6 @@ class EventsStore( to_delete.intersection_update(original_set) - logger.info("Deleting up to %d forward extremities", len(to_delete)) - deleted = self._simple_delete_many_txn( txn=txn, table="event_forward_extremities", @@ -2484,7 +2482,11 @@ class EventsStore( keyvalues={}, ) - logger.info("Deleted %d forward extremities", deleted) + logger.info( + "Deleted %d forward extremities of %d checked, to clean up #5269", + deleted, + len(original_set), + ) if deleted: # We now need to invalidate the caches of these rooms @@ -2496,10 +2498,11 @@ class EventsStore( keyvalues={}, retcols=("room_id",) ) - for row in rows: + room_ids = set(row["room_id"] for row in rows) + for room_id in room_ids: txn.call_after( self.get_latest_event_ids_in_room.invalidate, - (row["room_id"],) + (room_id,) ) self._simple_delete_many_txn( diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index 7056bd1d00..aa40f13da7 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -13,7 +13,10 @@ * limitations under the License. */ +-- Start a background job to cleanup extremities that were incorrectly added +-- by bug #5269. INSERT INTO background_updates (update_name, progress_json) VALUES ('delete_soft_failed_extremities', '{}'); +DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; -- cgit 1.5.1 From 98f438b52a93b1ce9d1f3e93fa57db0f870f9101 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 11:22:59 +0100 Subject: Move event background updates to a separate file --- synapse/storage/__init__.py | 2 + synapse/storage/events.py | 371 +------------------------------- synapse/storage/events_bg_updates.py | 401 +++++++++++++++++++++++++++++++++++ 3 files changed, 405 insertions(+), 369 deletions(-) create mode 100644 synapse/storage/events_bg_updates.py diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 7522d3fd57..56c434d4e8 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -36,6 +36,7 @@ from .engines import PostgresEngine from .event_federation import EventFederationStore from .event_push_actions import EventPushActionsStore from .events import EventsStore +from .events_bg_updates import EventsBackgroundUpdatesStore from .filtering import FilteringStore from .group_server import GroupServerStore from .keys import KeyStore @@ -65,6 +66,7 @@ logger = logging.getLogger(__name__) class DataStore( + EventsBackgroundUpdatesStore, RoomMemberStore, RoomStore, RegistrationStore, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 418d88b8dc..f9162be9b9 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2018-2019 New Vector Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -219,47 +220,11 @@ class EventsStore( EventsWorkerStore, BackgroundUpdateStore, ): - EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" - EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" - EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts - ) - self.register_background_update_handler( - self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, - self._background_reindex_fields_sender, - ) - - self.register_background_index_update( - "event_contains_url_index", - index_name="event_contains_url_index", - table="events", - columns=["room_id", "topological_ordering", "stream_ordering"], - where_clause="contains_url = true AND outlier = false", - ) - - # an event_id index on event_search is useful for the purge_history - # api. Plus it means we get to enforce some integrity with a UNIQUE - # clause - self.register_background_index_update( - "event_search_event_id_idx", - index_name="event_search_event_id_idx", - table="event_search", - columns=["event_id"], - unique=True, - psql_only=True, - ) - - self.register_background_update_handler( - self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, - self._cleanup_extremities_bg_update, - ) self._event_persist_queue = _EventPeristenceQueue() - self._state_resolution_handler = hs.get_state_resolution_handler() @defer.inlineCallbacks @@ -1585,153 +1550,6 @@ class EventsStore( ret = yield self.runInteraction("count_daily_active_rooms", _count) defer.returnValue(ret) - @defer.inlineCallbacks - def _background_reindex_fields_sender(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id_inclusive"] - max_stream_id = progress["max_stream_id_exclusive"] - rows_inserted = progress.get("rows_inserted", 0) - - INSERT_CLUMP_SIZE = 1000 - - def reindex_txn(txn): - sql = ( - "SELECT stream_ordering, event_id, json FROM events" - " INNER JOIN event_json USING (event_id)" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - - rows = txn.fetchall() - if not rows: - return 0 - - min_stream_id = rows[-1][0] - - update_rows = [] - for row in rows: - try: - event_id = row[1] - event_json = json.loads(row[2]) - sender = event_json["sender"] - content = event_json["content"] - - contains_url = "url" in content - if contains_url: - contains_url &= isinstance(content["url"], text_type) - except (KeyError, AttributeError): - # If the event is missing a necessary field then - # skip over it. - continue - - update_rows.append((sender, contains_url, event_id)) - - sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" - - for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): - clump = update_rows[index : index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - progress = { - "target_min_stream_id_inclusive": target_min_stream_id, - "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows), - } - - self._background_update_progress_txn( - txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress - ) - - return len(rows) - - result = yield self.runInteraction( - self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn - ) - - if not result: - yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME) - - defer.returnValue(result) - - @defer.inlineCallbacks - def _background_reindex_origin_server_ts(self, progress, batch_size): - target_min_stream_id = progress["target_min_stream_id_inclusive"] - max_stream_id = progress["max_stream_id_exclusive"] - rows_inserted = progress.get("rows_inserted", 0) - - INSERT_CLUMP_SIZE = 1000 - - def reindex_search_txn(txn): - sql = ( - "SELECT stream_ordering, event_id FROM events" - " WHERE ? <= stream_ordering AND stream_ordering < ?" - " ORDER BY stream_ordering DESC" - " LIMIT ?" - ) - - txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - - rows = txn.fetchall() - if not rows: - return 0 - - min_stream_id = rows[-1][0] - event_ids = [row[1] for row in rows] - - rows_to_update = [] - - chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] - for chunk in chunks: - ev_rows = self._simple_select_many_txn( - txn, - table="event_json", - column="event_id", - iterable=chunk, - retcols=["event_id", "json"], - keyvalues={}, - ) - - for row in ev_rows: - event_id = row["event_id"] - event_json = json.loads(row["json"]) - try: - origin_server_ts = event_json["origin_server_ts"] - except (KeyError, AttributeError): - # If the event is missing a necessary field then - # skip over it. - continue - - rows_to_update.append((origin_server_ts, event_id)) - - sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" - - for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): - clump = rows_to_update[index : index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - progress = { - "target_min_stream_id_inclusive": target_min_stream_id, - "max_stream_id_exclusive": min_stream_id, - "rows_inserted": rows_inserted + len(rows_to_update), - } - - self._background_update_progress_txn( - txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress - ) - - return len(rows_to_update) - - result = yield self.runInteraction( - self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn - ) - - if not result: - yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME) - - defer.returnValue(result) - def get_current_backfill_token(self): """The current minimum token that backfilled events have reached""" return -self._backfill_id_gen.get_current_token() @@ -2347,191 +2165,6 @@ class EventsStore( get_all_updated_current_state_deltas_txn, ) - @defer.inlineCallbacks - def _cleanup_extremities_bg_update(self, progress, batch_size): - """Background update to clean out extremities that should have been - deleted previously. - - Mainly used to deal with the aftermath of #5269. - """ - - # This works by first copying all existing forward extremities into the - # `_extremities_to_check` table at start up, and then checking each - # event in that table whether we have any descendants that are not - # soft-failed/rejected. If that is the case then we delete that event - # from the forward extremities table. - # - # For efficiency, we do this in batches by recursively pulling out all - # descendants of a batch until we find the non soft-failed/rejected - # events, i.e. the set of descendants whose chain of prev events back - # to the batch of extremities are all soft-failed or rejected. - # Typically, we won't find any such events as extremities will rarely - # have any descendants, but if they do then we should delete those - # extremities. - - def _cleanup_extremities_bg_update_txn(txn): - # The set of extremity event IDs that we're checking this round - original_set = set() - - # A dict[str, set[str]] of event ID to their prev events. - graph = {} - - # The set of descendants of the original set that are not rejected - # nor soft-failed. Ancestors of these events should be removed - # from the forward extremities table. - non_rejected_leaves = set() - - # Set of event IDs that have been soft failed, and for which we - # should check if they have descendants which haven't been soft - # failed. - soft_failed_events_to_lookup = set() - - # First, we get `batch_size` events from the table, pulling out - # their successor events, if any, and their successor events - # rejection status. - txn.execute( - """SELECT prev_event_id, event_id, internal_metadata, - rejections.event_id IS NOT NULL, events.outlier - FROM ( - SELECT event_id AS prev_event_id - FROM _extremities_to_check - LIMIT ? - ) AS f - LEFT JOIN event_edges USING (prev_event_id) - LEFT JOIN events USING (event_id) - LEFT JOIN event_json USING (event_id) - LEFT JOIN rejections USING (event_id) - """, (batch_size,) - ) - - for prev_event_id, event_id, metadata, rejected, outlier in txn: - original_set.add(prev_event_id) - - if not event_id or outlier: - # Common case where the forward extremity doesn't have any - # descendants. - continue - - graph.setdefault(event_id, set()).add(prev_event_id) - - soft_failed = False - if metadata: - soft_failed = json.loads(metadata).get("soft_failed") - - if soft_failed or rejected: - soft_failed_events_to_lookup.add(event_id) - else: - non_rejected_leaves.add(event_id) - - # Now we recursively check all the soft-failed descendants we - # found above in the same way, until we have nothing left to - # check. - while soft_failed_events_to_lookup: - # We only want to do 100 at a time, so we split given list - # into two. - batch = list(soft_failed_events_to_lookup) - to_check, to_defer = batch[:100], batch[100:] - soft_failed_events_to_lookup = set(to_defer) - - sql = """SELECT prev_event_id, event_id, internal_metadata, - rejections.event_id IS NOT NULL - FROM event_edges - INNER JOIN events USING (event_id) - INNER JOIN event_json USING (event_id) - LEFT JOIN rejections USING (event_id) - WHERE - prev_event_id IN (%s) - AND NOT events.outlier - """ % ( - ",".join("?" for _ in to_check), - ) - txn.execute(sql, to_check) - - for prev_event_id, event_id, metadata, rejected in txn: - if event_id in graph: - # Already handled this event previously, but we still - # want to record the edge. - graph[event_id].add(prev_event_id) - continue - - graph[event_id] = {prev_event_id} - - soft_failed = json.loads(metadata).get("soft_failed") - if soft_failed or rejected: - soft_failed_events_to_lookup.add(event_id) - else: - non_rejected_leaves.add(event_id) - - # We have a set of non-soft-failed descendants, so we recurse up - # the graph to find all ancestors and add them to the set of event - # IDs that we can delete from forward extremities table. - to_delete = set() - while non_rejected_leaves: - event_id = non_rejected_leaves.pop() - prev_event_ids = graph.get(event_id, set()) - non_rejected_leaves.update(prev_event_ids) - to_delete.update(prev_event_ids) - - to_delete.intersection_update(original_set) - - deleted = self._simple_delete_many_txn( - txn=txn, - table="event_forward_extremities", - column="event_id", - iterable=to_delete, - keyvalues={}, - ) - - logger.info( - "Deleted %d forward extremities of %d checked, to clean up #5269", - deleted, - len(original_set), - ) - - if deleted: - # We now need to invalidate the caches of these rooms - rows = self._simple_select_many_txn( - txn, - table="events", - column="event_id", - iterable=to_delete, - keyvalues={}, - retcols=("room_id",) - ) - room_ids = set(row["room_id"] for row in rows) - for room_id in room_ids: - txn.call_after( - self.get_latest_event_ids_in_room.invalidate, - (room_id,) - ) - - self._simple_delete_many_txn( - txn=txn, - table="_extremities_to_check", - column="event_id", - iterable=original_set, - keyvalues={}, - ) - - return len(original_set) - - num_handled = yield self.runInteraction( - "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, - ) - - if not num_handled: - yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) - - def _drop_table_txn(txn): - txn.execute("DROP TABLE _extremities_to_check") - - yield self.runInteraction( - "_cleanup_extremities_bg_update_drop_table", - _drop_table_txn, - ) - - defer.returnValue(num_handled) - AllNewEventsResult = namedtuple( "AllNewEventsResult", diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py new file mode 100644 index 0000000000..2eba106abf --- /dev/null +++ b/synapse/storage/events_bg_updates.py @@ -0,0 +1,401 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from six import text_type + +from canonicaljson import json + +from twisted.internet import defer + +from synapse.storage.background_updates import BackgroundUpdateStore + +logger = logging.getLogger(__name__) + + +class EventsBackgroundUpdatesStore(BackgroundUpdateStore): + + EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" + EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" + EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" + + def __init__(self, db_conn, hs): + super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs) + + self.register_background_update_handler( + self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts + ) + self.register_background_update_handler( + self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, + self._background_reindex_fields_sender, + ) + + self.register_background_index_update( + "event_contains_url_index", + index_name="event_contains_url_index", + table="events", + columns=["room_id", "topological_ordering", "stream_ordering"], + where_clause="contains_url = true AND outlier = false", + ) + + # an event_id index on event_search is useful for the purge_history + # api. Plus it means we get to enforce some integrity with a UNIQUE + # clause + self.register_background_index_update( + "event_search_event_id_idx", + index_name="event_search_event_id_idx", + table="event_search", + columns=["event_id"], + unique=True, + psql_only=True, + ) + + self.register_background_update_handler( + self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self._cleanup_extremities_bg_update, + ) + + @defer.inlineCallbacks + def _background_reindex_fields_sender(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_txn(txn): + sql = ( + "SELECT stream_ordering, event_id, json FROM events" + " INNER JOIN event_json USING (event_id)" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + + update_rows = [] + for row in rows: + try: + event_id = row[1] + event_json = json.loads(row[2]) + sender = event_json["sender"] + content = event_json["content"] + + contains_url = "url" in content + if contains_url: + contains_url &= isinstance(content["url"], text_type) + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + update_rows.append((sender, contains_url, event_id)) + + sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" + + for index in range(0, len(update_rows), INSERT_CLUMP_SIZE): + clump = update_rows[index : index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows), + } + + self._background_update_progress_txn( + txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress + ) + + return len(rows) + + result = yield self.runInteraction( + self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _background_reindex_origin_server_ts(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_ordering, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + event_ids = [row[1] for row in rows] + + rows_to_update = [] + + chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] + for chunk in chunks: + ev_rows = self._simple_select_many_txn( + txn, + table="event_json", + column="event_id", + iterable=chunk, + retcols=["event_id", "json"], + keyvalues={}, + ) + + for row in ev_rows: + event_id = row["event_id"] + event_json = json.loads(row["json"]) + try: + origin_server_ts = event_json["origin_server_ts"] + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + rows_to_update.append((origin_server_ts, event_id)) + + sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" + + for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE): + clump = rows_to_update[index : index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(rows_to_update), + } + + self._background_update_progress_txn( + txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress + ) + + return len(rows_to_update) + + result = yield self.runInteraction( + self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME) + + defer.returnValue(result) + + @defer.inlineCallbacks + def _cleanup_extremities_bg_update(self, progress, batch_size): + """Background update to clean out extremities that should have been + deleted previously. + + Mainly used to deal with the aftermath of #5269. + """ + + # This works by first copying all existing forward extremities into the + # `_extremities_to_check` table at start up, and then checking each + # event in that table whether we have any descendants that are not + # soft-failed/rejected. If that is the case then we delete that event + # from the forward extremities table. + # + # For efficiency, we do this in batches by recursively pulling out all + # descendants of a batch until we find the non soft-failed/rejected + # events, i.e. the set of descendants whose chain of prev events back + # to the batch of extremities are all soft-failed or rejected. + # Typically, we won't find any such events as extremities will rarely + # have any descendants, but if they do then we should delete those + # extremities. + + def _cleanup_extremities_bg_update_txn(txn): + # The set of extremity event IDs that we're checking this round + original_set = set() + + # A dict[str, set[str]] of event ID to their prev events. + graph = {} + + # The set of descendants of the original set that are not rejected + # nor soft-failed. Ancestors of these events should be removed + # from the forward extremities table. + non_rejected_leaves = set() + + # Set of event IDs that have been soft failed, and for which we + # should check if they have descendants which haven't been soft + # failed. + soft_failed_events_to_lookup = set() + + # First, we get `batch_size` events from the table, pulling out + # their successor events, if any, and their successor events + # rejection status. + txn.execute( + """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL, events.outlier + FROM ( + SELECT event_id AS prev_event_id + FROM _extremities_to_check + LIMIT ? + ) AS f + LEFT JOIN event_edges USING (prev_event_id) + LEFT JOIN events USING (event_id) + LEFT JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + """, (batch_size,) + ) + + for prev_event_id, event_id, metadata, rejected, outlier in txn: + original_set.add(prev_event_id) + + if not event_id or outlier: + # Common case where the forward extremity doesn't have any + # descendants. + continue + + graph.setdefault(event_id, set()).add(prev_event_id) + + soft_failed = False + if metadata: + soft_failed = json.loads(metadata).get("soft_failed") + + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # Now we recursively check all the soft-failed descendants we + # found above in the same way, until we have nothing left to + # check. + while soft_failed_events_to_lookup: + # We only want to do 100 at a time, so we split given list + # into two. + batch = list(soft_failed_events_to_lookup) + to_check, to_defer = batch[:100], batch[100:] + soft_failed_events_to_lookup = set(to_defer) + + sql = """SELECT prev_event_id, event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + INNER JOIN event_json USING (event_id) + LEFT JOIN rejections USING (event_id) + WHERE + prev_event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_check), + ) + txn.execute(sql, to_check) + + for prev_event_id, event_id, metadata, rejected in txn: + if event_id in graph: + # Already handled this event previously, but we still + # want to record the edge. + graph[event_id].add(prev_event_id) + continue + + graph[event_id] = {prev_event_id} + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + soft_failed_events_to_lookup.add(event_id) + else: + non_rejected_leaves.add(event_id) + + # We have a set of non-soft-failed descendants, so we recurse up + # the graph to find all ancestors and add them to the set of event + # IDs that we can delete from forward extremities table. + to_delete = set() + while non_rejected_leaves: + event_id = non_rejected_leaves.pop() + prev_event_ids = graph.get(event_id, set()) + non_rejected_leaves.update(prev_event_ids) + to_delete.update(prev_event_ids) + + to_delete.intersection_update(original_set) + + deleted = self._simple_delete_many_txn( + txn=txn, + table="event_forward_extremities", + column="event_id", + iterable=to_delete, + keyvalues={}, + ) + + logger.info( + "Deleted %d forward extremities of %d checked, to clean up #5269", + deleted, + len(original_set), + ) + + if deleted: + # We now need to invalidate the caches of these rooms + rows = self._simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",) + ) + room_ids = set(row["room_id"] for row in rows) + for room_id in room_ids: + txn.call_after( + self.get_latest_event_ids_in_room.invalidate, + (room_id,) + ) + + self._simple_delete_many_txn( + txn=txn, + table="_extremities_to_check", + column="event_id", + iterable=original_set, + keyvalues={}, + ) + + return len(original_set) + + num_handled = yield self.runInteraction( + "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn, + ) + + if not num_handled: + yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + + def _drop_table_txn(txn): + txn.execute("DROP TABLE _extremities_to_check") + + yield self.runInteraction( + "_cleanup_extremities_bg_update_drop_table", + _drop_table_txn, + ) + + defer.returnValue(num_handled) -- cgit 1.5.1 From 7386c35f58f360269df1410b2d1ec6d179081b32 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 11:24:42 +0100 Subject: Rename constant --- synapse/storage/events_bg_updates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 2eba106abf..22aac1393d 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -30,7 +30,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" - EVENT_DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" + DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities" def __init__(self, db_conn, hs): super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs) @@ -64,7 +64,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) self.register_background_update_handler( - self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES, + self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update, ) @@ -388,7 +388,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) if not num_handled: - yield self._end_background_update(self.EVENT_DELETE_SOFT_FAILED_EXTREMITIES) + yield self._end_background_update(self.DELETE_SOFT_FAILED_EXTREMITIES) def _drop_table_txn(txn): txn.execute("DROP TABLE _extremities_to_check") -- cgit 1.5.1 From 06eb408da5c4ab52a3072dc6d76fe5ac3b9b1e83 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:06:42 +0100 Subject: Update synapse/storage/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 22aac1393d..75c1935bf3 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -255,7 +255,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): soft_failed_events_to_lookup = set() # First, we get `batch_size` events from the table, pulling out - # their successor events, if any, and their successor events + # their successor events, if any, and the successor events' # rejection status. txn.execute( """SELECT prev_event_id, event_id, internal_metadata, -- cgit 1.5.1 From e2c3660a0ffb13d4198893e91a90ae1abcad8915 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 14:54:56 +0100 Subject: Add index to temp table --- synapse/storage/schema/delta/54/delete_forward_extremities.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql index aa40f13da7..b062ec840c 100644 --- a/synapse/storage/schema/delta/54/delete_forward_extremities.sql +++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql @@ -20,3 +20,4 @@ INSERT INTO background_updates (update_name, progress_json) VALUES DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent. CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities; +CREATE INDEX _extremities_to_check_id ON _extremities_to_check(event_id); -- cgit 1.5.1 From f5c7f90d7278f199523457ade9587d284b3ce39b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:05:26 +0100 Subject: Newsfile --- changelog.d/5291.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5291.bugfix diff --git a/changelog.d/5291.bugfix b/changelog.d/5291.bugfix new file mode 100644 index 0000000000..9e14d20289 --- /dev/null +++ b/changelog.d/5291.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. -- cgit 1.5.1 From 9315802221ede86def56986d5c5303f649cd2fa9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 30 May 2019 16:28:02 +0100 Subject: fix changelog for 0.99.5.1 (#5270) --- CHANGES.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6bdfdd6d70..350151b62c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,7 @@ Synapse 0.99.5.1 (2019-05-22) ============================= -No significant changes. - +0.99.5.1 supersedes 0.99.5 due to malformed debian changelog - no functional changes. Synapse 0.99.5 (2019-05-22) =========================== -- cgit 1.5.1 From c831748f4d243d74e9a3fd2042bc2b35cc30f961 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 16:28:52 +0100 Subject: 0.99.5.2 --- CHANGES.md | 9 +++++++++ changelog.d/5274.bugfix | 1 - changelog.d/5278.bugfix | 1 - changelog.d/5291.bugfix | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 6 files changed, 16 insertions(+), 4 deletions(-) delete mode 100644 changelog.d/5274.bugfix delete mode 100644 changelog.d/5278.bugfix delete mode 100644 changelog.d/5291.bugfix diff --git a/CHANGES.md b/CHANGES.md index 350151b62c..0ffdf1aaef 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 0.99.5.2 (2019-05-30) +============================= + +Bugfixes +-------- + +- Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. ([\#5274](https://github.com/matrix-org/synapse/issues/5274), [\#5278](https://github.com/matrix-org/synapse/issues/5278), [\#5291](https://github.com/matrix-org/synapse/issues/5291)) + + Synapse 0.99.5.1 (2019-05-22) ============================= diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix deleted file mode 100644 index 9e14d20289..0000000000 --- a/changelog.d/5274.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/changelog.d/5278.bugfix b/changelog.d/5278.bugfix deleted file mode 100644 index 9e14d20289..0000000000 --- a/changelog.d/5278.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/changelog.d/5291.bugfix b/changelog.d/5291.bugfix deleted file mode 100644 index 9e14d20289..0000000000 --- a/changelog.d/5291.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. diff --git a/debian/changelog b/debian/changelog index 90c6b86c5b..6a1a72c0e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (0.99.5.2) stable; urgency=medium + + * New synapse release 0.99.5.2. + + -- Synapse Packaging team Thu, 30 May 2019 16:28:07 +0100 + matrix-synapse-py3 (0.99.5.1) stable; urgency=medium * New synapse release 0.99.5.1. diff --git a/synapse/__init__.py b/synapse/__init__.py index 4f95778eea..d0e8d7c21b 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -27,4 +27,4 @@ try: except ImportError: pass -__version__ = "0.99.5.1" +__version__ = "0.99.5.2" -- cgit 1.5.1 From 3e1af5109cb91b8e22f0e14aee875f86bd9fcd92 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Fri, 31 May 2019 02:45:46 -0600 Subject: Clarify that the admin change password endpoint logs them out (#5303) --- changelog.d/5303.misc | 1 + docs/admin_api/user_admin_api.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/5303.misc diff --git a/changelog.d/5303.misc b/changelog.d/5303.misc new file mode 100644 index 0000000000..f6a7f1f8e3 --- /dev/null +++ b/changelog.d/5303.misc @@ -0,0 +1 @@ +Clarify that the admin change password API logs the user out. diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst index 8aca4f158d..213359d0c0 100644 --- a/docs/admin_api/user_admin_api.rst +++ b/docs/admin_api/user_admin_api.rst @@ -69,7 +69,7 @@ An empty body may be passed for backwards compatibility. Reset password ============== -Changes the password of another user. +Changes the password of another user. This will automatically log the user out of all their devices. The api is:: -- cgit 1.5.1 From 5037326d6624d1d1780a0536d19ff79e275f8735 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 15:37:57 +0100 Subject: Add indices. Remove room_ids accidentally added We have to do this by re-inserting a background update and recreating tables, as the tables only get created during a background update and will later be deleted. We also make sure that we remove any entries that should have been removed but weren't due to a race that has been fixed in a previous commit. --- synapse/storage/schema/delta/54/stats2.sql | 28 ++++++++++++++++++++ synapse/storage/stats.py | 41 ++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 13 deletions(-) create mode 100644 synapse/storage/schema/delta/54/stats2.sql diff --git a/synapse/storage/schema/delta/54/stats2.sql b/synapse/storage/schema/delta/54/stats2.sql new file mode 100644 index 0000000000..3b2d48447f --- /dev/null +++ b/synapse/storage/schema/delta/54/stats2.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This delta file gets run after `54/stats.sql` delta. + +-- We want to add some indices to the temporary stats table, so we re-insert +-- 'populate_stats_createtables' if we are still processing the rooms update. +INSERT INTO background_updates (update_name, progress_json) + SELECT 'populate_stats_createtables', '{}' + WHERE + 'populate_stats_process_rooms' IN ( + SELECT update_name FROM background_updates + ) + AND 'populate_stats_createtables' NOT IN ( -- don't insert if already exists + SELECT update_name FROM background_updates + ); diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py index a99637d4b4..1c0b183a56 100644 --- a/synapse/storage/stats.py +++ b/synapse/storage/stats.py @@ -18,6 +18,7 @@ import logging from twisted.internet import defer from synapse.api.constants import EventTypes, Membership +from synapse.storage.prepare_database import get_statements from synapse.storage.state_deltas import StateDeltasStore from synapse.util.caches.descriptors import cached @@ -69,12 +70,25 @@ class StatsStore(StateDeltasStore): # Get all the rooms that we want to process. def _make_staging_area(txn): - sql = ( - "CREATE TABLE IF NOT EXISTS " - + TEMP_TABLE - + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" - ) - txn.execute(sql) + # Create the temporary tables + stmts = get_statements(""" + -- We just recreate the table, we'll be reinserting the + -- correct entries again later anyway. + DROP TABLE IF EXISTS {temp}_rooms; + + CREATE TABLE IF NOT EXISTS {temp}_rooms( + room_id TEXT NOT NULL, + events BIGINT NOT NULL + ); + + CREATE INDEX {temp}_rooms_events + ON {temp}_rooms(events); + CREATE INDEX {temp}_rooms_id + ON {temp}_rooms(room_id); + """.format(temp=TEMP_TABLE).splitlines()) + + for statement in stmts: + txn.execute(statement) sql = ( "CREATE TABLE IF NOT EXISTS " @@ -83,15 +97,16 @@ class StatsStore(StateDeltasStore): ) txn.execute(sql) - # Get rooms we want to process from the database + # Get rooms we want to process from the database, only adding + # those that we haven't (i.e. those not in room_stats_earliest_token) sql = """ - SELECT room_id, count(*) FROM current_state_events - GROUP BY room_id - """ + INSERT INTO %s_rooms (room_id, events) + SELECT c.room_id, count(*) FROM current_state_events AS c + LEFT JOIN room_stats_earliest_token AS t USING (room_id) + WHERE t.room_id IS NULL + GROUP BY c.room_id + """ % (TEMP_TABLE,) txn.execute(sql) - rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] - self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) - del rooms new_pos = yield self.get_max_stream_id_in_current_state_deltas() yield self.runInteraction("populate_stats_temp_build", _make_staging_area) -- cgit 1.5.1 From 39bbf6a4a5b954de56865a2aa0877587acbd9552 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 30 May 2019 16:07:23 +0100 Subject: Newsfile --- changelog.d/5294.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5294.bugfix diff --git a/changelog.d/5294.bugfix b/changelog.d/5294.bugfix new file mode 100644 index 0000000000..5924bda319 --- /dev/null +++ b/changelog.d/5294.bugfix @@ -0,0 +1 @@ +Fix performance problems with the rooms stats background update. -- cgit 1.5.1