From 7c8c97e635811609c5a7ae4c0bb94e6573c30753 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 30 Oct 2019 15:12:49 +0000 Subject: Split purge API into events vs state --- synapse/storage/__init__.py | 2 + synapse/storage/data_stores/main/events.py | 328 ++++++++++++++--------------- synapse/storage/data_stores/main/state.py | 23 ++ synapse/storage/purge_events.py | 117 ++++++++++ 4 files changed, 295 insertions(+), 175 deletions(-) create mode 100644 synapse/storage/purge_events.py (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index a6429d17ed..3646ebd007 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -30,6 +30,7 @@ stored in `synapse.storage.schema`. from synapse.storage.data_stores import DataStores from synapse.storage.data_stores.main import DataStore from synapse.storage.persist_events import EventsPersistenceStorage +from synapse.storage.purge_events import PurgeEventsStorage __all__ = ["DataStores", "DataStore"] @@ -45,6 +46,7 @@ class Storage(object): self.main = stores.main self.persistence = EventsPersistenceStorage(hs, stores) + self.purge_events = PurgeEventsStorage(hs, stores) def are_all_users_on_domain(txn, database_engine, domain): diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 7c3607f308..4eacba8058 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1368,6 +1368,10 @@ class EventsStore( if True, we will delete local events as well as remote ones (instead of just marking them as outliers and deleting their state groups). + + Returns: + Deferred[set[int]]: The set of state groups that reference deleted + events. """ return self.runInteraction( @@ -1521,60 +1525,6 @@ class EventsStore( "[purge] found %i referenced state groups", len(referenced_state_groups) ) - logger.info("[purge] finding state groups that can be deleted") - - _ = self._find_unreferenced_groups_during_purge(txn, referenced_state_groups) - state_groups_to_delete, remaining_state_groups = _ - - logger.info( - "[purge] found %i state groups to delete", len(state_groups_to_delete) - ) - - logger.info( - "[purge] de-delta-ing %i remaining state groups", - len(remaining_state_groups), - ) - - # Now we turn the state groups that reference to-be-deleted state - # groups to non delta versions. - for sg in remaining_state_groups: - logger.info("[purge] de-delta-ing remaining state group %s", sg) - curr_state = self._get_state_groups_from_groups_txn(txn, [sg]) - curr_state = curr_state[sg] - - self._simple_delete_txn( - txn, table="state_groups_state", keyvalues={"state_group": sg} - ) - - self._simple_delete_txn( - txn, table="state_group_edges", keyvalues={"state_group": sg} - ) - - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": sg, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in iteritems(curr_state) - ], - ) - - logger.info("[purge] removing redundant state groups") - txn.executemany( - "DELETE FROM state_groups_state WHERE state_group = ?", - ((sg,) for sg in state_groups_to_delete), - ) - txn.executemany( - "DELETE FROM state_groups WHERE id = ?", - ((sg,) for sg in state_groups_to_delete), - ) - logger.info("[purge] removing events from event_to_state_groups") txn.execute( "DELETE FROM event_to_state_groups " @@ -1661,87 +1611,7 @@ class EventsStore( logger.info("[purge] done") - def _find_unreferenced_groups_during_purge(self, txn, state_groups): - """Used when purging history to figure out which state groups can be - deleted and which need to be de-delta'ed (due to one of its prev groups - being scheduled for deletion). - - Args: - txn - state_groups (set[int]): Set of state groups referenced by events - that are going to be deleted. - - Returns: - tuple[set[int], set[int]]: The set of state groups that can be - deleted and the set of state groups that need to be de-delta'ed - """ - # Graph of state group -> previous group - graph = {} - - # Set of events that we have found to be referenced by events - referenced_groups = set() - - # Set of state groups we've already seen - state_groups_seen = set(state_groups) - - # Set of state groups to handle next. - next_to_search = set(state_groups) - while next_to_search: - # We bound size of groups we're looking up at once, to stop the - # SQL query getting too big - if len(next_to_search) < 100: - current_search = next_to_search - next_to_search = set() - else: - current_search = set(itertools.islice(next_to_search, 100)) - next_to_search -= current_search - - # Check if state groups are referenced - sql = """ - SELECT DISTINCT state_group FROM event_to_state_groups - LEFT JOIN events_to_purge AS ep USING (event_id) - WHERE ep.event_id IS NULL AND - """ - clause, args = make_in_list_sql_clause( - txn.database_engine, "state_group", current_search - ) - txn.execute(sql + clause, list(args)) - - referenced = set(sg for sg, in txn) - referenced_groups |= referenced - - # We don't continue iterating up the state group graphs for state - # groups that are referenced. - current_search -= referenced - - rows = self._simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=current_search, - keyvalues={}, - retcols=("prev_state_group", "state_group"), - ) - - prevs = set(row["state_group"] for row in rows) - # We don't bother re-handling groups we've already seen - prevs -= state_groups_seen - next_to_search |= prevs - state_groups_seen |= prevs - - for row in rows: - # Note: Each state group can have at most one prev group - graph[row["state_group"]] = row["prev_state_group"] - - to_delete = state_groups_seen - referenced_groups - - to_dedelta = set() - for sg in referenced_groups: - prev_sg = graph.get(sg) - if prev_sg and prev_sg in to_delete: - to_dedelta.add(sg) - - return to_delete, to_dedelta + return referenced_state_groups def purge_room(self, room_id): """Deletes all record of a room @@ -1753,46 +1623,7 @@ class EventsStore( return self.runInteraction("purge_room", self._purge_room_txn, room_id) def _purge_room_txn(self, txn, room_id): - # first we have to delete the state groups states - logger.info("[purge] removing %s from state_groups_state", room_id) - - txn.execute( - """ - DELETE FROM state_groups_state WHERE state_group IN ( - SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) - WHERE events.room_id=? - ) - """, - (room_id,), - ) - - # ... and the state group edges - logger.info("[purge] removing %s from state_group_edges", room_id) - - txn.execute( - """ - DELETE FROM state_group_edges WHERE state_group IN ( - SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) - WHERE events.room_id=? - ) - """, - (room_id,), - ) - - # ... and the state groups - logger.info("[purge] removing %s from state_groups", room_id) - - txn.execute( - """ - DELETE FROM state_groups WHERE id IN ( - SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) - WHERE events.room_id=? - ) - """, - (room_id,), - ) - - # and then tables which lack an index on room_id but have one on event_id + # First delete tables which lack an index on room_id but have one on event_id for table in ( "event_auth", "event_edges", @@ -1881,6 +1712,153 @@ class EventsStore( logger.info("[purge] done") + def purge_unreferenced_state_groups(self, room_id, state_groups_to_delete): + """Deletes no longer referenced state groups and de-deltas any state + groups that reference them. + """ + + return self.runInteraction( + "purge_unreferenced_state_groups", + self._purge_unreferenced_state_groups, + room_id, + state_groups_to_delete, + ) + + def _purge_unreferenced_state_groups(self, txn, room_id, state_groups_to_delete): + logger.info( + "[purge] found %i state groups to delete", len(state_groups_to_delete) + ) + + rows = self._simple_select_many_txn( + txn, + table="state_group_edges", + column="prev_state_group", + iterable=state_groups_to_delete, + keyvalues={}, + retcols=("state_group",), + ) + + remaining_state_groups = set( + row["state_group"] + for row in rows + if row["state_group"] not in state_groups_to_delete + ) + + logger.info( + "[purge] de-delta-ing %i remaining state groups", + len(remaining_state_groups), + ) + + # Now we turn the state groups that reference to-be-deleted state + # groups to non delta versions. + for sg in remaining_state_groups: + logger.info("[purge] de-delta-ing remaining state group %s", sg) + curr_state = self._get_state_groups_from_groups_txn(txn, [sg]) + curr_state = curr_state[sg] + + self._simple_delete_txn( + txn, table="state_groups_state", keyvalues={"state_group": sg} + ) + + self._simple_delete_txn( + txn, table="state_group_edges", keyvalues={"state_group": sg} + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": sg, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in iteritems(curr_state) + ], + ) + + logger.info("[purge] removing redundant state groups") + txn.executemany( + "DELETE FROM state_groups_state WHERE state_group = ?", + ((sg,) for sg in state_groups_to_delete), + ) + txn.executemany( + "DELETE FROM state_groups WHERE id = ?", + ((sg,) for sg in state_groups_to_delete), + ) + + @defer.inlineCallbacks + def get_previous_state_groups(self, state_groups): + """Fetch the previous groups of the given state groups. + + Args: + state_groups (Iterable[int]) + + Returns: + Deferred[dict[int, int]]: mapping from state group to previous + state group. + """ + + rows = yield self._simple_select_many_batch( + table="state_group_edges", + column="prev_state_group", + iterable=state_groups, + keyvalues={}, + retcols=("prev_state_group", "state_group"), + desc="get_previous_state_groups", + ) + + return {row["state_group"]: row["prev_state_group"] for row in rows} + + def purge_room_state(self, room_id): + """Deletes all record of a room from state tables + + Args: + room_id (str): + """ + + return self.runInteraction( + "purge_room_state", self._purge_room_state_txn, room_id + ) + + def _purge_room_state_txn(self, txn, room_id): + # first we have to delete the state groups states + logger.info("[purge] removing %s from state_groups_state", room_id) + + txn.execute( + """ + DELETE FROM state_groups_state + INNER JOIN state_groups USING (event_id) + WHEREE state_groups.room_id = ? + """, + (room_id,), + ) + + # ... and the state group edges + logger.info("[purge] removing %s from state_group_edges", room_id) + + txn.execute( + """ + DELETE FROM state_group_edges + INNER JOIN state_groups USING (event_id) + WHEREE state_groups.room_id = ? + ) + """, + (room_id,), + ) + + # ... and the state groups + logger.info("[purge] removing %s from state_groups", room_id) + + txn.execute( + """ + DELETE FROM state_groups WHEREE room_id = ? + """, + (room_id,), + ) + async def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream """ diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 9b2207075b..36be8f0a9d 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -989,6 +989,29 @@ class StateGroupWorkerStore( return self.runInteraction("store_state_group", _store_state_group_txn) + @defer.inlineCallbacks + def get_referenced_state_groups(self, state_groups): + """Check if the state groups are referenced by events. + + Args: + state_groups (Iterable[int]) + + Returns: + Deferred[set[int]]: The subset of state groups that are + referenced. + """ + + rows = yield self._simple_select_many_batch( + table="event_to_state_groups", + column="state_group", + iterable=state_groups, + keyvalues={}, + retcols=("DISTINCT state_group",), + desc="get_referenced_state_groups", + ) + + return set(row["state_group"] for row in rows) + class StateBackgroundUpdateStore( StateGroupBackgroundUpdateStore, BackgroundUpdateStore diff --git a/synapse/storage/purge_events.py b/synapse/storage/purge_events.py new file mode 100644 index 0000000000..dd45df0c88 --- /dev/null +++ b/synapse/storage/purge_events.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import logging + +from twisted.internet import defer + +logger = logging.getLogger(__name__) + + +class PurgeEventsStorage(object): + """High level interface for purging rooms and event history. + """ + + def __init__(self, hs, stores): + self.stores = stores + + @defer.inlineCallbacks + def purge_room(self, room_id: str): + """Deletes all record of a room + """ + + yield self.stores.main.purge_room(room_id) + yield self.stores.main.purge_room_state(room_id) + + @defer.inlineCallbacks + def purge_history(self, room_id, token, delete_local_events): + """Deletes room history before a certain point + + Args: + room_id (str): + + token (str): A topological token to delete events before + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). + """ + state_groups = yield self.stores.main.purge_history( + room_id, token, delete_local_events + ) + + logger.info("[purge] finding state groups that can be deleted") + + sg_to_delete = yield self._find_unreferenced_groups(state_groups) + + yield self.stores.main.purge_unreferenced_state_groups(room_id, sg_to_delete) + + @defer.inlineCallbacks + def _find_unreferenced_groups(self, state_groups): + """Used when purging history to figure out which state groups can be + deleted. + + Args: + state_groups (set[int]): Set of state groups referenced by events + that are going to be deleted. + + Returns: + Deferred[set[int]] The set of state groups that can be deleted. + """ + # Graph of state group -> previous group + graph = {} + + # Set of events that we have found to be referenced by events + referenced_groups = set() + + # Set of state groups we've already seen + state_groups_seen = set(state_groups) + + # Set of state groups to handle next. + next_to_search = set(state_groups) + while next_to_search: + # We bound size of groups we're looking up at once, to stop the + # SQL query getting too big + if len(next_to_search) < 100: + current_search = next_to_search + next_to_search = set() + else: + current_search = set(itertools.islice(next_to_search, 100)) + next_to_search -= current_search + + referenced = yield self.stores.main.get_referenced_state_groups( + current_search + ) + referenced_groups |= referenced + + # We don't continue iterating up the state group graphs for state + # groups that are referenced. + current_search -= referenced + + edges = yield self.stores.main.get_previous_state_groups(current_search) + + prevs = set(edges.values()) + # We don't bother re-handling groups we've already seen + prevs -= state_groups_seen + next_to_search |= prevs + state_groups_seen |= prevs + + graph.update(edges) + + to_delete = state_groups_seen - referenced_groups + + return to_delete -- cgit 1.5.1 From 8f5bbdb987c92ee3e9b4170cc8ce296d87b1555d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:22:08 +0000 Subject: Fix purge room API --- synapse/storage/data_stores/main/events.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 63b09a09e8..a904a71570 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1829,8 +1829,10 @@ class EventsStore( txn.execute( """ DELETE FROM state_groups_state - INNER JOIN state_groups USING (event_id) - WHEREE state_groups.room_id = ? + WHERE state_group IN ( + SELECT state_group FROM state_groups + WHERE room_id = ? + ) """, (room_id,), ) @@ -1841,8 +1843,9 @@ class EventsStore( txn.execute( """ DELETE FROM state_group_edges - INNER JOIN state_groups USING (event_id) - WHEREE state_groups.room_id = ? + WHERE state_group IN ( + SELECT state_group FROM state_groups + WHERE room_id = ? ) """, (room_id,), @@ -1853,7 +1856,7 @@ class EventsStore( txn.execute( """ - DELETE FROM state_groups WHEREE room_id = ? + DELETE FROM state_groups WHERE room_id = ? """, (room_id,), ) -- cgit 1.5.1 From f91f2a1f92ee2eb5758184ef0df66490592587d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:25:21 +0000 Subject: Docstrings --- synapse/storage/data_stores/main/events.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index a904a71570..108b2e8bd5 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -19,6 +19,7 @@ import itertools import logging from collections import Counter as c_counter, OrderedDict, namedtuple from functools import wraps +from typing import Set from six import iteritems, text_type from six.moves import range @@ -1370,8 +1371,8 @@ class EventsStore( state groups). Returns: - Deferred[set[int]]: The set of state groups that reference deleted - events. + Deferred[set[int]]: The set of state groups that are referenced by + deleted events. """ return self.runInteraction( @@ -1711,9 +1712,16 @@ class EventsStore( logger.info("[purge] done") - def purge_unreferenced_state_groups(self, room_id, state_groups_to_delete): + def purge_unreferenced_state_groups( + self, room_id: str, state_groups_to_delete: Set[int] + ) -> defer.Deferred: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. + + Args: + room_id: The room the state groups belong to (must all be in the + same room). + state_groups_to_delete: Set of all state groups to delete. """ return self.runInteraction( -- cgit 1.5.1 From 61be1a29262a9a3553537f257a4187ef355684f5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:39:26 +0000 Subject: Add state_groups.room_id index --- .../main/schema/delta/56/state_group_room_idx.sql | 17 +++++++++++++++++ synapse/storage/data_stores/main/state.py | 7 +++++++ 2 files changed, 24 insertions(+) create mode 100644 synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql new file mode 100644 index 0000000000..7916ef18b2 --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('state_groups_room_id_idx', '{}'); diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 36be8f0a9d..bf6de4ca22 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -1021,6 +1021,7 @@ class StateBackgroundUpdateStore( STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" + STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" def __init__(self, db_conn, hs): super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) @@ -1044,6 +1045,12 @@ class StateBackgroundUpdateStore( table="event_to_state_groups", columns=["state_group"], ) + self.register_background_index_update( + self.STATE_GROUPS_ROOM_INDEX_UPDATE_NAME, + index_name="state_groups_room_id_idx", + table="state_groups", + columns=["room_id"], + ) @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): -- cgit 1.5.1 From fb1a6914cf953ed237235274a9aab62aafec5b4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:45:48 +0000 Subject: Update log line to lie a little less --- synapse/storage/data_stores/main/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 108b2e8bd5..3a9b6bed45 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1509,7 +1509,7 @@ class EventsStore( [(room_id, event_id) for event_id, in new_backwards_extrems], ) - logger.info("[purge] finding redundant state groups") + logger.info("[purge] finding state groups referenced by deleted events") # Get all state groups that are referenced by events that are to be # deleted. We then go and check if they are referenced by other events -- cgit 1.5.1 From 669b6cbda3e545f277def545954948090aec8980 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 1 Nov 2019 11:32:20 +0000 Subject: Fix up comment --- synapse/storage/data_stores/main/events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 3a9b6bed45..a71d7346d2 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1512,8 +1512,7 @@ class EventsStore( logger.info("[purge] finding state groups referenced by deleted events") # Get all state groups that are referenced by events that are to be - # deleted. We then go and check if they are referenced by other events - # or state groups, and if not we delete them. + # deleted. txn.execute( """ SELECT DISTINCT state_group FROM events_to_purge -- cgit 1.5.1 From c9a1b80a74863b98b5dce2954b8486a068a0151f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:41:28 +0000 Subject: MSC2326: Add background update to take previous events into account --- .../storage/data_stores/main/events_bg_updates.py | 55 ++++++++++++++++++++++ .../delta/56/event_labels_background_update.sql | 17 +++++++ 2 files changed, 72 insertions(+) create mode 100644 synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 51352b9966..e702fca149 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -21,6 +21,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.api.constants import LabelsField from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore @@ -85,6 +86,10 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "event_fix_redactions_bytes", self._event_fix_redactions_bytes ) + self.register_background_update_handler( + "event_store_labels", self._event_store_labels + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -503,3 +508,53 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): yield self._end_background_update("event_fix_redactions_bytes") return 1 + + @defer.inlineCallbacks + def _event_store_labels(self, progress, batch_size): + """Stores labels for events.""" + last_event_id = progress.get("last_event_id", "") + + def _event_store_labels_txn(txn): + txn.execute( + """ + SELECT event_id, json FROM event_json + WHERE event_id > ? + LIMIT ? + """, + (last_event_id, batch_size) + ) + + rows = txn.fetchall() + if not rows: + return True + + for row in rows: + event_id = row["event_id"] + event_json = json.loads(row["json"]) + + self._simple_insert_many_txn( + txn=txn, + table="event_labels", + values=[ + { + "event_id": event_id, + "label": label, + } + for label in event_json["content"].get(LabelsField) + ] + ) + + self._background_update_progress_txn( + txn, "event_store_labels", {"last_event_id": event_id} + ) + + return len(rows) == batch_size + + end = yield self.runInteraction( + desc="event_store_labels", func=_event_store_labels_txn + ) + + if end: + yield self._end_background_update("event_store_labels") + + return batch_size diff --git a/synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql b/synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql new file mode 100644 index 0000000000..5f5e0499ae --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('event_store_labels', '{}'); -- cgit 1.5.1 From a46574281d90d45e4a5f3ecede3261d17726719e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:46:16 +0000 Subject: Use the right format for rows --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index e702fca149..013242b04e 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -524,7 +524,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size) ) - rows = txn.fetchall() + rows = self.cursor_to_dict(txn) if not rows: return True -- cgit 1.5.1 From 07cb38e965a29624b3cc8a07d4c86f61dbe7b72a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:47:09 +0000 Subject: Use a sensible default value for labels --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 013242b04e..448048e11d 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -540,7 +540,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "event_id": event_id, "label": label, } - for label in event_json["content"].get(LabelsField) + for label in event_json["content"].get(LabelsField, []) ] ) -- cgit 1.5.1 From 911b03ca312160bd8f80f010da4eaf63d7f602da Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:49:41 +0000 Subject: Don't try to process events we already have a label for --- synapse/storage/data_stores/main/events_bg_updates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 448048e11d..c8168f6926 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -518,7 +518,8 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn.execute( """ SELECT event_id, json FROM event_json - WHERE event_id > ? + LEFT JOIN event_labels USING (event_id) + WHERE event_id > ? AND label IS NULL LIMIT ? """, (last_event_id, batch_size) -- cgit 1.5.1 From 1c1268245d501e3edefbe3a4eb40c238124bb2e6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 15:01:29 +0000 Subject: Lint --- synapse/storage/data_stores/main/events_bg_updates.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index c8168f6926..10ebc6d865 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -522,7 +522,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): WHERE event_id > ? AND label IS NULL LIMIT ? """, - (last_event_id, batch_size) + (last_event_id, batch_size), ) rows = self.cursor_to_dict(txn) @@ -537,12 +537,9 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn=txn, table="event_labels", values=[ - { - "event_id": event_id, - "label": label, - } + {"event_id": event_id, "label": label} for label in event_json["content"].get(LabelsField, []) - ] + ], ) self._background_update_progress_txn( -- cgit 1.5.1 From 1586f2c7e716fb066aa5551bdce654be8e35b01d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 23:01:26 +0000 Subject: Fix exit condition --- synapse/storage/data_stores/main/events_bg_updates.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 10ebc6d865..72d600c51b 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -546,7 +546,9 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn, "event_store_labels", {"last_event_id": event_id} ) - return len(rows) == batch_size + # We want to return true (to end the background update) only when + # the query returned with less rows than we asked for. + return len(rows) != batch_size end = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn -- cgit 1.5.1 From 49008e674f6c5463168e970349ce84c488407834 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 23:12:15 +0000 Subject: TODO --- synapse/storage/data_stores/main/events_bg_updates.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 72d600c51b..b7b390485b 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -548,6 +548,8 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): # We want to return true (to end the background update) only when # the query returned with less rows than we asked for. + # TODO: this currently doesn't work, i.e. it only runs once whereas + # its opposite does the whole thing, investigate. return len(rows) != batch_size end = yield self.runInteraction( -- cgit 1.5.1 From 824bba2f78f86a9364a87c6ce79a40bff0f73cbf Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 1 Nov 2019 12:03:04 +0000 Subject: Correctly order results --- synapse/storage/data_stores/main/events_bg_updates.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index b7b390485b..5ba1cff468 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -21,7 +21,7 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.constants import LabelsField +from synapse.api.constants import EventContentFields from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore @@ -520,7 +520,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): SELECT event_id, json FROM event_json LEFT JOIN event_labels USING (event_id) WHERE event_id > ? AND label IS NULL - LIMIT ? + ORDER BY event_id LIMIT ? """, (last_event_id, batch_size), ) @@ -538,7 +538,9 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): table="event_labels", values=[ {"event_id": event_id, "label": label} - for label in event_json["content"].get(LabelsField, []) + for label in event_json["content"].get( + EventContentFields.Labels, [] + ) ], ) @@ -548,8 +550,6 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): # We want to return true (to end the background update) only when # the query returned with less rows than we asked for. - # TODO: this currently doesn't work, i.e. it only runs once whereas - # its opposite does the whole thing, investigate. return len(rows) != batch_size end = yield self.runInteraction( -- cgit 1.5.1 From 3b29a73f9fc18912a6b775a083d9449d426fa790 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 1 Nov 2019 12:07:05 +0000 Subject: Print out the actual number of affected rows --- synapse/storage/data_stores/main/events_bg_updates.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 5ba1cff468..a703927471 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -527,7 +527,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): rows = self.cursor_to_dict(txn) if not rows: - return True + return True, 0 for row in rows: event_id = row["event_id"] @@ -550,13 +550,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): # We want to return true (to end the background update) only when # the query returned with less rows than we asked for. - return len(rows) != batch_size + return len(rows) != batch_size, len(rows) - end = yield self.runInteraction( + end, num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn ) if end: yield self._end_background_update("event_store_labels") - return batch_size + return num_rows -- cgit 1.5.1 From 7134ca7daa7ead8104e3c51ba4bc730d99d098e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Nov 2019 13:36:57 +0000 Subject: Change to not require a state_groups.room_id index. This does mean that we won't clean up orphaned state groups (i.e. state groups that were persisted but the associated event wasn't). --- synapse/storage/data_stores/main/events.py | 70 +++++++++++++--------- .../main/schema/delta/56/state_group_room_idx.sql | 17 ------ synapse/storage/data_stores/main/state.py | 7 --- synapse/storage/purge_events.py | 4 +- 4 files changed, 45 insertions(+), 53 deletions(-) delete mode 100644 synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 68f27078c4..3049a21dc5 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1624,7 +1624,10 @@ class EventsStore( """Deletes all record of a room Args: - room_id (str): + room_id (str) + + Returns: + Deferred[List[int]]: The list of state groups to delete. """ return self.runInteraction("purge_room", self._purge_room_txn, room_id) @@ -1714,10 +1717,24 @@ class EventsStore( # index on them. In any case we should be clearing out 'stream' tables # periodically anyway (#5888) + # Now we fetch all the state groups that should be deleted. + txn.execute( + """ + SELECT DISTINCT state_group FROM events + INNER JOIN event_to_state_groups USING(event_id) + WHERE events.room_id = ? + """, + (room_id,), + ) + + state_groups = [row[0] for row in txn] + # TODO: we could probably usefully do a bunch of cache invalidation here logger.info("[purge] done") + return state_groups + def purge_unreferenced_state_groups( self, room_id: str, state_groups_to_delete: Set[int] ) -> defer.Deferred: @@ -1825,54 +1842,53 @@ class EventsStore( return {row["state_group"]: row["prev_state_group"] for row in rows} - def purge_room_state(self, room_id): + def purge_room_state(self, room_id, state_groups_to_delete): """Deletes all record of a room from state tables Args: room_id (str): + state_groups_to_delete (list[int]): State groups to delete """ return self.runInteraction( - "purge_room_state", self._purge_room_state_txn, room_id + "purge_room_state", + self._purge_room_state_txn, + room_id, + state_groups_to_delete, ) - def _purge_room_state_txn(self, txn, room_id): + def _purge_room_state_txn(self, txn, room_id, state_groups_to_delete): # first we have to delete the state groups states logger.info("[purge] removing %s from state_groups_state", room_id) - txn.execute( - """ - DELETE FROM state_groups_state - WHERE state_group IN ( - SELECT state_group FROM state_groups - WHERE room_id = ? - ) - """, - (room_id,), + self._simple_delete_many_txn( + txn, + table="state_groups_state", + column="state_group", + iterable=state_groups_to_delete, + keyvalues={}, ) # ... and the state group edges logger.info("[purge] removing %s from state_group_edges", room_id) - txn.execute( - """ - DELETE FROM state_group_edges - WHERE state_group IN ( - SELECT state_group FROM state_groups - WHERE room_id = ? - ) - """, - (room_id,), + self._simple_delete_many_txn( + txn, + table="state_group_edges", + column="state_group", + iterable=state_groups_to_delete, + keyvalues={}, ) # ... and the state groups logger.info("[purge] removing %s from state_groups", room_id) - txn.execute( - """ - DELETE FROM state_groups WHERE room_id = ? - """, - (room_id,), + self._simple_delete_many_txn( + txn, + table="state_groups", + column="id", + iterable=state_groups_to_delete, + keyvalues={}, ) async def is_event_after(self, event_id1, event_id2): diff --git a/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql deleted file mode 100644 index 7916ef18b2..0000000000 --- a/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql +++ /dev/null @@ -1,17 +0,0 @@ -/* Copyright 2019 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -INSERT INTO background_updates (update_name, progress_json) VALUES - ('state_groups_room_id_idx', '{}'); diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index e1d3041c7c..5c5b15840e 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -1023,7 +1023,6 @@ class StateBackgroundUpdateStore( STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" - STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" def __init__(self, db_conn, hs): super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) @@ -1047,12 +1046,6 @@ class StateBackgroundUpdateStore( table="event_to_state_groups", columns=["state_group"], ) - self.register_background_index_update( - self.STATE_GROUPS_ROOM_INDEX_UPDATE_NAME, - index_name="state_groups_room_id_idx", - table="state_groups", - columns=["room_id"], - ) @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): diff --git a/synapse/storage/purge_events.py b/synapse/storage/purge_events.py index dd45df0c88..a368182034 100644 --- a/synapse/storage/purge_events.py +++ b/synapse/storage/purge_events.py @@ -33,8 +33,8 @@ class PurgeEventsStorage(object): """Deletes all record of a room """ - yield self.stores.main.purge_room(room_id) - yield self.stores.main.purge_room_state(room_id) + state_groups_to_delete = yield self.stores.main.purge_room(room_id) + yield self.stores.main.purge_room_state(room_id, state_groups_to_delete) @defer.inlineCallbacks def purge_history(self, room_id, token, delete_local_events): -- cgit 1.5.1 From 0287d033eec86fb7f6bb84f929e756c99caf2113 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Nov 2019 18:08:50 +0000 Subject: Transfer upgraded rooms on groups --- changelog.d/6235.bugfix | 1 + synapse/handlers/room_member.py | 9 +++++++++ synapse/storage/data_stores/main/group_server.py | 15 +++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 changelog.d/6235.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/6235.bugfix b/changelog.d/6235.bugfix new file mode 100644 index 0000000000..12718ba934 --- /dev/null +++ b/changelog.d/6235.bugfix @@ -0,0 +1 @@ +Remove a room from a server's public rooms list on room upgrade. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 06d09c2947..01c65ee222 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -514,6 +514,15 @@ class RoomMemberHandler(object): if old_room and old_room["is_public"]: yield self.store.set_room_is_public(old_room_id, False) yield self.store.set_room_is_public(room_id, True) + + # Check if any groups we own contain the predecessor room + local_group_ids = yield self.store.get_local_groups_for_room(old_room_id) + for group_id in local_group_ids: + # Add new the new room to those groups + yield self.store.add_room_to_group(group_id, room_id, old_room["is_public"]) + + # Remove the old room from those groups + yield self.store.remove_room_from_group(group_id, old_room_id) @defer.inlineCallbacks def copy_user_state_on_room_upgrade(self, old_room_id, new_room_id, user_ids): diff --git a/synapse/storage/data_stores/main/group_server.py b/synapse/storage/data_stores/main/group_server.py index b3a2771f1b..13ad71a49c 100644 --- a/synapse/storage/data_stores/main/group_server.py +++ b/synapse/storage/data_stores/main/group_server.py @@ -552,6 +552,21 @@ class GroupServerStore(SQLBaseStore): keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id}, desc="remove_user_from_summary", ) + + def get_local_groups_for_room(self, room_id): + """Get all of the local group that contain a given room + Args: + room_id (str): The ID of a room + Returns: + Deferred[list[str]]: A twisted.Deferred containing a list of group ids + containing this room + """ + return self._simple_select_onecol( + table="group_rooms", + keyvalues={"room_id": room_id}, + retcol="group_id", + desc="get_local_groups_for_room", + ) def get_users_for_summary_by_role(self, group_id, include_private=False): """Get the users and roles that should be included in a summary request -- cgit 1.5.1 From c2203bea57fd34de9be994f5e117da2c24338708 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Nov 2019 18:17:11 +0000 Subject: Re-add docstring, with caveats detailed --- synapse/handlers/room_member.py | 2 +- synapse/storage/data_stores/main/group_server.py | 2 +- synapse/storage/data_stores/main/state.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 01c65ee222..6cfee4b361 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -514,7 +514,7 @@ class RoomMemberHandler(object): if old_room and old_room["is_public"]: yield self.store.set_room_is_public(old_room_id, False) yield self.store.set_room_is_public(room_id, True) - + # Check if any groups we own contain the predecessor room local_group_ids = yield self.store.get_local_groups_for_room(old_room_id) for group_id in local_group_ids: diff --git a/synapse/storage/data_stores/main/group_server.py b/synapse/storage/data_stores/main/group_server.py index 13ad71a49c..5ded539af8 100644 --- a/synapse/storage/data_stores/main/group_server.py +++ b/synapse/storage/data_stores/main/group_server.py @@ -552,7 +552,7 @@ class GroupServerStore(SQLBaseStore): keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id}, desc="remove_user_from_summary", ) - + def get_local_groups_for_room(self, room_id): """Get all of the local group that contain a given room Args: diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 3132848034..a41cac7b36 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -285,7 +285,11 @@ class StateGroupWorkerStore( room_id (str) Returns: - Deferred[unicode|None]: predecessor room id + Deferred[dict|None]: A dictionary containing the structure of the predecessor + field from the room's create event. The structure is subject to other servers, + but it is expected to be: + * room_id (str): The room ID of the predecessor room + * event_id (str): The ID of the tombstone event in the predecessor room Raises: NotFoundError if the room is unknown -- cgit 1.5.1 From 807ec3bd99908d2991d2b3d0615b0862610c6dc3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 6 Nov 2019 10:01:39 +0000 Subject: Fix bug which caused rejected events to be stored with the wrong room state (#6320) Fixes a bug where rejected events were persisted with the wrong state group. Also fixes an occasional internal-server-error when receiving events over federation which are rejected and (possibly because they are backwards-extremities) have no prev_group. Fixes #6289. --- changelog.d/6320.bugfix | 1 + synapse/events/snapshot.py | 25 ++++- synapse/handlers/federation.py | 1 + synapse/state/__init__.py | 172 ++++++++++++++---------------- synapse/storage/data_stores/main/state.py | 2 +- tests/handlers/test_federation.py | 126 ++++++++++++++++++++++ tests/test_state.py | 61 +++++++++-- 7 files changed, 285 insertions(+), 103 deletions(-) create mode 100644 changelog.d/6320.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/6320.bugfix b/changelog.d/6320.bugfix new file mode 100644 index 0000000000..2c3fad5655 --- /dev/null +++ b/changelog.d/6320.bugfix @@ -0,0 +1 @@ +Fix bug which casued rejected events to be persisted with the wrong room state. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 0f3c5989cb..64e898f40c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -48,10 +48,21 @@ class EventContext: Note that this is a private attribute: it should be accessed via the ``state_group`` property. + state_group_before_event: The ID of the state group representing the state + of the room before this event. + + If this is a non-state event, this will be the same as ``state_group``. If + it's a state event, it will be the same as ``prev_group``. + + If ``state_group`` is None (ie, the event is an outlier), + ``state_group_before_event`` will always also be ``None``. + prev_group: If it is known, ``state_group``'s prev_group. Note that this being None does not necessarily mean that ``state_group`` does not have a prev_group! + If the event is a state event, this is normally the same as ``prev_group``. + If ``state_group`` is None (ie, the event is an outlier), ``prev_group`` will always also be ``None``. @@ -77,7 +88,8 @@ class EventContext: ``get_current_state_ids``. _AsyncEventContext impl calculates this on-demand: it will be None until that happens. - _prev_state_ids: The room state map, excluding this event. For a non-state + _prev_state_ids: The room state map, excluding this event - ie, the state + in ``state_group_before_event``. For a non-state event, this will be the same as _current_state_events. Note that it is a completely different thing to prev_group! @@ -92,6 +104,7 @@ class EventContext: rejected = attr.ib(default=False, type=Union[bool, str]) _state_group = attr.ib(default=None, type=Optional[int]) + state_group_before_event = attr.ib(default=None, type=Optional[int]) prev_group = attr.ib(default=None, type=Optional[int]) delta_ids = attr.ib(default=None, type=Optional[Dict[Tuple[str, str], str]]) app_service = attr.ib(default=None, type=Optional[ApplicationService]) @@ -103,12 +116,18 @@ class EventContext: @staticmethod def with_state( - state_group, current_state_ids, prev_state_ids, prev_group=None, delta_ids=None + state_group, + state_group_before_event, + current_state_ids, + prev_state_ids, + prev_group=None, + delta_ids=None, ): return EventContext( current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, state_group=state_group, + state_group_before_event=state_group_before_event, prev_group=prev_group, delta_ids=delta_ids, ) @@ -140,6 +159,7 @@ class EventContext: "event_type": event.type, "event_state_key": event.state_key if event.is_state() else None, "state_group": self._state_group, + "state_group_before_event": self.state_group_before_event, "rejected": self.rejected, "prev_group": self.prev_group, "delta_ids": _encode_state_dict(self.delta_ids), @@ -165,6 +185,7 @@ class EventContext: event_type=input["event_type"], event_state_key=input["event_state_key"], state_group=input["state_group"], + state_group_before_event=input["state_group_before_event"], prev_group=input["prev_group"], delta_ids=_decode_state_dict(input["delta_ids"]), rejected=input["rejected"], diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b7916de909..05dd8d2671 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2280,6 +2280,7 @@ class FederationHandler(BaseHandler): return EventContext.with_state( state_group=state_group, + state_group_before_event=context.state_group_before_event, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, prev_group=prev_group, diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 2c04ab1854..139beef8ed 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -16,6 +16,7 @@ import logging from collections import namedtuple +from typing import Iterable, Optional from six import iteritems, itervalues @@ -27,6 +28,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions +from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.logging.utils import log_function from synapse.state import v1, v2 @@ -212,15 +214,17 @@ class StateHandler(object): return joined_hosts @defer.inlineCallbacks - def compute_event_context(self, event, old_state=None): + def compute_event_context( + self, event: EventBase, old_state: Optional[Iterable[EventBase]] = None + ): """Build an EventContext structure for the event. This works out what the current state should be for the event, and generates a new state group if necessary. Args: - event (synapse.events.EventBase): - old_state (dict|None): The state at the event if it can't be + event: + old_state: The state at the event if it can't be calculated from existing events. This is normally only specified when receiving an event from federation where we don't have the prev events for, e.g. when backfilling. @@ -251,113 +255,103 @@ class StateHandler(object): # group for it. context = EventContext.with_state( state_group=None, + state_group_before_event=None, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, ) return context + # + # first of all, figure out the state before the event + # + if old_state: - # We already have the state, so we don't need to calculate it. - # Let's just correctly fill out the context and create a - # new state group for it. - - prev_state_ids = {(s.type, s.state_key): s.event_id for s in old_state} - - if event.is_state(): - key = (event.type, event.state_key) - if key in prev_state_ids: - replaces = prev_state_ids[key] - if replaces != event.event_id: # Paranoia check - event.unsigned["replaces_state"] = replaces - current_state_ids = dict(prev_state_ids) - current_state_ids[key] = event.event_id - else: - current_state_ids = prev_state_ids + # if we're given the state before the event, then we use that + state_ids_before_event = { + (s.type, s.state_key): s.event_id for s in old_state + } + state_group_before_event = None + state_group_before_event_prev_group = None + deltas_to_state_group_before_event = None - state_group = yield self.state_store.store_state_group( - event.event_id, - event.room_id, - prev_group=None, - delta_ids=None, - current_state_ids=current_state_ids, - ) + else: + # otherwise, we'll need to resolve the state across the prev_events. + logger.debug("calling resolve_state_groups from compute_event_context") - context = EventContext.with_state( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, + entry = yield self.resolve_state_groups_for_events( + event.room_id, event.prev_event_ids() ) - return context + state_ids_before_event = entry.state + state_group_before_event = entry.state_group + state_group_before_event_prev_group = entry.prev_group + deltas_to_state_group_before_event = entry.delta_ids - logger.debug("calling resolve_state_groups from compute_event_context") + # + # make sure that we have a state group at that point. If it's not a state event, + # that will be the state group for the new event. If it *is* a state event, + # it might get rejected (in which case we'll need to persist it with the + # previous state group) + # - entry = yield self.resolve_state_groups_for_events( - event.room_id, event.prev_event_ids() - ) + if not state_group_before_event: + state_group_before_event = yield self.state_store.store_state_group( + event.event_id, + event.room_id, + prev_group=state_group_before_event_prev_group, + delta_ids=deltas_to_state_group_before_event, + current_state_ids=state_ids_before_event, + ) - prev_state_ids = entry.state - prev_group = None - delta_ids = None + # XXX: can we update the state cache entry for the new state group? or + # could we set a flag on resolve_state_groups_for_events to tell it to + # always make a state group? + + # + # now if it's not a state event, we're done + # + + if not event.is_state(): + return EventContext.with_state( + state_group_before_event=state_group_before_event, + state_group=state_group_before_event, + current_state_ids=state_ids_before_event, + prev_state_ids=state_ids_before_event, + prev_group=state_group_before_event_prev_group, + delta_ids=deltas_to_state_group_before_event, + ) - if event.is_state(): - # If this is a state event then we need to create a new state - # group for the state after this event. + # + # otherwise, we'll need to create a new state group for after the event + # - key = (event.type, event.state_key) - if key in prev_state_ids: - replaces = prev_state_ids[key] + key = (event.type, event.state_key) + if key in state_ids_before_event: + replaces = state_ids_before_event[key] + if replaces != event.event_id: event.unsigned["replaces_state"] = replaces - current_state_ids = dict(prev_state_ids) - current_state_ids[key] = event.event_id - - if entry.state_group: - # If the state at the event has a state group assigned then - # we can use that as the prev group - prev_group = entry.state_group - delta_ids = {key: event.event_id} - elif entry.prev_group: - # If the state at the event only has a prev group, then we can - # use that as a prev group too. - prev_group = entry.prev_group - delta_ids = dict(entry.delta_ids) - delta_ids[key] = event.event_id - - state_group = yield self.state_store.store_state_group( - event.event_id, - event.room_id, - prev_group=prev_group, - delta_ids=delta_ids, - current_state_ids=current_state_ids, - ) - else: - current_state_ids = prev_state_ids - prev_group = entry.prev_group - delta_ids = entry.delta_ids - - if entry.state_group is None: - entry.state_group = yield self.state_store.store_state_group( - event.event_id, - event.room_id, - prev_group=entry.prev_group, - delta_ids=entry.delta_ids, - current_state_ids=current_state_ids, - ) - entry.state_id = entry.state_group - - state_group = entry.state_group - - context = EventContext.with_state( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - prev_group=prev_group, + state_ids_after_event = dict(state_ids_before_event) + state_ids_after_event[key] = event.event_id + delta_ids = {key: event.event_id} + + state_group_after_event = yield self.state_store.store_state_group( + event.event_id, + event.room_id, + prev_group=state_group_before_event, delta_ids=delta_ids, + current_state_ids=state_ids_after_event, ) - return context + return EventContext.with_state( + state_group=state_group_after_event, + state_group_before_event=state_group_before_event, + current_state_ids=state_ids_after_event, + prev_state_ids=state_ids_before_event, + prev_group=state_group_before_event, + delta_ids=delta_ids, + ) @measure_func() @defer.inlineCallbacks diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 3132848034..9e1541988e 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -1231,7 +1231,7 @@ class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore): # if the event was rejected, just give it the same state as its # predecessor. if context.rejected: - state_groups[event.event_id] = context.prev_group + state_groups[event.event_id] = context.state_group_before_event continue state_groups[event.event_id] = context.state_group diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index d56220f403..b4d92cf732 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -12,13 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging + from synapse.api.constants import EventTypes from synapse.api.errors import AuthError, Codes +from synapse.federation.federation_base import event_from_pdu_json +from synapse.logging.context import LoggingContext, run_in_background from synapse.rest import admin from synapse.rest.client.v1 import login, room from tests import unittest +logger = logging.getLogger(__name__) + class FederationTestCase(unittest.HomeserverTestCase): servlets = [ @@ -79,3 +85,123 @@ class FederationTestCase(unittest.HomeserverTestCase): self.assertEqual(failure.code, 403, failure) self.assertEqual(failure.errcode, Codes.FORBIDDEN, failure) self.assertEqual(failure.msg, "You are not invited to this room.") + + def test_rejected_message_event_state(self): + """ + Check that we store the state group correctly for rejected non-state events. + + Regression test for #6289. + """ + OTHER_SERVER = "otherserver" + OTHER_USER = "@otheruser:" + OTHER_SERVER + + # create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # pretend that another server has joined + join_event = self._build_and_send_join_event(OTHER_SERVER, OTHER_USER, room_id) + + # check the state group + sg = self.successResultOf( + self.store._get_state_group_for_event(join_event.event_id) + ) + + # build and send an event which will be rejected + ev = event_from_pdu_json( + { + "type": EventTypes.Message, + "content": {}, + "room_id": room_id, + "sender": "@yetanotheruser:" + OTHER_SERVER, + "depth": join_event["depth"] + 1, + "prev_events": [join_event.event_id], + "auth_events": [], + "origin_server_ts": self.clock.time_msec(), + }, + join_event.format_version, + ) + + with LoggingContext(request="send_rejected"): + d = run_in_background(self.handler.on_receive_pdu, OTHER_SERVER, ev) + self.get_success(d) + + # that should have been rejected + e = self.get_success(self.store.get_event(ev.event_id, allow_rejected=True)) + self.assertIsNotNone(e.rejected_reason) + + # ... and the state group should be the same as before + sg2 = self.successResultOf(self.store._get_state_group_for_event(ev.event_id)) + + self.assertEqual(sg, sg2) + + def test_rejected_state_event_state(self): + """ + Check that we store the state group correctly for rejected state events. + + Regression test for #6289. + """ + OTHER_SERVER = "otherserver" + OTHER_USER = "@otheruser:" + OTHER_SERVER + + # create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # pretend that another server has joined + join_event = self._build_and_send_join_event(OTHER_SERVER, OTHER_USER, room_id) + + # check the state group + sg = self.successResultOf( + self.store._get_state_group_for_event(join_event.event_id) + ) + + # build and send an event which will be rejected + ev = event_from_pdu_json( + { + "type": "org.matrix.test", + "state_key": "test_key", + "content": {}, + "room_id": room_id, + "sender": "@yetanotheruser:" + OTHER_SERVER, + "depth": join_event["depth"] + 1, + "prev_events": [join_event.event_id], + "auth_events": [], + "origin_server_ts": self.clock.time_msec(), + }, + join_event.format_version, + ) + + with LoggingContext(request="send_rejected"): + d = run_in_background(self.handler.on_receive_pdu, OTHER_SERVER, ev) + self.get_success(d) + + # that should have been rejected + e = self.get_success(self.store.get_event(ev.event_id, allow_rejected=True)) + self.assertIsNotNone(e.rejected_reason) + + # ... and the state group should be the same as before + sg2 = self.successResultOf(self.store._get_state_group_for_event(ev.event_id)) + + self.assertEqual(sg, sg2) + + def _build_and_send_join_event(self, other_server, other_user, room_id): + join_event = self.get_success( + self.handler.on_make_join_request(other_server, room_id, other_user) + ) + # the auth code requires that a signature exists, but doesn't check that + # signature... go figure. + join_event.signatures[other_server] = {"x": "y"} + with LoggingContext(request="send_join"): + d = run_in_background( + self.handler.on_send_join_request, other_server, join_event + ) + self.get_success(d) + + # sanity-check: the room should show that the new user is a member + r = self.get_success(self.store.get_current_state_ids(room_id)) + self.assertEqual(r[(EventTypes.Member, other_user)], join_event.event_id) + + return join_event diff --git a/tests/test_state.py b/tests/test_state.py index 38246555bd..176535947a 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -21,6 +21,7 @@ from synapse.api.auth import Auth from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.events import FrozenEvent +from synapse.events.snapshot import EventContext from synapse.state import StateHandler, StateResolutionHandler from tests import unittest @@ -198,16 +199,22 @@ class StateTestCase(unittest.TestCase): self.store.register_events(graph.walk()) - context_store = {} + context_store = {} # type: dict[str, EventContext] for event in graph.walk(): context = yield self.state.compute_event_context(event) self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["D"].get_prev_state_ids(self.store) + ctx_c = context_store["C"] + ctx_d = context_store["D"] + + prev_state_ids = yield ctx_d.get_prev_state_ids(self.store) self.assertEqual(2, len(prev_state_ids)) + self.assertEqual(ctx_c.state_group, ctx_d.state_group_before_event) + self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group) + @defer.inlineCallbacks def test_branch_basic_conflict(self): graph = Graph( @@ -241,12 +248,19 @@ class StateTestCase(unittest.TestCase): self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["D"].get_prev_state_ids(self.store) + # C ends up winning the resolution between B and C + + ctx_c = context_store["C"] + ctx_d = context_store["D"] + prev_state_ids = yield ctx_d.get_prev_state_ids(self.store) self.assertSetEqual( {"START", "A", "C"}, {e_id for e_id in prev_state_ids.values()} ) + self.assertEqual(ctx_c.state_group, ctx_d.state_group_before_event) + self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group) + @defer.inlineCallbacks def test_branch_have_banned_conflict(self): graph = Graph( @@ -292,11 +306,18 @@ class StateTestCase(unittest.TestCase): self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["E"].get_prev_state_ids(self.store) + # C ends up winning the resolution between C and D because bans win over other + # changes + + ctx_c = context_store["C"] + ctx_e = context_store["E"] + prev_state_ids = yield ctx_e.get_prev_state_ids(self.store) self.assertSetEqual( {"START", "A", "B", "C"}, {e for e in prev_state_ids.values()} ) + self.assertEqual(ctx_c.state_group, ctx_e.state_group_before_event) + self.assertEqual(ctx_e.state_group_before_event, ctx_e.state_group) @defer.inlineCallbacks def test_branch_have_perms_conflict(self): @@ -360,12 +381,20 @@ class StateTestCase(unittest.TestCase): self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["D"].get_prev_state_ids(self.store) + # B ends up winning the resolution between B and C because power levels + # win over other changes. + ctx_b = context_store["B"] + ctx_d = context_store["D"] + + prev_state_ids = yield ctx_d.get_prev_state_ids(self.store) self.assertSetEqual( {"A1", "A2", "A3", "A5", "B"}, {e for e in prev_state_ids.values()} ) + self.assertEqual(ctx_b.state_group, ctx_d.state_group_before_event) + self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group) + def _add_depths(self, nodes, edges): def _get_depth(ev): node = nodes[ev] @@ -390,13 +419,16 @@ class StateTestCase(unittest.TestCase): context = yield self.state.compute_event_context(event, old_state=old_state) - current_state_ids = yield context.get_current_state_ids(self.store) + prev_state_ids = yield context.get_prev_state_ids(self.store) + self.assertCountEqual((e.event_id for e in old_state), prev_state_ids.values()) - self.assertEqual( - set(e.event_id for e in old_state), set(current_state_ids.values()) + current_state_ids = yield context.get_current_state_ids(self.store) + self.assertCountEqual( + (e.event_id for e in old_state), current_state_ids.values() ) - self.assertIsNotNone(context.state_group) + self.assertIsNotNone(context.state_group_before_event) + self.assertEqual(context.state_group_before_event, context.state_group) @defer.inlineCallbacks def test_annotate_with_old_state(self): @@ -411,11 +443,18 @@ class StateTestCase(unittest.TestCase): context = yield self.state.compute_event_context(event, old_state=old_state) prev_state_ids = yield context.get_prev_state_ids(self.store) + self.assertCountEqual((e.event_id for e in old_state), prev_state_ids.values()) - self.assertEqual( - set(e.event_id for e in old_state), set(prev_state_ids.values()) + current_state_ids = yield context.get_current_state_ids(self.store) + self.assertCountEqual( + (e.event_id for e in old_state + [event]), current_state_ids.values() ) + self.assertIsNotNone(context.state_group_before_event) + self.assertNotEqual(context.state_group_before_event, context.state_group) + self.assertEqual(context.state_group_before_event, context.prev_group) + self.assertEqual({("state", ""): event.event_id}, context.delta_ids) + @defer.inlineCallbacks def test_trivial_annotate_message(self): prev_event_id = "prev_event_id" -- cgit 1.5.1 From 70d93cafdb4a3055849e7b84881cfd6225f0b6e5 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Nov 2019 10:59:03 +0000 Subject: Update insert --- synapse/storage/data_stores/main/events_bg_updates.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index a703927471..2fcb0c33dc 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -537,7 +537,12 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn=txn, table="event_labels", values=[ - {"event_id": event_id, "label": label} + { + "event_id": event_id, + "label": label, + "room_id": event_json["room_id"], + "topological_ordering": event_json["depth"], + } for label in event_json["content"].get( EventContentFields.Labels, [] ) -- cgit 1.5.1 From 24a214bd1bc396cbcc41c4c913938299ca9ffcf5 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Nov 2019 11:04:19 +0000 Subject: Fix field name --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 2fcb0c33dc..309bfcafe5 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -544,7 +544,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "topological_ordering": event_json["depth"], } for label in event_json["content"].get( - EventContentFields.Labels, [] + EventContentFields.LABELS, [] ) ], ) -- cgit 1.5.1 From b33c4f7a828e722d6115f73525e0456edb79a90f Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 6 Nov 2019 11:55:00 +0000 Subject: Numeric ID checker now checks @0, don't ratelimit on checking --- synapse/handlers/register.py | 41 +++++++++++++++--------- synapse/storage/data_stores/main/registration.py | 8 ++--- 2 files changed, 29 insertions(+), 20 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index cff6b0d375..3c142a4395 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -168,6 +168,7 @@ class RegistrationHandler(BaseHandler): Raises: RegistrationError if there was a problem registering. """ + yield self._check_registration_ratelimit(address) yield self.auth.check_auth_blocking(threepid=threepid) password_hash = None @@ -414,6 +415,30 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) + def _check_registration_ratelimit(self, address): + """A simple helper method to check whether the registration rate limit has been hit + for a given IP address + + Args: + address (str): the IP address used to perform the registration. + + Raises: + LimitExceededError: If the rate limit has been exceeded. + """ + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + address, + time_now_s=time_now, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)) + ) + def register_with_store( self, user_id, @@ -446,22 +471,6 @@ class RegistrationHandler(BaseHandler): Returns: Deferred """ - # Don't rate limit for app services - if appservice_id is None and address is not None: - time_now = self.clock.time() - - allowed, time_allowed = self.ratelimiter.can_do_action( - address, - time_now_s=time_now, - rate_hz=self.hs.config.rc_registration.per_second, - burst_count=self.hs.config.rc_registration.burst_count, - ) - - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now)) - ) - if self.hs.config.worker_app: return self._register_client( user_id=user_id, diff --git a/synapse/storage/data_stores/main/registration.py b/synapse/storage/data_stores/main/registration.py index f70d41ecab..ee1b2b2bbf 100644 --- a/synapse/storage/data_stores/main/registration.py +++ b/synapse/storage/data_stores/main/registration.py @@ -488,14 +488,14 @@ class RegistrationWorkerStore(SQLBaseStore): we can. Unfortunately, it's possible some of them are already taken by existing users, and there may be gaps in the already taken range. This function returns the start of the first allocatable gap. This is to - avoid the case of ID 10000000 being pre-allocated, so us wasting the - first (and shortest) many generated user IDs. + avoid the case of ID 1000 being pre-allocated and starting at 1001 while + 0-999 are available. """ def _find_next_generated_user_id(txn): - # We bound between '@1' and '@a' to avoid pulling the entire table + # We bound between '@0' and '@a' to avoid pulling the entire table # out. - txn.execute("SELECT name FROM users WHERE '@1' <= name AND name < '@a'") + txn.execute("SELECT name FROM users WHERE '@0' <= name AND name < '@a'") regex = re.compile(r"^@(\d+):") -- cgit 1.5.1 From 71f3bd734fe9f8f903c5a496720e7dcf926f2f4a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Nov 2019 17:00:18 +0000 Subject: Use correct type annotation --- synapse/storage/data_stores/main/events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 3049a21dc5..d69c59f5a1 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -19,7 +19,7 @@ import itertools import logging from collections import Counter as c_counter, OrderedDict, namedtuple from functools import wraps -from typing import Set +from typing import Collection from six import iteritems, text_type from six.moves import range @@ -1736,7 +1736,7 @@ class EventsStore( return state_groups def purge_unreferenced_state_groups( - self, room_id: str, state_groups_to_delete: Set[int] + self, room_id: str, state_groups_to_delete: Collection[int] ) -> defer.Deferred: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. -- cgit 1.5.1 From 5c3363233cca7044a333b7e19ba239eaf5587ff8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Nov 2019 17:02:05 +0000 Subject: Fix deleting state groups during room purge. And fix the tests to actually test that things got deleted. --- synapse/storage/data_stores/main/events.py | 27 ++++++++++++++------------- tests/rest/admin/test_admin.py | 4 +++- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index d69c59f5a1..946823876a 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1633,7 +1633,20 @@ class EventsStore( return self.runInteraction("purge_room", self._purge_room_txn, room_id) def _purge_room_txn(self, txn, room_id): - # First delete tables which lack an index on room_id but have one on event_id + # First we fetch all the state groups that should be deleted, before + # we delete that information. + txn.execute( + """ + SELECT DISTINCT state_group FROM events + INNER JOIN event_to_state_groups USING(event_id) + WHERE events.room_id = ? + """, + (room_id,), + ) + + state_groups = [row[0] for row in txn] + + # Now we delete tables which lack an index on room_id but have one on event_id for table in ( "event_auth", "event_edges", @@ -1717,18 +1730,6 @@ class EventsStore( # index on them. In any case we should be clearing out 'stream' tables # periodically anyway (#5888) - # Now we fetch all the state groups that should be deleted. - txn.execute( - """ - SELECT DISTINCT state_group FROM events - INNER JOIN event_to_state_groups USING(event_id) - WHERE events.room_id = ? - """, - (room_id,), - ) - - state_groups = [row[0] for row in txn] - # TODO: we could probably usefully do a bunch of cache invalidation here logger.info("[purge] done") diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index 8e1ca8b738..d9f1b95cb0 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -628,10 +628,12 @@ class PurgeRoomTestCase(unittest.HomeserverTestCase): "local_invites", "room_account_data", "room_tags", + "state_groups", + "state_groups_state", ): count = self.get_success( self.store._simple_select_one_onecol( - table="events", + table=table, keyvalues={"room_id": room_id}, retcol="COUNT(*)", desc="test_purge_room", -- cgit 1.5.1 From 3f9b61ff9504dd88cac17fb3cb097e319babd2a3 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 11:49:37 +0000 Subject: Fix the SQL SELECT query in _paginate_room_events_txn Doing a SELECT DISTINCT when paginating is quite expensive, because it requires the engine to do sorting on the entire events table. However, we only need to run it if we're filtering on 2+ labels, so this PR is changing the request so that DISTINCT is only used then. --- synapse/storage/data_stores/main/stream.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 616ef91d4e..ef0b1426d1 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -871,14 +871,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args.append(int(limit)) + # Using DISTINCT in this SELECT query is quite expensive, because it requires the + # engine to sort on the entire (not limited) result set, i.e. the entire events + # table. We only need to use it when we're filtering on more than two labels, + # because that's the only scenario in which we can possibly to get multiple times + # the same event ID in the results. + if event_filter.labels and len(event_filter.labels) > 1: + select_keywords = "SELECT DISTINCT" + + else: + select_keywords = "SELECT" + sql = ( - "SELECT DISTINCT event_id, topological_ordering, stream_ordering" + "%(select_keywords)s event_id, topological_ordering, stream_ordering" " FROM events" " LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)" " WHERE outlier = ? AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," " stream_ordering %(order)s LIMIT ?" - ) % {"bounds": bounds, "order": order} + ) % {"select_keywords": select_keywords, "bounds": bounds, "order": order} txn.execute(sql, args) -- cgit 1.5.1 From 15a1a02e70ca18d8af9a4faaf1bb40427ea6a643 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 12:04:37 +0000 Subject: Handle lack of filter --- synapse/storage/data_stores/main/stream.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index ef0b1426d1..bb70a0f38a 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -876,11 +876,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # table. We only need to use it when we're filtering on more than two labels, # because that's the only scenario in which we can possibly to get multiple times # the same event ID in the results. - if event_filter.labels and len(event_filter.labels) > 1: - select_keywords = "SELECT DISTINCT" - - else: - select_keywords = "SELECT" + select_keywords = "SELECT" + if event_filter and event_filter.labels and len(event_filter.labels) > 1: + select_keywords += "DISTINCT" sql = ( "%(select_keywords)s event_id, topological_ordering, stream_ordering" -- cgit 1.5.1 From 70804392ae42949109e55e4e51566e2545e1df63 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 14:55:10 +0000 Subject: Only join on event_labels if we're filtering on labels --- synapse/storage/data_stores/main/stream.py | 33 ++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index bb70a0f38a..064f602a65 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -871,23 +871,38 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args.append(int(limit)) - # Using DISTINCT in this SELECT query is quite expensive, because it requires the - # engine to sort on the entire (not limited) result set, i.e. the entire events - # table. We only need to use it when we're filtering on more than two labels, - # because that's the only scenario in which we can possibly to get multiple times - # the same event ID in the results. select_keywords = "SELECT" - if event_filter and event_filter.labels and len(event_filter.labels) > 1: - select_keywords += "DISTINCT" + join_clause = "" + if event_filter and event_filter.labels: + # If we're not filtering on a label, then joining on event_labels will + # return as many row for a single event as the number of labels it has. To + # avoid this, only join if we're filtering on at least one label. + join_clause = ( + "LEFT JOIN event_labels" + " USING (event_id, room_id, topological_ordering)" + ) + if len(event_filter.labels) > 1: + # Using DISTINCT in this SELECT query is quite expensive, because it + # requires the engine to sort on the entire (not limited) result set, + # i.e. the entire events table. We only need to use it when we're + # filtering on more than two labels, because that's the only scenario + # in which we can possibly to get multiple times the same event ID in + # the results. + select_keywords += "DISTINCT" sql = ( "%(select_keywords)s event_id, topological_ordering, stream_ordering" " FROM events" - " LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)" + " %(join_clause)s" " WHERE outlier = ? AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," " stream_ordering %(order)s LIMIT ?" - ) % {"select_keywords": select_keywords, "bounds": bounds, "order": order} + ) % { + "select_keywords": select_keywords, + "join_clause": join_clause, + "bounds": bounds, + "order": order + } txn.execute(sql, args) -- cgit 1.5.1 From b9cba079628db11951fc5ed30b03e8825eb954d7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 14:57:15 +0000 Subject: Lint --- synapse/storage/data_stores/main/stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 064f602a65..90bb2d8e8f 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -876,7 +876,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if event_filter and event_filter.labels: # If we're not filtering on a label, then joining on event_labels will # return as many row for a single event as the number of labels it has. To - # avoid this, only join if we're filtering on at least one label. + # avoid this, only join if we're filtering on at least one label. join_clause = ( "LEFT JOIN event_labels" " USING (event_id, room_id, topological_ordering)" @@ -901,7 +901,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "select_keywords": select_keywords, "join_clause": join_clause, "bounds": bounds, - "order": order + "order": order, } txn.execute(sql, args) -- cgit 1.5.1 From bb78276bdc77c0462696529c27fd8a6062b37afc Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 15:25:27 +0000 Subject: Incorporate review --- .../storage/data_stores/main/events_bg_updates.py | 23 +++++++++------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 309bfcafe5..9714662c11 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,43 +525,38 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) - rows = self.cursor_to_dict(txn) - if not rows: - return True, 0 - - for row in rows: - event_id = row["event_id"] - event_json = json.loads(row["json"]) - + nbrows = 0 + for (event_id, event_json) in txn: self._simple_insert_many_txn( txn=txn, table="event_labels", values=[ { "event_id": event_id, - "label": label, + "label": str(label), "room_id": event_json["room_id"], "topological_ordering": event_json["depth"], } for label in event_json["content"].get( EventContentFields.LABELS, [] ) + if label is not None ], ) + nbrows += 1 + self._background_update_progress_txn( txn, "event_store_labels", {"last_event_id": event_id} ) - # We want to return true (to end the background update) only when - # the query returned with less rows than we asked for. - return len(rows) != batch_size, len(rows) + return nbrows - end, num_rows = yield self.runInteraction( + num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn ) - if end: + if not num_rows: yield self._end_background_update("event_store_labels") return num_rows -- cgit 1.5.1 From ec2cb9f29829cf81f843124ef4289bc0eb88413e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:18:40 +0000 Subject: Initialise value before looping --- synapse/storage/data_stores/main/events_bg_updates.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 9714662c11..bee8c9cfa0 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -526,28 +526,32 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) nbrows = 0 - for (event_id, event_json) in txn: + last_row_event_id = "" + for (event_id, event_json_raw) in txn: + event_json = json.loads(event_json_raw) + self._simple_insert_many_txn( txn=txn, table="event_labels", values=[ { "event_id": event_id, - "label": str(label), + "label": label, "room_id": event_json["room_id"], "topological_ordering": event_json["depth"], } for label in event_json["content"].get( EventContentFields.LABELS, [] ) - if label is not None + if label is not None and isinstance(label, str) ], ) nbrows += 1 + last_row_event_id = event_id self._background_update_progress_txn( - txn, "event_store_labels", {"last_event_id": event_id} + txn, "event_store_labels", {"last_event_id": last_row_event_id} ) return nbrows -- cgit 1.5.1 From 1186612d6cd728e6b7ed7806579db3cea7410b54 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:46:41 +0000 Subject: Back to using cursor_to_dict --- synapse/storage/data_stores/main/events_bg_updates.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index bee8c9cfa0..b858e3ac1a 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,10 +525,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) - nbrows = 0 - last_row_event_id = "" - for (event_id, event_json_raw) in txn: - event_json = json.loads(event_json_raw) + rows = self.cursor_to_dict(txn) + if not len(rows): + return 0 + + for row in rows: + event_id = row["event_id"] + event_json = json.loads(row["event_json"]) self._simple_insert_many_txn( txn=txn, @@ -547,14 +550,11 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ], ) - nbrows += 1 - last_row_event_id = event_id - self._background_update_progress_txn( - txn, "event_store_labels", {"last_event_id": last_row_event_id} + txn, "event_store_labels", {"last_event_id": event_id} ) - return nbrows + return len(rows) num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn -- cgit 1.5.1 From cd312012676d39da8d0383cba167d1211378fece Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:47:15 +0000 Subject: Revert "Back to using cursor_to_dict" This reverts commit 1186612d6cd728e6b7ed7806579db3cea7410b54. --- synapse/storage/data_stores/main/events_bg_updates.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index b858e3ac1a..bee8c9cfa0 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,13 +525,10 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) - rows = self.cursor_to_dict(txn) - if not len(rows): - return 0 - - for row in rows: - event_id = row["event_id"] - event_json = json.loads(row["event_json"]) + nbrows = 0 + last_row_event_id = "" + for (event_id, event_json_raw) in txn: + event_json = json.loads(event_json_raw) self._simple_insert_many_txn( txn=txn, @@ -550,11 +547,14 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ], ) + nbrows += 1 + last_row_event_id = event_id + self._background_update_progress_txn( - txn, "event_store_labels", {"last_event_id": event_id} + txn, "event_store_labels", {"last_event_id": last_row_event_id} ) - return len(rows) + return nbrows num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn -- cgit 1.5.1 From c9b27d00445f84ebfbd4115e3177a10513aadcfc Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:47:45 +0000 Subject: Copy results --- synapse/storage/data_stores/main/events_bg_updates.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index bee8c9cfa0..a4cb64f479 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,9 +525,11 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) + results = list(txn) + nbrows = 0 last_row_event_id = "" - for (event_id, event_json_raw) in txn: + for (event_id, event_json_raw) in results: event_json = json.loads(event_json_raw) self._simple_insert_many_txn( -- cgit 1.5.1 From 6d360f099f3ea09c90795e5a6c4fc07dbffd874e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 17:01:43 +0000 Subject: Update synapse/storage/data_stores/main/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index a4cb64f479..18862adb9c 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -545,7 +545,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): for label in event_json["content"].get( EventContentFields.LABELS, [] ) - if label is not None and isinstance(label, str) + if isinstance(label, str) ], ) -- cgit 1.5.1 From dad8d68c9938e7d09eefd6aa1b633494ab89f719 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 17:01:53 +0000 Subject: Update synapse/storage/data_stores/main/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 18862adb9c..0ed59ef48e 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -511,7 +511,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): @defer.inlineCallbacks def _event_store_labels(self, progress, batch_size): - """Stores labels for events.""" + """Background update handler which will store labels for existing events.""" last_event_id = progress.get("last_event_id", "") def _event_store_labels_txn(txn): -- cgit 1.5.1 From e4ec82ce0fdd17f912fad76defd53ec99f782528 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Nov 2019 09:50:48 +0000 Subject: Move type annotation into docstring --- synapse/storage/data_stores/main/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 946823876a..878f7568a6 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -19,7 +19,6 @@ import itertools import logging from collections import Counter as c_counter, OrderedDict, namedtuple from functools import wraps -from typing import Collection from six import iteritems, text_type from six.moves import range @@ -1737,7 +1736,7 @@ class EventsStore( return state_groups def purge_unreferenced_state_groups( - self, room_id: str, state_groups_to_delete: Collection[int] + self, room_id: str, state_groups_to_delete ) -> defer.Deferred: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. @@ -1745,7 +1744,8 @@ class EventsStore( Args: room_id: The room the state groups belong to (must all be in the same room). - state_groups_to_delete: Set of all state groups to delete. + state_groups_to_delete (Collection[int]): Set of all state groups + to delete. """ return self.runInteraction( -- cgit 1.5.1 From b16fa433860824560c64acf594aa6c891e5f1a0a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 8 Nov 2019 10:34:09 +0000 Subject: Incorporate review --- synapse/storage/data_stores/main/stream.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 90bb2d8e8f..8780fdd989 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -877,10 +877,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # If we're not filtering on a label, then joining on event_labels will # return as many row for a single event as the number of labels it has. To # avoid this, only join if we're filtering on at least one label. - join_clause = ( - "LEFT JOIN event_labels" - " USING (event_id, room_id, topological_ordering)" - ) + join_clause = """ + LEFT JOIN event_labels + USING (event_id, room_id, topological_ordering) + """ if len(event_filter.labels) > 1: # Using DISTINCT in this SELECT query is quite expensive, because it # requires the engine to sort on the entire (not limited) result set, @@ -890,14 +890,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # the results. select_keywords += "DISTINCT" - sql = ( - "%(select_keywords)s event_id, topological_ordering, stream_ordering" - " FROM events" - " %(join_clause)s" - " WHERE outlier = ? AND room_id = ? AND %(bounds)s" - " ORDER BY topological_ordering %(order)s," - " stream_ordering %(order)s LIMIT ?" - ) % { + sql = """ + %(select_keywords)s event_id, topological_ordering, stream_ordering + FROM events + %(join_clause)s + WHERE outlier = ? AND room_id = ? AND %(bounds)s + ORDER BY topological_ordering %(order)s, + stream_ordering %(order)s LIMIT ? + """ % { "select_keywords": select_keywords, "join_clause": join_clause, "bounds": bounds, -- cgit 1.5.1 From c4bdf2d7855dcddb7e294e9dba458884db2be7e0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 8 Nov 2019 11:42:55 +0000 Subject: Remove content from being sent for account data rdata stream --- synapse/replication/tcp/streams/_base.py | 6 +++--- synapse/storage/data_stores/main/account_data.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 9e45429d49..dd87733842 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -89,7 +89,7 @@ TagAccountDataStreamRow = namedtuple( ) AccountDataStreamRow = namedtuple( "AccountDataStream", - ("user_id", "room_id", "data_type", "data"), # str # str # str # dict + ("user_id", "room_id", "data_type"), # str # str # str ) GroupsStreamRow = namedtuple( "GroupsStreamRow", @@ -421,8 +421,8 @@ class AccountDataStream(Stream): results = list(room_results) results.extend( - (stream_id, user_id, None, account_data_type, content) - for stream_id, user_id, account_data_type, content in global_results + (stream_id, user_id, None, account_data_type) + for stream_id, user_id, account_data_type in global_results ) return results diff --git a/synapse/storage/data_stores/main/account_data.py b/synapse/storage/data_stores/main/account_data.py index 6afbfc0d74..22093484ed 100644 --- a/synapse/storage/data_stores/main/account_data.py +++ b/synapse/storage/data_stores/main/account_data.py @@ -184,14 +184,14 @@ class AccountDataWorkerStore(SQLBaseStore): current_id(int): The position to fetch up to. Returns: A deferred pair of lists of tuples of stream_id int, user_id string, - room_id string, type string, and content string. + room_id string, and type string. """ if last_room_id == current_id and last_global_id == current_id: return defer.succeed(([], [])) def get_updated_account_data_txn(txn): sql = ( - "SELECT stream_id, user_id, account_data_type, content" + "SELECT stream_id, user_id, account_data_type" " FROM account_data WHERE ? < stream_id AND stream_id <= ?" " ORDER BY stream_id ASC LIMIT ?" ) @@ -199,7 +199,7 @@ class AccountDataWorkerStore(SQLBaseStore): global_results = txn.fetchall() sql = ( - "SELECT stream_id, user_id, room_id, account_data_type, content" + "SELECT stream_id, user_id, room_id, account_data_type" " FROM room_account_data WHERE ? < stream_id AND stream_id <= ?" " ORDER BY stream_id ASC LIMIT ?" ) -- cgit 1.5.1 From 745a48625d9760374a7d683441185fa8bd2a2aac Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 14 Nov 2019 12:02:05 +0000 Subject: Fix guest -> real account upgrade with account validity enabled (#6359) --- changelog.d/6359.bugfix | 1 + synapse/storage/_base.py | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) create mode 100644 changelog.d/6359.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/6359.bugfix b/changelog.d/6359.bugfix new file mode 100644 index 0000000000..22bf5f642a --- /dev/null +++ b/changelog.d/6359.bugfix @@ -0,0 +1 @@ +Fix bug where upgrading a guest account to a full user would fail when account validity is enabled. \ No newline at end of file diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1a2b7ebe25..ab596fa68d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -361,14 +361,11 @@ class SQLBaseStore(object): expiration_ts, ) - self._simple_insert_txn( + self._simple_upsert_txn( txn, "account_validity", - values={ - "user_id": user_id, - "expiration_ts_ms": expiration_ts, - "email_sent": False, - }, + keyvalues={"user_id": user_id}, + values={"expiration_ts_ms": expiration_ts, "email_sent": False}, ) def start_profiling(self): -- cgit 1.5.1 From 657d614f6a53f3dbfd2858bd85d0f81563db0041 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 15 Nov 2019 14:02:34 +0000 Subject: Replace UPDATE with UPSERT on device_max_stream_id table (#6363) --- changelog.d/6363.bugfix | 1 + synapse/storage/data_stores/main/deviceinbox.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6363.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/6363.bugfix b/changelog.d/6363.bugfix new file mode 100644 index 0000000000..d023b49181 --- /dev/null +++ b/changelog.d/6363.bugfix @@ -0,0 +1 @@ +Fix `to_device` stream ID getting reset every time Synapse restarts, which had the potential to cause unable to decrypt errors. \ No newline at end of file diff --git a/synapse/storage/data_stores/main/deviceinbox.py b/synapse/storage/data_stores/main/deviceinbox.py index f04aad0743..96cd0fb77a 100644 --- a/synapse/storage/data_stores/main/deviceinbox.py +++ b/synapse/storage/data_stores/main/deviceinbox.py @@ -358,8 +358,21 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) def _add_messages_to_local_device_inbox_txn( self, txn, stream_id, messages_by_user_then_device ): - sql = "UPDATE device_max_stream_id" " SET stream_id = ?" " WHERE stream_id < ?" - txn.execute(sql, (stream_id, stream_id)) + # Compatible method of performing an upsert + sql = "SELECT stream_id FROM device_max_stream_id" + + txn.execute(sql) + rows = txn.fetchone() + if rows: + db_stream_id = rows[0] + if db_stream_id < stream_id: + # Insert the new stream_id + sql = "UPDATE device_max_stream_id SET stream_id = ?" + else: + # No rows, perform an insert + sql = "INSERT INTO device_max_stream_id (stream_id) VALUES (?)" + + txn.execute(sql, (stream_id,)) local_by_user_then_device = {} for user_id, messages_by_device in messages_by_user_then_device.items(): -- cgit 1.5.1 From 3916e1b97a1ffc481dfdf66f7da58201a52140a9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 21 Nov 2019 12:00:14 +0000 Subject: Clean up newline quote marks around the codebase (#6362) --- changelog.d/6362.misc | 1 + synapse/app/federation_sender.py | 2 +- synapse/appservice/api.py | 2 +- synapse/config/appservice.py | 2 +- synapse/config/room_directory.py | 2 +- synapse/config/server.py | 6 +++--- synapse/federation/persistence.py | 4 ++-- synapse/federation/sender/__init__.py | 2 +- synapse/federation/sender/transaction_manager.py | 4 ++-- synapse/handlers/directory.py | 2 +- synapse/http/servlet.py | 2 +- synapse/push/httppusher.py | 5 ++--- synapse/push/mailer.py | 4 ++-- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/server_notices/consent_server_notices.py | 2 +- synapse/storage/_base.py | 2 +- synapse/storage/data_stores/main/deviceinbox.py | 2 +- synapse/storage/data_stores/main/end_to_end_keys.py | 6 +++--- synapse/storage/data_stores/main/events.py | 8 +++----- synapse/storage/data_stores/main/filtering.py | 2 +- synapse/storage/data_stores/main/media_repository.py | 6 +++--- synapse/storage/data_stores/main/registration.py | 4 +--- synapse/storage/data_stores/main/stream.py | 2 +- synapse/storage/data_stores/main/tags.py | 4 +--- synapse/storage/prepare_database.py | 2 +- synapse/streams/config.py | 9 ++++++--- 26 files changed, 43 insertions(+), 46 deletions(-) create mode 100644 changelog.d/6362.misc (limited to 'synapse/storage') diff --git a/changelog.d/6362.misc b/changelog.d/6362.misc new file mode 100644 index 0000000000..b79a5bea99 --- /dev/null +++ b/changelog.d/6362.misc @@ -0,0 +1 @@ +Clean up some unnecessary quotation marks around the codebase. \ No newline at end of file diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 139221ad34..448e45e00f 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -69,7 +69,7 @@ class FederationSenderSlaveStore( self.federation_out_pos_startup = self._get_federation_out_pos(db_conn) def _get_federation_out_pos(self, db_conn): - sql = "SELECT stream_id FROM federation_stream_position" " WHERE type = ?" + sql = "SELECT stream_id FROM federation_stream_position WHERE type = ?" sql = self.database_engine.convert_param_style(sql) txn = db_conn.cursor() diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 3e25bf5747..57174da021 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -185,7 +185,7 @@ class ApplicationServiceApi(SimpleHttpClient): if not _is_valid_3pe_metadata(info): logger.warning( - "query_3pe_protocol to %s did not return a" " valid result", uri + "query_3pe_protocol to %s did not return a valid result", uri ) return None diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index e77d3387ff..ca43e96bd1 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -134,7 +134,7 @@ def _load_appservice(hostname, as_info, config_filename): for regex_obj in as_info["namespaces"][ns]: if not isinstance(regex_obj, dict): raise ValueError( - "Expected namespace entry in %s to be an object," " but got %s", + "Expected namespace entry in %s to be an object, but got %s", ns, regex_obj, ) diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 7c9f05bde4..7ac7699676 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -170,7 +170,7 @@ class _RoomDirectoryRule(object): self.action = action else: raise ConfigError( - "%s rules can only have action of 'allow'" " or 'deny'" % (option_name,) + "%s rules can only have action of 'allow' or 'deny'" % (option_name,) ) self._alias_matches_all = alias == "*" diff --git a/synapse/config/server.py b/synapse/config/server.py index 00d01c43af..11336d7549 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -223,7 +223,7 @@ class ServerConfig(Config): self.federation_ip_range_blacklist.update(["0.0.0.0", "::"]) except Exception as e: raise ConfigError( - "Invalid range(s) provided in " "federation_ip_range_blacklist: %s" % e + "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e ) if self.public_baseurl is not None: @@ -787,14 +787,14 @@ class ServerConfig(Config): "--print-pidfile", action="store_true", default=None, - help="Print the path to the pidfile just" " before daemonizing", + help="Print the path to the pidfile just before daemonizing", ) server_group.add_argument( "--manhole", metavar="PORT", dest="manhole", type=int, - help="Turn on the twisted telnet manhole" " service on the given port.", + help="Turn on the twisted telnet manhole service on the given port.", ) diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py index 44edcabed4..d68b4bd670 100644 --- a/synapse/federation/persistence.py +++ b/synapse/federation/persistence.py @@ -44,7 +44,7 @@ class TransactionActions(object): response code and response body. """ if not transaction.transaction_id: - raise RuntimeError("Cannot persist a transaction with no " "transaction_id") + raise RuntimeError("Cannot persist a transaction with no transaction_id") return self.store.get_received_txn_response(transaction.transaction_id, origin) @@ -56,7 +56,7 @@ class TransactionActions(object): Deferred """ if not transaction.transaction_id: - raise RuntimeError("Cannot persist a transaction with no " "transaction_id") + raise RuntimeError("Cannot persist a transaction with no transaction_id") return self.store.set_received_txn_response( transaction.transaction_id, origin, code, response diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 2b2ee8612a..4ebb0e8bc0 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -49,7 +49,7 @@ sent_pdus_destination_dist_count = Counter( sent_pdus_destination_dist_total = Counter( "synapse_federation_client_sent_pdu_destinations:total", - "" "Total number of PDUs queued for sending across all destinations", + "Total number of PDUs queued for sending across all destinations", ) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 67b3e1ab6e..5fed626d5b 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -84,7 +84,7 @@ class TransactionManager(object): txn_id = str(self._next_txn_id) logger.debug( - "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)", + "TX [%s] {%s} Attempting new transaction (pdus: %d, edus: %d)", destination, txn_id, len(pdus), @@ -103,7 +103,7 @@ class TransactionManager(object): self._next_txn_id += 1 logger.info( - "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)", + "TX [%s] {%s} Sending transaction [%s], (PDUs: %d, EDUs: %d)", destination, txn_id, transaction.transaction_id, diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 69051101a6..a07d2f1a17 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -119,7 +119,7 @@ class DirectoryHandler(BaseHandler): if not service.is_interested_in_alias(room_alias.to_string()): raise SynapseError( 400, - "This application service has not reserved" " this kind of alias.", + "This application service has not reserved this kind of alias.", errcode=Codes.EXCLUSIVE, ) else: diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index e9a5e46ced..13fcb408a6 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -96,7 +96,7 @@ def parse_boolean_from_args(args, name, default=None, required=False): return {b"true": True, b"false": False}[args[name][0]] except Exception: message = ( - "Boolean query parameter %r must be one of" " ['true', 'false']" + "Boolean query parameter %r must be one of ['true', 'false']" ) % (name,) raise SynapseError(400, message) else: diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index e994037be6..d0879b0490 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -246,7 +246,7 @@ class HttpPusher(object): # fixed, we don't suddenly deliver a load # of old notifications. logger.warning( - "Giving up on a notification to user %s, " "pushkey %s", + "Giving up on a notification to user %s, pushkey %s", self.user_id, self.pushkey, ) @@ -299,8 +299,7 @@ class HttpPusher(object): # for sanity, we only remove the pushkey if it # was the one we actually sent... logger.warning( - ("Ignoring rejected pushkey %s because we" " didn't send it"), - pk, + ("Ignoring rejected pushkey %s because we didn't send it"), pk, ) else: logger.info("Pushkey %s was rejected: removing", pk) diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 1d15a06a58..b13b646bfd 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -43,7 +43,7 @@ logger = logging.getLogger(__name__) MESSAGE_FROM_PERSON_IN_ROOM = ( - "You have a message on %(app)s from %(person)s " "in the %(room)s room..." + "You have a message on %(app)s from %(person)s in the %(room)s room..." ) MESSAGE_FROM_PERSON = "You have a message on %(app)s from %(person)s..." MESSAGES_FROM_PERSON = "You have messages on %(app)s from %(person)s..." @@ -55,7 +55,7 @@ MESSAGES_FROM_PERSON_AND_OTHERS = ( "You have messages on %(app)s from %(person)s and others..." ) INVITE_FROM_PERSON_TO_ROOM = ( - "%(person)s has invited you to join the " "%(room)s room on %(app)s..." + "%(person)s has invited you to join the %(room)s room on %(app)s..." ) INVITE_FROM_PERSON = "%(person)s has invited you to chat on %(app)s..." diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 15c15a12f5..a23d6f5c75 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -122,7 +122,7 @@ class PreviewUrlResource(DirectServeResource): pattern = entry[attrib] value = getattr(url_tuple, attrib) logger.debug( - "Matching attrib '%s' with value '%s' against" " pattern '%s'", + "Matching attrib '%s' with value '%s' against pattern '%s'", attrib, value, pattern, diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py index 415e9c17d8..5736c56032 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py @@ -54,7 +54,7 @@ class ConsentServerNotices(object): ) if "body" not in self._server_notice_content: raise ConfigError( - "user_consent server_notice_consent must contain a 'body' " "key." + "user_consent server_notice_consent must contain a 'body' key." ) self._consent_uri_builder = ConsentURIBuilder(hs.config) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index ab596fa68d..6b8a9cd89a 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -851,7 +851,7 @@ class SQLBaseStore(object): allvalues.update(values) latter = "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) - sql = ("INSERT INTO %s (%s) VALUES (%s) " "ON CONFLICT (%s) DO %s") % ( + sql = ("INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s") % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), diff --git a/synapse/storage/data_stores/main/deviceinbox.py b/synapse/storage/data_stores/main/deviceinbox.py index 96cd0fb77a..a23744f11c 100644 --- a/synapse/storage/data_stores/main/deviceinbox.py +++ b/synapse/storage/data_stores/main/deviceinbox.py @@ -380,7 +380,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) devices = list(messages_by_device.keys()) if len(devices) == 1 and devices[0] == "*": # Handle wildcard device_ids. - sql = "SELECT device_id FROM devices" " WHERE user_id = ?" + sql = "SELECT device_id FROM devices WHERE user_id = ?" txn.execute(sql, (user_id,)) message_json = json.dumps(messages_by_device["*"]) for row in txn: diff --git a/synapse/storage/data_stores/main/end_to_end_keys.py b/synapse/storage/data_stores/main/end_to_end_keys.py index 073412a78d..d8ad59ad93 100644 --- a/synapse/storage/data_stores/main/end_to_end_keys.py +++ b/synapse/storage/data_stores/main/end_to_end_keys.py @@ -138,9 +138,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): result.setdefault(user_id, {})[device_id] = None # get signatures on the device - signature_sql = ( - "SELECT * " " FROM e2e_cross_signing_signatures " " WHERE %s" - ) % (" OR ".join("(" + q + ")" for q in signature_query_clauses)) + signature_sql = ("SELECT * FROM e2e_cross_signing_signatures WHERE %s") % ( + " OR ".join("(" + q + ")" for q in signature_query_clauses) + ) txn.execute(signature_sql, signature_query_params) rows = self.cursor_to_dict(txn) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 878f7568a6..627c0b67f1 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -713,9 +713,7 @@ class EventsStore( metadata_json = encode_json(event.internal_metadata.get_dict()) - sql = ( - "UPDATE event_json SET internal_metadata = ?" " WHERE event_id = ?" - ) + sql = "UPDATE event_json SET internal_metadata = ? WHERE event_id = ?" txn.execute(sql, (metadata_json, event.event_id)) # Add an entry to the ex_outlier_stream table to replicate the @@ -732,7 +730,7 @@ class EventsStore( }, ) - sql = "UPDATE events SET outlier = ?" " WHERE event_id = ?" + sql = "UPDATE events SET outlier = ? WHERE event_id = ?" txn.execute(sql, (False, event.event_id)) # Update the event_backward_extremities table now that this @@ -1479,7 +1477,7 @@ class EventsStore( # We do joins against events_to_purge for e.g. calculating state # groups to purge, etc., so lets make an index. - txn.execute("CREATE INDEX events_to_purge_id" " ON events_to_purge(event_id)") + txn.execute("CREATE INDEX events_to_purge_id ON events_to_purge(event_id)") txn.execute("SELECT event_id, should_delete FROM events_to_purge") event_rows = txn.fetchall() diff --git a/synapse/storage/data_stores/main/filtering.py b/synapse/storage/data_stores/main/filtering.py index a2a2a67927..f05ace299a 100644 --- a/synapse/storage/data_stores/main/filtering.py +++ b/synapse/storage/data_stores/main/filtering.py @@ -55,7 +55,7 @@ class FilteringStore(SQLBaseStore): if filter_id_response is not None: return filter_id_response[0] - sql = "SELECT MAX(filter_id) FROM user_filters " "WHERE user_id = ?" + sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?" txn.execute(sql, (user_localpart,)) max_id = txn.fetchone()[0] if max_id is None: diff --git a/synapse/storage/data_stores/main/media_repository.py b/synapse/storage/data_stores/main/media_repository.py index 84b5f3ad5e..0f2887bdce 100644 --- a/synapse/storage/data_stores/main/media_repository.py +++ b/synapse/storage/data_stores/main/media_repository.py @@ -337,7 +337,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): if len(media_ids) == 0: return - sql = "DELETE FROM local_media_repository_url_cache" " WHERE media_id = ?" + sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?" def _delete_url_cache_txn(txn): txn.executemany(sql, [(media_id,) for media_id in media_ids]) @@ -365,11 +365,11 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): return def _delete_url_cache_media_txn(txn): - sql = "DELETE FROM local_media_repository" " WHERE media_id = ?" + sql = "DELETE FROM local_media_repository WHERE media_id = ?" txn.executemany(sql, [(media_id,) for media_id in media_ids]) - sql = "DELETE FROM local_media_repository_thumbnails" " WHERE media_id = ?" + sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?" txn.executemany(sql, [(media_id,) for media_id in media_ids]) diff --git a/synapse/storage/data_stores/main/registration.py b/synapse/storage/data_stores/main/registration.py index ee1b2b2bbf..6a594c160c 100644 --- a/synapse/storage/data_stores/main/registration.py +++ b/synapse/storage/data_stores/main/registration.py @@ -377,9 +377,7 @@ class RegistrationWorkerStore(SQLBaseStore): """ def f(txn): - sql = ( - "SELECT name, password_hash FROM users" " WHERE lower(name) = lower(?)" - ) + sql = "SELECT name, password_hash FROM users WHERE lower(name) = lower(?)" txn.execute(sql, (user_id,)) return dict(txn) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 8780fdd989..9ae4a913a1 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -616,7 +616,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): def _get_max_topological_txn(self, txn, room_id): txn.execute( - "SELECT MAX(topological_ordering) FROM events" " WHERE room_id = ?", + "SELECT MAX(topological_ordering) FROM events WHERE room_id = ?", (room_id,), ) diff --git a/synapse/storage/data_stores/main/tags.py b/synapse/storage/data_stores/main/tags.py index 10d1887f75..aa24339717 100644 --- a/synapse/storage/data_stores/main/tags.py +++ b/synapse/storage/data_stores/main/tags.py @@ -83,9 +83,7 @@ class TagsWorkerStore(AccountDataWorkerStore): ) def get_tag_content(txn, tag_ids): - sql = ( - "SELECT tag, content" " FROM room_tags" " WHERE user_id=? AND room_id=?" - ) + sql = "SELECT tag, content FROM room_tags WHERE user_id=? AND room_id=?" results = [] for stream_id, user_id, room_id in tag_ids: txn.execute(sql, (user_id, room_id)) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 2e7753820e..731e1c9d9c 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -447,7 +447,7 @@ def _apply_module_schema_files(cur, database_engine, modname, names_and_streams) # Mark as done. cur.execute( database_engine.convert_param_style( - "INSERT INTO applied_module_schemas (module_name, file)" " VALUES (?,?)" + "INSERT INTO applied_module_schemas (module_name, file) VALUES (?,?)" ), (modname, name), ) diff --git a/synapse/streams/config.py b/synapse/streams/config.py index 02994ab2a5..cd56cd91ed 100644 --- a/synapse/streams/config.py +++ b/synapse/streams/config.py @@ -88,9 +88,12 @@ class PaginationConfig(object): raise SynapseError(400, "Invalid request.") def __repr__(self): - return ( - "PaginationConfig(from_tok=%r, to_tok=%r," " direction=%r, limit=%r)" - ) % (self.from_token, self.to_token, self.direction, self.limit) + return ("PaginationConfig(from_tok=%r, to_tok=%r, direction=%r, limit=%r)") % ( + self.from_token, + self.to_token, + self.direction, + self.limit, + ) def get_source_config(self, source_name): keyname = "%s_key" % source_name -- cgit 1.5.1 From b7367c339db153824fb47728d3eebe2f944530e6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 25 Nov 2019 13:26:59 +0000 Subject: Fix exceptions from background database update for event labels. (#6407) Add some exception handling here so that events whose json cannot be parsed are ignored rather than getting us stuck in a loop. Fixes #6404. --- changelog.d/6407.bugfix | 1 + .../storage/data_stores/main/events_bg_updates.py | 43 +++++++++++++--------- 2 files changed, 26 insertions(+), 18 deletions(-) create mode 100644 changelog.d/6407.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/6407.bugfix b/changelog.d/6407.bugfix new file mode 100644 index 0000000000..0fdbf2a781 --- /dev/null +++ b/changelog.d/6407.bugfix @@ -0,0 +1 @@ +Fix a bug which could cause the background database update hander for event labels to get stuck in a loop raising exceptions. diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 0ed59ef48e..aa87f9abc5 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -530,24 +530,31 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): nbrows = 0 last_row_event_id = "" for (event_id, event_json_raw) in results: - event_json = json.loads(event_json_raw) - - self._simple_insert_many_txn( - txn=txn, - table="event_labels", - values=[ - { - "event_id": event_id, - "label": label, - "room_id": event_json["room_id"], - "topological_ordering": event_json["depth"], - } - for label in event_json["content"].get( - EventContentFields.LABELS, [] - ) - if isinstance(label, str) - ], - ) + try: + event_json = json.loads(event_json_raw) + + self._simple_insert_many_txn( + txn=txn, + table="event_labels", + values=[ + { + "event_id": event_id, + "label": label, + "room_id": event_json["room_id"], + "topological_ordering": event_json["depth"], + } + for label in event_json["content"].get( + EventContentFields.LABELS, [] + ) + if isinstance(label, str) + ], + ) + except Exception as e: + logger.warning( + "Unable to load event %s (no labels will be imported): %s", + event_id, + e, + ) nbrows += 1 last_row_event_id = event_id -- cgit 1.5.1 From c01d5435843ad4af3d520851e86d9938b47b2d12 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 25 Nov 2019 21:03:17 +0000 Subject: Make sure that we close cursors before returning from a query (#6408) There are lots of words in the comment as to why this is a good idea. Fixes #6403. --- changelog.d/6408.bugfix | 1 + synapse/storage/_base.py | 51 +++++++++++++++++++++++----- synapse/storage/data_stores/main/receipts.py | 2 +- 3 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 changelog.d/6408.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/6408.bugfix b/changelog.d/6408.bugfix new file mode 100644 index 0000000000..c9babe599b --- /dev/null +++ b/changelog.d/6408.bugfix @@ -0,0 +1 @@ +Fix an intermittent exception when handling read-receipts. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 6b8a9cd89a..459901ac60 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -409,16 +409,15 @@ class SQLBaseStore(object): i = 0 N = 5 while True: + cursor = LoggingTransaction( + conn.cursor(), + name, + self.database_engine, + after_callbacks, + exception_callbacks, + ) try: - txn = conn.cursor() - txn = LoggingTransaction( - txn, - name, - self.database_engine, - after_callbacks, - exception_callbacks, - ) - r = func(txn, *args, **kwargs) + r = func(cursor, *args, **kwargs) conn.commit() return r except self.database_engine.module.OperationalError as e: @@ -456,6 +455,40 @@ class SQLBaseStore(object): ) continue raise + finally: + # we're either about to retry with a new cursor, or we're about to + # release the connection. Once we release the connection, it could + # get used for another query, which might do a conn.rollback(). + # + # In the latter case, even though that probably wouldn't affect the + # results of this transaction, python's sqlite will reset all + # statements on the connection [1], which will make our cursor + # invalid [2]. + # + # In any case, continuing to read rows after commit()ing seems + # dubious from the PoV of ACID transactional semantics + # (sqlite explicitly says that once you commit, you may see rows + # from subsequent updates.) + # + # In psycopg2, cursors are essentially a client-side fabrication - + # all the data is transferred to the client side when the statement + # finishes executing - so in theory we could go on streaming results + # from the cursor, but attempting to do so would make us + # incompatible with sqlite, so let's make sure we're not doing that + # by closing the cursor. + # + # (*named* cursors in psycopg2 are different and are proper server- + # side things, but (a) we don't use them and (b) they are implicitly + # closed by ending the transaction anyway.) + # + # In short, if we haven't finished with the cursor yet, that's a + # problem waiting to bite us. + # + # TL;DR: we're done with the cursor, so we can close it. + # + # [1]: https://github.com/python/cpython/blob/v3.8.0/Modules/_sqlite/connection.c#L465 + # [2]: https://github.com/python/cpython/blob/v3.8.0/Modules/_sqlite/cursor.c#L236 + cursor.close() except Exception as e: logger.debug("[TXN FAIL] {%s} %s", name, e) raise diff --git a/synapse/storage/data_stores/main/receipts.py b/synapse/storage/data_stores/main/receipts.py index 0c24430f28..8b17334ff4 100644 --- a/synapse/storage/data_stores/main/receipts.py +++ b/synapse/storage/data_stores/main/receipts.py @@ -280,7 +280,7 @@ class ReceiptsWorkerStore(SQLBaseStore): args.append(limit) txn.execute(sql, args) - return (r[0:5] + (json.loads(r[5]),) for r in txn) + return list(r[0:5] + (json.loads(r[5]),) for r in txn) return self.runInteraction( "get_all_updated_receipts", get_all_updated_receipts_txn -- cgit 1.5.1 From 8bb7b15894462c6a0ba81f7198f23a140100331d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 26 Nov 2019 15:50:17 +0000 Subject: Fix find_next_generated_user_id_localpart --- synapse/storage/data_stores/main/registration.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/registration.py b/synapse/storage/data_stores/main/registration.py index 6a594c160c..c124bbb88b 100644 --- a/synapse/storage/data_stores/main/registration.py +++ b/synapse/storage/data_stores/main/registration.py @@ -19,7 +19,6 @@ import logging import re from six import iterkeys -from six.moves import range from twisted.internet import defer from twisted.internet.defer import Deferred @@ -482,12 +481,8 @@ class RegistrationWorkerStore(SQLBaseStore): """ Gets the localpart of the next generated user ID. - Generated user IDs are integers, and we aim for them to be as small as - we can. Unfortunately, it's possible some of them are already taken by - existing users, and there may be gaps in the already taken range. This - function returns the start of the first allocatable gap. This is to - avoid the case of ID 1000 being pre-allocated and starting at 1001 while - 0-999 are available. + Generated user IDs are integers, so we find the largest integer user ID + already taken and return that plus one. """ def _find_next_generated_user_id(txn): @@ -503,9 +498,11 @@ class RegistrationWorkerStore(SQLBaseStore): match = regex.search(user_id) if match: found.add(int(match.group(1))) - for i in range(len(found) + 1): - if i not in found: - return i + + if not found: + return 1 + + return max(found) + 1 return ( ( -- cgit 1.5.1 From f8f14ba466a18ee38827629b8e9ab2d5931e5713 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 26 Nov 2019 16:06:41 +0000 Subject: Don't construct a set --- synapse/storage/data_stores/main/registration.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/registration.py b/synapse/storage/data_stores/main/registration.py index c124bbb88b..0a3c1f0510 100644 --- a/synapse/storage/data_stores/main/registration.py +++ b/synapse/storage/data_stores/main/registration.py @@ -492,17 +492,14 @@ class RegistrationWorkerStore(SQLBaseStore): regex = re.compile(r"^@(\d+):") - found = set() + max_found = 0 for (user_id,) in txn: match = regex.search(user_id) if match: - found.add(int(match.group(1))) + max_found = max(int(match.group(1)), max_found) - if not found: - return 1 - - return max(found) + 1 + return max_found + 1 return ( ( -- cgit 1.5.1