From f58d202e3fc2c17bbe4ee24dd07f09888f73ef23 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 3 Sep 2021 10:59:25 +0100 Subject: Fix bug with reusing 'txn' when persisting event. (#10743) This will only happen when a server has multiple out of band membership events in a single room. --- synapse/storage/databases/main/events.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'synapse/storage/databases/main/events.py') diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 40b53274fb..096ae28788 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -575,7 +575,13 @@ class PersistEventsStore: missing_auth_chains.clear() - for auth_id, event_type, state_key, chain_id, sequence_number in txn: + for ( + auth_id, + event_type, + state_key, + chain_id, + sequence_number, + ) in txn.fetchall(): event_to_types[auth_id] = (event_type, state_key) if chain_id is None: -- cgit 1.5.1 From 1ca70fd312a860a6b486406de3b38ef60ac4abe6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 4 Sep 2021 00:58:49 -0500 Subject: Allow room creator to send MSC2716 related events in existing room versions (#10566) * Allow room creator to send MSC2716 related events in existing room versions Discussed at https://github.com/matrix-org/matrix-doc/pull/2716/#discussion_r682474869 Restoring `get_create_event_for_room_txn` from, https://github.com/matrix-org/synapse/pull/10245/commits/44bb3f0cf5cb365ef9281554daceeecfb17cc94d * Add changelog * Stop people from trying to redact MSC2716 events in unsupported room versions * Populate rooms.creator column for easy lookup > From some [out of band discussion](https://matrix.to/#/!UytJQHLQYfvYWsGrGY:jki.re/$p2fKESoFst038x6pOOmsY0C49S2gLKMr0jhNMz_JJz0?via=jki.re&via=matrix.org), my plan is to use `rooms.creator`. But currently, we don't fill in `creator` for remote rooms when a user is invited to a room for example. So we need to add some code to fill in `creator` wherever we add to the `rooms` table. And also add a background update to fill in the rows missing `creator` (we can use the same logic that `get_create_event_for_room_txn` is doing by looking in the state events to get the `creator`). > > https://github.com/matrix-org/synapse/pull/10566#issuecomment-901616642 * Remove and switch away from get_create_event_for_room_txn * Fix no create event being found because no state events persisted yet * Fix and add tests for rooms creator bg update * Populate rooms.creator field for easy lookup Part of https://github.com/matrix-org/synapse/pull/10566 - Fill in creator whenever we insert into the rooms table - Add background update to backfill any missing creator values * Add changelog * Fix usage * Remove extra delta already included in #10697 * Don't worry about setting creator for invite * Only iterate over rows missing the creator See https://github.com/matrix-org/synapse/pull/10697#discussion_r695940898 * Use constant to fetch room creator field See https://github.com/matrix-org/synapse/pull/10697#discussion_r696803029 * More protection from other random types See https://github.com/matrix-org/synapse/pull/10697#discussion_r696806853 * Move new background update to end of list See https://github.com/matrix-org/synapse/pull/10697#discussion_r696814181 * Fix query casing * Fix ambiguity iterating over cursor instead of list Fix `psycopg2.ProgrammingError: no results to fetch` error when tests run with Postgres. ``` SYNAPSE_POSTGRES=1 SYNAPSE_TEST_LOG_LEVEL=INFO python -m twisted.trial tests.storage.databases.main.test_room ``` --- We use `txn.fetchall` because it will return the results as a list or an empty list when there are no results. Docs: > `cursor` objects are iterable, so, instead of calling explicitly fetchone() in a loop, the object itself can be used: > > https://www.psycopg.org/docs/cursor.html#cursor-iterable And I'm guessing iterating over a raw cursor does something weird when there are no results. --- Test CI failure: https://github.com/matrix-org/synapse/pull/10697/checks?check_run_id=3468916530 ``` tests.test_visibility.FilterEventsForServerTestCase.test_large_room =============================================================================== [FAIL] Traceback (most recent call last): File "/home/runner/work/synapse/synapse/tests/storage/databases/main/test_room.py", line 85, in test_background_populate_rooms_creator_column self.get_success( File "/home/runner/work/synapse/synapse/tests/unittest.py", line 500, in get_success return self.successResultOf(d) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/trial/_synctest.py", line 700, in successResultOf self.fail( twisted.trial.unittest.FailTest: Success result expected on , found failure result instead: Traceback (most recent call last): File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 701, in errback self._startRunCallbacks(fail) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 764, in _startRunCallbacks self._runCallbacks() File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 858, in _runCallbacks current.result = callback( # type: ignore[misc] File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 1751, in gotResult current_context.run(_inlineCallbacks, r, gen, status) --- --- File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 1657, in _inlineCallbacks result = current_context.run( File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/python/failure.py", line 500, in throwExceptionIntoGenerator return g.throw(self.type, self.value, self.tb) File "/home/runner/work/synapse/synapse/synapse/storage/background_updates.py", line 224, in do_next_background_update await self._do_background_update(desired_duration_ms) File "/home/runner/work/synapse/synapse/synapse/storage/background_updates.py", line 261, in _do_background_update items_updated = await update_handler(progress, batch_size) File "/home/runner/work/synapse/synapse/synapse/storage/databases/main/room.py", line 1399, in _background_populate_rooms_creator_column end = await self.db_pool.runInteraction( File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 686, in runInteraction result = await self.runWithConnection( File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 791, in runWithConnection return await make_deferred_yieldable( File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 858, in _runCallbacks current.result = callback( # type: ignore[misc] File "/home/runner/work/synapse/synapse/tests/server.py", line 425, in d.addCallback(lambda x: function(*args, **kwargs)) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/enterprise/adbapi.py", line 293, in _runWithConnection compat.reraise(excValue, excTraceback) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/python/deprecate.py", line 298, in deprecatedFunction return function(*args, **kwargs) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/python/compat.py", line 404, in reraise raise exception.with_traceback(traceback) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/enterprise/adbapi.py", line 284, in _runWithConnection result = func(conn, *args, **kw) File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 786, in inner_func return func(db_conn, *args, **kwargs) File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 554, in new_transaction r = func(cursor, *args, **kwargs) File "/home/runner/work/synapse/synapse/synapse/storage/databases/main/room.py", line 1375, in _background_populate_rooms_creator_column_txn for room_id, event_json in txn: psycopg2.ProgrammingError: no results to fetch ``` * Move code not under the MSC2716 room version underneath an experimental config option See https://github.com/matrix-org/synapse/pull/10566#issuecomment-906437909 * Add ordering to rooms creator background update See https://github.com/matrix-org/synapse/pull/10697#discussion_r696815277 * Add comment to better document constant See https://github.com/matrix-org/synapse/pull/10697#discussion_r699674458 * Use constant field --- changelog.d/10566.feature | 1 + synapse/handlers/federation_event.py | 10 ++++++++-- synapse/handlers/message.py | 28 +++++++++++++++++++++++++--- synapse/storage/databases/main/events.py | 32 +++++++++++++++++++++++++++----- 4 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 changelog.d/10566.feature (limited to 'synapse/storage/databases/main/events.py') diff --git a/changelog.d/10566.feature b/changelog.d/10566.feature new file mode 100644 index 0000000000..04575d76a9 --- /dev/null +++ b/changelog.d/10566.feature @@ -0,0 +1 @@ +Allow room creators to send historical events specified by [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) in existing room versions. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 9f055f00cf..b622e3ae2d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1023,9 +1023,15 @@ class FederationEventHandler(BaseHandler): return # Skip processing a marker event if the room version doesn't - # support it. + # support it or the event is not from the room creator. room_version = await self.store.get_room_version(marker_event.room_id) - if not room_version.msc2716_historical: + create_event = await self.store.get_create_event_for_room(marker_event.room_id) + room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) + if ( + not room_version.msc2716_historical + or not self.hs.config.experimental.msc2716_enabled + or marker_event.sender != room_creator + ): return logger.debug("_handle_marker_event: received %s", marker_event) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 101a29c6d3..9d2c897341 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1393,6 +1393,9 @@ class EventCreationHandler: allow_none=True, ) + room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + # we can make some additional checks now if we have the original event. if original_event: if original_event.type == EventTypes.Create: @@ -1404,6 +1407,28 @@ class EventCreationHandler: if original_event.type == EventTypes.ServerACL: raise AuthError(403, "Redacting server ACL events is not permitted") + # Add a little safety stop-gap to prevent people from trying to + # redact MSC2716 related events when they're in a room version + # which does not support it yet. We allow people to use MSC2716 + # events in existing room versions but only from the room + # creator since it does not require any changes to the auth + # rules and in effect, the redaction algorithm . In the + # supported room version, we add the `historical` power level to + # auth the MSC2716 related events and adjust the redaction + # algorthim to keep the `historical` field around (redacting an + # event should only strip fields which don't affect the + # structural protocol level). + is_msc2716_event = ( + original_event.type == EventTypes.MSC2716_INSERTION + or original_event.type == EventTypes.MSC2716_CHUNK + or original_event.type == EventTypes.MSC2716_MARKER + ) + if not room_version_obj.msc2716_historical and is_msc2716_event: + raise AuthError( + 403, + "Redacting MSC2716 events is not supported in this room version", + ) + prev_state_ids = await context.get_prev_state_ids() auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=True @@ -1411,9 +1436,6 @@ class EventCreationHandler: auth_events_map = await self.store.get_events(auth_events_ids) auth_events = {(e.type, e.state_key): e for e in auth_events_map.values()} - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - if event_auth.check_redaction( room_version_obj, event, auth_events=auth_events ): diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 096ae28788..c0ff4c0633 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1776,10 +1776,21 @@ class PersistEventsStore: # Not a insertion event return - # Skip processing a insertion event if the room version doesn't - # support it. + # Skip processing an insertion event if the room version doesn't + # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) - if not room_version.msc2716_historical: + room_creator = self.db_pool.simple_select_one_onecol_txn( + txn, + table="rooms", + keyvalues={"room_id": event.room_id}, + retcol="creator", + allow_none=True, + ) + if ( + not room_version.msc2716_historical + or not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator + ): return next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) @@ -1828,9 +1839,20 @@ class PersistEventsStore: return # Skip processing a chunk event if the room version doesn't - # support it. + # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) - if not room_version.msc2716_historical: + room_creator = self.db_pool.simple_select_one_onecol_txn( + txn, + table="rooms", + keyvalues={"room_id": event.room_id}, + retcol="creator", + allow_none=True, + ) + if ( + not room_version.msc2716_historical + or not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator + ): return chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) -- cgit 1.5.1 From 2ca0d64854b0d369785f1cb76915a480df445792 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 6 Sep 2021 10:14:07 +0100 Subject: Speed up persisting redacted events (#10756) --- changelog.d/10756.misc | 1 + synapse/storage/databases/main/events.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 changelog.d/10756.misc (limited to 'synapse/storage/databases/main/events.py') diff --git a/changelog.d/10756.misc b/changelog.d/10756.misc new file mode 100644 index 0000000000..3b7acff03f --- /dev/null +++ b/changelog.d/10756.misc @@ -0,0 +1 @@ +Minor speed ups when joining large rooms over federation. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c0ff4c0633..f07e288056 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1385,18 +1385,18 @@ class PersistEventsStore: # If we're persisting an unredacted event we go and ensure # that we mark any redactions that reference this event as # requiring censoring. - sql = "UPDATE redactions SET have_censored = ? WHERE redacts = ?" - txn.execute_batch( - sql, - ( - ( - False, - event.event_id, - ) - for event, _ in events_and_contexts - if not event.internal_metadata.is_redacted() - ), + unredacted_events = [ + event.event_id + for event, _ in events_and_contexts + if not event.internal_metadata.is_redacted() + ] + sql = "UPDATE redactions SET have_censored = ? WHERE " + clause, args = make_in_list_sql_clause( + self.database_engine, + "redacts", + unredacted_events, ) + txn.execute(sql + clause, [False] + args) state_events_and_contexts = [ ec for ec in events_and_contexts if ec[0].is_state() -- cgit 1.5.1 From 74f01e11c9e762101b834493a52da12538477e75 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 8 Sep 2021 15:18:35 +0100 Subject: Skip handling of push actions for outlier events (#10780) Outlier events don't ever have push actions associated with them, so we can skip some expensive queries during event persistence. --- changelog.d/10780.misc | 1 + synapse/storage/databases/main/events.py | 21 +++++++++++++++++---- tests/storage/test_event_push_actions.py | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 changelog.d/10780.misc (limited to 'synapse/storage/databases/main/events.py') diff --git a/changelog.d/10780.misc b/changelog.d/10780.misc new file mode 100644 index 0000000000..3b7acff03f --- /dev/null +++ b/changelog.d/10780.misc @@ -0,0 +1 @@ +Minor speed ups when joining large rooms over federation. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f07e288056..14ada8a8b3 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1990,6 +1990,15 @@ class PersistEventsStore: events_and_context. """ + # Only non outlier events will have push actions associated with them, + # so let's filter them out. (This makes joining large rooms faster, as + # these queries took seconds to process all the state events). + non_outlier_events = [ + event + for event, _ in events_and_contexts + if not event.internal_metadata.is_outlier() + ] + sql = """ INSERT INTO event_push_actions ( room_id, event_id, user_id, actions, stream_ordering, @@ -2000,7 +2009,7 @@ class PersistEventsStore: WHERE event_id = ? """ - if events_and_contexts: + if non_outlier_events: txn.execute_batch( sql, ( @@ -2010,12 +2019,12 @@ class PersistEventsStore: event.depth, event.event_id, ) - for event, _ in events_and_contexts + for event in non_outlier_events ), ) room_to_event_ids: Dict[str, List[str]] = {} - for e, _ in events_and_contexts: + for e in non_outlier_events: room_to_event_ids.setdefault(e.room_id, []).append(e.event_id) for room_id, event_ids in room_to_event_ids.items(): @@ -2040,7 +2049,11 @@ class PersistEventsStore: # persisted. txn.execute_batch( "DELETE FROM event_push_actions_staging WHERE event_id = ?", - ((event.event_id,) for event, _ in all_events_and_contexts), + ( + (event.event_id,) + for event, _ in all_events_and_contexts + if not event.internal_metadata.is_outlier() + ), ) def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id): diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 1930b37eda..bb5939ba4a 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -69,6 +69,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): event.room_id = room_id event.event_id = "$test:example.com" event.internal_metadata.stream_ordering = stream + event.internal_metadata.is_outlier.return_value = False event.depth = stream self.get_success( -- cgit 1.5.1 From 7f0565e029791e336933806522d0e87bce870f64 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Sep 2021 10:16:52 +0100 Subject: Don't needlessly batch in `add_event_to_cache` (#10784) We've already batched up the events previously, and assume in other places in the events.py file that we have. Removing this makes it easier to adjust the batch sizes in one place. --- changelog.d/10784.misc | 1 + synapse/storage/databases/main/events.py | 51 +++++++++++++++----------------- 2 files changed, 25 insertions(+), 27 deletions(-) create mode 100644 changelog.d/10784.misc (limited to 'synapse/storage/databases/main/events.py') diff --git a/changelog.d/10784.misc b/changelog.d/10784.misc new file mode 100644 index 0000000000..3b7acff03f --- /dev/null +++ b/changelog.d/10784.misc @@ -0,0 +1 @@ +Minor speed ups when joining large rooms over federation. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 14ada8a8b3..8e691678e5 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1547,35 +1547,32 @@ class PersistEventsStore: to_prefill = [] rows = [] - N = 200 - for i in range(0, len(events_and_contexts), N): - ev_map = {e[0].event_id: e[0] for e in events_and_contexts[i : i + N]} - if not ev_map: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM events as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE " - ) - clause, args = make_in_list_sql_clause( - self.database_engine, "e.event_id", list(ev_map) - ) + ev_map = {e.event_id: e for e, _ in events_and_contexts} + if not ev_map: + return - txn.execute(sql + clause, args) - rows = self.db_pool.cursor_to_dict(txn) - for row in rows: - event = ev_map[row["event_id"]] - if not row["rejects"] and not row["redacts"]: - to_prefill.append( - _EventCacheEntry(event=event, redacted_event=None) - ) + sql = ( + "SELECT " + " e.event_id as event_id, " + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM events as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE " + ) + + clause, args = make_in_list_sql_clause( + self.database_engine, "e.event_id", list(ev_map) + ) + + txn.execute(sql + clause, args) + rows = self.db_pool.cursor_to_dict(txn) + for row in rows: + event = ev_map[row["event_id"]] + if not row["rejects"] and not row["redacts"]: + to_prefill.append(_EventCacheEntry(event=event, redacted_event=None)) def prefill(): for cache_entry in to_prefill: -- cgit 1.5.1