From 02086e1da0e3fa3d5002bf2eb7560c043ad47187 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Oct 2022 16:13:32 +0100 Subject: Fix rotating existing notifications in push summary (#14138) Broke by #14045. Fixes #14120. Introduced in v1.69.0rc2. --- synapse/storage/databases/main/event_push_actions.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'synapse/storage/databases/main/event_push_actions.py') diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index c9724d7345..87d07f7d9b 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -1104,11 +1104,13 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) # First ensure that the existing rows have an updated thread_id field. - self.db_pool.simple_update_txn( - txn, - table="event_push_summary", - keyvalues={"room_id": room_id, "user_id": user_id, "thread_id": None}, - updatevalues={"thread_id": "main"}, + txn.execute( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + ("main", room_id, user_id), ) # Replace the previous summary with the new counts. @@ -1272,6 +1274,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas logger.info("Rotating notifications, handling %d rows", len(summaries)) # Ensure that any updated threads have an updated thread_id. + txn.execute_batch( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + [("main", room_id, user_id) for user_id, room_id in summaries], + ) self.db_pool.simple_update_many_txn( txn, table="event_push_summary", -- cgit 1.5.1 From 16c5d95b594e4fe146947c4848057ebe0b9f900b Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 13 Oct 2022 18:32:16 +0100 Subject: Optimise the event_push_backfill_thread_id bg job (#14172) Co-authored-by: Erik Johnston --- changelog.d/14172.bugfix | 1 + synapse/storage/databases/main/event_push_actions.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 changelog.d/14172.bugfix (limited to 'synapse/storage/databases/main/event_push_actions.py') diff --git a/changelog.d/14172.bugfix b/changelog.d/14172.bugfix new file mode 100644 index 0000000000..36521c670c --- /dev/null +++ b/changelog.d/14172.bugfix @@ -0,0 +1 @@ +Fix poor performance of the `event_push_backfill_thread_id` background update, which was introduced in Synapse 1.68.0rc1. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 87d07f7d9b..7f7bcb7094 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -297,9 +297,15 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas sql = f""" UPDATE {table_name} SET thread_id = 'main' - WHERE stream_ordering <= ? AND thread_id IS NULL + WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL """ - txn.execute(sql, (max_stream_ordering,)) + txn.execute( + sql, + ( + start_stream_ordering, + max_stream_ordering, + ), + ) # Update progress. processed_rows = txn.rowcount -- cgit 1.5.1 From d241a1350d5b0e1cf8262114f0cb34325cb91a26 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Oct 2022 14:46:23 +0100 Subject: Fix background update to use an index (#14181) --- changelog.d/14181.bugfix | 1 + .../storage/databases/main/event_push_actions.py | 62 ++++++++++++++++++---- 2 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 changelog.d/14181.bugfix (limited to 'synapse/storage/databases/main/event_push_actions.py') diff --git a/changelog.d/14181.bugfix b/changelog.d/14181.bugfix new file mode 100644 index 0000000000..36521c670c --- /dev/null +++ b/changelog.d/14181.bugfix @@ -0,0 +1 @@ +Fix poor performance of the `event_push_backfill_thread_id` background update, which was introduced in Synapse 1.68.0rc1. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 7f7bcb7094..72cf91eb39 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -269,11 +269,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas event_push_actions_done = progress.get("event_push_actions_done", False) def add_thread_id_txn( - txn: LoggingTransaction, table_name: str, start_stream_ordering: int + txn: LoggingTransaction, start_stream_ordering: int ) -> int: - sql = f""" + sql = """ SELECT stream_ordering - FROM {table_name} + FROM event_push_actions WHERE thread_id IS NULL AND stream_ordering > ? @@ -285,7 +285,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # No more rows to process. rows = txn.fetchall() if not rows: - progress[f"{table_name}_done"] = True + progress["event_push_actions_done"] = True self.db_pool.updates._background_update_progress_txn( txn, "event_push_backfill_thread_id", progress ) @@ -294,8 +294,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Update the thread ID for any of those rows. max_stream_ordering = rows[-1][0] - sql = f""" - UPDATE {table_name} + sql = """ + UPDATE event_push_actions SET thread_id = 'main' WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL """ @@ -309,7 +309,50 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Update progress. processed_rows = txn.rowcount - progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering + progress["max_event_push_actions_stream_ordering"] = max_stream_ordering + self.db_pool.updates._background_update_progress_txn( + txn, "event_push_backfill_thread_id", progress + ) + + return processed_rows + + def add_thread_id_summary_txn(txn: LoggingTransaction) -> int: + min_user_id = progress.get("max_summary_user_id", "") + min_room_id = progress.get("max_summary_room_id", "") + + # Slightly overcomplicated query for getting the Nth user ID / room + # ID tuple, or the last if there are less than N remaining. + sql = """ + SELECT user_id, room_id FROM ( + SELECT user_id, room_id FROM event_push_summary + WHERE (user_id, room_id) > (?, ?) + AND thread_id IS NULL + ORDER BY user_id, room_id + LIMIT ? + ) AS e + ORDER BY user_id DESC, room_id DESC + LIMIT 1 + """ + + txn.execute(sql, (min_user_id, min_room_id, batch_size)) + row = txn.fetchone() + if not row: + return 0 + + max_user_id, max_room_id = row + + sql = """ + UPDATE event_push_summary + SET thread_id = 'main' + WHERE + (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?) + AND thread_id IS NULL + """ + txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id)) + processed_rows = txn.rowcount + + progress["max_summary_user_id"] = max_user_id + progress["max_summary_room_id"] = max_room_id self.db_pool.updates._background_update_progress_txn( txn, "event_push_backfill_thread_id", progress ) @@ -325,15 +368,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas result = await self.db_pool.runInteraction( "event_push_backfill_thread_id", add_thread_id_txn, - "event_push_actions", progress.get("max_event_push_actions_stream_ordering", 0), ) else: result = await self.db_pool.runInteraction( "event_push_backfill_thread_id", - add_thread_id_txn, - "event_push_summary", - progress.get("max_event_push_summary_stream_ordering", 0), + add_thread_id_summary_txn, ) # Only done after the event_push_summary table is done. -- cgit 1.5.1 From dbf18f514ea5d2539ba3148049eae5a6793f1d60 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 18 Oct 2022 10:55:41 -0400 Subject: Update the thread_id right before use (in case the bg update hasn't finished) (#14222) This avoids running a forced-update of a null thread_id rows. An index is added (in the background) to hopefully make this easier in the future. --- changelog.d/14222.feature | 1 + .../storage/databases/main/event_push_actions.py | 103 +++++++++++++++++++++ .../delta/73/06thread_notifications_backfill.sql | 29 ------ .../73/06thread_notifications_thread_id_idx.sql | 23 +++++ .../07thread_notifications_not_null.sql.postgres | 19 ---- .../73/07thread_notifications_not_null.sql.sqlite | 101 -------------------- 6 files changed, 127 insertions(+), 149 deletions(-) create mode 100644 changelog.d/14222.feature delete mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql create mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql delete mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres delete mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite (limited to 'synapse/storage/databases/main/event_push_actions.py') diff --git a/changelog.d/14222.feature b/changelog.d/14222.feature new file mode 100644 index 0000000000..5d0ae16e13 --- /dev/null +++ b/changelog.d/14222.feature @@ -0,0 +1 @@ +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index f070e6e88a..b283ab0f9c 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -294,6 +294,44 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas self._background_backfill_thread_id, ) + # Indexes which will be used to quickly make the thread_id column non-null. + self.db_pool.updates.register_background_index_update( + "event_push_actions_thread_id_null", + index_name="event_push_actions_thread_id_null", + table="event_push_actions", + columns=["thread_id"], + where_clause="thread_id IS NULL", + ) + self.db_pool.updates.register_background_index_update( + "event_push_summary_thread_id_null", + index_name="event_push_summary_thread_id_null", + table="event_push_summary", + columns=["thread_id"], + where_clause="thread_id IS NULL", + ) + + # Check ASAP (and then later, every 1s) to see if we have finished + # background updates the event_push_actions and event_push_summary tables. + self._clock.call_later(0.0, self._check_event_push_backfill_thread_id) + self._event_push_backfill_thread_id_done = False + + @wrap_as_background_process("check_event_push_backfill_thread_id") + async def _check_event_push_backfill_thread_id(self) -> None: + """ + Has thread_id finished backfilling? + + If not, we need to just-in-time update it so the queries work. + """ + done = await self.db_pool.updates.has_completed_background_update( + "event_push_backfill_thread_id" + ) + + if done: + self._event_push_backfill_thread_id_done = True + else: + # Reschedule to run. + self._clock.call_later(15.0, self._check_event_push_backfill_thread_id) + async def _background_backfill_thread_id( self, progress: JsonDict, batch_size: int ) -> int: @@ -526,6 +564,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), ) + # First ensure that the existing rows have an updated thread_id field. + if not self._event_push_backfill_thread_id_done: + txn.execute( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + txn.execute( + """ + UPDATE event_push_actions + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + # First we pull the counts from the summary table. # # We check that `last_receipt_stream_ordering` matches the stream ordering of the @@ -1341,6 +1398,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (room_id, user_id, stream_ordering, *thread_args), ) + # First ensure that the existing rows have an updated thread_id field. + if not self._event_push_backfill_thread_id_done: + txn.execute( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + txn.execute( + """ + UPDATE event_push_actions + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + (MAIN_TIMELINE, room_id, user_id), + ) + # Fetch the notification counts between the stream ordering of the # latest receipt and what was previously summarised. unread_counts = self._get_notif_unread_count_for_user_room( @@ -1475,6 +1551,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas rotate_to_stream_ordering: The new maximum event stream ordering to summarise. """ + # Ensure that any new actions have an updated thread_id. + if not self._event_push_backfill_thread_id_done: + txn.execute( + """ + UPDATE event_push_actions + SET thread_id = ? + WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL + """, + (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering), + ) + + # XXX Do we need to update summaries here too? + # Calculate the new counts that should be upserted into event_push_summary sql = """ SELECT user_id, room_id, thread_id, @@ -1537,6 +1626,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas logger.info("Rotating notifications, handling %d rows", len(summaries)) + # Ensure that any updated threads have the proper thread_id. + if not self._event_push_backfill_thread_id_done: + txn.execute_batch( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + [ + (MAIN_TIMELINE, room_id, user_id) + for user_id, room_id, _ in summaries + ], + ) + self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql deleted file mode 100644 index 0ffde9bbeb..0000000000 --- a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright 2022 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- Forces the background updates from 06thread_notifications.sql to run in the --- foreground as code will now require those to be "done". - -DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id'; - --- Overwrite any null thread_id columns. -UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL; -UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL; -UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL; - --- Do not run the event_push_summary_unique_index job if it is pending; the --- thread_id field will be made required. -DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index'; -DROP INDEX IF EXISTS event_push_summary_unique_index; diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql new file mode 100644 index 0000000000..8b3c636594 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql @@ -0,0 +1,23 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Allow there to be multiple summaries per user/room. +DROP INDEX IF EXISTS event_push_summary_unique_index; + +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (7306, 'event_push_actions_thread_id_null', '{}', 'event_push_backfill_thread_id'); + +INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES + (7306, 'event_push_summary_thread_id_null', '{}', 'event_push_backfill_thread_id'); diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres deleted file mode 100644 index 33674f8c62..0000000000 --- a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright 2022 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- The columns can now be made non-nullable. -ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL; -ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL; -ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL; diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite deleted file mode 100644 index 5322ad77a4..0000000000 --- a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2022 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- SQLite doesn't support modifying columns to an existing table, so it must --- be recreated. - --- Create the new tables. -CREATE TABLE event_push_actions_staging_new ( - event_id TEXT NOT NULL, - user_id TEXT NOT NULL, - actions TEXT NOT NULL, - notif SMALLINT NOT NULL, - highlight SMALLINT NOT NULL, - unread SMALLINT, - thread_id TEXT NOT NULL, - inserted_ts BIGINT -); - -CREATE TABLE event_push_actions_new ( - room_id TEXT NOT NULL, - event_id TEXT NOT NULL, - user_id TEXT NOT NULL, - profile_tag VARCHAR(32), - actions TEXT NOT NULL, - topological_ordering BIGINT, - stream_ordering BIGINT, - notif SMALLINT, - highlight SMALLINT, - unread SMALLINT, - thread_id TEXT NOT NULL, - CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) -); - -CREATE TABLE event_push_summary_new ( - user_id TEXT NOT NULL, - room_id TEXT NOT NULL, - notif_count BIGINT NOT NULL, - stream_ordering BIGINT NOT NULL, - unread_count BIGINT, - last_receipt_stream_ordering BIGINT, - thread_id TEXT NOT NULL -); - --- Swap the indexes. -DROP INDEX IF EXISTS event_push_actions_staging_id; -CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging_new(event_id); - -DROP INDEX IF EXISTS event_push_actions_room_id_user_id; -DROP INDEX IF EXISTS event_push_actions_rm_tokens; -DROP INDEX IF EXISTS event_push_actions_stream_ordering; -DROP INDEX IF EXISTS event_push_actions_u_highlight; -DROP INDEX IF EXISTS event_push_actions_highlights_index; -CREATE INDEX event_push_actions_room_id_user_id on event_push_actions_new(room_id, user_id); -CREATE INDEX event_push_actions_rm_tokens on event_push_actions_new( user_id, room_id, topological_ordering, stream_ordering ); -CREATE INDEX event_push_actions_stream_ordering on event_push_actions_new( stream_ordering, user_id ); -CREATE INDEX event_push_actions_u_highlight ON event_push_actions_new (user_id, stream_ordering); -CREATE INDEX event_push_actions_highlights_index ON event_push_actions_new (user_id, room_id, topological_ordering, stream_ordering); - --- Copy the data. -INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts) - SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts - FROM event_push_actions_staging; - -INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id) - SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id - FROM event_push_actions; - -INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id) - SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id - FROM event_push_summary; - --- Drop the old tables. -DROP TABLE event_push_actions_staging; -DROP TABLE event_push_actions; -DROP TABLE event_push_summary; - --- Rename the tables. -ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging; -ALTER TABLE event_push_actions_new RENAME TO event_push_actions; -ALTER TABLE event_push_summary_new RENAME TO event_push_summary; - --- Re-run background updates from 72/02event_push_actions_index.sql and --- 72/06thread_notifications.sql. -INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (7307, 'event_push_summary_unique_index2', '{}') - ON CONFLICT (update_name) DO NOTHING; -INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (7307, 'event_push_actions_stream_highlight_index', '{}') - ON CONFLICT (update_name) DO NOTHING; -- cgit 1.5.1 From e8bce8999f21d30affc459755e304a1f4732165c Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 30 Nov 2022 13:45:06 +0000 Subject: Aggregate unread notif count query for badge count calculation (#14255) Fetch the unread notification counts used by the badge counts in push notifications for all rooms at once (instead of fetching them per room). --- changelog.d/14255.misc | 1 + synapse/push/push_tools.py | 28 ++-- .../storage/databases/main/event_push_actions.py | 149 +++++++++++++++++++++ tests/storage/test_event_push_actions.py | 47 +++++-- 4 files changed, 198 insertions(+), 27 deletions(-) create mode 100644 changelog.d/14255.misc (limited to 'synapse/storage/databases/main/event_push_actions.py') diff --git a/changelog.d/14255.misc b/changelog.d/14255.misc new file mode 100644 index 0000000000..39924659c7 --- /dev/null +++ b/changelog.d/14255.misc @@ -0,0 +1 @@ +Optimise push badge count calculations. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py index edeba27a45..7ee07e4bee 100644 --- a/synapse/push/push_tools.py +++ b/synapse/push/push_tools.py @@ -17,7 +17,6 @@ from synapse.events import EventBase from synapse.push.presentable_names import calculate_room_name, name_from_member_event from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore -from synapse.util.async_helpers import concurrently_execute async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -> int: @@ -26,23 +25,12 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - badge = len(invites) - room_notifs = [] - - async def get_room_unread_count(room_id: str) -> None: - room_notifs.append( - await store.get_unread_event_push_actions_by_room_for_user( - room_id, - user_id, - ) - ) - - await concurrently_execute(get_room_unread_count, joins, 10) - - for notifs in room_notifs: - # Combine the counts from all the threads. - notify_count = notifs.main_timeline.notify_count + sum( - n.notify_count for n in notifs.threads.values() - ) + room_to_count = await store.get_unread_counts_by_room_for_user(user_id) + for room_id, notify_count in room_to_count.items(): + # room_to_count may include rooms which the user has left, + # ignore those. + if room_id not in joins: + continue if notify_count == 0: continue @@ -51,8 +39,10 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - # return one badge count per conversation badge += 1 else: - # increment the badge count by the number of unread messages in the room + # Increase badge by number of notifications in room + # NOTE: this includes threaded and unthreaded notifications. badge += notify_count + return badge diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index b283ab0f9c..7ebe34f773 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -74,6 +74,7 @@ receipt. """ import logging +from collections import defaultdict from typing import ( TYPE_CHECKING, Collection, @@ -95,6 +96,7 @@ from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, LoggingTransaction, + PostgresEngine, ) from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore @@ -463,6 +465,153 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return result + async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]: + """Get the notification count by room for a user. Only considers notifications, + not highlight or unread counts, and threads are currently aggregated under their room. + + This function is intentionally not cached because it is called to calculate the + unread badge for push notifications and thus the result is expected to change. + + Note that this function assumes the user is a member of the room. Because + summary rows are not removed when a user leaves a room, the caller must + filter out those results from the result. + + Returns: + A map of room ID to notification counts for the given user. + """ + return await self.db_pool.runInteraction( + "get_unread_counts_by_room_for_user", + self._get_unread_counts_by_room_for_user_txn, + user_id, + ) + + def _get_unread_counts_by_room_for_user_txn( + self, txn: LoggingTransaction, user_id: str + ) -> Dict[str, int]: + receipt_types_clause, args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), + ) + args.extend([user_id, user_id]) + + receipts_cte = f""" + WITH all_receipts AS ( + SELECT room_id, thread_id, MAX(event_stream_ordering) AS max_receipt_stream_ordering + FROM receipts_linearized + LEFT JOIN events USING (room_id, event_id) + WHERE + {receipt_types_clause} + AND user_id = ? + GROUP BY room_id, thread_id + ) + """ + + receipts_joins = """ + LEFT JOIN ( + SELECT room_id, thread_id, + max_receipt_stream_ordering AS threaded_receipt_stream_ordering + FROM all_receipts + WHERE thread_id IS NOT NULL + ) AS threaded_receipts USING (room_id, thread_id) + LEFT JOIN ( + SELECT room_id, thread_id, + max_receipt_stream_ordering AS unthreaded_receipt_stream_ordering + FROM all_receipts + WHERE thread_id IS NULL + ) AS unthreaded_receipts USING (room_id) + """ + + # First get summary counts by room / thread for the user. We use the max receipt + # stream ordering of both threaded & unthreaded receipts to compare against the + # summary table. + # + # PostgreSQL and SQLite differ in comparing scalar numerics. + if isinstance(self.database_engine, PostgresEngine): + # GREATEST ignores NULLs. + max_clause = """GREATEST( + threaded_receipt_stream_ordering, + unthreaded_receipt_stream_ordering + )""" + else: + # MAX returns NULL if any are NULL, so COALESCE to 0 first. + max_clause = """MAX( + COALESCE(threaded_receipt_stream_ordering, 0), + COALESCE(unthreaded_receipt_stream_ordering, 0) + )""" + + sql = f""" + {receipts_cte} + SELECT eps.room_id, eps.thread_id, notif_count + FROM event_push_summary AS eps + {receipts_joins} + WHERE user_id = ? + AND notif_count != 0 + AND ( + (last_receipt_stream_ordering IS NULL AND stream_ordering > {max_clause}) + OR last_receipt_stream_ordering = {max_clause} + ) + """ + txn.execute(sql, args) + + seen_thread_ids = set() + room_to_count: Dict[str, int] = defaultdict(int) + + for room_id, thread_id, notif_count in txn: + room_to_count[room_id] += notif_count + seen_thread_ids.add(thread_id) + + # Now get any event push actions that haven't been rotated using the same OR + # join and filter by receipt and event push summary rotated up to stream ordering. + sql = f""" + {receipts_cte} + SELECT epa.room_id, epa.thread_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count + FROM event_push_actions AS epa + {receipts_joins} + WHERE user_id = ? + AND epa.notif = 1 + AND stream_ordering > (SELECT stream_ordering FROM event_push_summary_stream_ordering) + AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering) + AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering) + GROUP BY epa.room_id, epa.thread_id + """ + txn.execute(sql, args) + + for room_id, thread_id, notif_count in txn: + # Note: only count push actions we have valid summaries for with up to date receipt. + if thread_id not in seen_thread_ids: + continue + room_to_count[room_id] += notif_count + + thread_id_clause, thread_ids_args = make_in_list_sql_clause( + self.database_engine, "epa.thread_id", seen_thread_ids + ) + + # Finally re-check event_push_actions for any rooms not in the summary, ignoring + # the rotated up-to position. This handles the case where a read receipt has arrived + # but not been rotated meaning the summary table is out of date, so we go back to + # the push actions table. + sql = f""" + {receipts_cte} + SELECT epa.room_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count + FROM event_push_actions AS epa + {receipts_joins} + WHERE user_id = ? + AND NOT {thread_id_clause} + AND epa.notif = 1 + AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering) + AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering) + GROUP BY epa.room_id + """ + + args.extend(thread_ids_args) + txn.execute(sql, args) + + for room_id, notif_count in txn: + room_to_count[room_id] += notif_count + + return room_to_count + @cached(tree=True, max_entries=5000, iterable=True) async def get_unread_event_push_actions_by_room_for_user( self, diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index ee48920f84..5fa8bd2d98 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -156,7 +156,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): last_event_id: str - def _assert_counts(noitf_count: int, highlight_count: int) -> None: + def _assert_counts(notif_count: int, highlight_count: int) -> None: counts = self.get_success( self.store.db_pool.runInteraction( "get-unread-counts", @@ -168,13 +168,22 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, + notify_count=notif_count, unread_count=0, highlight_count=highlight_count, ), ) self.assertEqual(counts.threads, {}) + aggregate_counts = self.get_success( + self.store.db_pool.runInteraction( + "get-aggregate-unread-counts", + self.store._get_unread_counts_by_room_for_user_txn, + user_id, + ) + ) + self.assertEqual(aggregate_counts[room_id], notif_count) + def _create_event(highlight: bool = False) -> str: result = self.helper.send_event( room_id, @@ -283,7 +292,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): last_event_id: str def _assert_counts( - noitf_count: int, + notif_count: int, highlight_count: int, thread_notif_count: int, thread_highlight_count: int, @@ -299,7 +308,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, + notify_count=notif_count, unread_count=0, highlight_count=highlight_count, ), @@ -318,6 +327,17 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): else: self.assertEqual(counts.threads, {}) + aggregate_counts = self.get_success( + self.store.db_pool.runInteraction( + "get-aggregate-unread-counts", + self.store._get_unread_counts_by_room_for_user_txn, + user_id, + ) + ) + self.assertEqual( + aggregate_counts[room_id], notif_count + thread_notif_count + ) + def _create_event( highlight: bool = False, thread_id: Optional[str] = None ) -> str: @@ -454,7 +474,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): last_event_id: str def _assert_counts( - noitf_count: int, + notif_count: int, highlight_count: int, thread_notif_count: int, thread_highlight_count: int, @@ -470,7 +490,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, + notify_count=notif_count, unread_count=0, highlight_count=highlight_count, ), @@ -489,6 +509,17 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): else: self.assertEqual(counts.threads, {}) + aggregate_counts = self.get_success( + self.store.db_pool.runInteraction( + "get-aggregate-unread-counts", + self.store._get_unread_counts_by_room_for_user_txn, + user_id, + ) + ) + self.assertEqual( + aggregate_counts[room_id], notif_count + thread_notif_count + ) + def _create_event( highlight: bool = False, thread_id: Optional[str] = None ) -> str: @@ -646,7 +677,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): ) return result["event_id"] - def _assert_counts(noitf_count: int, thread_notif_count: int) -> None: + def _assert_counts(notif_count: int, thread_notif_count: int) -> None: counts = self.get_success( self.store.db_pool.runInteraction( "get-unread-counts", @@ -658,7 +689,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, unread_count=0, highlight_count=0 + notify_count=notif_count, unread_count=0, highlight_count=0 ), ) if thread_notif_count: -- cgit 1.5.1