diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index dd2627037c..b283ab0f9c 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -12,14 +12,85 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+"""Responsible for storing and fetching push actions / notifications.
+
+There are two main uses for push actions:
+ 1. Sending out push to a user's device; and
+ 2. Tracking per-room per-user notification counts (used in sync requests).
+
+For the former we simply use the `event_push_actions` table, which contains all
+the calculated actions for a given user (which were calculated by the
+`BulkPushRuleEvaluator`).
+
+For the latter we could simply count the number of rows in `event_push_actions`
+table for a given room/user, but in practice this is *very* heavyweight when
+there were a large number of notifications (due to e.g. the user never reading a
+room). Plus, keeping all push actions indefinitely uses a lot of disk space.
+
+To fix these issues, we add a new table `event_push_summary` that tracks
+per-user per-room counts of all notifications that happened before a stream
+ordering S. Thus, to get the notification count for a user / room we can simply
+query a single row in `event_push_summary` and count the number of rows in
+`event_push_actions` with a stream ordering larger than S (and as long as S is
+"recent", the number of rows needing to be scanned will be small).
+
+The `event_push_summary` table is updated via a background job that periodically
+chooses a new stream ordering S' (usually the latest stream ordering), counts
+all notifications in `event_push_actions` between the existing S and S', and
+adds them to the existing counts in `event_push_summary`.
+
+This allows us to delete old rows from `event_push_actions` once those rows have
+been counted and added to `event_push_summary` (we call this process
+"rotation").
+
+
+We need to handle when a user sends a read receipt to the room. Again this is
+done as a background process. For each receipt we clear the row in
+`event_push_summary` and count the number of notifications in
+`event_push_actions` that happened after the receipt but before S, and insert
+that count into `event_push_summary` (If the receipt happened *after* S then we
+simply clear the `event_push_summary`.)
+
+Note that its possible that if the read receipt is for an old event the relevant
+`event_push_actions` rows will have been rotated and we get the wrong count
+(it'll be too low). We accept this as a rare edge case that is unlikely to
+impact the user much (since the vast majority of read receipts will be for the
+latest event).
+
+The last complication is to handle the race where we request the notifications
+counts after a user sends a read receipt into the room, but *before* the
+background update handles the receipt (without any special handling the counts
+would be outdated). We fix this by including in `event_push_summary` the read
+receipt we used when updating `event_push_summary`, and every time we query the
+table we check if that matches the most recent read receipt in the room. If yes,
+continue as above, if not we simply query the `event_push_actions` table
+directly.
+
+Since read receipts are almost always for recent events, scanning the
+`event_push_actions` table in this case is unlikely to be a problem. Even if it
+is a problem, it is temporary until the background job handles the new read
+receipt.
+"""
+
import logging
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, cast
+from typing import (
+ TYPE_CHECKING,
+ Collection,
+ Dict,
+ List,
+ Mapping,
+ Optional,
+ Tuple,
+ Union,
+ cast,
+)
import attr
-from synapse.api.constants import ReceiptTypes
+from synapse.api.constants import MAIN_TIMELINE, ReceiptTypes
from synapse.metrics.background_process_metrics import wrap_as_background_process
-from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import (
DatabasePool,
LoggingDatabaseConnection,
@@ -27,6 +98,7 @@ from synapse.storage.database import (
)
from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
from synapse.storage.databases.main.stream import StreamWorkerStore
+from synapse.types import JsonDict
from synapse.util import json_encoder
from synapse.util.caches.descriptors import cached
@@ -47,6 +119,32 @@ DEFAULT_HIGHLIGHT_ACTION: List[Union[dict, str]] = [
]
+@attr.s(slots=True, auto_attribs=True)
+class _RoomReceipt:
+ """
+ HttpPushAction instances include the information used to generate HTTP
+ requests to a push gateway.
+ """
+
+ unthreaded_stream_ordering: int = 0
+ # threaded_stream_ordering includes the main pseudo-thread.
+ threaded_stream_ordering: Dict[str, int] = attr.Factory(dict)
+
+ def is_unread(self, thread_id: str, stream_ordering: int) -> bool:
+ """Returns True if the stream ordering is unread according to the receipt information."""
+
+ # Only include push actions with a stream ordering after both the unthreaded
+ # and threaded receipt. Properly handles a user without any receipts present.
+ return (
+ self.unthreaded_stream_ordering < stream_ordering
+ and self.threaded_stream_ordering.get(thread_id, 0) < stream_ordering
+ )
+
+
+# A _RoomReceipt with no receipts in it.
+MISSING_ROOM_RECEIPT = _RoomReceipt()
+
+
@attr.s(slots=True, frozen=True, auto_attribs=True)
class HttpPushAction:
"""
@@ -85,7 +183,7 @@ class UserPushAction(EmailPushAction):
@attr.s(slots=True, auto_attribs=True)
class NotifCounts:
"""
- The per-user, per-room count of notifications. Used by sync and push.
+ The per-user, per-room, per-thread count of notifications. Used by sync and push.
"""
notify_count: int = 0
@@ -93,7 +191,24 @@ class NotifCounts:
highlight_count: int = 0
-def _serialize_action(actions: List[Union[dict, str]], is_highlight: bool) -> str:
+@attr.s(slots=True, auto_attribs=True)
+class RoomNotifCounts:
+ """
+ The per-user, per-room count of notifications. Used by sync and push.
+ """
+
+ main_timeline: NotifCounts
+ # Map of thread ID to the notification counts.
+ threads: Dict[str, NotifCounts]
+
+ def __len__(self) -> int:
+ # To properly account for the amount of space in any caches.
+ return len(self.threads) + 1
+
+
+def _serialize_action(
+ actions: Collection[Union[Mapping, str]], is_highlight: bool
+) -> str:
"""Custom serializer for actions. This allows us to "compress" common actions.
We use the fact that most users have the same actions for notifs (and for
@@ -131,6 +246,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
):
super().__init__(database, db_conn, hs)
+ # Track when the process started.
+ self._started_ts = self._clock.time_msec()
+
# These get correctly set by _find_stream_orderings_for_times_txn
self.stream_ordering_month_ago: Optional[int] = None
self.stream_ordering_day_ago: Optional[int] = None
@@ -150,6 +268,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
self._rotate_notifs, 30 * 1000
)
+ self._clear_old_staging_loop = self._clock.looping_call(
+ self._clear_old_push_actions_staging, 30 * 60 * 1000
+ )
+
self.db_pool.updates.register_background_index_update(
"event_push_summary_unique_index",
index_name="event_push_summary_unique_index",
@@ -159,14 +281,196 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
replaces_index="event_push_summary_user_rm",
)
- @cached(tree=True, max_entries=5000)
+ self.db_pool.updates.register_background_index_update(
+ "event_push_summary_unique_index2",
+ index_name="event_push_summary_unique_index2",
+ table="event_push_summary",
+ columns=["user_id", "room_id", "thread_id"],
+ unique=True,
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "event_push_backfill_thread_id",
+ self._background_backfill_thread_id,
+ )
+
+ # Indexes which will be used to quickly make the thread_id column non-null.
+ self.db_pool.updates.register_background_index_update(
+ "event_push_actions_thread_id_null",
+ index_name="event_push_actions_thread_id_null",
+ table="event_push_actions",
+ columns=["thread_id"],
+ where_clause="thread_id IS NULL",
+ )
+ self.db_pool.updates.register_background_index_update(
+ "event_push_summary_thread_id_null",
+ index_name="event_push_summary_thread_id_null",
+ table="event_push_summary",
+ columns=["thread_id"],
+ where_clause="thread_id IS NULL",
+ )
+
+ # Check ASAP (and then later, every 1s) to see if we have finished
+ # background updates the event_push_actions and event_push_summary tables.
+ self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
+ self._event_push_backfill_thread_id_done = False
+
+ @wrap_as_background_process("check_event_push_backfill_thread_id")
+ async def _check_event_push_backfill_thread_id(self) -> None:
+ """
+ Has thread_id finished backfilling?
+
+ If not, we need to just-in-time update it so the queries work.
+ """
+ done = await self.db_pool.updates.has_completed_background_update(
+ "event_push_backfill_thread_id"
+ )
+
+ if done:
+ self._event_push_backfill_thread_id_done = True
+ else:
+ # Reschedule to run.
+ self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
+
+ async def _background_backfill_thread_id(
+ self, progress: JsonDict, batch_size: int
+ ) -> int:
+ """
+ Fill in the thread_id field for event_push_actions and event_push_summary.
+
+ This is preparatory so that it can be made non-nullable in the future.
+
+ Because all current (null) data is done in an unthreaded manner this
+ simply assumes it is on the "main" timeline. Since event_push_actions
+ are periodically cleared it is not possible to correctly re-calculate
+ the thread_id.
+ """
+ event_push_actions_done = progress.get("event_push_actions_done", False)
+
+ def add_thread_id_txn(
+ txn: LoggingTransaction, start_stream_ordering: int
+ ) -> int:
+ sql = """
+ SELECT stream_ordering
+ FROM event_push_actions
+ WHERE
+ thread_id IS NULL
+ AND stream_ordering > ?
+ ORDER BY stream_ordering
+ LIMIT ?
+ """
+ txn.execute(sql, (start_stream_ordering, batch_size))
+
+ # No more rows to process.
+ rows = txn.fetchall()
+ if not rows:
+ progress["event_push_actions_done"] = True
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "event_push_backfill_thread_id", progress
+ )
+ return 0
+
+ # Update the thread ID for any of those rows.
+ max_stream_ordering = rows[-1][0]
+
+ sql = """
+ UPDATE event_push_actions
+ SET thread_id = 'main'
+ WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+ """
+ txn.execute(
+ sql,
+ (
+ start_stream_ordering,
+ max_stream_ordering,
+ ),
+ )
+
+ # Update progress.
+ processed_rows = txn.rowcount
+ progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "event_push_backfill_thread_id", progress
+ )
+
+ return processed_rows
+
+ def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
+ min_user_id = progress.get("max_summary_user_id", "")
+ min_room_id = progress.get("max_summary_room_id", "")
+
+ # Slightly overcomplicated query for getting the Nth user ID / room
+ # ID tuple, or the last if there are less than N remaining.
+ sql = """
+ SELECT user_id, room_id FROM (
+ SELECT user_id, room_id FROM event_push_summary
+ WHERE (user_id, room_id) > (?, ?)
+ AND thread_id IS NULL
+ ORDER BY user_id, room_id
+ LIMIT ?
+ ) AS e
+ ORDER BY user_id DESC, room_id DESC
+ LIMIT 1
+ """
+
+ txn.execute(sql, (min_user_id, min_room_id, batch_size))
+ row = txn.fetchone()
+ if not row:
+ return 0
+
+ max_user_id, max_room_id = row
+
+ sql = """
+ UPDATE event_push_summary
+ SET thread_id = 'main'
+ WHERE
+ (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
+ AND thread_id IS NULL
+ """
+ txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
+ processed_rows = txn.rowcount
+
+ progress["max_summary_user_id"] = max_user_id
+ progress["max_summary_room_id"] = max_room_id
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "event_push_backfill_thread_id", progress
+ )
+
+ return processed_rows
+
+ # First update the event_push_actions table, then the event_push_summary table.
+ #
+ # Note that the event_push_actions_staging table is ignored since it is
+ # assumed that items in that table will only exist for a short period of
+ # time.
+ if not event_push_actions_done:
+ result = await self.db_pool.runInteraction(
+ "event_push_backfill_thread_id",
+ add_thread_id_txn,
+ progress.get("max_event_push_actions_stream_ordering", 0),
+ )
+ else:
+ result = await self.db_pool.runInteraction(
+ "event_push_backfill_thread_id",
+ add_thread_id_summary_txn,
+ )
+
+ # Only done after the event_push_summary table is done.
+ if not result:
+ await self.db_pool.updates._end_background_update(
+ "event_push_backfill_thread_id"
+ )
+
+ return result
+
+ @cached(tree=True, max_entries=5000, iterable=True)
async def get_unread_event_push_actions_by_room_for_user(
self,
room_id: str,
user_id: str,
- ) -> NotifCounts:
+ ) -> RoomNotifCounts:
"""Get the notification count, the highlight count and the unread message count
- for a given user in a given room after the given read receipt.
+ for a given user in a given room after their latest read receipt.
Note that this function assumes the user to be a current member of the room,
since it's either called by the sync handler to handle joined room entries, or by
@@ -177,9 +481,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
user_id: The user to retrieve the counts for.
Returns
- A dict containing the counts mentioned earlier in this docstring,
- respectively under the keys "notify_count", "highlight_count" and
- "unread_count".
+ A RoomNotifCounts object containing the notification count, the
+ highlight count and the unread message count for both the main timeline
+ and threads.
"""
return await self.db_pool.runInteraction(
"get_unread_event_push_actions_by_room",
@@ -193,21 +497,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
txn: LoggingTransaction,
room_id: str,
user_id: str,
- ) -> NotifCounts:
- result = self.get_last_receipt_for_user_txn(
+ ) -> RoomNotifCounts:
+ # Get the stream ordering of the user's latest receipt in the room.
+ result = self.get_last_unthreaded_receipt_for_user_txn(
txn,
user_id,
room_id,
receipt_types=(ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
)
- stream_ordering = None
if result:
_, stream_ordering = result
- if stream_ordering is None:
- # Either last_read_event_id is None, or it's an event we don't have (e.g.
- # because it's been purged), in which case retrieve the stream ordering for
+ else:
+ # If the user has no receipts in the room, retrieve the stream ordering for
# the latest membership event from this user in this room (which we assume is
# a join).
event_id = self.db_pool.simple_select_one_onecol_txn(
@@ -224,72 +527,236 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
)
def _get_unread_counts_by_pos_txn(
- self, txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int
- ) -> NotifCounts:
+ self,
+ txn: LoggingTransaction,
+ room_id: str,
+ user_id: str,
+ unthreaded_receipt_stream_ordering: int,
+ ) -> RoomNotifCounts:
"""Get the number of unread messages for a user/room that have happened
since the given stream ordering.
+
+ Args:
+ txn: The database transaction.
+ room_id: The room ID to get unread counts for.
+ user_id: The user ID to get unread counts for.
+ unthreaded_receipt_stream_ordering: The stream ordering of the user's latest
+ unthreaded receipt in the room. If there are no unthreaded receipts,
+ the stream ordering of the user's join event.
+
+ Returns:
+ A RoomNotifCounts object containing the notification count, the
+ highlight count and the unread message count for both the main timeline
+ and threads.
"""
- counts = NotifCounts()
+ main_counts = NotifCounts()
+ thread_counts: Dict[str, NotifCounts] = {}
+
+ def _get_thread(thread_id: str) -> NotifCounts:
+ if thread_id == MAIN_TIMELINE:
+ return main_counts
+ return thread_counts.setdefault(thread_id, NotifCounts())
+
+ receipt_types_clause, receipts_args = make_in_list_sql_clause(
+ self.database_engine,
+ "receipt_type",
+ (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
+ )
+
+ # First ensure that the existing rows have an updated thread_id field.
+ if not self._event_push_backfill_thread_id_done:
+ txn.execute(
+ """
+ UPDATE event_push_summary
+ SET thread_id = ?
+ WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+ """,
+ (MAIN_TIMELINE, room_id, user_id),
+ )
+ txn.execute(
+ """
+ UPDATE event_push_actions
+ SET thread_id = ?
+ WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+ """,
+ (MAIN_TIMELINE, room_id, user_id),
+ )
# First we pull the counts from the summary table.
#
- # We check that `last_receipt_stream_ordering` matches the stream
- # ordering given. If it doesn't match then a new read receipt has arrived and
- # we haven't yet updated the counts in `event_push_summary` to reflect
- # that; in that case we simply ignore `event_push_summary` counts
- # and do a manual count of all of the rows in the `event_push_actions` table
- # for this user/room.
+ # We check that `last_receipt_stream_ordering` matches the stream ordering of the
+ # latest receipt for the thread (which may be either the unthreaded read receipt
+ # or the threaded read receipt).
+ #
+ # If it doesn't match then a new read receipt has arrived and we haven't yet
+ # updated the counts in `event_push_summary` to reflect that; in that case we
+ # simply ignore `event_push_summary` counts.
+ #
+ # We then do a manual count of all the rows in the `event_push_actions` table
+ # for any user/room/thread which did not have a valid summary found.
#
- # If `last_receipt_stream_ordering` is null then that means it's up to
- # date (as the row was written by an older version of Synapse that
+ # If `last_receipt_stream_ordering` is null then that means it's up-to-date
+ # (as the row was written by an older version of Synapse that
# updated `event_push_summary` synchronously when persisting a new read
# receipt).
txn.execute(
- """
- SELECT stream_ordering, notif_count, COALESCE(unread_count, 0)
+ f"""
+ SELECT notif_count, COALESCE(unread_count, 0), thread_id
FROM event_push_summary
+ LEFT JOIN (
+ SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering
+ FROM receipts_linearized
+ LEFT JOIN events USING (room_id, event_id)
+ WHERE
+ user_id = ?
+ AND room_id = ?
+ AND stream_ordering > ?
+ AND {receipt_types_clause}
+ GROUP BY thread_id
+ ) AS receipts USING (thread_id)
WHERE room_id = ? AND user_id = ?
AND (
- (last_receipt_stream_ordering IS NULL AND stream_ordering > ?)
- OR last_receipt_stream_ordering = ?
- )
+ (last_receipt_stream_ordering IS NULL AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?))
+ OR last_receipt_stream_ordering = COALESCE(threaded_receipt_stream_ordering, ?)
+ ) AND (notif_count != 0 OR COALESCE(unread_count, 0) != 0)
""",
- (room_id, user_id, stream_ordering, stream_ordering),
+ (
+ user_id,
+ room_id,
+ unthreaded_receipt_stream_ordering,
+ *receipts_args,
+ room_id,
+ user_id,
+ unthreaded_receipt_stream_ordering,
+ unthreaded_receipt_stream_ordering,
+ ),
)
- row = txn.fetchone()
-
- summary_stream_ordering = 0
- if row:
- summary_stream_ordering = row[0]
- counts.notify_count += row[1]
- counts.unread_count += row[2]
-
- # Next we need to count highlights, which aren't summarized
- sql = """
- SELECT COUNT(*) FROM event_push_actions
+ summarised_threads = set()
+ for notif_count, unread_count, thread_id in txn:
+ summarised_threads.add(thread_id)
+ counts = _get_thread(thread_id)
+ counts.notify_count += notif_count
+ counts.unread_count += unread_count
+
+ # Next we need to count highlights, which aren't summarised
+ sql = f"""
+ SELECT COUNT(*), thread_id FROM event_push_actions
+ LEFT JOIN (
+ SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering
+ FROM receipts_linearized
+ LEFT JOIN events USING (room_id, event_id)
+ WHERE
+ user_id = ?
+ AND room_id = ?
+ AND stream_ordering > ?
+ AND {receipt_types_clause}
+ GROUP BY thread_id
+ ) AS receipts USING (thread_id)
WHERE user_id = ?
AND room_id = ?
- AND stream_ordering > ?
+ AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?)
AND highlight = 1
+ GROUP BY thread_id
"""
- txn.execute(sql, (user_id, room_id, stream_ordering))
- row = txn.fetchone()
- if row:
- counts.highlight_count += row[0]
+ txn.execute(
+ sql,
+ (
+ user_id,
+ room_id,
+ unthreaded_receipt_stream_ordering,
+ *receipts_args,
+ user_id,
+ room_id,
+ unthreaded_receipt_stream_ordering,
+ ),
+ )
+ for highlight_count, thread_id in txn:
+ _get_thread(thread_id).highlight_count += highlight_count
- # Finally we need to count push actions that aren't included in the
- # summary returned above, e.g. recent events that haven't been
- # summarized yet, or the summary is empty due to a recent read receipt.
- stream_ordering = max(stream_ordering, summary_stream_ordering)
- notify_count, unread_count = self._get_notif_unread_count_for_user_room(
- txn, room_id, user_id, stream_ordering
+ # For threads which were summarised we need to count actions since the last
+ # rotation.
+ thread_id_clause, thread_id_args = make_in_list_sql_clause(
+ self.database_engine, "thread_id", summarised_threads
)
- counts.notify_count += notify_count
- counts.unread_count += unread_count
+ # The (inclusive) event stream ordering that was previously summarised.
+ rotated_upto_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="event_push_summary_stream_ordering",
+ keyvalues={},
+ retcol="stream_ordering",
+ )
- return counts
+ unread_counts = self._get_notif_unread_count_for_user_room(
+ txn, room_id, user_id, rotated_upto_stream_ordering
+ )
+ for notif_count, unread_count, thread_id in unread_counts:
+ if thread_id not in summarised_threads:
+ continue
+
+ if thread_id == MAIN_TIMELINE:
+ counts.notify_count += notif_count
+ counts.unread_count += unread_count
+ elif thread_id in thread_counts:
+ thread_counts[thread_id].notify_count += notif_count
+ thread_counts[thread_id].unread_count += unread_count
+ else:
+ # Previous thread summaries of 0 are discarded above.
+ #
+ # TODO If empty summaries are deleted this can be removed.
+ thread_counts[thread_id] = NotifCounts(
+ notify_count=notif_count,
+ unread_count=unread_count,
+ highlight_count=0,
+ )
+
+ # Finally we need to count push actions that aren't included in the
+ # summary returned above. This might be due to recent events that haven't
+ # been summarised yet or the summary is out of date due to a recent read
+ # receipt.
+ sql = f"""
+ SELECT
+ COUNT(CASE WHEN notif = 1 THEN 1 END),
+ COUNT(CASE WHEN unread = 1 THEN 1 END),
+ thread_id
+ FROM event_push_actions
+ LEFT JOIN (
+ SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering
+ FROM receipts_linearized
+ LEFT JOIN events USING (room_id, event_id)
+ WHERE
+ user_id = ?
+ AND room_id = ?
+ AND stream_ordering > ?
+ AND {receipt_types_clause}
+ GROUP BY thread_id
+ ) AS receipts USING (thread_id)
+ WHERE user_id = ?
+ AND room_id = ?
+ AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?)
+ AND NOT {thread_id_clause}
+ GROUP BY thread_id
+ """
+ txn.execute(
+ sql,
+ (
+ user_id,
+ room_id,
+ unthreaded_receipt_stream_ordering,
+ *receipts_args,
+ user_id,
+ room_id,
+ unthreaded_receipt_stream_ordering,
+ *thread_id_args,
+ ),
+ )
+ for notif_count, unread_count, thread_id in txn:
+ counts = _get_thread(thread_id)
+ counts.notify_count += notif_count
+ counts.unread_count += unread_count
+
+ return RoomNotifCounts(main_counts, thread_counts)
def _get_notif_unread_count_for_user_room(
self,
@@ -298,48 +765,70 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
user_id: str,
stream_ordering: int,
max_stream_ordering: Optional[int] = None,
- ) -> Tuple[int, int]:
+ thread_id: Optional[str] = None,
+ ) -> List[Tuple[int, int, str]]:
"""Returns the notify and unread counts from `event_push_actions` for
the given user/room in the given range.
Does not consult `event_push_summary` table, which may include push
actions that have been deleted from `event_push_actions` table.
+
+ Args:
+ txn: The database transaction.
+ room_id: The room ID to get unread counts for.
+ user_id: The user ID to get unread counts for.
+ stream_ordering: The (exclusive) minimum stream ordering to consider.
+ max_stream_ordering: The (inclusive) maximum stream ordering to consider.
+ If this is not given, then no maximum is applied.
+ thread_id: The thread ID to fetch unread counts for. If this is not provided
+ then the results for *all* threads is returned.
+
+ Note that if this is provided the resulting list will only have 0 or
+ 1 tuples in it.
+
+ Return:
+ A tuple of the notif count and unread count in the given range for
+ each thread.
"""
# If there have been no events in the room since the stream ordering,
# there can't be any push actions either.
if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering):
- return 0, 0
+ return []
- clause = ""
+ stream_ordering_clause = ""
args = [user_id, room_id, stream_ordering]
if max_stream_ordering is not None:
- clause = "AND ea.stream_ordering <= ?"
+ stream_ordering_clause = "AND ea.stream_ordering <= ?"
args.append(max_stream_ordering)
# If the max stream ordering is less than the min stream ordering,
# then obviously there are zero push actions in that range.
if max_stream_ordering <= stream_ordering:
- return 0, 0
+ return []
+
+ # Either limit the results to a specific thread or fetch all threads.
+ thread_id_clause = ""
+ if thread_id is not None:
+ thread_id_clause = "AND thread_id = ?"
+ args.append(thread_id)
sql = f"""
SELECT
COUNT(CASE WHEN notif = 1 THEN 1 END),
- COUNT(CASE WHEN unread = 1 THEN 1 END)
- FROM event_push_actions ea
- WHERE user_id = ?
+ COUNT(CASE WHEN unread = 1 THEN 1 END),
+ thread_id
+ FROM event_push_actions ea
+ WHERE user_id = ?
AND room_id = ?
AND ea.stream_ordering > ?
- {clause}
+ {stream_ordering_clause}
+ {thread_id_clause}
+ GROUP BY thread_id
"""
txn.execute(sql, args)
- row = txn.fetchone()
-
- if row:
- return cast(Tuple[int, int], row)
-
- return 0, 0
+ return cast(List[Tuple[int, int, str]], txn.fetchall())
async def get_push_action_users_in_range(
self, min_stream_ordering: int, max_stream_ordering: int
@@ -354,6 +843,49 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
return await self.db_pool.runInteraction("get_push_action_users_in_range", f)
+ def _get_receipts_by_room_txn(
+ self, txn: LoggingTransaction, user_id: str
+ ) -> Dict[str, _RoomReceipt]:
+ """
+ Generate a map of room ID to the latest stream ordering that has been
+ read by the given user.
+
+ Args:
+ txn:
+ user_id: The user to fetch receipts for.
+
+ Returns:
+ A map including all rooms the user is in with a receipt. It maps
+ room IDs to _RoomReceipt instances
+ """
+ receipt_types_clause, args = make_in_list_sql_clause(
+ self.database_engine,
+ "receipt_type",
+ (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
+ )
+
+ sql = f"""
+ SELECT room_id, thread_id, MAX(stream_ordering)
+ FROM receipts_linearized
+ INNER JOIN events USING (room_id, event_id)
+ WHERE {receipt_types_clause}
+ AND user_id = ?
+ GROUP BY room_id, thread_id
+ """
+
+ args.extend((user_id,))
+ txn.execute(sql, args)
+
+ result: Dict[str, _RoomReceipt] = {}
+ for room_id, thread_id, stream_ordering in txn:
+ room_receipt = result.setdefault(room_id, _RoomReceipt())
+ if thread_id is None:
+ room_receipt.unthreaded_stream_ordering = stream_ordering
+ else:
+ room_receipt.threaded_stream_ordering[thread_id] = stream_ordering
+
+ return result
+
async def get_unread_push_actions_for_user_in_range_for_http(
self,
user_id: str,
@@ -376,81 +908,45 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
The list will be ordered by ascending stream_ordering.
The list will have between 0~limit entries.
"""
- # find rooms that have a read receipt in them and return the next
- # push actions
- def get_after_receipt(
- txn: LoggingTransaction,
- ) -> List[Tuple[str, str, int, str, bool]]:
- # find rooms that have a read receipt in them and return the next
- # push actions
- sql = (
- "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
- " ep.highlight "
- " FROM ("
- " SELECT room_id,"
- " MAX(stream_ordering) as stream_ordering"
- " FROM events"
- " INNER JOIN receipts_linearized USING (room_id, event_id)"
- " WHERE receipt_type = 'm.read' AND user_id = ?"
- " GROUP BY room_id"
- ") AS rl,"
- " event_push_actions AS ep"
- " WHERE"
- " ep.room_id = rl.room_id"
- " AND ep.stream_ordering > rl.stream_ordering"
- " AND ep.user_id = ?"
- " AND ep.stream_ordering > ?"
- " AND ep.stream_ordering <= ?"
- " AND ep.notif = 1"
- " ORDER BY ep.stream_ordering ASC LIMIT ?"
- )
- args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
- txn.execute(sql, args)
- return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall())
- after_read_receipt = await self.db_pool.runInteraction(
- "get_unread_push_actions_for_user_in_range_http_arr", get_after_receipt
+ receipts_by_room = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_http_receipts",
+ self._get_receipts_by_room_txn,
+ user_id=user_id,
)
- # There are rooms with push actions in them but you don't have a read receipt in
- # them e.g. rooms you've been invited to, so get push actions for rooms which do
- # not have read receipts in them too.
- def get_no_receipt(
+ def get_push_actions_txn(
txn: LoggingTransaction,
- ) -> List[Tuple[str, str, int, str, bool]]:
- sql = (
- "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
- " ep.highlight "
- " FROM event_push_actions AS ep"
- " INNER JOIN events AS e USING (room_id, event_id)"
- " WHERE"
- " ep.room_id NOT IN ("
- " SELECT room_id FROM receipts_linearized"
- " WHERE receipt_type = 'm.read' AND user_id = ?"
- " GROUP BY room_id"
- " )"
- " AND ep.user_id = ?"
- " AND ep.stream_ordering > ?"
- " AND ep.stream_ordering <= ?"
- " AND ep.notif = 1"
- " ORDER BY ep.stream_ordering ASC LIMIT ?"
- )
- args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
- txn.execute(sql, args)
- return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall())
+ ) -> List[Tuple[str, str, str, int, str, bool]]:
+ sql = """
+ SELECT ep.event_id, ep.room_id, ep.thread_id, ep.stream_ordering,
+ ep.actions, ep.highlight
+ FROM event_push_actions AS ep
+ WHERE
+ ep.user_id = ?
+ AND ep.stream_ordering > ?
+ AND ep.stream_ordering <= ?
+ AND ep.notif = 1
+ ORDER BY ep.stream_ordering ASC LIMIT ?
+ """
+ txn.execute(sql, (user_id, min_stream_ordering, max_stream_ordering, limit))
+ return cast(List[Tuple[str, str, str, int, str, bool]], txn.fetchall())
- no_read_receipt = await self.db_pool.runInteraction(
- "get_unread_push_actions_for_user_in_range_http_nrr", get_no_receipt
+ push_actions = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_http", get_push_actions_txn
)
notifs = [
HttpPushAction(
- event_id=row[0],
- room_id=row[1],
- stream_ordering=row[2],
- actions=_deserialize_action(row[3], row[4]),
+ event_id=event_id,
+ room_id=room_id,
+ stream_ordering=stream_ordering,
+ actions=_deserialize_action(actions, highlight),
+ )
+ for event_id, room_id, thread_id, stream_ordering, actions, highlight in push_actions
+ if receipts_by_room.get(room_id, MISSING_ROOM_RECEIPT).is_unread(
+ thread_id, stream_ordering
)
- for row in after_read_receipt + no_read_receipt
]
# Now sort it so it's ordered correctly, since currently it will
@@ -485,82 +981,48 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
The list will be ordered by descending received_ts.
The list will have between 0~limit entries.
"""
- # find rooms that have a read receipt in them and return the most recent
- # push actions
- def get_after_receipt(
- txn: LoggingTransaction,
- ) -> List[Tuple[str, str, int, str, bool, int]]:
- sql = (
- "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
- " ep.highlight, e.received_ts"
- " FROM ("
- " SELECT room_id,"
- " MAX(stream_ordering) as stream_ordering"
- " FROM events"
- " INNER JOIN receipts_linearized USING (room_id, event_id)"
- " WHERE receipt_type = 'm.read' AND user_id = ?"
- " GROUP BY room_id"
- ") AS rl,"
- " event_push_actions AS ep"
- " INNER JOIN events AS e USING (room_id, event_id)"
- " WHERE"
- " ep.room_id = rl.room_id"
- " AND ep.stream_ordering > rl.stream_ordering"
- " AND ep.user_id = ?"
- " AND ep.stream_ordering > ?"
- " AND ep.stream_ordering <= ?"
- " AND ep.notif = 1"
- " ORDER BY ep.stream_ordering DESC LIMIT ?"
- )
- args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
- txn.execute(sql, args)
- return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall())
- after_read_receipt = await self.db_pool.runInteraction(
- "get_unread_push_actions_for_user_in_range_email_arr", get_after_receipt
+ receipts_by_room = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_email_receipts",
+ self._get_receipts_by_room_txn,
+ user_id=user_id,
)
- # There are rooms with push actions in them but you don't have a read receipt in
- # them e.g. rooms you've been invited to, so get push actions for rooms which do
- # not have read receipts in them too.
- def get_no_receipt(
+ def get_push_actions_txn(
txn: LoggingTransaction,
- ) -> List[Tuple[str, str, int, str, bool, int]]:
- sql = (
- "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
- " ep.highlight, e.received_ts"
- " FROM event_push_actions AS ep"
- " INNER JOIN events AS e USING (room_id, event_id)"
- " WHERE"
- " ep.room_id NOT IN ("
- " SELECT room_id FROM receipts_linearized"
- " WHERE receipt_type = 'm.read' AND user_id = ?"
- " GROUP BY room_id"
- " )"
- " AND ep.user_id = ?"
- " AND ep.stream_ordering > ?"
- " AND ep.stream_ordering <= ?"
- " AND ep.notif = 1"
- " ORDER BY ep.stream_ordering DESC LIMIT ?"
- )
- args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
- txn.execute(sql, args)
- return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall())
+ ) -> List[Tuple[str, str, str, int, str, bool, int]]:
+ sql = """
+ SELECT ep.event_id, ep.room_id, ep.thread_id, ep.stream_ordering,
+ ep.actions, ep.highlight, e.received_ts
+ FROM event_push_actions AS ep
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ ep.user_id = ?
+ AND ep.stream_ordering > ?
+ AND ep.stream_ordering <= ?
+ AND ep.notif = 1
+ ORDER BY ep.stream_ordering DESC LIMIT ?
+ """
+ txn.execute(sql, (user_id, min_stream_ordering, max_stream_ordering, limit))
+ return cast(List[Tuple[str, str, str, int, str, bool, int]], txn.fetchall())
- no_read_receipt = await self.db_pool.runInteraction(
- "get_unread_push_actions_for_user_in_range_email_nrr", get_no_receipt
+ push_actions = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_email", get_push_actions_txn
)
# Make a list of dicts from the two sets of results.
notifs = [
EmailPushAction(
- event_id=row[0],
- room_id=row[1],
- stream_ordering=row[2],
- actions=_deserialize_action(row[3], row[4]),
- received_ts=row[5],
+ event_id=event_id,
+ room_id=room_id,
+ stream_ordering=stream_ordering,
+ actions=_deserialize_action(actions, highlight),
+ received_ts=received_ts,
+ )
+ for event_id, room_id, thread_id, stream_ordering, actions, highlight, received_ts in push_actions
+ if receipts_by_room.get(room_id, MISSING_ROOM_RECEIPT).is_unread(
+ thread_id, stream_ordering
)
- for row in after_read_receipt + no_read_receipt
]
# Now sort it so it's ordered correctly, since currently it will
@@ -606,8 +1068,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
async def add_push_actions_to_staging(
self,
event_id: str,
- user_id_actions: Dict[str, List[Union[dict, str]]],
+ user_id_actions: Dict[str, Collection[Union[Mapping, str]]],
count_as_unread: bool,
+ thread_id: str,
) -> None:
"""Add the push actions for the event to the push action staging area.
@@ -616,6 +1079,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
user_id_actions: A mapping of user_id to list of push actions, where
an action can either be a string or dict.
count_as_unread: Whether this event should increment unread counts.
+ thread_id: The thread this event is parent of, if applicable.
"""
if not user_id_actions:
return
@@ -623,8 +1087,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
# This is a helper function for generating the necessary tuple that
# can be used to insert into the `event_push_actions_staging` table.
def _gen_entry(
- user_id: str, actions: List[Union[dict, str]]
- ) -> Tuple[str, str, str, int, int, int]:
+ user_id: str, actions: Collection[Union[Mapping, str]]
+ ) -> Tuple[str, str, str, int, int, int, str, int]:
is_highlight = 1 if _action_has_highlight(actions) else 0
notif = 1 if "notify" in actions else 0
return (
@@ -634,28 +1098,27 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
notif, # notif column
is_highlight, # highlight column
int(count_as_unread), # unread column
+ thread_id, # thread_id column
+ self._clock.time_msec(), # inserted_ts column
)
- def _add_push_actions_to_staging_txn(txn: LoggingTransaction) -> None:
- # We don't use simple_insert_many here to avoid the overhead
- # of generating lists of dicts.
-
- sql = """
- INSERT INTO event_push_actions_staging
- (event_id, user_id, actions, notif, highlight, unread)
- VALUES (?, ?, ?, ?, ?, ?)
- """
-
- txn.execute_batch(
- sql,
- (
- _gen_entry(user_id, actions)
- for user_id, actions in user_id_actions.items()
- ),
- )
-
- return await self.db_pool.runInteraction(
- "add_push_actions_to_staging", _add_push_actions_to_staging_txn
+ await self.db_pool.simple_insert_many(
+ "event_push_actions_staging",
+ keys=(
+ "event_id",
+ "user_id",
+ "actions",
+ "notif",
+ "highlight",
+ "unread",
+ "thread_id",
+ "inserted_ts",
+ ),
+ values=[
+ _gen_entry(user_id, actions)
+ for user_id, actions in user_id_actions.items()
+ ],
+ desc="add_push_actions_to_staging",
)
async def remove_push_actions_from_staging(self, event_id: str) -> None:
@@ -769,12 +1232,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
# [10, <none>, 20], we should treat this as being equivalent to
# [10, 10, 20].
#
- sql = (
- "SELECT received_ts FROM events"
- " WHERE stream_ordering <= ?"
- " ORDER BY stream_ordering DESC"
- " LIMIT 1"
- )
+ sql = """
+ SELECT received_ts FROM events
+ WHERE stream_ordering <= ?
+ ORDER BY stream_ordering DESC
+ LIMIT 1
+ """
while range_end - range_start > 0:
middle = (range_end + range_start) // 2
@@ -802,14 +1265,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
self, stream_ordering: int
) -> Optional[int]:
def f(txn: LoggingTransaction) -> Optional[Tuple[int]]:
- sql = (
- "SELECT e.received_ts"
- " FROM event_push_actions AS ep"
- " JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id"
- " WHERE ep.stream_ordering > ? AND notif = 1"
- " ORDER BY ep.stream_ordering ASC"
- " LIMIT 1"
- )
+ sql = """
+ SELECT e.received_ts
+ FROM event_push_actions AS ep
+ JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id
+ WHERE ep.stream_ordering > ? AND notif = 1
+ ORDER BY ep.stream_ordering ASC
+ LIMIT 1
+ """
txn.execute(sql, (stream_ordering,))
return cast(Optional[Tuple[int]], txn.fetchone())
@@ -858,10 +1321,13 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
Any push actions which predate the user's most recent read receipt are
now redundant, so we can remove them from `event_push_actions` and
update `event_push_summary`.
+
+ Returns true if all new receipts have been processed.
"""
limit = 100
+ # The (inclusive) receipt stream ID that was previously processed..
min_receipts_stream_id = self.db_pool.simple_select_one_onecol_txn(
txn,
table="event_push_summary_last_receipt_stream_id",
@@ -871,8 +1337,16 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
max_receipts_stream_id = self._receipts_id_gen.get_current_token()
+ # The (inclusive) event stream ordering that was previously summarised.
+ old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="event_push_summary_stream_ordering",
+ keyvalues={},
+ retcol="stream_ordering",
+ )
+
sql = """
- SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering
+ SELECT r.stream_id, r.room_id, r.user_id, r.thread_id, e.stream_ordering
FROM receipts_linearized AS r
INNER JOIN events AS e USING (event_id)
WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ?
@@ -893,73 +1367,133 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
limit,
),
)
- rows = txn.fetchall()
-
- old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
- txn,
- table="event_push_summary_stream_ordering",
- keyvalues={},
- retcol="stream_ordering",
- )
+ rows = cast(List[Tuple[int, str, str, Optional[str], int]], txn.fetchall())
# For each new read receipt we delete push actions from before it and
# recalculate the summary.
- for _, room_id, user_id, stream_ordering in rows:
+ #
+ # Care must be taken of whether it is a threaded or unthreaded receipt.
+ for _, room_id, user_id, thread_id, stream_ordering in rows:
# Only handle our own read receipts.
if not self.hs.is_mine_id(user_id):
continue
+ thread_clause = ""
+ thread_args: Tuple = ()
+ if thread_id is not None:
+ thread_clause = "AND thread_id = ?"
+ thread_args = (thread_id,)
+
+ # For each new read receipt we delete push actions from before it and
+ # recalculate the summary.
txn.execute(
- """
+ f"""
DELETE FROM event_push_actions
WHERE room_id = ?
AND user_id = ?
AND stream_ordering <= ?
AND highlight = 0
+ {thread_clause}
""",
- (room_id, user_id, stream_ordering),
+ (room_id, user_id, stream_ordering, *thread_args),
)
- notif_count, unread_count = self._get_notif_unread_count_for_user_room(
- txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
+ # First ensure that the existing rows have an updated thread_id field.
+ if not self._event_push_backfill_thread_id_done:
+ txn.execute(
+ """
+ UPDATE event_push_summary
+ SET thread_id = ?
+ WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+ """,
+ (MAIN_TIMELINE, room_id, user_id),
+ )
+ txn.execute(
+ """
+ UPDATE event_push_actions
+ SET thread_id = ?
+ WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+ """,
+ (MAIN_TIMELINE, room_id, user_id),
+ )
+
+ # Fetch the notification counts between the stream ordering of the
+ # latest receipt and what was previously summarised.
+ unread_counts = self._get_notif_unread_count_for_user_room(
+ txn,
+ room_id,
+ user_id,
+ stream_ordering,
+ old_rotate_stream_ordering,
+ thread_id,
)
- self.db_pool.simple_upsert_txn(
+ # For an unthreaded receipt, mark the summary for all threads in the room
+ # as cleared.
+ if thread_id is None:
+ self.db_pool.simple_update_txn(
+ txn,
+ table="event_push_summary",
+ keyvalues={"user_id": user_id, "room_id": room_id},
+ updatevalues={
+ "notif_count": 0,
+ "unread_count": 0,
+ "stream_ordering": old_rotate_stream_ordering,
+ "last_receipt_stream_ordering": stream_ordering,
+ },
+ )
+
+ # For a threaded receipt, we *always* want to update that receipt,
+ # event if there are no new notifications in that thread. This ensures
+ # the stream_ordering & last_receipt_stream_ordering are updated.
+ elif not unread_counts:
+ unread_counts = [(0, 0, thread_id)]
+
+ # Then any updated threads get their notification count and unread
+ # count updated.
+ self.db_pool.simple_update_many_txn(
txn,
table="event_push_summary",
- keyvalues={"room_id": room_id, "user_id": user_id},
- values={
- "notif_count": notif_count,
- "unread_count": unread_count,
- "stream_ordering": old_rotate_stream_ordering,
- "last_receipt_stream_ordering": stream_ordering,
- },
+ key_names=("room_id", "user_id", "thread_id"),
+ key_values=[(room_id, user_id, row[2]) for row in unread_counts],
+ value_names=(
+ "notif_count",
+ "unread_count",
+ "stream_ordering",
+ "last_receipt_stream_ordering",
+ ),
+ value_values=[
+ (row[0], row[1], old_rotate_stream_ordering, stream_ordering)
+ for row in unread_counts
+ ],
)
# We always update `event_push_summary_last_receipt_stream_id` to
# ensure that we don't rescan the same receipts for remote users.
- upper_limit = max_receipts_stream_id
+ receipts_last_processed_stream_id = max_receipts_stream_id
if len(rows) >= limit:
# If we pulled out a limited number of rows we only update the
# position to the last receipt we processed, so we continue
# processing the rest next iteration.
- upper_limit = rows[-1][0]
+ receipts_last_processed_stream_id = rows[-1][0]
self.db_pool.simple_update_txn(
txn,
table="event_push_summary_last_receipt_stream_id",
keyvalues={},
- updatevalues={"stream_id": upper_limit},
+ updatevalues={"stream_id": receipts_last_processed_stream_id},
)
return len(rows) < limit
def _rotate_notifs_txn(self, txn: LoggingTransaction) -> bool:
- """Archives older notifications into event_push_summary. Returns whether
- the archiving process has caught up or not.
+ """Archives older notifications (from event_push_actions) into event_push_summary.
+
+ Returns whether the archiving process has caught up or not.
"""
+ # The (inclusive) event stream ordering that was previously summarised.
old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
txn,
table="event_push_summary_stream_ordering",
@@ -974,7 +1508,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
SELECT stream_ordering FROM event_push_actions
WHERE stream_ordering > ?
ORDER BY stream_ordering ASC LIMIT 1 OFFSET ?
- """,
+ """,
(old_rotate_stream_ordering, self._rotate_count),
)
stream_row = txn.fetchone()
@@ -993,39 +1527,62 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
logger.info("Rotating notifications up to: %s", rotate_to_stream_ordering)
- self._rotate_notifs_before_txn(txn, rotate_to_stream_ordering)
+ self._rotate_notifs_before_txn(
+ txn, old_rotate_stream_ordering, rotate_to_stream_ordering
+ )
return caught_up
def _rotate_notifs_before_txn(
- self, txn: LoggingTransaction, rotate_to_stream_ordering: int
+ self,
+ txn: LoggingTransaction,
+ old_rotate_stream_ordering: int,
+ rotate_to_stream_ordering: int,
) -> None:
- old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
- txn,
- table="event_push_summary_stream_ordering",
- keyvalues={},
- retcol="stream_ordering",
- )
+ """Archives older notifications (from event_push_actions) into event_push_summary.
+
+ Any event_push_actions between old_rotate_stream_ordering (exclusive) and
+ rotate_to_stream_ordering (inclusive) will be added to the event_push_summary
+ table.
+
+ Args:
+ txn: The database transaction.
+ old_rotate_stream_ordering: The previous maximum event stream ordering.
+ rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
+ """
+
+ # Ensure that any new actions have an updated thread_id.
+ if not self._event_push_backfill_thread_id_done:
+ txn.execute(
+ """
+ UPDATE event_push_actions
+ SET thread_id = ?
+ WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+ """,
+ (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
+ )
+
+ # XXX Do we need to update summaries here too?
# Calculate the new counts that should be upserted into event_push_summary
sql = """
- SELECT user_id, room_id,
+ SELECT user_id, room_id, thread_id,
coalesce(old.%s, 0) + upd.cnt,
upd.stream_ordering
FROM (
- SELECT user_id, room_id, count(*) as cnt,
+ SELECT user_id, room_id, thread_id, count(*) as cnt,
max(ea.stream_ordering) as stream_ordering
FROM event_push_actions AS ea
- LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+ LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id)
WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ?
AND (
old.last_receipt_stream_ordering IS NULL
OR old.last_receipt_stream_ordering < ea.stream_ordering
)
AND %s = 1
- GROUP BY user_id, room_id
+ GROUP BY user_id, room_id, thread_id
) AS upd
- LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+ LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id)
"""
# First get the count of unread messages.
@@ -1039,11 +1596,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
# object because we might not have the same amount of rows in each of them. To do
# this, we use a dict indexed on the user ID and room ID to make it easier to
# populate.
- summaries: Dict[Tuple[str, str], _EventPushSummary] = {}
+ summaries: Dict[Tuple[str, str, str], _EventPushSummary] = {}
for row in txn:
- summaries[(row[0], row[1])] = _EventPushSummary(
- unread_count=row[2],
- stream_ordering=row[3],
+ summaries[(row[0], row[1], row[2])] = _EventPushSummary(
+ unread_count=row[3],
+ stream_ordering=row[4],
notif_count=0,
)
@@ -1054,26 +1611,43 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
)
for row in txn:
- if (row[0], row[1]) in summaries:
- summaries[(row[0], row[1])].notif_count = row[2]
+ if (row[0], row[1], row[2]) in summaries:
+ summaries[(row[0], row[1], row[2])].notif_count = row[3]
else:
# Because the rules on notifying are different than the rules on marking
# a message unread, we might end up with messages that notify but aren't
# marked unread, so we might not have a summary for this (user, room)
# tuple to complete.
- summaries[(row[0], row[1])] = _EventPushSummary(
+ summaries[(row[0], row[1], row[2])] = _EventPushSummary(
unread_count=0,
- stream_ordering=row[3],
- notif_count=row[2],
+ stream_ordering=row[4],
+ notif_count=row[3],
)
logger.info("Rotating notifications, handling %d rows", len(summaries))
+ # Ensure that any updated threads have the proper thread_id.
+ if not self._event_push_backfill_thread_id_done:
+ txn.execute_batch(
+ """
+ UPDATE event_push_summary
+ SET thread_id = ?
+ WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+ """,
+ [
+ (MAIN_TIMELINE, room_id, user_id)
+ for user_id, room_id, _ in summaries
+ ],
+ )
+
self.db_pool.simple_upsert_many_txn(
txn,
table="event_push_summary",
- key_names=("user_id", "room_id"),
- key_values=[(user_id, room_id) for user_id, room_id in summaries],
+ key_names=("user_id", "room_id", "thread_id"),
+ key_values=[
+ (user_id, room_id, thread_id)
+ for user_id, room_id, thread_id in summaries
+ ],
value_names=("notif_count", "unread_count", "stream_ordering"),
value_values=[
(
@@ -1090,12 +1664,13 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
(rotate_to_stream_ordering,),
)
- async def _remove_old_push_actions_that_have_rotated(
- self,
- ) -> None:
- """Clear out old push actions that have been summarized."""
+ async def _remove_old_push_actions_that_have_rotated(self) -> None:
+ """
+ Clear out old push actions that have been summarised (and are older than
+ 1 day ago).
+ """
- # We want to clear out anything that older than a day that *has* already
+ # We want to clear out anything that is older than a day that *has* already
# been rotated.
rotated_upto_stream_ordering = await self.db_pool.simple_select_one_onecol(
table="event_push_summary_stream_ordering",
@@ -1119,7 +1694,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
SELECT stream_ordering FROM event_push_actions
WHERE stream_ordering <= ? AND highlight = 0
ORDER BY stream_ordering ASC LIMIT 1 OFFSET ?
- """,
+ """,
(
max_stream_ordering_to_delete,
batch_size,
@@ -1154,6 +1729,53 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
if done:
break
+ @wrap_as_background_process("_clear_old_push_actions_staging")
+ async def _clear_old_push_actions_staging(self) -> None:
+ """Clear out any old event push actions from the staging table for
+ events that we failed to persist.
+ """
+
+ # We delete anything more than an hour old, on the assumption that we'll
+ # never take more than an hour to persist an event.
+ delete_before_ts = self._clock.time_msec() - 60 * 60 * 1000
+
+ if self._started_ts > delete_before_ts:
+ # We need to wait for at least an hour before we started deleting,
+ # so that we know it's safe to delete rows with NULL `inserted_ts`.
+ return
+
+ # We don't have an index on `inserted_ts`, instead we assume that the
+ # number of "live" rows in `event_push_actions_staging` is small enough
+ # that an infrequent periodic scan won't cause a problem.
+ #
+ # Note: we also delete any columns with NULL `inserted_ts`, this is safe
+ # as we added a default value to new rows and so they must be at least
+ # an hour old.
+ limit = 1000
+ sql = """
+ DELETE FROM event_push_actions_staging WHERE event_id IN (
+ SELECT event_id FROM event_push_actions_staging WHERE
+ inserted_ts < ? OR inserted_ts IS NULL
+ LIMIT ?
+ )
+ """
+
+ def _clear_old_push_actions_staging_txn(txn: LoggingTransaction) -> bool:
+ txn.execute(sql, (delete_before_ts, limit))
+ return txn.rowcount >= limit
+
+ while True:
+ # Returns true if we have more stuff to delete from the table.
+ deleted = await self.db_pool.runInteraction(
+ "_clear_old_push_actions_staging", _clear_old_push_actions_staging_txn
+ )
+
+ if not deleted:
+ return
+
+ # We sleep to ensure that we don't overwhelm the DB.
+ await self._clock.sleep(1.0)
+
class EventPushActionsStore(EventPushActionsWorkerStore):
EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
@@ -1188,7 +1810,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
table="event_push_actions",
columns=["highlight", "stream_ordering"],
where_clause="highlight=0",
- psql_only=True,
)
async def get_push_actions_for_user(
@@ -1215,16 +1836,18 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
# NB. This assumes event_ids are globally unique since
# it makes the query easier to index
- sql = (
- "SELECT epa.event_id, epa.room_id,"
- " epa.stream_ordering, epa.topological_ordering,"
- " epa.actions, epa.highlight, epa.profile_tag, e.received_ts"
- " FROM event_push_actions epa, events e"
- " WHERE epa.event_id = e.event_id"
- " AND epa.user_id = ? %s"
- " AND epa.notif = 1"
- " ORDER BY epa.stream_ordering DESC"
- " LIMIT ?" % (before_clause,)
+ sql = """
+ SELECT epa.event_id, epa.room_id,
+ epa.stream_ordering, epa.topological_ordering,
+ epa.actions, epa.highlight, epa.profile_tag, e.received_ts
+ FROM event_push_actions epa, events e
+ WHERE epa.event_id = e.event_id
+ AND epa.user_id = ? %s
+ AND epa.notif = 1
+ ORDER BY epa.stream_ordering DESC
+ LIMIT ?
+ """ % (
+ before_clause,
)
txn.execute(sql, args)
return cast(
@@ -1247,7 +1870,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
]
-def _action_has_highlight(actions: List[Union[dict, str]]) -> bool:
+def _action_has_highlight(actions: Collection[Union[Mapping, str]]) -> bool:
for action in actions:
if not isinstance(action, dict):
continue
|