diff --git a/changelog.d/17683.misc b/changelog.d/17683.misc
new file mode 100644
index 0000000000..11a10ff854
--- /dev/null
+++ b/changelog.d/17683.misc
@@ -0,0 +1 @@
+Speed up sliding sync by reducing amount of data pulled out of the database for large rooms.
diff --git a/synapse/handlers/sliding_sync/__init__.py b/synapse/handlers/sliding_sync/__init__.py
index b097ac57a2..04493494a6 100644
--- a/synapse/handlers/sliding_sync/__init__.py
+++ b/synapse/handlers/sliding_sync/__init__.py
@@ -784,32 +784,10 @@ class SlidingSyncHandler:
):
avatar_changed = True
+ # We only need the room summary for calculating heroes, however if we do
+ # fetch it then we can use it to calculate `joined_count` and
+ # `invited_count`.
room_membership_summary: Optional[Mapping[str, MemberSummary]] = None
- empty_membership_summary = MemberSummary([], 0)
- # We need the room summary for:
- # - Always for initial syncs (or the first time we send down the room)
- # - When the room has no name, we need `heroes`
- # - When the membership has changed so we need to give updated `heroes` and
- # `joined_count`/`invited_count`.
- #
- # Ideally, instead of just looking at `name_changed`, we'd check if the room
- # name is not set but this is a good enough approximation that saves us from
- # having to pull out the full event. This just means, we're generating the
- # summary whenever the room name changes instead of only when it changes to
- # `None`.
- if initial or name_changed or membership_changed:
- # We can't trace the function directly because it's cached and the `@cached`
- # decorator doesn't mix with `@trace` yet.
- with start_active_span("get_room_summary"):
- if room_membership_for_user_at_to_token.membership in (
- Membership.LEAVE,
- Membership.BAN,
- ):
- # TODO: Figure out how to get the membership summary for left/banned rooms
- room_membership_summary = {}
- else:
- room_membership_summary = await self.store.get_room_summary(room_id)
- # TODO: Reverse/rewind back to the `to_token`
# `heroes` are required if the room name is not set.
#
@@ -828,11 +806,45 @@ class SlidingSyncHandler:
# get them on initial syncs (or the first time we send down the room) or if the
# membership has changed which may change the heroes.
if name_event_id is None and (initial or (not initial and membership_changed)):
- assert room_membership_summary is not None
+ # We need the room summary to extract the heroes from
+ if room_membership_for_user_at_to_token.membership != Membership.JOIN:
+ # TODO: Figure out how to get the membership summary for left/banned rooms
+ # For invite/knock rooms we don't include the information.
+ room_membership_summary = {}
+ else:
+ room_membership_summary = await self.store.get_room_summary(room_id)
+ # TODO: Reverse/rewind back to the `to_token`
+
hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me=user.to_string()
)
+ # Fetch the membership counts for rooms we're joined to.
+ #
+ # Similarly to other metadata, we only need to calculate the member
+ # counts if this is an initial sync or the memberships have changed.
+ joined_count: Optional[int] = None
+ invited_count: Optional[int] = None
+ if (
+ initial or membership_changed
+ ) and room_membership_for_user_at_to_token.membership == Membership.JOIN:
+ # If we have the room summary (because we calculated heroes above)
+ # then we can simply pull the counts from there.
+ if room_membership_summary is not None:
+ empty_membership_summary = MemberSummary([], 0)
+
+ joined_count = room_membership_summary.get(
+ Membership.JOIN, empty_membership_summary
+ ).count
+
+ invited_count = room_membership_summary.get(
+ Membership.INVITE, empty_membership_summary
+ ).count
+ else:
+ member_counts = await self.store.get_member_counts(room_id)
+ joined_count = member_counts.get(Membership.JOIN, 0)
+ invited_count = member_counts.get(Membership.INVITE, 0)
+
# Fetch the `required_state` for the room
#
# No `required_state` for invite/knock rooms (just `stripped_state`)
@@ -1090,20 +1102,6 @@ class SlidingSyncHandler:
set_tag(SynapseTags.RESULT_PREFIX + "initial", initial)
- joined_count: Optional[int] = None
- if initial or membership_changed:
- assert room_membership_summary is not None
- joined_count = room_membership_summary.get(
- Membership.JOIN, empty_membership_summary
- ).count
-
- invited_count: Optional[int] = None
- if initial or membership_changed:
- assert room_membership_summary is not None
- invited_count = room_membership_summary.get(
- Membership.INVITE, empty_membership_summary
- ).count
-
return SlidingSyncResult.RoomResult(
name=room_name,
avatar=room_avatar,
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index d22160b85c..d6deb077c8 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -112,6 +112,7 @@ class SQLBaseStore(metaclass=ABCMeta):
self._attempt_to_invalidate_cache(
"get_number_joined_users_in_room", (room_id,)
)
+ self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
# There's no easy way of invalidating this cache for just the users
@@ -153,6 +154,7 @@ class SQLBaseStore(metaclass=ABCMeta):
self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,))
+ self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None)
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 8df760e8a6..db03729cfe 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -312,18 +312,10 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
# We do this all in one transaction to keep the cache small.
# FIXME: get rid of this when we have room_stats
- # Note, rejected events will have a null membership field, so
- # we we manually filter them out.
- sql = """
- SELECT count(*), membership FROM current_state_events
- WHERE type = 'm.room.member' AND room_id = ?
- AND membership IS NOT NULL
- GROUP BY membership
- """
+ counts = self._get_member_counts_txn(txn, room_id)
- txn.execute(sql, (room_id,))
res: Dict[str, MemberSummary] = {}
- for count, membership in txn:
+ for membership, count in counts.items():
res.setdefault(membership, MemberSummary([], count))
# Order by membership (joins -> invites -> leave (former insiders) ->
@@ -370,6 +362,31 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
)
@cached()
+ async def get_member_counts(self, room_id: str) -> Mapping[str, int]:
+ """Get a mapping of number of users by membership"""
+
+ return await self.db_pool.runInteraction(
+ "get_member_counts", self._get_member_counts_txn, room_id
+ )
+
+ def _get_member_counts_txn(
+ self, txn: LoggingTransaction, room_id: str
+ ) -> Dict[str, int]:
+ """Get a mapping of number of users by membership"""
+
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT count(*), membership FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ GROUP BY membership
+ """
+
+ txn.execute(sql, (room_id,))
+ return {membership: count for count, membership in txn}
+
+ @cached()
async def get_number_joined_users_in_room(self, room_id: str) -> int:
return await self.db_pool.simple_select_one_onecol(
table="current_state_events",
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index c5caaf56b0..ca31122ad3 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -736,6 +736,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events"
+ MEMBERS_CURRENT_STATE_UPDATE_NAME = "current_state_events_members_room_index"
def __init__(
self,
@@ -764,6 +765,13 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
self.DELETE_CURRENT_STATE_UPDATE_NAME,
self._background_remove_left_rooms,
)
+ self.db_pool.updates.register_background_index_update(
+ self.MEMBERS_CURRENT_STATE_UPDATE_NAME,
+ index_name="current_state_events_members_room_index",
+ table="current_state_events",
+ columns=["room_id", "membership"],
+ where_clause="type='m.room.member'",
+ )
async def _background_remove_left_rooms(
self, progress: JsonDict, batch_size: int
diff --git a/synapse/storage/schema/main/delta/87/03_current_state_index.sql b/synapse/storage/schema/main/delta/87/03_current_state_index.sql
new file mode 100644
index 0000000000..76b974271c
--- /dev/null
+++ b/synapse/storage/schema/main/delta/87/03_current_state_index.sql
@@ -0,0 +1,19 @@
+--
+-- This file is licensed under the Affero General Public License (AGPL) version 3.
+--
+-- Copyright (C) 2024 New Vector, Ltd
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU Affero General Public License as
+-- published by the Free Software Foundation, either version 3 of the
+-- License, or (at your option) any later version.
+--
+-- See the GNU Affero General Public License for more details:
+-- <https://www.gnu.org/licenses/agpl-3.0.html>.
+
+
+-- Add a background updates to add a new index:
+-- `current_state_events(room_id, membership) WHERE type = 'm.room.member'
+-- This makes counting membership in rooms (for syncs) much faster
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+ (8701, 'current_state_events_members_room_index', '{}');
diff --git a/tests/rest/client/sliding_sync/test_rooms_meta.py b/tests/rest/client/sliding_sync/test_rooms_meta.py
index 6d2742e25f..6dbce7126f 100644
--- a/tests/rest/client/sliding_sync/test_rooms_meta.py
+++ b/tests/rest/client/sliding_sync/test_rooms_meta.py
@@ -371,14 +371,17 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
"mxc://UPDATED_DUMMY_MEDIA_ID",
response_body["rooms"][room_id1],
)
- self.assertEqual(
- response_body["rooms"][room_id1]["joined_count"],
- 1,
+
+ # We don't give extra room information to invitees
+ self.assertNotIn(
+ "joined_count",
+ response_body["rooms"][room_id1],
)
- self.assertEqual(
- response_body["rooms"][room_id1]["invited_count"],
- 1,
+ self.assertNotIn(
+ "invited_count",
+ response_body["rooms"][room_id1],
)
+
self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"),
)
@@ -450,15 +453,16 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
"mxc://DUMMY_MEDIA_ID",
response_body["rooms"][room_id1],
)
- self.assertEqual(
- response_body["rooms"][room_id1]["joined_count"],
- # FIXME: The actual number should be "1" (user2) but we currently don't
- # support this for rooms where the user has left/been banned.
- 0,
+
+ # FIXME: We possibly want to return joined and invited counts for rooms
+ # you're banned form
+ self.assertNotIn(
+ "joined_count",
+ response_body["rooms"][room_id1],
)
- self.assertEqual(
- response_body["rooms"][room_id1]["invited_count"],
- 0,
+ self.assertNotIn(
+ "invited_count",
+ response_body["rooms"][room_id1],
)
self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"),
@@ -692,19 +696,15 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
[],
)
- self.assertEqual(
- response_body["rooms"][room_id1]["joined_count"],
- # FIXME: The actual number should be "1" (user2) but we currently don't
- # support this for rooms where the user has left/been banned.
- 0,
+ # FIXME: We possibly want to return joined and invited counts for rooms
+ # you're banned form
+ self.assertNotIn(
+ "joined_count",
+ response_body["rooms"][room_id1],
)
- self.assertEqual(
- response_body["rooms"][room_id1]["invited_count"],
- # We shouldn't see user5 since they were invited after user1 was banned.
- #
- # FIXME: The actual number should be "1" (user3) but we currently don't
- # support this for rooms where the user has left/been banned.
- 0,
+ self.assertNotIn(
+ "invited_count",
+ response_body["rooms"][room_id1],
)
def test_rooms_meta_heroes_incremental_sync_no_change(self) -> None:
|