summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erikj@element.io>2024-09-06 11:12:54 +0100
committerGitHub <noreply@github.com>2024-09-06 11:12:54 +0100
commit786de8570bf7ff14a8b98dbad6b9b9c18b05faf7 (patch)
tree283154cbc8b8269229ec329bf40e21d4ea94a388
parentSpeed up sliding sync by avoiding copies (#17670) (diff)
downloadsynapse-786de8570bf7ff14a8b98dbad6b9b9c18b05faf7.tar.xz
Speed up fetching partial-state rooms on sliding sync (#17666)
Instead of having a large cache of `room_id -> bool` about whether a
room is partially stated, replace with a "fetch rooms the user is which
are partially-stated". This is a lot faster as the set of partially
stated rooms at any point across the whole server is small, and so such
a query is fast.

The main issue with the bulk cache lookup is the CPU time looking all
the rooms up in the cache.
-rw-r--r--changelog.d/17666.misc1
-rw-r--r--synapse/handlers/sliding_sync/room_lists.py28
-rw-r--r--synapse/storage/databases/main/room.py26
3 files changed, 35 insertions, 20 deletions
diff --git a/changelog.d/17666.misc b/changelog.d/17666.misc
new file mode 100644

index 0000000000..3550679247 --- /dev/null +++ b/changelog.d/17666.misc
@@ -0,0 +1 @@ +Small performance improvement in speeding up sliding sync. diff --git a/synapse/handlers/sliding_sync/room_lists.py b/synapse/handlers/sliding_sync/room_lists.py
index a77b7ef2c3..8d6d8be44f 100644 --- a/synapse/handlers/sliding_sync/room_lists.py +++ b/synapse/handlers/sliding_sync/room_lists.py
@@ -333,11 +333,7 @@ class SlidingSyncRoomLists: # Find which rooms are partially stated and may need to be filtered out # depending on the `required_state` requested (see below). - partial_state_room_map = ( - await self.store.is_partial_state_room_batched( - filtered_sync_room_map.keys() - ) - ) + partial_state_rooms = await self.store.get_partial_rooms() # Since creating the `RoomSyncConfig` takes some work, let's just do it # once. @@ -349,7 +345,7 @@ class SlidingSyncRoomLists: filtered_sync_room_map = { room_id: room for room_id, room in filtered_sync_room_map.items() - if not partial_state_room_map.get(room_id) + if room_id not in partial_state_rooms } all_rooms.update(filtered_sync_room_map) @@ -409,9 +405,7 @@ class SlidingSyncRoomLists: with start_active_span("assemble_room_subscriptions"): # Find which rooms are partially stated and may need to be filtered out # depending on the `required_state` requested (see below). - partial_state_room_map = await self.store.is_partial_state_room_batched( - sync_config.room_subscriptions.keys() - ) + partial_state_rooms = await self.store.get_partial_rooms() for ( room_id, @@ -431,7 +425,7 @@ class SlidingSyncRoomLists: # Exclude partially-stated rooms if we must wait for the room to be # fully-stated if room_sync_config.must_await_full_state(self.is_mine_id): - if partial_state_room_map.get(room_id): + if room_id in partial_state_rooms: continue all_rooms.add(room_id) @@ -514,11 +508,7 @@ class SlidingSyncRoomLists: # Find which rooms are partially stated and may need to be filtered out # depending on the `required_state` requested (see below). - partial_state_room_map = ( - await self.store.is_partial_state_room_batched( - filtered_sync_room_map.keys() - ) - ) + partial_state_rooms = await self.store.get_partial_rooms() # Since creating the `RoomSyncConfig` takes some work, let's just do it # once. @@ -530,7 +520,7 @@ class SlidingSyncRoomLists: filtered_sync_room_map = { room_id: room for room_id, room in filtered_sync_room_map.items() - if not partial_state_room_map.get(room_id) + if room_id not in partial_state_rooms } all_rooms.update(filtered_sync_room_map) @@ -590,9 +580,7 @@ class SlidingSyncRoomLists: with start_active_span("assemble_room_subscriptions"): # Find which rooms are partially stated and may need to be filtered out # depending on the `required_state` requested (see below). - partial_state_room_map = await self.store.is_partial_state_room_batched( - sync_config.room_subscriptions.keys() - ) + partial_state_rooms = await self.store.get_partial_rooms() for ( room_id, @@ -624,7 +612,7 @@ class SlidingSyncRoomLists: # Exclude partially-stated rooms if we must wait for the room to be # fully-stated if room_sync_config.must_await_full_state(self.is_mine_id): - if partial_state_room_map.get(room_id): + if room_id in partial_state_rooms: continue all_rooms.add(room_id) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 68b0806041..e0b7b7e194 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py
@@ -1382,6 +1382,30 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): partial_state_rooms = {row[0] for row in rows} return {room_id: room_id in partial_state_rooms for room_id in room_ids} + @cached(max_entries=10000, iterable=True) + async def get_partial_rooms(self) -> AbstractSet[str]: + """Get any "partial-state" rooms which the user is in. + + This is fast as the set of partially stated rooms at any point across + the whole server is small, and so such a query is fast. This is also + faster than looking up whether a set of room ID's are partially stated + via `is_partial_state_room_batched(...)` because of the sheer amount of + CPU time looking all the rooms up in the cache. + """ + + def _get_partial_rooms_for_user_txn( + txn: LoggingTransaction, + ) -> AbstractSet[str]: + sql = """ + SELECT room_id FROM partial_state_rooms + """ + txn.execute(sql) + return {room_id for (room_id,) in txn} + + return await self.db_pool.runInteraction( + "get_partial_rooms_for_user", _get_partial_rooms_for_user_txn + ) + async def get_join_event_id_and_device_lists_stream_id_for_partial_state( self, room_id: str ) -> Tuple[str, int]: @@ -2341,6 +2365,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): self._invalidate_cache_and_stream( txn, self._get_partial_state_servers_at_join, (room_id,) ) + self._invalidate_all_cache_and_stream(txn, self.get_partial_rooms) async def write_partial_state_rooms_join_event_id( self, @@ -2562,6 +2587,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): self._invalidate_cache_and_stream( txn, self._get_partial_state_servers_at_join, (room_id,) ) + self._invalidate_all_cache_and_stream(txn, self.get_partial_rooms) DatabasePool.simple_insert_txn( txn,