summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2023-01-26 12:15:36 -0500
committerGitHub <noreply@github.com>2023-01-26 17:15:36 +0000
commit8a05d5de21888cdd0b53870fead3a1eae64f0b17 (patch)
tree2734c2f7cdb704d4090658d049f3691cdd479dc1 /synapse
parentFix initialization of `_device_list_id_gen` (#14914) (diff)
downloadsynapse-8a05d5de21888cdd0b53870fead3a1eae64f0b17.tar.xz
Batch look-ups to see if rooms are partial stated. (#14917)
* Batch look-ups to see if rooms are partial stated.

* Fix issues found in linting.

* Fix typo.

* Apply suggestions from code review

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Clarify comments.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Also improve the cache size while we're at it

* is_partial_state_rooms -> is_partial_state_room_batched

* Run `black`

* Improve annotation for `simple_select_many_batch`

* Fix is_partial_state_room_batched impl

* Okay, _actually_ fix impl

* Update description.

* Update synapse/storage/databases/main/room.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Run black.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
Co-authored-by: David Robertson <davidr@element.io>
Diffstat (limited to 'synapse')
-rw-r--r--synapse/handlers/sync.py24
-rw-r--r--synapse/storage/database.py2
-rw-r--r--synapse/storage/databases/main/room.py27
3 files changed, 42 insertions, 11 deletions
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index ee11764567..5ebd3ea855 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1383,16 +1383,21 @@ class SyncHandler:
         if not sync_config.filter_collection.lazy_load_members():
             # Non-lazy syncs should never include partially stated rooms.
             # Exclude all partially stated rooms from this sync.
-            for room_id in mutable_joined_room_ids:
-                if await self.store.is_partial_state_room(room_id):
-                    mutable_rooms_to_exclude.add(room_id)
+            results = await self.store.is_partial_state_room_batched(
+                mutable_joined_room_ids
+            )
+            mutable_rooms_to_exclude.update(
+                room_id
+                for room_id, is_partial_state in results.items()
+                if is_partial_state
+            )
 
         # Incremental eager syncs should additionally include rooms that
         # - we are joined to
         # - are full-stated
         # - became fully-stated at some point during the sync period
         #   (These rooms will have been omitted during a previous eager sync.)
-        forced_newly_joined_room_ids = set()
+        forced_newly_joined_room_ids: Set[str] = set()
         if since_token and not sync_config.filter_collection.lazy_load_members():
             un_partial_stated_rooms = (
                 await self.store.get_un_partial_stated_rooms_between(
@@ -1401,9 +1406,14 @@ class SyncHandler:
                     mutable_joined_room_ids,
                 )
             )
-            for room_id in un_partial_stated_rooms:
-                if not await self.store.is_partial_state_room(room_id):
-                    forced_newly_joined_room_ids.add(room_id)
+            results = await self.store.is_partial_state_room_batched(
+                un_partial_stated_rooms
+            )
+            forced_newly_joined_room_ids.update(
+                room_id
+                for room_id, is_partial_state in results.items()
+                if not is_partial_state
+            )
 
         # Now we have our list of joined room IDs, exclude as configured and freeze
         joined_room_ids = frozenset(
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 88479a16db..e20c5c5302 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1819,7 +1819,7 @@ class DatabasePool:
         keyvalues: Optional[Dict[str, Any]] = None,
         desc: str = "simple_select_many_batch",
         batch_size: int = 100,
-    ) -> List[Any]:
+    ) -> List[Dict[str, Any]]:
         """Executes a SELECT query on the named table, which may return zero or
         more rows, returning the result as a list of dicts.
 
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 3aa7b94560..fbbc018887 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -60,9 +60,9 @@ from synapse.storage.util.id_generators import (
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID
+from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID
 from synapse.util import json_encoder
-from synapse.util.caches.descriptors import cached
+from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.stringutils import MXC_REGEX
 
 if TYPE_CHECKING:
@@ -1255,7 +1255,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return room_servers
 
-    @cached()
+    @cached(max_entries=10000)
     async def is_partial_state_room(self, room_id: str) -> bool:
         """Checks if this room has partial state.
 
@@ -1274,6 +1274,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return entry is not None
 
+    @cachedList(cached_method_name="is_partial_state_room", list_name="room_ids")
+    async def is_partial_state_room_batched(
+        self, room_ids: StrCollection
+    ) -> Mapping[str, bool]:
+        """Checks if the given rooms have partial state.
+
+        Returns true for "partial-state" rooms, which means that the state
+        at events in the room, and `current_state_events`, may not yet be
+        complete.
+        """
+
+        rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch(
+            table="partial_state_rooms",
+            column="room_id",
+            iterable=room_ids,
+            retcols=("room_id",),
+            desc="is_partial_state_room_batched",
+        )
+        partial_state_rooms = {row_dict["room_id"] for row_dict in rows}
+        return {room_id: room_id in partial_state_rooms for room_id in room_ids}
+
     async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
         self, room_id: str
     ) -> Tuple[str, int]: