Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. (#16759)

The idea here being that the directory server shouldn't advertise rooms to a requesting server is the requesting server would not be allowed to join or participate in the room.     Base: `develop`    Original commit schedule, with full messages: <ol> <li> Pass `from_federation_origin` down into room list retrieval code </li> <li> Don't cache /publicRooms response for inbound federated requests </li> <li> fixup! Don't cache /publicRooms response for inbound federated requests </li> <li> Cap the number of /publicRooms entries to 100 </li> <li> Simplify code now that you can't request unlimited rooms </li> <li> Filter out rooms from federated requests that don't have the correct ACL </li> <li> Request a handful more when filtering ACLs so that we can try to avoid shortchanging the requester </li> </ol> --------- Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
author: reivilibre <oliverw@matrix.org> 2024-01-08 17:24:20 +0000
committer: GitHub <noreply@github.com> 2024-01-08 17:24:20 +0000
commit: a83a337c4dd908db82ed555e8340ee3b52f34e9e (patch)
tree: 0df9de93a9ad38e44711ef2e87d8b713da154a0f /synapse/handlers
parent: Port `EventInternalMetadata` class to Rust (#16782) (diff)
download: synapse-a83a337c4dd908db82ed555e8340ee3b52f34e9e.tar.xz
1 files changed, 127 insertions, 50 deletions
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index bfd205db49..5bad133f6b 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -19,7 +19,7 @@
 #
 
 import logging
-from typing import TYPE_CHECKING, Any, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
 import attr
 import msgpack
@@ -54,6 +54,9 @@ REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
 # This is used to indicate we should only return rooms published to the main list.
 EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
 
+# Maximum number of local public rooms returned over the CS or SS API
+MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
+
 
 class RoomListHandler:
     def __init__(self, hs: "HomeServer"):
@@ -74,7 +77,7 @@ class RoomListHandler:
         since_token: Optional[str] = None,
         search_filter: Optional[dict] = None,
         network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
-        from_federation: bool = False,
+        from_federation_origin: Optional[str] = None,
     ) -> JsonDict:
         """Generate a local public room list.
 
@@ -89,7 +92,8 @@ class RoomListHandler:
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
-            from_federation: true iff the request comes from the federation API
+            from_federation_origin: the server name of the requester, or None
+                if the request is not from federation.
         """
         if not self.enable_room_list_search:
             return {"chunk": [], "total_room_count_estimate": 0}
@@ -102,36 +106,43 @@ class RoomListHandler:
             network_tuple,
         )
 
-        if search_filter:
+        capped_limit: int = (
+            MAX_PUBLIC_ROOMS_IN_RESPONSE
+            if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
+            else limit
+        )
+
+        if search_filter or from_federation_origin is not None:
             # We explicitly don't bother caching searches or requests for
             # appservice specific lists.
-            logger.info("Bypassing cache as search request.")
+            # We also don't bother caching requests from federated homeservers.
+            logger.debug("Bypassing cache as search or federation request.")
 
             return await self._get_public_room_list(
-                limit,
+                capped_limit,
                 since_token,
                 search_filter,
                 network_tuple=network_tuple,
-                from_federation=from_federation,
+                from_federation_origin=from_federation_origin,
             )
 
-        key = (limit, since_token, network_tuple)
+        key = (capped_limit, since_token, network_tuple)
         return await self.response_cache.wrap(
             key,
             self._get_public_room_list,
-            limit,
+            capped_limit,
             since_token,
             network_tuple=network_tuple,
-            from_federation=from_federation,
+            from_federation_origin=from_federation_origin,
         )
 
     async def _get_public_room_list(
         self,
-        limit: Optional[int] = None,
+        limit: int,
         since_token: Optional[str] = None,
         search_filter: Optional[dict] = None,
         network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
-        from_federation: bool = False,
+        from_federation_origin: Optional[str] = None,
     ) -> JsonDict:
         """Generate a public room list.
         Args:
@@ -142,8 +153,8 @@ class RoomListHandler:
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
-            from_federation: Whether this request originated from a
-                federating server or a client. Used for room filtering.
+            from_federation_origin: the server name of the requester, or None
+                if the request is not from federation.
         """
 
         # Pagination tokens work by storing the room ID sent in the last batch,
@@ -165,8 +176,16 @@ class RoomListHandler:
             forwards = True
             has_batch_token = False
 
-        # we request one more than wanted to see if there are more pages to come
-        probing_limit = limit + 1 if limit is not None else None
+        if from_federation_origin is None:
+            # Client-Server API:
+            # we request one more than wanted to see if there are more pages to come
+            probing_limit = limit + 1
+        else:
+            # Federation API:
+            # we request a handful more in case any get filtered out by ACLs
+            # as a best easy effort attempt to return the full number of entries
+            # specified by `limit`.
+            probing_limit = limit + 10
 
         results = await self.store.get_largest_public_rooms(
             network_tuple,
@@ -174,7 +193,7 @@ class RoomListHandler:
             probing_limit,
             bounds=bounds,
             forwards=forwards,
-            ignore_non_federatable=from_federation,
+            ignore_non_federatable=from_federation_origin is not None,
         )
 
         def build_room_entry(room: LargestRoomStats) -> JsonDict:
@@ -195,59 +214,117 @@ class RoomListHandler:
             # Filter out Nones – rather omit the field altogether
             return {k: v for k, v in entry.items() if v is not None}
 
-        response: JsonDict = {}
+        # Build a list of up to `limit` entries.
+        room_entries: List[JsonDict] = []
+        rooms_iterator = results if forwards else reversed(results)
+
+        # Track the first and last 'considered' rooms so that we can provide correct
+        # next_batch/prev_batch tokens.
+        # This is needed because the loop might finish early when
+        # `len(room_entries) >= limit` and we might be left with rooms we didn't
+        # 'consider' (iterate over) and we should save those rooms for the next
+        # batch.
+        first_considered_room: Optional[LargestRoomStats] = None
+        last_considered_room: Optional[LargestRoomStats] = None
+        cut_off_due_to_limit: bool = False
+
+        for room_result in rooms_iterator:
+            if len(room_entries) >= limit:
+                cut_off_due_to_limit = True
+                break
+
+            if first_considered_room is None:
+                first_considered_room = room_result
+            last_considered_room = room_result
+
+            if from_federation_origin is not None:
+                # If this is a federated request, apply server ACLs if the room has any set
+                acl_evaluator = (
+                    await self._storage_controllers.state.get_server_acl_for_room(
+                        room_result.room_id
+                    )
+                )
+
+                if acl_evaluator is not None:
+                    if not acl_evaluator.server_matches_acl_event(
+                        from_federation_origin
+                    ):
+                        # the requesting server is ACL blocked by the room,
+                        # don't show in directory
+                        continue
+
+            room_entries.append(build_room_entry(room_result))
+
+        if not forwards:
+            # If we are paginating backwards, we still return the chunk in
+            # biggest-first order, so reverse again.
+            room_entries.reverse()
+            # Swap the order of first/last considered rooms.
+            first_considered_room, last_considered_room = (
+                last_considered_room,
+                first_considered_room,
+            )
+
+        response: JsonDict = {
+            "chunk": room_entries,
+        }
         num_results = len(results)
-        if limit is not None:
-            more_to_come = num_results == probing_limit
 
-            # Depending on direction we trim either the front or back.
-            if forwards:
-                results = results[:limit]
+        more_to_come_from_database = num_results == probing_limit
+
+        if forwards and has_batch_token:
+            # If there was a token given then we assume that there
+            # must be previous results, even if there were no results in this batch.
+            if first_considered_room is not None:
+                response["prev_batch"] = RoomListNextBatch(
+                    last_joined_members=first_considered_room.joined_members,
+                    last_room_id=first_considered_room.room_id,
+                    direction_is_forward=False,
+                ).to_token()
             else:
-                results = results[-limit:]
-        else:
-            more_to_come = False
+                # If we didn't find any results this time,
+                # we don't have an actual room ID to put in the token.
+                # But since `first_considered_room` is None, we know that we have
+                # reached the end of the results.
+                # So we can use a token of (0, empty room ID) to paginate from the end
+                # next time.
+                response["prev_batch"] = RoomListNextBatch(
+                    last_joined_members=0,
+                    last_room_id="",
+                    direction_is_forward=False,
+                ).to_token()
 
         if num_results > 0:
-            final_entry = results[-1]
-            initial_entry = results[0]
-
+            assert first_considered_room is not None
+            assert last_considered_room is not None
             if forwards:
-                if has_batch_token:
-                    # If there was a token given then we assume that there
-                    # must be previous results.
-                    response["prev_batch"] = RoomListNextBatch(
-                        last_joined_members=initial_entry.joined_members,
-                        last_room_id=initial_entry.room_id,
-                        direction_is_forward=False,
-                    ).to_token()
-
-                if more_to_come:
+                if more_to_come_from_database or cut_off_due_to_limit:
                     response["next_batch"] = RoomListNextBatch(
-                        last_joined_members=final_entry.joined_members,
-                        last_room_id=final_entry.room_id,
+                        last_joined_members=last_considered_room.joined_members,
+                        last_room_id=last_considered_room.room_id,
                         direction_is_forward=True,
                     ).to_token()
-            else:
+            else:  # backwards
                 if has_batch_token:
                     response["next_batch"] = RoomListNextBatch(
-                        last_joined_members=final_entry.joined_members,
-                        last_room_id=final_entry.room_id,
+                        last_joined_members=last_considered_room.joined_members,
+                        last_room_id=last_considered_room.room_id,
                         direction_is_forward=True,
                     ).to_token()
 
-                if more_to_come:
+                if more_to_come_from_database or cut_off_due_to_limit:
                     response["prev_batch"] = RoomListNextBatch(
-                        last_joined_members=initial_entry.joined_members,
-                        last_room_id=initial_entry.room_id,
+                        last_joined_members=first_considered_room.joined_members,
+                        last_room_id=first_considered_room.room_id,
                         direction_is_forward=False,
                     ).to_token()
 
-        response["chunk"] = [build_room_entry(r) for r in results]
-
+        # We can't efficiently count the total number of rooms that are not
+        # blocked by ACLs, but this is just an estimate so that should be
+        # good enough.
         response["total_room_count_estimate"] = await self.store.count_public_rooms(
             network_tuple,
-            ignore_non_federatable=from_federation,
+            ignore_non_federatable=from_federation_origin is not None,
             search_filter=search_filter,
         )
author	reivilibre <oliverw@matrix.org>	2024-01-08 17:24:20 +0000
committer	GitHub <noreply@github.com>	2024-01-08 17:24:20 +0000
commit	a83a337c4dd908db82ed555e8340ee3b52f34e9e (patch)
tree	0df9de93a9ad38e44711ef2e87d8b713da154a0f /synapse/handlers
parent	Port `EventInternalMetadata` class to Rust (#16782) (diff)
download	synapse-a83a337c4dd908db82ed555e8340ee3b52f34e9e.tar.xz