summary refs log tree commit diff
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2024-01-08 17:24:20 +0000
committerGitHub <noreply@github.com>2024-01-08 17:24:20 +0000
commita83a337c4dd908db82ed555e8340ee3b52f34e9e (patch)
tree0df9de93a9ad38e44711ef2e87d8b713da154a0f
parentPort `EventInternalMetadata` class to Rust (#16782) (diff)
downloadsynapse-a83a337c4dd908db82ed555e8340ee3b52f34e9e.tar.xz
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. (#16759)
The idea here being that the directory server shouldn't advertise rooms
to a requesting server is the requesting server would not be allowed to
join or participate in the room.

<!--
Fixes: # <!-- -->
<!--
Supersedes: # <!-- -->
<!--
Follows: # <!-- -->
<!--
Part of: # <!-- -->
Base: `develop` <!-- git-stack-base-branch:develop -->

<!--
This pull request is commit-by-commit review friendly. <!-- -->
<!--
This pull request is intended for commit-by-commit review. <!-- -->

Original commit schedule, with full messages:

<ol>
<li>

Pass `from_federation_origin` down into room list retrieval code 

</li>
<li>

Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

fixup! Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

Cap the number of /publicRooms entries to 100 

</li>
<li>

Simplify code now that you can't request unlimited rooms 

</li>
<li>

Filter out rooms from federated requests that don't have the correct ACL

</li>
<li>

Request a handful more when filtering ACLs so that we can try to avoid
shortchanging the requester

</li>
</ol>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
-rw-r--r--changelog.d/16759.feature1
-rw-r--r--synapse/federation/transport/server/__init__.py7
-rw-r--r--synapse/handlers/room_list.py177
-rw-r--r--tests/handlers/test_room_list.py88
4 files changed, 221 insertions, 52 deletions
diff --git a/changelog.d/16759.feature b/changelog.d/16759.feature
new file mode 100644
index 0000000000..5846e5a9f0
--- /dev/null
+++ b/changelog.d/16759.feature
@@ -0,0 +1 @@
+Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.
\ No newline at end of file
diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py
index 839092e4d3..74391e3cb2 100644
--- a/synapse/federation/transport/server/__init__.py
+++ b/synapse/federation/transport/server/__init__.py
@@ -154,7 +154,10 @@ class PublicRoomList(BaseFederationServlet):
             limit = None
 
         data = await self.handler.get_local_public_room_list(
-            limit, since_token, network_tuple=network_tuple, from_federation=True
+            limit,
+            since_token,
+            network_tuple=network_tuple,
+            from_federation_origin=origin,
         )
         return 200, data
 
@@ -195,7 +198,7 @@ class PublicRoomList(BaseFederationServlet):
             since_token=since_token,
             search_filter=search_filter,
             network_tuple=network_tuple,
-            from_federation=True,
+            from_federation_origin=origin,
         )
 
         return 200, data
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index bfd205db49..5bad133f6b 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -19,7 +19,7 @@
 #
 
 import logging
-from typing import TYPE_CHECKING, Any, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
 import attr
 import msgpack
@@ -54,6 +54,9 @@ REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
 # This is used to indicate we should only return rooms published to the main list.
 EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
 
+# Maximum number of local public rooms returned over the CS or SS API
+MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
+
 
 class RoomListHandler:
     def __init__(self, hs: "HomeServer"):
@@ -74,7 +77,7 @@ class RoomListHandler:
         since_token: Optional[str] = None,
         search_filter: Optional[dict] = None,
         network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
-        from_federation: bool = False,
+        from_federation_origin: Optional[str] = None,
     ) -> JsonDict:
         """Generate a local public room list.
 
@@ -89,7 +92,8 @@ class RoomListHandler:
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
-            from_federation: true iff the request comes from the federation API
+            from_federation_origin: the server name of the requester, or None
+                if the request is not from federation.
         """
         if not self.enable_room_list_search:
             return {"chunk": [], "total_room_count_estimate": 0}
@@ -102,36 +106,43 @@ class RoomListHandler:
             network_tuple,
         )
 
-        if search_filter:
+        capped_limit: int = (
+            MAX_PUBLIC_ROOMS_IN_RESPONSE
+            if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
+            else limit
+        )
+
+        if search_filter or from_federation_origin is not None:
             # We explicitly don't bother caching searches or requests for
             # appservice specific lists.
-            logger.info("Bypassing cache as search request.")
+            # We also don't bother caching requests from federated homeservers.
+            logger.debug("Bypassing cache as search or federation request.")
 
             return await self._get_public_room_list(
-                limit,
+                capped_limit,
                 since_token,
                 search_filter,
                 network_tuple=network_tuple,
-                from_federation=from_federation,
+                from_federation_origin=from_federation_origin,
             )
 
-        key = (limit, since_token, network_tuple)
+        key = (capped_limit, since_token, network_tuple)
         return await self.response_cache.wrap(
             key,
             self._get_public_room_list,
-            limit,
+            capped_limit,
             since_token,
             network_tuple=network_tuple,
-            from_federation=from_federation,
+            from_federation_origin=from_federation_origin,
         )
 
     async def _get_public_room_list(
         self,
-        limit: Optional[int] = None,
+        limit: int,
         since_token: Optional[str] = None,
         search_filter: Optional[dict] = None,
         network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
-        from_federation: bool = False,
+        from_federation_origin: Optional[str] = None,
     ) -> JsonDict:
         """Generate a public room list.
         Args:
@@ -142,8 +153,8 @@ class RoomListHandler:
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
-            from_federation: Whether this request originated from a
-                federating server or a client. Used for room filtering.
+            from_federation_origin: the server name of the requester, or None
+                if the request is not from federation.
         """
 
         # Pagination tokens work by storing the room ID sent in the last batch,
@@ -165,8 +176,16 @@ class RoomListHandler:
             forwards = True
             has_batch_token = False
 
-        # we request one more than wanted to see if there are more pages to come
-        probing_limit = limit + 1 if limit is not None else None
+        if from_federation_origin is None:
+            # Client-Server API:
+            # we request one more than wanted to see if there are more pages to come
+            probing_limit = limit + 1
+        else:
+            # Federation API:
+            # we request a handful more in case any get filtered out by ACLs
+            # as a best easy effort attempt to return the full number of entries
+            # specified by `limit`.
+            probing_limit = limit + 10
 
         results = await self.store.get_largest_public_rooms(
             network_tuple,
@@ -174,7 +193,7 @@ class RoomListHandler:
             probing_limit,
             bounds=bounds,
             forwards=forwards,
-            ignore_non_federatable=from_federation,
+            ignore_non_federatable=from_federation_origin is not None,
         )
 
         def build_room_entry(room: LargestRoomStats) -> JsonDict:
@@ -195,59 +214,117 @@ class RoomListHandler:
             # Filter out Nones – rather omit the field altogether
             return {k: v for k, v in entry.items() if v is not None}
 
-        response: JsonDict = {}
+        # Build a list of up to `limit` entries.
+        room_entries: List[JsonDict] = []
+        rooms_iterator = results if forwards else reversed(results)
+
+        # Track the first and last 'considered' rooms so that we can provide correct
+        # next_batch/prev_batch tokens.
+        # This is needed because the loop might finish early when
+        # `len(room_entries) >= limit` and we might be left with rooms we didn't
+        # 'consider' (iterate over) and we should save those rooms for the next
+        # batch.
+        first_considered_room: Optional[LargestRoomStats] = None
+        last_considered_room: Optional[LargestRoomStats] = None
+        cut_off_due_to_limit: bool = False
+
+        for room_result in rooms_iterator:
+            if len(room_entries) >= limit:
+                cut_off_due_to_limit = True
+                break
+
+            if first_considered_room is None:
+                first_considered_room = room_result
+            last_considered_room = room_result
+
+            if from_federation_origin is not None:
+                # If this is a federated request, apply server ACLs if the room has any set
+                acl_evaluator = (
+                    await self._storage_controllers.state.get_server_acl_for_room(
+                        room_result.room_id
+                    )
+                )
+
+                if acl_evaluator is not None:
+                    if not acl_evaluator.server_matches_acl_event(
+                        from_federation_origin
+                    ):
+                        # the requesting server is ACL blocked by the room,
+                        # don't show in directory
+                        continue
+
+            room_entries.append(build_room_entry(room_result))
+
+        if not forwards:
+            # If we are paginating backwards, we still return the chunk in
+            # biggest-first order, so reverse again.
+            room_entries.reverse()
+            # Swap the order of first/last considered rooms.
+            first_considered_room, last_considered_room = (
+                last_considered_room,
+                first_considered_room,
+            )
+
+        response: JsonDict = {
+            "chunk": room_entries,
+        }
         num_results = len(results)
-        if limit is not None:
-            more_to_come = num_results == probing_limit
 
-            # Depending on direction we trim either the front or back.
-            if forwards:
-                results = results[:limit]
+        more_to_come_from_database = num_results == probing_limit
+
+        if forwards and has_batch_token:
+            # If there was a token given then we assume that there
+            # must be previous results, even if there were no results in this batch.
+            if first_considered_room is not None:
+                response["prev_batch"] = RoomListNextBatch(
+                    last_joined_members=first_considered_room.joined_members,
+                    last_room_id=first_considered_room.room_id,
+                    direction_is_forward=False,
+                ).to_token()
             else:
-                results = results[-limit:]
-        else:
-            more_to_come = False
+                # If we didn't find any results this time,
+                # we don't have an actual room ID to put in the token.
+                # But since `first_considered_room` is None, we know that we have
+                # reached the end of the results.
+                # So we can use a token of (0, empty room ID) to paginate from the end
+                # next time.
+                response["prev_batch"] = RoomListNextBatch(
+                    last_joined_members=0,
+                    last_room_id="",
+                    direction_is_forward=False,
+                ).to_token()
 
         if num_results > 0:
-            final_entry = results[-1]
-            initial_entry = results[0]
-
+            assert first_considered_room is not None
+            assert last_considered_room is not None
             if forwards:
-                if has_batch_token:
-                    # If there was a token given then we assume that there
-                    # must be previous results.
-                    response["prev_batch"] = RoomListNextBatch(
-                        last_joined_members=initial_entry.joined_members,
-                        last_room_id=initial_entry.room_id,
-                        direction_is_forward=False,
-                    ).to_token()
-
-                if more_to_come:
+                if more_to_come_from_database or cut_off_due_to_limit:
                     response["next_batch"] = RoomListNextBatch(
-                        last_joined_members=final_entry.joined_members,
-                        last_room_id=final_entry.room_id,
+                        last_joined_members=last_considered_room.joined_members,
+                        last_room_id=last_considered_room.room_id,
                         direction_is_forward=True,
                     ).to_token()
-            else:
+            else:  # backwards
                 if has_batch_token:
                     response["next_batch"] = RoomListNextBatch(
-                        last_joined_members=final_entry.joined_members,
-                        last_room_id=final_entry.room_id,
+                        last_joined_members=last_considered_room.joined_members,
+                        last_room_id=last_considered_room.room_id,
                         direction_is_forward=True,
                     ).to_token()
 
-                if more_to_come:
+                if more_to_come_from_database or cut_off_due_to_limit:
                     response["prev_batch"] = RoomListNextBatch(
-                        last_joined_members=initial_entry.joined_members,
-                        last_room_id=initial_entry.room_id,
+                        last_joined_members=first_considered_room.joined_members,
+                        last_room_id=first_considered_room.room_id,
                         direction_is_forward=False,
                     ).to_token()
 
-        response["chunk"] = [build_room_entry(r) for r in results]
-
+        # We can't efficiently count the total number of rooms that are not
+        # blocked by ACLs, but this is just an estimate so that should be
+        # good enough.
         response["total_room_count_estimate"] = await self.store.count_public_rooms(
             network_tuple,
-            ignore_non_federatable=from_federation,
+            ignore_non_federatable=from_federation_origin is not None,
             search_filter=search_filter,
         )
 
diff --git a/tests/handlers/test_room_list.py b/tests/handlers/test_room_list.py
new file mode 100644
index 0000000000..4d22ef98c2
--- /dev/null
+++ b/tests/handlers/test_room_list.py
@@ -0,0 +1,88 @@
+from http import HTTPStatus
+from typing import Optional, Set
+
+from synapse.rest import admin
+from synapse.rest.client import directory, login, room
+from synapse.types import JsonDict
+
+from tests import unittest
+
+
+class RoomListHandlerTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        login.register_servlets,
+        room.register_servlets,
+        directory.register_servlets,
+    ]
+
+    def _create_published_room(
+        self, tok: str, extra_content: Optional[JsonDict] = None
+    ) -> str:
+        room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content)
+        channel = self.make_request(
+            "PUT",
+            f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}",
+            content={
+                "visibility": "public",
+            },
+        )
+        assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}"
+        return room_id
+
+    def test_acls_applied_to_room_directory_results(self) -> None:
+        """
+        Creates 3 rooms. Room 2 has an ACL that only permits the homeservers
+        `test` and `test2` to access it.
+
+        We then simulate `test2` and `test3` requesting the room directory and assert
+        that `test3` does not see room 2, but `test2` sees all 3.
+        """
+        self.register_user("u1", "p1")
+        u1tok = self.login("u1", "p1")
+        room1 = self._create_published_room(u1tok)
+
+        room2 = self._create_published_room(
+            u1tok,
+            extra_content={
+                "initial_state": [
+                    {
+                        "type": "m.room.server_acl",
+                        "content": {
+                            "allow": ["test", "test2"],
+                        },
+                    }
+                ]
+            },
+        )
+
+        room3 = self._create_published_room(u1tok)
+
+        room_list = self.get_success(
+            self.hs.get_room_list_handler().get_local_public_room_list(
+                limit=50, from_federation_origin="test2"
+            )
+        )
+        room_ids_in_test2_list: Set[str] = {
+            entry["room_id"] for entry in room_list["chunk"]
+        }
+
+        room_list = self.get_success(
+            self.hs.get_room_list_handler().get_local_public_room_list(
+                limit=50, from_federation_origin="test3"
+            )
+        )
+        room_ids_in_test3_list: Set[str] = {
+            entry["room_id"] for entry in room_list["chunk"]
+        }
+
+        self.assertEqual(
+            room_ids_in_test2_list,
+            {room1, room2, room3},
+            "test2 should be able to see all 3 rooms",
+        )
+        self.assertEqual(
+            room_ids_in_test3_list,
+            {room1, room3},
+            "test3 should be able to see only 2 rooms",
+        )