summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/10981.bugfix1
-rw-r--r--synapse/storage/databases/main/user_directory.py33
-rw-r--r--tests/storage/test_user_directory.py94
3 files changed, 99 insertions, 29 deletions
diff --git a/changelog.d/10981.bugfix b/changelog.d/10981.bugfix
new file mode 100644
index 0000000000..d7bf660348
--- /dev/null
+++ b/changelog.d/10981.bugfix
@@ -0,0 +1 @@
+Fix a bug that could leak local users' per-room nicknames and avatars when the user directory is rebuilt.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 5f538947ec..5c713a732e 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -228,10 +228,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             is_in_room = await self.is_host_joined(room_id, self.server_name)
 
             if is_in_room:
-                is_public = await self.is_room_world_readable_or_publicly_joinable(
-                    room_id
-                )
-
                 users_with_profile = await self.get_users_in_room_with_profiles(room_id)
                 # Throw away users excluded from the directory.
                 users_with_profile = {
@@ -241,22 +237,33 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                     or await self.should_include_local_user_in_dir(user_id)
                 }
 
-                # Update each user in the user directory.
+                # Upsert a user_directory record for each remote user we see.
                 for user_id, profile in users_with_profile.items():
+                    # Local users are processed separately in
+                    # `_populate_user_directory_users`; there we can read from
+                    # the `profiles` table to ensure we don't leak their per-room
+                    # profiles. It also means we write local users to this table
+                    # exactly once, rather than once for every room they're in.
+                    if self.hs.is_mine_id(user_id):
+                        continue
+                    # TODO `users_with_profile` above reads from the `user_directory`
+                    #   table, meaning that `profile` is bespoke to this room.
+                    #   and this leaks remote users' per-room profiles to the user directory.
                     await self.update_profile_in_user_dir(
                         user_id, profile.display_name, profile.avatar_url
                     )
 
-                to_insert = set()
-
+                # Now update the room sharing tables to include this room.
+                is_public = await self.is_room_world_readable_or_publicly_joinable(
+                    room_id
+                )
                 if is_public:
-                    for user_id in users_with_profile:
-                        to_insert.add(user_id)
-
-                    if to_insert:
-                        await self.add_users_in_public_rooms(room_id, to_insert)
-                        to_insert.clear()
+                    if users_with_profile:
+                        await self.add_users_in_public_rooms(
+                            room_id, users_with_profile.keys()
+                        )
                 else:
+                    to_insert = set()
                     for user_id in users_with_profile:
                         # We want the set of pairs (L, M) where L and M are
                         # in `users_with_profile` and L is local.
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index 6884ca9b7a..fddfb8db28 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -11,17 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Dict, List, Set, Tuple
+from typing import Any, Dict, List, Set, Tuple
+from unittest import mock
 from unittest.mock import Mock, patch
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.constants import UserTypes
+from synapse.api.constants import EventTypes, Membership, UserTypes
 from synapse.appservice import ApplicationService
 from synapse.rest import admin
 from synapse.rest.client import login, register, room
 from synapse.server import HomeServer
 from synapse.storage import DataStore
+from synapse.storage.roommember import ProfileInfo
 from synapse.util import Clock
 
 from tests.test_utils.event_injection import inject_member_event
@@ -52,6 +54,11 @@ class GetUserDirectoryTables:
         return r
 
     async def get_users_in_public_rooms(self) -> List[Tuple[str, str]]:
+        """Fetch the entire `users_in_public_rooms` table.
+
+        Returns a list of tuples (user_id, room_id) where room_id is public and
+        contains the user with the given id.
+        """
         r = await self.store.db_pool.simple_select_list(
             "users_in_public_rooms", None, ("user_id", "room_id")
         )
@@ -62,6 +69,13 @@ class GetUserDirectoryTables:
         return retval
 
     async def get_users_who_share_private_rooms(self) -> List[Dict[str, str]]:
+        """Fetch the entire `users_who_share_private_rooms` table.
+
+        Returns a dict containing "user_id", "other_user_id" and "room_id" keys.
+        The dicts can be flattened to Tuples with the `_compress_shared` method.
+        (This seems a little awkward---maybe we could clean this up.)
+        """
+
         return await self.store.db_pool.simple_select_list(
             "users_who_share_private_rooms",
             None,
@@ -69,6 +83,10 @@ class GetUserDirectoryTables:
         )
 
     async def get_users_in_user_directory(self) -> Set[str]:
+        """Fetch the set of users in the `user_directory` table.
+
+        This is useful when checking we've correctly excluded users from the directory.
+        """
         result = await self.store.db_pool.simple_select_list(
             "user_directory",
             None,
@@ -76,6 +94,25 @@ class GetUserDirectoryTables:
         )
         return {row["user_id"] for row in result}
 
+    async def get_profiles_in_user_directory(self) -> Dict[str, ProfileInfo]:
+        """Fetch users and their profiles from the `user_directory` table.
+
+        This is useful when we want to inspect display names and avatars.
+        It's almost the entire contents of the `user_directory` table: the only
+        thing missing is an unused room_id column.
+        """
+        rows = await self.store.db_pool.simple_select_list(
+            "user_directory",
+            None,
+            ("user_id", "display_name", "avatar_url"),
+        )
+        return {
+            row["user_id"]: ProfileInfo(
+                display_name=row["display_name"], avatar_url=row["avatar_url"]
+            )
+            for row in rows
+        }
+
 
 class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
     """Ensure that rebuilding the directory writes the correct data to the DB.
@@ -201,20 +238,6 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
         self.helper.invite(private_room, src=u1, targ=u3, tok=u1_token)
         self.helper.join(private_room, user=u3, tok=u3_token)
 
-        self.get_success(self.store.update_user_directory_stream_pos(None))
-        self.get_success(self.store.delete_all_from_user_dir())
-
-        shares_private = self.get_success(
-            self.user_dir_helper.get_users_who_share_private_rooms()
-        )
-        public_users = self.get_success(
-            self.user_dir_helper.get_users_in_public_rooms()
-        )
-
-        # Nothing updated yet
-        self.assertEqual(shares_private, [])
-        self.assertEqual(public_users, [])
-
         # Do the initial population of the user directory via the background update
         self._purge_and_rebuild_user_dir()
 
@@ -346,6 +369,45 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
         # Check the AS user is not in the directory.
         self._check_room_sharing_tables(user, public, private)
 
+    def test_population_conceals_private_nickname(self) -> None:
+        # Make a private room, and set a nickname within
+        user = self.register_user("aaaa", "pass")
+        user_token = self.login(user, "pass")
+        private_room = self.helper.create_room_as(user, is_public=False, tok=user_token)
+        self.helper.send_state(
+            private_room,
+            EventTypes.Member,
+            state_key=user,
+            body={"membership": Membership.JOIN, "displayname": "BBBB"},
+            tok=user_token,
+        )
+
+        # Rebuild the user directory. Make the rescan of the `users` table a no-op
+        # so we only see the effect of scanning the `room_memberships` table.
+        async def mocked_process_users(*args: Any, **kwargs: Any) -> int:
+            await self.store.db_pool.updates._end_background_update(
+                "populate_user_directory_process_users"
+            )
+            return 1
+
+        with mock.patch.dict(
+            self.store.db_pool.updates._background_update_handlers,
+            populate_user_directory_process_users=mocked_process_users,
+        ):
+            self._purge_and_rebuild_user_dir()
+
+        # Local users are ignored by the scan over rooms
+        users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
+        self.assertEqual(users, {})
+
+        # Do a full rebuild including the scan over the `users` table. The local
+        # user should appear with their profile name.
+        self._purge_and_rebuild_user_dir()
+        users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
+        self.assertEqual(
+            users, {user: ProfileInfo(display_name="aaaa", avatar_url=None)}
+        )
+
 
 class UserDirectoryStoreTestCase(HomeserverTestCase):
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: