summary refs log tree commit diff
diff options
context:
space:
mode:
authorDavid Robertson <davidr@element.io>2021-09-10 10:54:38 +0100
committerGitHub <noreply@github.com>2021-09-10 10:54:38 +0100
commit318162f5debc595d3381337fe363fa7936cc7843 (patch)
tree2a6451443ea2e9ecd6fe8ff973ab202abe48b42e
parentRemove fixed and flakey tests from the sytest blacklist (#10788) (diff)
downloadsynapse-318162f5debc595d3381337fe363fa7936cc7843.tar.xz
Easy refactors of the user directory (#10789)
No functional changes here. This came out as I was working to tackle #5677 
-rw-r--r--changelog.d/10789.misc1
-rw-r--r--docs/user_directory.md37
-rw-r--r--synapse/handlers/deactivate_account.py2
-rw-r--r--synapse/handlers/state_deltas.py23
-rw-r--r--synapse/handlers/user_directory.py46
-rw-r--r--synapse/storage/databases/main/roommember.py5
-rw-r--r--synapse/storage/databases/main/user_directory.py7
-rw-r--r--tests/handlers/test_user_directory.py6
8 files changed, 90 insertions, 37 deletions
diff --git a/changelog.d/10789.misc b/changelog.d/10789.misc
new file mode 100644
index 0000000000..8a0b54e32a
--- /dev/null
+++ b/changelog.d/10789.misc
@@ -0,0 +1 @@
+Improve internal details of the user directory code.
\ No newline at end of file
diff --git a/docs/user_directory.md b/docs/user_directory.md
index d4f38d2cf1..07fe954891 100644
--- a/docs/user_directory.md
+++ b/docs/user_directory.md
@@ -10,3 +10,40 @@ DB corruption) get stale or out of sync.  If this happens, for now the
 solution to fix it is to execute the SQL [here](https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/main/delta/53/user_dir_populate.sql)
 and then restart synapse. This should then start a background task to
 flush the current tables and regenerate the directory.
+
+Data model
+----------
+
+There are five relevant tables that collectively form the "user directory".
+Three of them track a master list of all the users we could search for.
+The last two (collectively called the "search tables") track who can
+see who.
+
+From all of these tables we exclude three types of local user:
+  - support users
+  - appservice users
+  - deactivated users
+
+* `user_directory`. This contains the user_id, display name and avatar we'll
+  return when you search the directory.
+  - Because there's only one directory entry per user, it's important that we only
+    ever put publicly visible names here. Otherwise we might leak a private
+    nickname or avatar used in a private room.
+  - Indexed on rooms. Indexed on users.
+
+* `user_directory_search`. To be joined to `user_directory`. It contains an extra
+  column that enables full text search based on user ids and display names.
+  Different schemas for SQLite and Postgres with different code paths to match.
+  - Indexed on the full text search data. Indexed on users.
+
+* `user_directory_stream_pos`. When the initial background update to populate
+  the directory is complete, we record a stream position here. This indicates
+  that synapse should now listen for room changes and incrementally update
+  the directory where necessary.
+
+* `users_in_public_rooms`. Contains associations between users and the public rooms they're in.
+  Used to determine which users are in public rooms and should be publicly visible in the directory.
+
+* `users_who_share_private_rooms`. Rows are triples `(L, M, room id)` where `L`
+   is a local user and `M` is a local or remote user. `L` and `M` should be
+   different, but this isn't enforced by a constraint.
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 45d2404dde..ab22d76359 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -131,7 +131,7 @@ class DeactivateAccountHandler(BaseHandler):
         await self.store.add_user_pending_deactivation(user_id)
 
         # delete from user directory
-        await self.user_directory_handler.handle_user_deactivated(user_id)
+        await self.user_directory_handler.handle_local_user_deactivated(user_id)
 
         # Mark the user as erased, if they asked for that
         if erase_data:
diff --git a/synapse/handlers/state_deltas.py b/synapse/handlers/state_deltas.py
index 077c7c0649..d30ba2b724 100644
--- a/synapse/handlers/state_deltas.py
+++ b/synapse/handlers/state_deltas.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+from enum import Enum, auto
 from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
@@ -21,6 +22,12 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+class MatchChange(Enum):
+    no_change = auto()
+    now_true = auto()
+    now_false = auto()
+
+
 class StateDeltasHandler:
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastore()
@@ -31,18 +38,12 @@ class StateDeltasHandler:
         event_id: Optional[str],
         key_name: str,
         public_value: str,
-    ) -> Optional[bool]:
+    ) -> MatchChange:
         """Given two events check if the `key_name` field in content changed
         from not matching `public_value` to doing so.
 
         For example, check if `history_visibility` (`key_name`) changed from
         `shared` to `world_readable` (`public_value`).
-
-        Returns:
-            None if the field in the events either both match `public_value`
-            or if neither do, i.e. there has been no change.
-            True if it didn't match `public_value` but now does
-            False if it did match `public_value` but now doesn't
         """
         prev_event = None
         event = None
@@ -54,7 +55,7 @@ class StateDeltasHandler:
 
         if not event and not prev_event:
             logger.debug("Neither event exists: %r %r", prev_event_id, event_id)
-            return None
+            return MatchChange.no_change
 
         prev_value = None
         value = None
@@ -68,8 +69,8 @@ class StateDeltasHandler:
         logger.debug("prev_value: %r -> value: %r", prev_value, value)
 
         if value == public_value and prev_value != public_value:
-            return True
+            return MatchChange.now_true
         elif value != public_value and prev_value == public_value:
-            return False
+            return MatchChange.now_false
         else:
-            return None
+            return MatchChange.no_change
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 6edb1da50a..6faa1d84be 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 import synapse.metrics
 from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Membership
-from synapse.handlers.state_deltas import StateDeltasHandler
+from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.roommember import ProfileInfo
 from synapse.types import JsonDict
@@ -30,14 +30,26 @@ logger = logging.getLogger(__name__)
 
 
 class UserDirectoryHandler(StateDeltasHandler):
-    """Handles querying of and keeping updated the user_directory.
+    """Handles queries and updates for the user_directory.
 
     N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY
 
-    The user directory is filled with users who this server can see are joined to a
-    world_readable or publicly joinable room. We keep a database table up to date
-    by streaming changes of the current state and recalculating whether users should
-    be in the directory or not when necessary.
+    When a local user searches the user_directory, we report two kinds of users:
+
+    - users this server can see are joined to a world_readable or publicly
+      joinable room, and
+    - users belonging to a private room shared by that local user.
+
+    The two cases are tracked separately in the `users_in_public_rooms` and
+    `users_who_share_private_rooms` tables. Both kinds of users have their
+    username and avatar tracked in a `user_directory` table.
+
+    This handler has three responsibilities:
+    1. Forwarding requests to `/user_directory/search` to the UserDirectoryStore.
+    2. Providing hooks for the application to call when local users are added,
+       removed, or have their profile changed.
+    3. Listening for room state changes that indicate remote users have
+       joined or left a room, or that their profile has changed.
     """
 
     def __init__(self, hs: "HomeServer"):
@@ -130,7 +142,7 @@ class UserDirectoryHandler(StateDeltasHandler):
                 user_id, profile.display_name, profile.avatar_url
             )
 
-    async def handle_user_deactivated(self, user_id: str) -> None:
+    async def handle_local_user_deactivated(self, user_id: str) -> None:
         """Called when a user ID is deactivated"""
         # FIXME(#3714): We should probably do this in the same worker as all
         # the other changes.
@@ -196,7 +208,7 @@ class UserDirectoryHandler(StateDeltasHandler):
                     public_value=Membership.JOIN,
                 )
 
-                if change is False:
+                if change is MatchChange.now_false:
                     # Need to check if the server left the room entirely, if so
                     # we might need to remove all the users in that room
                     is_in_room = await self.store.is_host_joined(
@@ -219,14 +231,14 @@ class UserDirectoryHandler(StateDeltasHandler):
 
                 is_support = await self.store.is_support_user(state_key)
                 if not is_support:
-                    if change is None:
+                    if change is MatchChange.no_change:
                         # Handle any profile changes
                         await self._handle_profile_change(
                             state_key, room_id, prev_event_id, event_id
                         )
                         continue
 
-                    if change:  # The user joined
+                    if change is MatchChange.now_true:  # The user joined
                         event = await self.store.get_event(event_id, allow_none=True)
                         # It isn't expected for this event to not exist, but we
                         # don't want the entire background process to break.
@@ -263,14 +275,14 @@ class UserDirectoryHandler(StateDeltasHandler):
         logger.debug("Handling change for %s: %s", typ, room_id)
 
         if typ == EventTypes.RoomHistoryVisibility:
-            change = await self._get_key_change(
+            publicness = await self._get_key_change(
                 prev_event_id,
                 event_id,
                 key_name="history_visibility",
                 public_value=HistoryVisibility.WORLD_READABLE,
             )
         elif typ == EventTypes.JoinRules:
-            change = await self._get_key_change(
+            publicness = await self._get_key_change(
                 prev_event_id,
                 event_id,
                 key_name="join_rule",
@@ -278,9 +290,7 @@ class UserDirectoryHandler(StateDeltasHandler):
             )
         else:
             raise Exception("Invalid event type")
-        # If change is None, no change. True => become world_readable/public,
-        # False => was world_readable/public
-        if change is None:
+        if publicness is MatchChange.no_change:
             logger.debug("No change")
             return
 
@@ -290,13 +300,13 @@ class UserDirectoryHandler(StateDeltasHandler):
             room_id
         )
 
-        logger.debug("Change: %r, is_public: %r", change, is_public)
+        logger.debug("Change: %r, publicness: %r", publicness, is_public)
 
-        if change and not is_public:
+        if publicness is MatchChange.now_true and not is_public:
             # If we became world readable but room isn't currently public then
             # we ignore the change
             return
-        elif not change and is_public:
+        elif publicness is MatchChange.now_false and is_public:
             # If we stopped being world readable but are still public,
             # ignore the change
             return
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index c58a4b8690..64c18c6f86 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -196,6 +196,11 @@ class RoomMemberWorkerStore(EventsWorkerStore):
     ) -> Dict[str, ProfileInfo]:
         """Get a mapping from user ID to profile information for all users in a given room.
 
+        The profile information comes directly from this room's `m.room.member`
+        events, and so may be specific to this room rather than part of a user's
+        global profile. To avoid privacy leaks, the profile data should only be
+        revealed to users who are already in this room.
+
         Args:
             room_id: The ID of the room to retrieve the users of.
 
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 65dde67ae9..16d9824ec1 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -196,7 +196,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 )
 
                 users_with_profile = await self.get_users_in_room_with_profiles(room_id)
-                user_ids = set(users_with_profile)
 
                 # Update each user in the user directory.
                 for user_id, profile in users_with_profile.items():
@@ -207,7 +206,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 to_insert = set()
 
                 if is_public:
-                    for user_id in user_ids:
+                    for user_id in users_with_profile:
                         if self.get_if_app_services_interested_in_user(user_id):
                             continue
 
@@ -217,14 +216,14 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                         await self.add_users_in_public_rooms(room_id, to_insert)
                         to_insert.clear()
                 else:
-                    for user_id in user_ids:
+                    for user_id in users_with_profile:
                         if not self.hs.is_mine_id(user_id):
                             continue
 
                         if self.get_if_app_services_interested_in_user(user_id):
                             continue
 
-                        for other_user_id in user_ids:
+                        for other_user_id in users_with_profile:
                             if user_id == other_user_id:
                                 continue
 
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index a91d31ce61..ae88ed89aa 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -94,7 +94,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
 
         # deactivate user
         self.get_success(self.store.set_user_deactivated_status(r_user_id, True))
-        self.get_success(self.handler.handle_user_deactivated(r_user_id))
+        self.get_success(self.handler.handle_local_user_deactivated(r_user_id))
 
         # profile is not in directory
         profile = self.get_success(self.store.get_user_in_directory(r_user_id))
@@ -118,7 +118,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         )
 
         self.store.remove_from_user_dir = Mock(return_value=defer.succeed(None))
-        self.get_success(self.handler.handle_user_deactivated(s_user_id))
+        self.get_success(self.handler.handle_local_user_deactivated(s_user_id))
         self.store.remove_from_user_dir.not_called()
 
     def test_handle_user_deactivated_regular_user(self):
@@ -127,7 +127,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
             self.store.register_user(user_id=r_user_id, password_hash=None)
         )
         self.store.remove_from_user_dir = Mock(return_value=defer.succeed(None))
-        self.get_success(self.handler.handle_user_deactivated(r_user_id))
+        self.get_success(self.handler.handle_local_user_deactivated(r_user_id))
         self.store.remove_from_user_dir.called_once_with(r_user_id)
 
     def test_private_room(self):