summary refs log tree commit diff
diff options
context:
space:
mode:
authorAndrew Morgan <1342360+anoadragon453@users.noreply.github.com>2021-02-19 11:02:03 +0000
committerGitHub <noreply@github.com>2021-02-19 11:02:03 +0000
commit13e9029f447e724016b904a1c847de2ad4736d66 (patch)
treec7badc51e4998079c0261499efc84c9f0c6e078a
parentAdd configs to make profile data more private (#9203) (diff)
downloadsynapse-13e9029f447e724016b904a1c847de2ad4736d66.tar.xz
Add a config option to prioritise local users in user directory search results (#9383)
This PR adds a homeserver config option, `user_directory.prefer_local_users`, that when enabled will show local users higher in user directory search results than remote users. This option is off by default.

Note that turning this on doesn't necessarily mean that remote users will always be put below local users, but they should be assuming all other ranking factors (search query match, profile information present etc) are identical.

This is useful for, say, University networks that are openly federating, but want to prioritise local students and staff in the user directory over other random users.
-rw-r--r--changelog.d/9383.feature1
-rw-r--r--docs/sample_config.yaml5
-rw-r--r--synapse/config/user_directory.py9
-rw-r--r--synapse/storage/databases/main/user_directory.py59
-rw-r--r--tests/handlers/test_user_directory.py94
5 files changed, 159 insertions, 9 deletions
diff --git a/changelog.d/9383.feature b/changelog.d/9383.feature
new file mode 100644
index 0000000000..8957c9cc5e
--- /dev/null
+++ b/changelog.d/9383.feature
@@ -0,0 +1 @@
+Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users.
\ No newline at end of file
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index e5615f961b..c660c62620 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -2554,9 +2554,14 @@ spam_checker:
 # rebuild the user_directory search indexes, see
 # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
 #
+# 'prefer_local_users' defines whether to prioritise local users in
+# search query results. If True, local users are more likely to appear above
+# remote users when searching the user directory. Defaults to false.
+#
 #user_directory:
 #  enabled: true
 #  search_all_users: false
+#  prefer_local_users: false
 
 
 # User Consent configuration
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index c8d19c5d6b..89dbebd148 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -26,6 +26,7 @@ class UserDirectoryConfig(Config):
     def read_config(self, config, **kwargs):
         self.user_directory_search_enabled = True
         self.user_directory_search_all_users = False
+        self.user_directory_search_prefer_local_users = False
         user_directory_config = config.get("user_directory", None)
         if user_directory_config:
             self.user_directory_search_enabled = user_directory_config.get(
@@ -34,6 +35,9 @@ class UserDirectoryConfig(Config):
             self.user_directory_search_all_users = user_directory_config.get(
                 "search_all_users", False
             )
+            self.user_directory_search_prefer_local_users = user_directory_config.get(
+                "prefer_local_users", False
+            )
 
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
         return """
@@ -49,7 +53,12 @@ class UserDirectoryConfig(Config):
         # rebuild the user_directory search indexes, see
         # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
         #
+        # 'prefer_local_users' defines whether to prioritise local users in
+        # search query results. If True, local users are more likely to appear above
+        # remote users when searching the user directory. Defaults to false.
+        #
         #user_directory:
         #  enabled: true
         #  search_all_users: false
+        #  prefer_local_users: false
         """
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 63f88eac51..02ee15676c 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -556,6 +556,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
 
+        self._prefer_local_users_in_search = (
+            hs.config.user_directory_search_prefer_local_users
+        )
+        self._server_name = hs.config.server_name
+
     async def remove_from_user_dir(self, user_id: str) -> None:
         def _remove_from_user_dir_txn(txn):
             self.db_pool.simple_delete_txn(
@@ -754,9 +759,24 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 )
             """
 
+        # We allow manipulating the ranking algorithm by injecting statements
+        # based on config options.
+        additional_ordering_statements = []
+        ordering_arguments = ()
+
         if isinstance(self.database_engine, PostgresEngine):
             full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
 
+            # If enabled, this config option will rank local users higher than those on
+            # remote instances.
+            if self._prefer_local_users_in_search:
+                # This statement checks whether a given user's user ID contains a server name
+                # that matches the local server
+                statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)"
+                additional_ordering_statements.append(statement)
+
+                ordering_arguments += ("%:" + self._server_name,)
+
             # We order by rank and then if they have profile info
             # The ranking algorithm is hand tweaked for "best" results. Broadly
             # the idea is we give a higher weight to exact matches.
@@ -767,7 +787,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 FROM user_directory_search as t
                 INNER JOIN user_directory AS d USING (user_id)
                 WHERE
-                    %s
+                    %(where_clause)s
                     AND vector @@ to_tsquery('simple', ?)
                 ORDER BY
                     (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
@@ -787,33 +807,54 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                             8
                         )
                     )
+                    %(order_case_statements)s
                     DESC,
                     display_name IS NULL,
                     avatar_url IS NULL
                 LIMIT ?
-            """ % (
-                where_clause,
+            """ % {
+                "where_clause": where_clause,
+                "order_case_statements": " ".join(additional_ordering_statements),
+            }
+            args = (
+                join_args
+                + (full_query, exact_query, prefix_query)
+                + ordering_arguments
+                + (limit + 1,)
             )
-            args = join_args + (full_query, exact_query, prefix_query, limit + 1)
         elif isinstance(self.database_engine, Sqlite3Engine):
             search_query = _parse_query_sqlite(search_term)
 
+            # If enabled, this config option will rank local users higher than those on
+            # remote instances.
+            if self._prefer_local_users_in_search:
+                # This statement checks whether a given user's user ID contains a server name
+                # that matches the local server
+                #
+                # Note that we need to include a comma at the end for valid SQL
+                statement = "user_id LIKE ? DESC,"
+                additional_ordering_statements.append(statement)
+
+                ordering_arguments += ("%:" + self._server_name,)
+
             sql = """
                 SELECT d.user_id AS user_id, display_name, avatar_url
                 FROM user_directory_search as t
                 INNER JOIN user_directory AS d USING (user_id)
                 WHERE
-                    %s
+                    %(where_clause)s
                     AND value MATCH ?
                 ORDER BY
                     rank(matchinfo(user_directory_search)) DESC,
+                    %(order_statements)s
                     display_name IS NULL,
                     avatar_url IS NULL
                 LIMIT ?
-            """ % (
-                where_clause,
-            )
-            args = join_args + (search_query, limit + 1)
+            """ % {
+                "where_clause": where_clause,
+                "order_statements": " ".join(additional_ordering_statements),
+            }
+            args = join_args + (search_query,) + ordering_arguments + (limit + 1,)
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 3572e54c5d..8dddc12204 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -18,6 +18,7 @@ from twisted.internet import defer
 
 import synapse.rest.admin
 from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes
+from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.rest.client.v1 import login, room
 from synapse.rest.client.v2_alpha import user_directory
 from synapse.storage.roommember import ProfileInfo
@@ -46,6 +47,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
     def prepare(self, reactor, clock, hs):
         self.store = hs.get_datastore()
         self.handler = hs.get_user_directory_handler()
+        self.event_builder_factory = self.hs.get_event_builder_factory()
+        self.event_creation_handler = self.hs.get_event_creation_handler()
 
     def test_handle_local_profile_change_with_support_user(self):
         support_user_id = "@support:test"
@@ -547,6 +550,97 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         s = self.get_success(self.handler.search_users(u1, u4, 10))
         self.assertEqual(len(s["results"]), 1)
 
+    @override_config(
+        {
+            "user_directory": {
+                "enabled": True,
+                "search_all_users": True,
+                "prefer_local_users": True,
+            }
+        }
+    )
+    def test_prefer_local_users(self):
+        """Tests that local users are shown higher in search results when
+        user_directory.prefer_local_users is True.
+        """
+        # Create a room and few users to test the directory with
+        searching_user = self.register_user("searcher", "password")
+        searching_user_tok = self.login("searcher", "password")
+
+        room_id = self.helper.create_room_as(
+            searching_user,
+            room_version=RoomVersions.V1.identifier,
+            tok=searching_user_tok,
+        )
+
+        # Create a few local users and join them to the room
+        local_user_1 = self.register_user("user_xxxxx", "password")
+        local_user_2 = self.register_user("user_bbbbb", "password")
+        local_user_3 = self.register_user("user_zzzzz", "password")
+
+        self._add_user_to_room(room_id, RoomVersions.V1, local_user_1)
+        self._add_user_to_room(room_id, RoomVersions.V1, local_user_2)
+        self._add_user_to_room(room_id, RoomVersions.V1, local_user_3)
+
+        # Create a few "remote" users and join them to the room
+        remote_user_1 = "@user_aaaaa:remote_server"
+        remote_user_2 = "@user_yyyyy:remote_server"
+        remote_user_3 = "@user_ccccc:remote_server"
+        self._add_user_to_room(room_id, RoomVersions.V1, remote_user_1)
+        self._add_user_to_room(room_id, RoomVersions.V1, remote_user_2)
+        self._add_user_to_room(room_id, RoomVersions.V1, remote_user_3)
+
+        local_users = [local_user_1, local_user_2, local_user_3]
+        remote_users = [remote_user_1, remote_user_2, remote_user_3]
+
+        # Populate the user directory via background update
+        self._add_background_updates()
+        while not self.get_success(
+            self.store.db_pool.updates.has_completed_background_updates()
+        ):
+            self.get_success(
+                self.store.db_pool.updates.do_next_background_update(100), by=0.1
+            )
+
+        # The local searching user searches for the term "user", which other users have
+        # in their user id
+        results = self.get_success(
+            self.handler.search_users(searching_user, "user", 20)
+        )["results"]
+        received_user_id_ordering = [result["user_id"] for result in results]
+
+        # Typically we'd expect Synapse to return users in lexicographical order,
+        # assuming they have similar User IDs/display names, and profile information.
+
+        # Check that the order of returned results using our module is as we expect,
+        # i.e our local users show up first, despite all users having lexographically mixed
+        # user IDs.
+        [self.assertIn(user, local_users) for user in received_user_id_ordering[:3]]
+        [self.assertIn(user, remote_users) for user in received_user_id_ordering[3:]]
+
+    def _add_user_to_room(
+        self, room_id: str, room_version: RoomVersion, user_id: str,
+    ):
+        # Add a user to the room.
+        builder = self.event_builder_factory.for_room_version(
+            room_version,
+            {
+                "type": "m.room.member",
+                "sender": user_id,
+                "state_key": user_id,
+                "room_id": room_id,
+                "content": {"membership": "join"},
+            },
+        )
+
+        event, context = self.get_success(
+            self.event_creation_handler.create_new_client_event(builder)
+        )
+
+        self.get_success(
+            self.hs.get_storage().persistence.persist_event(event, context)
+        )
+
 
 class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
     user_id = "@test:test"