summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2020-02-14 07:17:54 -0500
committerGitHub <noreply@github.com>2020-02-14 07:17:54 -0500
commit49f877d32efc79cb40b2766cb052cf35bad31de5 (patch)
tree5c8f2b67c64f4852200ecd26f49c412698b08f7d
parentUpdate changelog for #6905 to group it with upcoming PRs (diff)
downloadsynapse-49f877d32efc79cb40b2766cb052cf35bad31de5.tar.xz
Filter the results of user directory searching via the spam checker (#6888)
Add a method to the spam checker to filter the user directory results.
-rw-r--r--changelog.d/6888.feature1
-rw-r--r--docs/spam_checker.md3
-rw-r--r--synapse/events/spamcheck.py27
-rw-r--r--synapse/handlers/user_directory.py14
-rw-r--r--tests/handlers/test_user_directory.py92
5 files changed, 135 insertions, 2 deletions
diff --git a/changelog.d/6888.feature b/changelog.d/6888.feature
new file mode 100644
index 0000000000..1b7ac0c823
--- /dev/null
+++ b/changelog.d/6888.feature
@@ -0,0 +1 @@
+The result of a user directory search can now be filtered via the spam checker.
diff --git a/docs/spam_checker.md b/docs/spam_checker.md
index 97ff17f952..5b5f5000b7 100644
--- a/docs/spam_checker.md
+++ b/docs/spam_checker.md
@@ -54,6 +54,9 @@ class ExampleSpamChecker:
 
     def user_may_publish_room(self, userid, room_id):
         return True  # allow publishing of all rooms
+
+    def check_username_for_spam(self, user_profile):
+        return False  # allow all usernames
 ```
 
 ## Configuration
diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py
index 5a907718d6..0a13fca9a4 100644
--- a/synapse/events/spamcheck.py
+++ b/synapse/events/spamcheck.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import inspect
+from typing import Dict
 
 from synapse.spam_checker_api import SpamCheckerApi
 
@@ -125,3 +126,29 @@ class SpamChecker(object):
             return True
 
         return self.spam_checker.user_may_publish_room(userid, room_id)
+
+    def check_username_for_spam(self, user_profile: Dict[str, str]) -> bool:
+        """Checks if a user ID or display name are considered "spammy" by this server.
+
+        If the server considers a username spammy, then it will not be included in
+        user directory results.
+
+        Args:
+            user_profile: The user information to check, it contains the keys:
+                * user_id
+                * display_name
+                * avatar_url
+
+        Returns:
+            True if the user is spammy.
+        """
+        if self.spam_checker is None:
+            return False
+
+        # For backwards compatibility, if the method does not exist on the spam checker, fallback to not interfering.
+        checker = getattr(self.spam_checker, "check_username_for_spam", None)
+        if not checker:
+            return False
+        # Make a copy of the user profile object to ensure the spam checker
+        # cannot modify it.
+        return checker(user_profile.copy())
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 81aa58dc8c..722760c59d 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -52,6 +52,7 @@ class UserDirectoryHandler(StateDeltasHandler):
         self.is_mine_id = hs.is_mine_id
         self.update_user_directory = hs.config.update_user_directory
         self.search_all_users = hs.config.user_directory_search_all_users
+        self.spam_checker = hs.get_spam_checker()
         # The current position in the current_state_delta stream
         self.pos = None
 
@@ -65,7 +66,7 @@ class UserDirectoryHandler(StateDeltasHandler):
             # we start populating the user directory
             self.clock.call_later(0, self.notify_new_event)
 
-    def search_users(self, user_id, search_term, limit):
+    async def search_users(self, user_id, search_term, limit):
         """Searches for users in directory
 
         Returns:
@@ -82,7 +83,16 @@ class UserDirectoryHandler(StateDeltasHandler):
                     ]
                 }
         """
-        return self.store.search_user_dir(user_id, search_term, limit)
+        results = await self.store.search_user_dir(user_id, search_term, limit)
+
+        # Remove any spammy users from the results.
+        results["results"] = [
+            user
+            for user in results["results"]
+            if not self.spam_checker.check_username_for_spam(user)
+        ]
+
+        return results
 
     def notify_new_event(self):
         """Called when there may be more deltas to process
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 26071059d2..0a4765fff4 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -147,6 +147,98 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         s = self.get_success(self.handler.search_users(u1, "user3", 10))
         self.assertEqual(len(s["results"]), 0)
 
+    def test_spam_checker(self):
+        """
+        A user which fails to the spam checks will not appear in search results.
+        """
+        u1 = self.register_user("user1", "pass")
+        u1_token = self.login(u1, "pass")
+        u2 = self.register_user("user2", "pass")
+        u2_token = self.login(u2, "pass")
+
+        # We do not add users to the directory until they join a room.
+        s = self.get_success(self.handler.search_users(u1, "user2", 10))
+        self.assertEqual(len(s["results"]), 0)
+
+        room = self.helper.create_room_as(u1, is_public=False, tok=u1_token)
+        self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
+        self.helper.join(room, user=u2, tok=u2_token)
+
+        # Check we have populated the database correctly.
+        shares_private = self.get_users_who_share_private_rooms()
+        public_users = self.get_users_in_public_rooms()
+
+        self.assertEqual(
+            self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
+        )
+        self.assertEqual(public_users, [])
+
+        # We get one search result when searching for user2 by user1.
+        s = self.get_success(self.handler.search_users(u1, "user2", 10))
+        self.assertEqual(len(s["results"]), 1)
+
+        # Configure a spam checker that does not filter any users.
+        spam_checker = self.hs.get_spam_checker()
+
+        class AllowAll(object):
+            def check_username_for_spam(self, user_profile):
+                # Allow all users.
+                return False
+
+        spam_checker.spam_checker = AllowAll()
+
+        # The results do not change:
+        # We get one search result when searching for user2 by user1.
+        s = self.get_success(self.handler.search_users(u1, "user2", 10))
+        self.assertEqual(len(s["results"]), 1)
+
+        # Configure a spam checker that filters all users.
+        class BlockAll(object):
+            def check_username_for_spam(self, user_profile):
+                # All users are spammy.
+                return True
+
+        spam_checker.spam_checker = BlockAll()
+
+        # User1 now gets no search results for any of the other users.
+        s = self.get_success(self.handler.search_users(u1, "user2", 10))
+        self.assertEqual(len(s["results"]), 0)
+
+    def test_legacy_spam_checker(self):
+        """
+        A spam checker without the expected method should be ignored.
+        """
+        u1 = self.register_user("user1", "pass")
+        u1_token = self.login(u1, "pass")
+        u2 = self.register_user("user2", "pass")
+        u2_token = self.login(u2, "pass")
+
+        # We do not add users to the directory until they join a room.
+        s = self.get_success(self.handler.search_users(u1, "user2", 10))
+        self.assertEqual(len(s["results"]), 0)
+
+        room = self.helper.create_room_as(u1, is_public=False, tok=u1_token)
+        self.helper.invite(room, src=u1, targ=u2, tok=u1_token)
+        self.helper.join(room, user=u2, tok=u2_token)
+
+        # Check we have populated the database correctly.
+        shares_private = self.get_users_who_share_private_rooms()
+        public_users = self.get_users_in_public_rooms()
+
+        self.assertEqual(
+            self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)])
+        )
+        self.assertEqual(public_users, [])
+
+        # Configure a spam checker.
+        spam_checker = self.hs.get_spam_checker()
+        # The spam checker doesn't need any methods, so create a bare object.
+        spam_checker.spam_checker = object()
+
+        # We get one search result when searching for user2 by user1.
+        s = self.get_success(self.handler.search_users(u1, "user2", 10))
+        self.assertEqual(len(s["results"]), 1)
+
     def _compress_shared(self, shared):
         """
         Compress a list of users who share rooms dicts to a list of tuples.