From 49f877d32efc79cb40b2766cb052cf35bad31de5 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 14 Feb 2020 07:17:54 -0500 Subject: Filter the results of user directory searching via the spam checker (#6888) Add a method to the spam checker to filter the user directory results. --- changelog.d/6888.feature | 1 + docs/spam_checker.md | 3 ++ synapse/events/spamcheck.py | 27 ++++++++++ synapse/handlers/user_directory.py | 14 +++++- tests/handlers/test_user_directory.py | 92 +++++++++++++++++++++++++++++++++++ 5 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6888.feature diff --git a/changelog.d/6888.feature b/changelog.d/6888.feature new file mode 100644 index 0000000000..1b7ac0c823 --- /dev/null +++ b/changelog.d/6888.feature @@ -0,0 +1 @@ +The result of a user directory search can now be filtered via the spam checker. diff --git a/docs/spam_checker.md b/docs/spam_checker.md index 97ff17f952..5b5f5000b7 100644 --- a/docs/spam_checker.md +++ b/docs/spam_checker.md @@ -54,6 +54,9 @@ class ExampleSpamChecker: def user_may_publish_room(self, userid, room_id): return True # allow publishing of all rooms + + def check_username_for_spam(self, user_profile): + return False # allow all usernames ``` ## Configuration diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 5a907718d6..0a13fca9a4 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -15,6 +15,7 @@ # limitations under the License. import inspect +from typing import Dict from synapse.spam_checker_api import SpamCheckerApi @@ -125,3 +126,29 @@ class SpamChecker(object): return True return self.spam_checker.user_may_publish_room(userid, room_id) + + def check_username_for_spam(self, user_profile: Dict[str, str]) -> bool: + """Checks if a user ID or display name are considered "spammy" by this server. + + If the server considers a username spammy, then it will not be included in + user directory results. + + Args: + user_profile: The user information to check, it contains the keys: + * user_id + * display_name + * avatar_url + + Returns: + True if the user is spammy. + """ + if self.spam_checker is None: + return False + + # For backwards compatibility, if the method does not exist on the spam checker, fallback to not interfering. + checker = getattr(self.spam_checker, "check_username_for_spam", None) + if not checker: + return False + # Make a copy of the user profile object to ensure the spam checker + # cannot modify it. + return checker(user_profile.copy()) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 81aa58dc8c..722760c59d 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -52,6 +52,7 @@ class UserDirectoryHandler(StateDeltasHandler): self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users + self.spam_checker = hs.get_spam_checker() # The current position in the current_state_delta stream self.pos = None @@ -65,7 +66,7 @@ class UserDirectoryHandler(StateDeltasHandler): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) - def search_users(self, user_id, search_term, limit): + async def search_users(self, user_id, search_term, limit): """Searches for users in directory Returns: @@ -82,7 +83,16 @@ class UserDirectoryHandler(StateDeltasHandler): ] } """ - return self.store.search_user_dir(user_id, search_term, limit) + results = await self.store.search_user_dir(user_id, search_term, limit) + + # Remove any spammy users from the results. + results["results"] = [ + user + for user in results["results"] + if not self.spam_checker.check_username_for_spam(user) + ] + + return results def notify_new_event(self): """Called when there may be more deltas to process diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 26071059d2..0a4765fff4 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -147,6 +147,98 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): s = self.get_success(self.handler.search_users(u1, "user3", 10)) self.assertEqual(len(s["results"]), 0) + def test_spam_checker(self): + """ + A user which fails to the spam checks will not appear in search results. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + self.assertEqual(public_users, []) + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # Configure a spam checker that does not filter any users. + spam_checker = self.hs.get_spam_checker() + + class AllowAll(object): + def check_username_for_spam(self, user_profile): + # Allow all users. + return False + + spam_checker.spam_checker = AllowAll() + + # The results do not change: + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # Configure a spam checker that filters all users. + class BlockAll(object): + def check_username_for_spam(self, user_profile): + # All users are spammy. + return True + + spam_checker.spam_checker = BlockAll() + + # User1 now gets no search results for any of the other users. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + def test_legacy_spam_checker(self): + """ + A spam checker without the expected method should be ignored. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + self.assertEqual(public_users, []) + + # Configure a spam checker. + spam_checker = self.hs.get_spam_checker() + # The spam checker doesn't need any methods, so create a bare object. + spam_checker.spam_checker = object() + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + def _compress_shared(self, shared): """ Compress a list of users who share rooms dicts to a list of tuples. -- cgit 1.4.1