From 361de49c90fd1f35adc4a6bca8206e50e7f15454 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 13 Feb 2020 07:40:57 -0500 Subject: Add documentation for the spam checker module (#6906) Add documentation for the spam checker. --- docs/spam_checker.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 docs/spam_checker.md (limited to 'docs/spam_checker.md') diff --git a/docs/spam_checker.md b/docs/spam_checker.md new file mode 100644 index 0000000000..97ff17f952 --- /dev/null +++ b/docs/spam_checker.md @@ -0,0 +1,85 @@ +# Handling spam in Synapse + +Synapse has support to customize spam checking behavior. It can plug into a +variety of events and affect how they are presented to users on your homeserver. + +The spam checking behavior is implemented as a Python class, which must be +able to be imported by the running Synapse. + +## Python spam checker class + +The Python class is instantiated with two objects: + +* Any configuration (see below). +* An instance of `synapse.spam_checker_api.SpamCheckerApi`. + +It then implements methods which return a boolean to alter behavior in Synapse. + +There's a generic method for checking every event (`check_event_for_spam`), as +well as some specific methods: + +* `user_may_invite` +* `user_may_create_room` +* `user_may_create_room_alias` +* `user_may_publish_room` + +The details of the each of these methods (as well as their inputs and outputs) +are documented in the `synapse.events.spamcheck.SpamChecker` class. + +The `SpamCheckerApi` class provides a way for the custom spam checker class to +call back into the homeserver internals. It currently implements the following +methods: + +* `get_state_events_in_room` + +### Example + +```python +class ExampleSpamChecker: + def __init__(self, config, api): + self.config = config + self.api = api + + def check_event_for_spam(self, foo): + return False # allow all events + + def user_may_invite(self, inviter_userid, invitee_userid, room_id): + return True # allow all invites + + def user_may_create_room(self, userid): + return True # allow all room creations + + def user_may_create_room_alias(self, userid, room_alias): + return True # allow all room aliases + + def user_may_publish_room(self, userid, room_id): + return True # allow publishing of all rooms +``` + +## Configuration + +Modify the `spam_checker` section of your `homeserver.yaml` in the following +manner: + +`module` should point to the fully qualified Python class that implements your +custom logic, e.g. `my_module.ExampleSpamChecker`. + +`config` is a dictionary that gets passed to the spam checker class. + +### Example + +This section might look like: + +```yaml +spam_checker: + module: my_module.ExampleSpamChecker + config: + # Enable or disable a specific option in ExampleSpamChecker. + my_custom_option: true +``` + +## Examples + +The [Mjolnir](https://github.com/matrix-org/mjolnir) project is a full fledged +example using the Synapse spam checking API, including a bot for dynamic +configuration. -- cgit 1.5.1 From 49f877d32efc79cb40b2766cb052cf35bad31de5 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 14 Feb 2020 07:17:54 -0500 Subject: Filter the results of user directory searching via the spam checker (#6888) Add a method to the spam checker to filter the user directory results. --- changelog.d/6888.feature | 1 + docs/spam_checker.md | 3 ++ synapse/events/spamcheck.py | 27 ++++++++++ synapse/handlers/user_directory.py | 14 +++++- tests/handlers/test_user_directory.py | 92 +++++++++++++++++++++++++++++++++++ 5 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6888.feature (limited to 'docs/spam_checker.md') diff --git a/changelog.d/6888.feature b/changelog.d/6888.feature new file mode 100644 index 0000000000..1b7ac0c823 --- /dev/null +++ b/changelog.d/6888.feature @@ -0,0 +1 @@ +The result of a user directory search can now be filtered via the spam checker. diff --git a/docs/spam_checker.md b/docs/spam_checker.md index 97ff17f952..5b5f5000b7 100644 --- a/docs/spam_checker.md +++ b/docs/spam_checker.md @@ -54,6 +54,9 @@ class ExampleSpamChecker: def user_may_publish_room(self, userid, room_id): return True # allow publishing of all rooms + + def check_username_for_spam(self, user_profile): + return False # allow all usernames ``` ## Configuration diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 5a907718d6..0a13fca9a4 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -15,6 +15,7 @@ # limitations under the License. import inspect +from typing import Dict from synapse.spam_checker_api import SpamCheckerApi @@ -125,3 +126,29 @@ class SpamChecker(object): return True return self.spam_checker.user_may_publish_room(userid, room_id) + + def check_username_for_spam(self, user_profile: Dict[str, str]) -> bool: + """Checks if a user ID or display name are considered "spammy" by this server. + + If the server considers a username spammy, then it will not be included in + user directory results. + + Args: + user_profile: The user information to check, it contains the keys: + * user_id + * display_name + * avatar_url + + Returns: + True if the user is spammy. + """ + if self.spam_checker is None: + return False + + # For backwards compatibility, if the method does not exist on the spam checker, fallback to not interfering. + checker = getattr(self.spam_checker, "check_username_for_spam", None) + if not checker: + return False + # Make a copy of the user profile object to ensure the spam checker + # cannot modify it. + return checker(user_profile.copy()) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 81aa58dc8c..722760c59d 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -52,6 +52,7 @@ class UserDirectoryHandler(StateDeltasHandler): self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users + self.spam_checker = hs.get_spam_checker() # The current position in the current_state_delta stream self.pos = None @@ -65,7 +66,7 @@ class UserDirectoryHandler(StateDeltasHandler): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) - def search_users(self, user_id, search_term, limit): + async def search_users(self, user_id, search_term, limit): """Searches for users in directory Returns: @@ -82,7 +83,16 @@ class UserDirectoryHandler(StateDeltasHandler): ] } """ - return self.store.search_user_dir(user_id, search_term, limit) + results = await self.store.search_user_dir(user_id, search_term, limit) + + # Remove any spammy users from the results. + results["results"] = [ + user + for user in results["results"] + if not self.spam_checker.check_username_for_spam(user) + ] + + return results def notify_new_event(self): """Called when there may be more deltas to process diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 26071059d2..0a4765fff4 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -147,6 +147,98 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): s = self.get_success(self.handler.search_users(u1, "user3", 10)) self.assertEqual(len(s["results"]), 0) + def test_spam_checker(self): + """ + A user which fails to the spam checks will not appear in search results. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + self.assertEqual(public_users, []) + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # Configure a spam checker that does not filter any users. + spam_checker = self.hs.get_spam_checker() + + class AllowAll(object): + def check_username_for_spam(self, user_profile): + # Allow all users. + return False + + spam_checker.spam_checker = AllowAll() + + # The results do not change: + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # Configure a spam checker that filters all users. + class BlockAll(object): + def check_username_for_spam(self, user_profile): + # All users are spammy. + return True + + spam_checker.spam_checker = BlockAll() + + # User1 now gets no search results for any of the other users. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + def test_legacy_spam_checker(self): + """ + A spam checker without the expected method should be ignored. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + self.assertEqual(public_users, []) + + # Configure a spam checker. + spam_checker = self.hs.get_spam_checker() + # The spam checker doesn't need any methods, so create a bare object. + spam_checker.spam_checker = object() + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + def _compress_shared(self, shared): """ Compress a list of users who share rooms dicts to a list of tuples. -- cgit 1.5.1 From 67feea8044562764b04c4968ebf159b44eb59218 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 8 May 2020 19:25:48 +0100 Subject: Extend spam checker to allow for multiple modules (#7435) --- changelog.d/7435.feature | 1 + docs/sample_config.yaml | 15 +++++-- docs/spam_checker.md | 19 +++++---- synapse/config/spam_checker.py | 38 +++++++++++++---- synapse/events/spamcheck.py | 78 +++++++++++++++++------------------ tests/handlers/test_user_directory.py | 4 +- 6 files changed, 95 insertions(+), 60 deletions(-) create mode 100644 changelog.d/7435.feature (limited to 'docs/spam_checker.md') diff --git a/changelog.d/7435.feature b/changelog.d/7435.feature new file mode 100644 index 0000000000..399291b13b --- /dev/null +++ b/changelog.d/7435.feature @@ -0,0 +1 @@ +Allow for using more than one spam checker module at once. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 1e397f7734..5abeaf519b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1867,10 +1867,17 @@ password_providers: # include_content: true -#spam_checker: -# module: "my_custom_project.SuperSpamChecker" -# config: -# example_option: 'things' +# Spam checkers are third-party modules that can block specific actions +# of local users, such as creating rooms and registering undesirable +# usernames, as well as remote users by redacting incoming events. +# +spam_checker: + #- module: "my_custom_project.SuperSpamChecker" + # config: + # example_option: 'things' + #- module: "some_other_project.BadEventStopper" + # config: + # example_stop_events_from: ['@bad:example.com'] # Uncomment to allow non-server-admin users to create groups on this server diff --git a/docs/spam_checker.md b/docs/spam_checker.md index 5b5f5000b7..eb10e115f9 100644 --- a/docs/spam_checker.md +++ b/docs/spam_checker.md @@ -64,10 +64,12 @@ class ExampleSpamChecker: Modify the `spam_checker` section of your `homeserver.yaml` in the following manner: -`module` should point to the fully qualified Python class that implements your -custom logic, e.g. `my_module.ExampleSpamChecker`. +Create a list entry with the keys `module` and `config`. -`config` is a dictionary that gets passed to the spam checker class. +* `module` should point to the fully qualified Python class that implements your + custom logic, e.g. `my_module.ExampleSpamChecker`. + +* `config` is a dictionary that gets passed to the spam checker class. ### Example @@ -75,12 +77,15 @@ This section might look like: ```yaml spam_checker: - module: my_module.ExampleSpamChecker - config: - # Enable or disable a specific option in ExampleSpamChecker. - my_custom_option: true + - module: my_module.ExampleSpamChecker + config: + # Enable or disable a specific option in ExampleSpamChecker. + my_custom_option: true ``` +More spam checkers can be added in tandem by appending more items to the list. An +action is blocked when at least one of the configured spam checkers flags it. + ## Examples The [Mjolnir](https://github.com/matrix-org/mjolnir) project is a full fledged diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py index 36e0ddab5c..3d067d29db 100644 --- a/synapse/config/spam_checker.py +++ b/synapse/config/spam_checker.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, List, Tuple + +from synapse.config import ConfigError from synapse.util.module_loader import load_module from ._base import Config @@ -22,16 +25,35 @@ class SpamCheckerConfig(Config): section = "spamchecker" def read_config(self, config, **kwargs): - self.spam_checker = None + self.spam_checkers = [] # type: List[Tuple[Any, Dict]] + + spam_checkers = config.get("spam_checker") or [] + if isinstance(spam_checkers, dict): + # The spam_checker config option used to only support one + # spam checker, and thus was simply a dictionary with module + # and config keys. Support this old behaviour by checking + # to see if the option resolves to a dictionary + self.spam_checkers.append(load_module(spam_checkers)) + elif isinstance(spam_checkers, list): + for spam_checker in spam_checkers: + if not isinstance(spam_checker, dict): + raise ConfigError("spam_checker syntax is incorrect") - provider = config.get("spam_checker", None) - if provider is not None: - self.spam_checker = load_module(provider) + self.spam_checkers.append(load_module(spam_checker)) + else: + raise ConfigError("spam_checker syntax is incorrect") def generate_config_section(self, **kwargs): return """\ - #spam_checker: - # module: "my_custom_project.SuperSpamChecker" - # config: - # example_option: 'things' + # Spam checkers are third-party modules that can block specific actions + # of local users, such as creating rooms and registering undesirable + # usernames, as well as remote users by redacting incoming events. + # + spam_checker: + #- module: "my_custom_project.SuperSpamChecker" + # config: + # example_option: 'things' + #- module: "some_other_project.BadEventStopper" + # config: + # example_stop_events_from: ['@bad:example.com'] """ diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index a23b6b7b61..1ffc9525d1 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -15,7 +15,7 @@ # limitations under the License. import inspect -from typing import Dict +from typing import Any, Dict, List from synapse.spam_checker_api import SpamCheckerApi @@ -26,24 +26,17 @@ if MYPY: class SpamChecker(object): def __init__(self, hs: "synapse.server.HomeServer"): - self.spam_checker = None + self.spam_checkers = [] # type: List[Any] - module = None - config = None - try: - module, config = hs.config.spam_checker - except Exception: - pass - - if module is not None: + for module, config in hs.config.spam_checkers: # Older spam checkers don't accept the `api` argument, so we # try and detect support. spam_args = inspect.getfullargspec(module) if "api" in spam_args.args: api = SpamCheckerApi(hs) - self.spam_checker = module(config=config, api=api) + self.spam_checkers.append(module(config=config, api=api)) else: - self.spam_checker = module(config=config) + self.spam_checkers.append(module(config=config)) def check_event_for_spam(self, event: "synapse.events.EventBase") -> bool: """Checks if a given event is considered "spammy" by this server. @@ -58,10 +51,11 @@ class SpamChecker(object): Returns: True if the event is spammy. """ - if self.spam_checker is None: - return False + for spam_checker in self.spam_checkers: + if spam_checker.check_event_for_spam(event): + return True - return self.spam_checker.check_event_for_spam(event) + return False def user_may_invite( self, inviter_userid: str, invitee_userid: str, room_id: str @@ -78,12 +72,14 @@ class SpamChecker(object): Returns: True if the user may send an invite, otherwise False """ - if self.spam_checker is None: - return True + for spam_checker in self.spam_checkers: + if ( + spam_checker.user_may_invite(inviter_userid, invitee_userid, room_id) + is False + ): + return False - return self.spam_checker.user_may_invite( - inviter_userid, invitee_userid, room_id - ) + return True def user_may_create_room(self, userid: str) -> bool: """Checks if a given user may create a room @@ -96,10 +92,11 @@ class SpamChecker(object): Returns: True if the user may create a room, otherwise False """ - if self.spam_checker is None: - return True + for spam_checker in self.spam_checkers: + if spam_checker.user_may_create_room(userid) is False: + return False - return self.spam_checker.user_may_create_room(userid) + return True def user_may_create_room_alias(self, userid: str, room_alias: str) -> bool: """Checks if a given user may create a room alias @@ -113,10 +110,11 @@ class SpamChecker(object): Returns: True if the user may create a room alias, otherwise False """ - if self.spam_checker is None: - return True + for spam_checker in self.spam_checkers: + if spam_checker.user_may_create_room_alias(userid, room_alias) is False: + return False - return self.spam_checker.user_may_create_room_alias(userid, room_alias) + return True def user_may_publish_room(self, userid: str, room_id: str) -> bool: """Checks if a given user may publish a room to the directory @@ -130,10 +128,11 @@ class SpamChecker(object): Returns: True if the user may publish the room, otherwise False """ - if self.spam_checker is None: - return True + for spam_checker in self.spam_checkers: + if spam_checker.user_may_publish_room(userid, room_id) is False: + return False - return self.spam_checker.user_may_publish_room(userid, room_id) + return True def check_username_for_spam(self, user_profile: Dict[str, str]) -> bool: """Checks if a user ID or display name are considered "spammy" by this server. @@ -150,13 +149,14 @@ class SpamChecker(object): Returns: True if the user is spammy. """ - if self.spam_checker is None: - return False - - # For backwards compatibility, if the method does not exist on the spam checker, fallback to not interfering. - checker = getattr(self.spam_checker, "check_username_for_spam", None) - if not checker: - return False - # Make a copy of the user profile object to ensure the spam checker - # cannot modify it. - return checker(user_profile.copy()) + for spam_checker in self.spam_checkers: + # For backwards compatibility, only run if the method exists on the + # spam checker + checker = getattr(spam_checker, "check_username_for_spam", None) + if checker: + # Make a copy of the user profile object to ensure the spam checker + # cannot modify it. + if checker(user_profile.copy()): + return True + + return False diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 7b92bdbc47..572df8d80b 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -185,7 +185,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Allow all users. return False - spam_checker.spam_checker = AllowAll() + spam_checker.spam_checkers = [AllowAll()] # The results do not change: # We get one search result when searching for user2 by user1. @@ -198,7 +198,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # All users are spammy. return True - spam_checker.spam_checker = BlockAll() + spam_checker.spam_checkers = [BlockAll()] # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) -- cgit 1.5.1