From 5ed109d59f46c5185395f7c76050274fdd6abc15 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 19 Sep 2017 12:20:11 +0100 Subject: PoC for filtering spammy events (#2456) Demonstration of how you might add some hooks to filter out spammy events. --- synapse/federation/federation_base.py | 40 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 2339cc9034..28eaab2cef 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -12,21 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - -from twisted.internet import defer - -from synapse.events.utils import prune_event - -from synapse.crypto.event_signing import check_event_content_hash +import logging from synapse.api.errors import SynapseError - +from synapse.crypto.event_signing import check_event_content_hash +from synapse.events import spamcheck +from synapse.events.utils import prune_event from synapse.util import unwrapFirstError -from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred - -import logging - +from synapse.util.logcontext import preserve_context_over_deferred, preserve_fn +from twisted.internet import defer logger = logging.getLogger(__name__) @@ -117,12 +111,18 @@ class FederationBase(object): return self._check_sigs_and_hashes([pdu])[0] def _check_sigs_and_hashes(self, pdus): - """Throws a SynapseError if a PDU does not have the correct - signatures. + """Checks that each of the received events is correctly signed by the + sending server. + + Args: + pdus (list[FrozenEvent]): the events to be checked Returns: - FrozenEvent: Either the given event or it redacted if it failed the - content hash check. + list[Deferred]: for each input event, a deferred which: + * returns the original event if the checks pass + * returns a redacted version of the event (if the signature + matched but the hash did not) + * throws a SynapseError if the signature check failed. """ redacted_pdus = [ @@ -142,6 +142,14 @@ class FederationBase(object): pdu.event_id, pdu.get_pdu_json() ) return redacted + + if spamcheck.check_event_for_spam(pdu): + logger.warn( + "Event contains spam, redacting %s: %s", + pdu.event_id, pdu.get_pdu_json() + ) + return redacted + return pdu def errback(failure, pdu): -- cgit 1.5.1 From fcf2c0fd1aa4d85df0bdb43bc8411ad4ad988a6f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 20 Sep 2017 01:32:42 +0100 Subject: Remove redundant `preserve_fn` preserve_fn is a no-op unless the wrapped function returns a Deferred. verify_json_objects_for_server returns a list, so this is doing nothing. --- synapse/federation/federation_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 28eaab2cef..cabed33f74 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -19,7 +19,7 @@ from synapse.crypto.event_signing import check_event_content_hash from synapse.events import spamcheck from synapse.events.utils import prune_event from synapse.util import unwrapFirstError -from synapse.util.logcontext import preserve_context_over_deferred, preserve_fn +from synapse.util.logcontext import preserve_context_over_deferred from twisted.internet import defer logger = logging.getLogger(__name__) @@ -130,7 +130,7 @@ class FederationBase(object): for pdu in pdus ] - deferreds = preserve_fn(self.keyring.verify_json_objects_for_server)([ + deferreds = self.keyring.verify_json_objects_for_server([ (p.origin, p.get_pdu_json()) for p in redacted_pdus ]) -- cgit 1.5.1 From 6de74ea6d7394b63c9475e9dfff943188a9ed73b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 20 Sep 2017 01:32:42 +0100 Subject: Fix logcontexts in _check_sigs_and_hashes --- synapse/federation/federation_base.py | 114 ++++++++++++++++---------------- synapse/federation/federation_client.py | 8 +-- 2 files changed, 62 insertions(+), 60 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index cabed33f74..babd9ea078 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -18,8 +18,7 @@ from synapse.api.errors import SynapseError from synapse.crypto.event_signing import check_event_content_hash from synapse.events import spamcheck from synapse.events.utils import prune_event -from synapse.util import unwrapFirstError -from synapse.util.logcontext import preserve_context_over_deferred +from synapse.util import unwrapFirstError, logcontext from twisted.internet import defer logger = logging.getLogger(__name__) @@ -51,56 +50,52 @@ class FederationBase(object): """ deferreds = self._check_sigs_and_hashes(pdus) - def callback(pdu): - return pdu + @defer.inlineCallbacks + def handle_check_result(pdu, deferred): + try: + res = yield logcontext.make_deferred_yieldable(deferred) + except SynapseError: + res = None - def errback(failure, pdu): - failure.trap(SynapseError) - return None - - def try_local_db(res, pdu): if not res: # Check local db. - return self.store.get_event( + res = yield self.store.get_event( pdu.event_id, allow_rejected=True, allow_none=True, ) - return res - def try_remote(res, pdu): if not res and pdu.origin != origin: - return self.get_pdu( - destinations=[pdu.origin], - event_id=pdu.event_id, - outlier=outlier, - timeout=10000, - ).addErrback(lambda e: None) - return res - - def warn(res, pdu): + try: + res = yield self.get_pdu( + destinations=[pdu.origin], + event_id=pdu.event_id, + outlier=outlier, + timeout=10000, + ) + except SynapseError: + pass + if not res: logger.warn( "Failed to find copy of %s with valid signature", pdu.event_id, ) - return res - for pdu, deferred in zip(pdus, deferreds): - deferred.addCallbacks( - callback, errback, errbackArgs=[pdu] - ).addCallback( - try_local_db, pdu - ).addCallback( - try_remote, pdu - ).addCallback( - warn, pdu - ) + defer.returnValue(res) - valid_pdus = yield preserve_context_over_deferred(defer.gatherResults( - deferreds, - consumeErrors=True - )).addErrback(unwrapFirstError) + handle = logcontext.preserve_fn(handle_check_result) + deferreds2 = [ + handle(pdu, deferred) + for pdu, deferred in zip(pdus, deferreds) + ] + + valid_pdus = yield logcontext.make_deferred_yieldable( + defer.gatherResults( + deferreds2, + consumeErrors=True, + ) + ).addErrback(unwrapFirstError) if include_none: defer.returnValue(valid_pdus) @@ -108,7 +103,9 @@ class FederationBase(object): defer.returnValue([p for p in valid_pdus if p]) def _check_sigs_and_hash(self, pdu): - return self._check_sigs_and_hashes([pdu])[0] + return logcontext.make_deferred_yieldable( + self._check_sigs_and_hashes([pdu])[0], + ) def _check_sigs_and_hashes(self, pdus): """Checks that each of the received events is correctly signed by the @@ -123,6 +120,7 @@ class FederationBase(object): * returns a redacted version of the event (if the signature matched but the hash did not) * throws a SynapseError if the signature check failed. + The deferreds run their callbacks in the sentinel logcontext. """ redacted_pdus = [ @@ -135,29 +133,33 @@ class FederationBase(object): for p in redacted_pdus ]) - def callback(_, pdu, redacted): - if not check_event_content_hash(pdu): - logger.warn( - "Event content has been tampered, redacting %s: %s", - pdu.event_id, pdu.get_pdu_json() - ) - return redacted - - if spamcheck.check_event_for_spam(pdu): - logger.warn( - "Event contains spam, redacting %s: %s", - pdu.event_id, pdu.get_pdu_json() - ) - return redacted + ctx = logcontext.LoggingContext.current_context() - return pdu + def callback(_, pdu, redacted): + with logcontext.PreserveLoggingContext(ctx): + if not check_event_content_hash(pdu): + logger.warn( + "Event content has been tampered, redacting %s: %s", + pdu.event_id, pdu.get_pdu_json() + ) + return redacted + + if spamcheck.check_event_for_spam(pdu): + logger.warn( + "Event contains spam, redacting %s: %s", + pdu.event_id, pdu.get_pdu_json() + ) + return redacted + + return pdu def errback(failure, pdu): failure.trap(SynapseError) - logger.warn( - "Signature check failed for %s", - pdu.event_id, - ) + with logcontext.PreserveLoggingContext(ctx): + logger.warn( + "Signature check failed for %s", + pdu.event_id, + ) return failure for deferred, pdu, redacted in zip(deferreds, pdus, redacted_pdus): diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 861441708b..7c5e5d957f 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -22,7 +22,7 @@ from synapse.api.constants import Membership from synapse.api.errors import ( CodeMessageException, HttpResponseException, SynapseError, ) -from synapse.util import unwrapFirstError +from synapse.util import unwrapFirstError, logcontext from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logutils import log_function from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred @@ -189,10 +189,10 @@ class FederationClient(FederationBase): ] # FIXME: We should handle signature failures more gracefully. - pdus[:] = yield preserve_context_over_deferred(defer.gatherResults( + pdus[:] = yield logcontext.make_deferred_yieldable(defer.gatherResults( self._check_sigs_and_hashes(pdus), consumeErrors=True, - )).addErrback(unwrapFirstError) + ).addErrback(unwrapFirstError)) defer.returnValue(pdus) @@ -252,7 +252,7 @@ class FederationClient(FederationBase): pdu = pdu_list[0] # Check signatures are correct. - signed_pdu = yield self._check_sigs_and_hashes([pdu])[0] + signed_pdu = yield self._check_sigs_and_hash(pdu) break -- cgit 1.5.1 From 6cd5fcd5366cfef4959d107e818d0e20d78aa483 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 26 Sep 2017 19:20:23 +0100 Subject: Make the spam checker a module --- synapse/config/homeserver.py | 4 +++- synapse/events/spamcheck.py | 37 +++++++++++++++++++---------------- synapse/federation/federation_base.py | 5 ++--- synapse/handlers/message.py | 5 +++-- synapse/server.py | 5 +++++ 5 files changed, 33 insertions(+), 23 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index b22cacf8dc..3f9d9d5f8b 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -34,6 +34,7 @@ from .password_auth_providers import PasswordAuthProviderConfig from .emailconfig import EmailConfig from .workers import WorkerConfig from .push import PushConfig +from .spam_checker import SpamCheckerConfig class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, @@ -41,7 +42,8 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, VoipConfig, RegistrationConfig, MetricsConfig, ApiConfig, AppServiceConfig, KeyConfig, SAML2Config, CasConfig, JWTConfig, PasswordConfig, EmailConfig, - WorkerConfig, PasswordAuthProviderConfig, PushConfig,): + WorkerConfig, PasswordAuthProviderConfig, PushConfig, + SpamCheckerConfig,): pass diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 56fa9e556e..7b22b3413a 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -13,26 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. +class SpamChecker(object): + def __init__(self, hs): + self.spam_checker = None -def check_event_for_spam(event): - """Checks if a given event is considered "spammy" by this server. + if hs.config.spam_checker is not None: + module, config = hs.config.spam_checker + print("cfg %r", config) + self.spam_checker = module(config=config) - If the server considers an event spammy, then it will be rejected if - sent by a local user. If it is sent by a user on another server, then - users receive a blank event. + def check_event_for_spam(self, event): + """Checks if a given event is considered "spammy" by this server. - Args: - event (synapse.events.EventBase): the event to be checked + If the server considers an event spammy, then it will be rejected if + sent by a local user. If it is sent by a user on another server, then + users receive a blank event. - Returns: - bool: True if the event is spammy. - """ - if not hasattr(event, "content") or "body" not in event.content: - return False + Args: + event (synapse.events.EventBase): the event to be checked - # for example: - # - # if "the third flower is green" in event.content["body"]: - # return True + Returns: + bool: True if the event is spammy. + """ + if self.spam_checker is None: + return False - return False + return self.spam_checker.check_event_for_spam(event) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index babd9ea078..a0f5d40eb3 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -16,7 +16,6 @@ import logging from synapse.api.errors import SynapseError from synapse.crypto.event_signing import check_event_content_hash -from synapse.events import spamcheck from synapse.events.utils import prune_event from synapse.util import unwrapFirstError, logcontext from twisted.internet import defer @@ -26,7 +25,7 @@ logger = logging.getLogger(__name__) class FederationBase(object): def __init__(self, hs): - pass + self.spam_checker = hs.get_spam_checker() @defer.inlineCallbacks def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False, @@ -144,7 +143,7 @@ class FederationBase(object): ) return redacted - if spamcheck.check_event_for_spam(pdu): + if self.spam_checker.check_event_for_spam(pdu): logger.warn( "Event contains spam, redacting %s: %s", pdu.event_id, pdu.get_pdu_json() diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index da18bf23db..37f0a2772a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.events import spamcheck from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -58,6 +57,8 @@ class MessageHandler(BaseHandler): self.action_generator = hs.get_action_generator() + self.spam_checker = hs.get_spam_checker() + @defer.inlineCallbacks def purge_history(self, room_id, event_id): event = yield self.store.get_event(event_id) @@ -322,7 +323,7 @@ class MessageHandler(BaseHandler): txn_id=txn_id ) - if spamcheck.check_event_for_spam(event): + if self.spam_checker.check_event_for_spam(event): raise SynapseError( 403, "Spam is not permitted here", Codes.FORBIDDEN ) diff --git a/synapse/server.py b/synapse/server.py index a38e5179e0..4d44af745e 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -31,6 +31,7 @@ from synapse.appservice.api import ApplicationServiceApi from synapse.appservice.scheduler import ApplicationServiceScheduler from synapse.crypto.keyring import Keyring from synapse.events.builder import EventBuilderFactory +from synapse.events.spamcheck import SpamChecker from synapse.federation import initialize_http_replication from synapse.federation.send_queue import FederationRemoteSendQueue from synapse.federation.transport.client import TransportLayerClient @@ -139,6 +140,7 @@ class HomeServer(object): 'read_marker_handler', 'action_generator', 'user_directory_handler', + 'spam_checker', ] def __init__(self, hostname, **kwargs): @@ -309,6 +311,9 @@ class HomeServer(object): def build_user_directory_handler(self): return UserDirectoyHandler(self) + def build_spam_checker(self): + return SpamChecker(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) -- cgit 1.5.1