From 71b625d80806886794c5e72f7ff11432e99b736c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Feb 2019 16:54:35 +0000 Subject: Stop backpaginating when events not visible --- synapse/visibility.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 0281a7c919..f6dcc96630 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,20 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events): +def filter_events_for_server(store, server_name, events, redact=True): + """Filter a list of events based on whether given server is allowed to + see them. + + Args: + store (DataStore) + server_name (str) + events (iterable[FrozenEvent]) + redact (bool): Whether to return a redacted version of the event, or + to filter them out entirely. + + Returns + Deferred[list[FrozenEvent]] + """ # Whatever else we do, we need to check for senders which have requested # erasure of their data. erased_senders = yield store.are_users_erased( @@ -231,7 +244,10 @@ def filter_events_for_server(store, server_name, events): "Sender of %s has been erased, redacting", event.event_id, ) - return prune_event(event) + if redact: + return prune_event(event) + else: + return None # state will be None if we decided we didn't need to filter by # room membership. @@ -265,7 +281,10 @@ def filter_events_for_server(store, server_name, events): return event else: # server has no users in the room: redact - return prune_event(event) + if redact: + return prune_event(event) + else: + return None return event @@ -361,7 +380,8 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in iteritems(event_to_state_ids) } - defer.returnValue([ + to_return = ( redact_disallowed(e, event_to_state[e.event_id]) for e in events - ]) + ) + defer.returnValue([e for e in to_return if e is not None]) -- cgit 1.5.1 From d1523aed6bebf00a4643d72eea611b029db65f08 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 14:34:34 +0000 Subject: Only check history visibility when filtering When filtering events to send to server we check more than just history visibility. However when deciding whether to backfill or not we only care about the history visibility. --- synapse/handlers/federation.py | 4 ++- synapse/visibility.py | 77 +++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 35 deletions(-) (limited to 'synapse/visibility.py') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 32d7ba6cf5..bf2989aefd 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -891,9 +891,11 @@ class FederationHandler(BaseHandler): get_prev_content=False, ) + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. filtered_extremities = yield filter_events_for_server( self.store, self.server_name, list(extremities_events.values()), - redact=False, + redact=False, check_history_visibility_only=True, ) if not filtered_extremities: diff --git a/synapse/visibility.py b/synapse/visibility.py index f6dcc96630..8b9c7180b6 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,8 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events, redact=True): +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): """Filter a list of events based on whether given server is allowed to see them. @@ -226,34 +227,25 @@ def filter_events_for_server(store, server_name, events, redact=True): events (iterable[FrozenEvent]) redact (bool): Whether to return a redacted version of the event, or to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. Returns Deferred[list[FrozenEvent]] """ - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. + def is_sender_erased(event, erased_senders): if erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - if redact: - return prune_event(event) - else: - return None - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -275,18 +267,15 @@ def filter_events_for_server(store, server_name, events, redact=True): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - if redact: - return prune_event(event) - else: - return None + return False - return event + return True # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't @@ -315,16 +304,31 @@ def filter_events_for_server(store, server_name, events, redact=True): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -380,8 +384,13 @@ def filter_events_for_server(store, server_name, events, redact=True): for e_id, key_to_eid in iteritems(event_to_state_ids) } - to_return = ( - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ) - defer.returnValue([e for e in to_return if e is not None]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) -- cgit 1.5.1 From 0d2d046709270797f46a65672a5702b194dabef9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 16:04:04 +0000 Subject: Fix missing null guard --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 8b9c7180b6..e9dc73c25e 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -237,7 +237,7 @@ def filter_events_for_server(store, server_name, events, redact=True, """ def is_sender_erased(event, erased_senders): - if erased_senders[event.sender]: + if erased_senders and erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, -- cgit 1.5.1 From aa06d26ae05585ddfb5e33a2dd521c9aa27b6cfa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Mar 2019 09:16:35 +0000 Subject: clarify comments --- changelog.d/4699.bugfix | 2 +- synapse/handlers/federation.py | 19 +++++++++++-------- synapse/visibility.py | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'synapse/visibility.py') diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix index 8cd8340cc1..1d7f3174e7 100644 --- a/changelog.d/4699.bugfix +++ b/changelog.d/4699.bugfix @@ -1 +1 @@ -Fix attempting to paginate in rooms where server cannot see any events. +Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index bf2989aefd..72b63d64d0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -862,9 +862,9 @@ class FederationHandler(BaseHandler): # as otherwise we'll just spend a lot of resources to get redacted # events. # - # We do this by filtering all the extremities and seeing if any remain. - # Given we don't have the extremity events themselves, we need to - # actually check the events that reference them. + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. # # *Note*: the spec wants us to keep backfilling until we reach the start # of the room in case we are allowed to see some of the history. However @@ -873,13 +873,16 @@ class FederationHandler(BaseHandler): # there is its often sufficiently long ago that clients would stop # attempting to paginate before backfill reached the visible history. # - # TODO: If we do do a backfill the we should filter the extremities to - # only include those that point to visible portions of history. + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. # # TODO: Correctly handle the case where we are allowed to see the - # forward event but not the extremity, e.g. in the case of initial - # join of the server where we are allowed to see the join event but - # not anything before it. + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. forward_events = yield self.store.get_successor_events( list(extremities), diff --git a/synapse/visibility.py b/synapse/visibility.py index e9dc73c25e..efec21673b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -277,7 +277,7 @@ def filter_events_for_server(store, server_name, events, redact=True, return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( -- cgit 1.5.1 From a9de04be724be9e19af0a5a5839c65924f90886a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 10:31:21 +0000 Subject: Implement soft fail --- synapse/events/__init__.py | 14 ++++++++ synapse/handlers/federation.py | 77 +++++++++++++++++++++++++++++++++++++++++- synapse/storage/events.py | 1 + synapse/visibility.py | 4 +++ 4 files changed, 95 insertions(+), 1 deletion(-) (limited to 'synapse/visibility.py') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 20c1ab4203..bd130f8816 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -77,6 +77,20 @@ class _EventInternalMetadata(object): """ return getattr(self, "recheck_redaction", False) + def is_soft_failed(self): + """Whether the event has been soft failed. + + Soft failed events should be handled as usual, except: + 1. They should not go down sync or event streams, or generally + sent to clients. + 2. They should not be added to the forward extremities (and + therefore not to current state). + + Returns: + bool + """ + return getattr(self, "soft_failed", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 72b63d64d0..a75abe8e91 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import compute_event_signature +from synapse.event_auth import auth_types_for_event from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -1628,6 +1629,7 @@ class FederationHandler(BaseHandler): origin, event, state=state, auth_events=auth_events, + backfilled=backfilled, ) # reraise does not allow inlineCallbacks to preserve the stacktrace, so we @@ -1672,6 +1674,7 @@ class FederationHandler(BaseHandler): event, state=ev_info.get("state"), auth_events=ev_info.get("auth_events"), + backfilled=backfilled, ) defer.returnValue(res) @@ -1794,7 +1797,7 @@ class FederationHandler(BaseHandler): ) @defer.inlineCallbacks - def _prep_event(self, origin, event, state=None, auth_events=None): + def _prep_event(self, origin, event, state, auth_events, backfilled): """ Args: @@ -1802,6 +1805,7 @@ class FederationHandler(BaseHandler): event: state: auth_events: + backfilled (bool) Returns: Deferred, which resolves to synapse.events.snapshot.EventContext @@ -1843,6 +1847,77 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + # For new (non-backfilled and non-outlier) events we check if the event + # passes auth based on the current state. If it doesn't then we + # "soft-fail" the event. + do_soft_fail_check = not backfilled and not event.internal_metadata.is_outlier() + if do_soft_fail_check: + extrem_ids = yield self.store.get_latest_event_ids_in_room( + event.room_id, + ) + + extrem_ids = set(extrem_ids) + prev_event_ids = set(event.prev_event_ids()) + + if extrem_ids == prev_event_ids: + # If they're the same then the current state is the same as the + # state at the event, so no point rechecking auth for soft fail. + do_soft_fail_check = False + + if do_soft_fail_check: + room_version = yield self.store.get_room_version(event.room_id) + + # Calculate the "current state". + if state is not None: + # If we're explicitly given the state then we won't have all the + # prev events, and so we have a gap in the graph. In this case + # we want to be a little careful as we might have been down for + # a while and have an incorrect view of the current state, + # however we still want to do checks as gaps are easy to + # maliciously manufacture. + # + # So we use a "current state" that is actually a state + # resolution across the current forward extremities and the + # given state at the event. This should correctly handle cases + # like bans, especially with state res v2. + + state_sets = yield self.store.get_state_groups( + event.room_id, extrem_ids, + ) + state_sets = list(state_sets.values()) + state_sets.append(state) + current_state_ids = yield self.state_handler.resolve_events( + room_version, state_sets, event, + ) + current_state_ids = { + k: e.event_id for k, e in iteritems(current_state_ids) + } + else: + current_state_ids = yield self.state_handler.get_current_state_ids( + event.room_id, latest_event_ids=extrem_ids, + ) + + # Now check if event pass auth against said current state + auth_types = auth_types_for_event(event) + current_state_ids = [ + e for k, e in iteritems(current_state_ids) + if k in auth_types + ] + + current_auth_events = yield self.store.get_events(current_state_ids) + current_auth_events = { + (e.type, e.state_key): e for e in current_auth_events.values() + } + + try: + self.auth.check(room_version, event, auth_events=current_auth_events) + except AuthError as e: + logger.warn( + "Failed current state auth resolution for %r because %s", + event, e, + ) + event.internal_metadata.soft_failed = True + if event.type == EventTypes.GuestAccess and not context.rejected: yield self.maybe_kick_guest_users(event) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 06db9e56e6..990a5eaaae 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -537,6 +537,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore new_events = [ event for event, ctx in event_contexts if not event.internal_metadata.is_outlier() and not ctx.rejected + and not event.internal_metadata.is_soft_failed() ] # start with the existing forward extremities diff --git a/synapse/visibility.py b/synapse/visibility.py index efec21673b..16c40cd74c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -67,6 +67,10 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, Returns: Deferred[list[synapse.events.EventBase]] """ + # Filter out events that have been soft failed so that we don't relay them + # to clients. + events = list(e for e in events if not e.internal_metadata.is_soft_failed()) + types = ( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), -- cgit 1.5.1