From 71b625d80806886794c5e72f7ff11432e99b736c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Feb 2019 16:54:35 +0000 Subject: Stop backpaginating when events not visible --- synapse/visibility.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 0281a7c919..f6dcc96630 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,20 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events): +def filter_events_for_server(store, server_name, events, redact=True): + """Filter a list of events based on whether given server is allowed to + see them. + + Args: + store (DataStore) + server_name (str) + events (iterable[FrozenEvent]) + redact (bool): Whether to return a redacted version of the event, or + to filter them out entirely. + + Returns + Deferred[list[FrozenEvent]] + """ # Whatever else we do, we need to check for senders which have requested # erasure of their data. erased_senders = yield store.are_users_erased( @@ -231,7 +244,10 @@ def filter_events_for_server(store, server_name, events): "Sender of %s has been erased, redacting", event.event_id, ) - return prune_event(event) + if redact: + return prune_event(event) + else: + return None # state will be None if we decided we didn't need to filter by # room membership. @@ -265,7 +281,10 @@ def filter_events_for_server(store, server_name, events): return event else: # server has no users in the room: redact - return prune_event(event) + if redact: + return prune_event(event) + else: + return None return event @@ -361,7 +380,8 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in iteritems(event_to_state_ids) } - defer.returnValue([ + to_return = ( redact_disallowed(e, event_to_state[e.event_id]) for e in events - ]) + ) + defer.returnValue([e for e in to_return if e is not None]) -- cgit 1.5.1 From d1523aed6bebf00a4643d72eea611b029db65f08 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 14:34:34 +0000 Subject: Only check history visibility when filtering When filtering events to send to server we check more than just history visibility. However when deciding whether to backfill or not we only care about the history visibility. --- synapse/handlers/federation.py | 4 ++- synapse/visibility.py | 77 +++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 35 deletions(-) (limited to 'synapse/visibility.py') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 32d7ba6cf5..bf2989aefd 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -891,9 +891,11 @@ class FederationHandler(BaseHandler): get_prev_content=False, ) + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. filtered_extremities = yield filter_events_for_server( self.store, self.server_name, list(extremities_events.values()), - redact=False, + redact=False, check_history_visibility_only=True, ) if not filtered_extremities: diff --git a/synapse/visibility.py b/synapse/visibility.py index f6dcc96630..8b9c7180b6 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,8 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events, redact=True): +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): """Filter a list of events based on whether given server is allowed to see them. @@ -226,34 +227,25 @@ def filter_events_for_server(store, server_name, events, redact=True): events (iterable[FrozenEvent]) redact (bool): Whether to return a redacted version of the event, or to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. Returns Deferred[list[FrozenEvent]] """ - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. + def is_sender_erased(event, erased_senders): if erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - if redact: - return prune_event(event) - else: - return None - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -275,18 +267,15 @@ def filter_events_for_server(store, server_name, events, redact=True): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - if redact: - return prune_event(event) - else: - return None + return False - return event + return True # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't @@ -315,16 +304,31 @@ def filter_events_for_server(store, server_name, events, redact=True): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -380,8 +384,13 @@ def filter_events_for_server(store, server_name, events, redact=True): for e_id, key_to_eid in iteritems(event_to_state_ids) } - to_return = ( - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ) - defer.returnValue([e for e in to_return if e is not None]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) -- cgit 1.5.1 From 0d2d046709270797f46a65672a5702b194dabef9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 16:04:04 +0000 Subject: Fix missing null guard --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 8b9c7180b6..e9dc73c25e 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -237,7 +237,7 @@ def filter_events_for_server(store, server_name, events, redact=True, """ def is_sender_erased(event, erased_senders): - if erased_senders[event.sender]: + if erased_senders and erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, -- cgit 1.5.1 From aa06d26ae05585ddfb5e33a2dd521c9aa27b6cfa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Mar 2019 09:16:35 +0000 Subject: clarify comments --- changelog.d/4699.bugfix | 2 +- synapse/handlers/federation.py | 19 +++++++++++-------- synapse/visibility.py | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'synapse/visibility.py') diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix index 8cd8340cc1..1d7f3174e7 100644 --- a/changelog.d/4699.bugfix +++ b/changelog.d/4699.bugfix @@ -1 +1 @@ -Fix attempting to paginate in rooms where server cannot see any events. +Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index bf2989aefd..72b63d64d0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -862,9 +862,9 @@ class FederationHandler(BaseHandler): # as otherwise we'll just spend a lot of resources to get redacted # events. # - # We do this by filtering all the extremities and seeing if any remain. - # Given we don't have the extremity events themselves, we need to - # actually check the events that reference them. + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. # # *Note*: the spec wants us to keep backfilling until we reach the start # of the room in case we are allowed to see some of the history. However @@ -873,13 +873,16 @@ class FederationHandler(BaseHandler): # there is its often sufficiently long ago that clients would stop # attempting to paginate before backfill reached the visible history. # - # TODO: If we do do a backfill the we should filter the extremities to - # only include those that point to visible portions of history. + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. # # TODO: Correctly handle the case where we are allowed to see the - # forward event but not the extremity, e.g. in the case of initial - # join of the server where we are allowed to see the join event but - # not anything before it. + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. forward_events = yield self.store.get_successor_events( list(extremities), diff --git a/synapse/visibility.py b/synapse/visibility.py index e9dc73c25e..efec21673b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -277,7 +277,7 @@ def filter_events_for_server(store, server_name, events, redact=True, return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( -- cgit 1.5.1