From ea69d3565110a20f5abe00f1a5c2357b483140fb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 11:38:45 +0100 Subject: Move filter_events_for_server out of FederationHandler for easier unit testing. --- synapse/visibility.py | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 015c2bab37..ddce41efaf 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -20,6 +20,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events.utils import prune_event +from synapse.types import get_domain_from_id from synapse.util.logcontext import make_deferred_yieldable, preserve_fn logger = logging.getLogger(__name__) @@ -225,3 +226,134 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, # we turn it into a list before returning it. defer.returnValue(list(filtered_events)) + + +@defer.inlineCallbacks +def filter_events_for_server(store, server_name, events): + """Filter the given events for the given server, redacting those the + server can't see. + + Assumes the server is currently in the room. + + Returns + list[FrozenEvent] + """ + # First lets check to see if all the events have a history visibility + # of "shared" or "world_readable". If thats the case then we don't + # need to check membership (as we know the server is in the room). + event_to_state_ids = yield store.get_state_ids_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + ) + ) + + visibility_ids = set() + for sids in event_to_state_ids.itervalues(): + hist = sids.get((EventTypes.RoomHistoryVisibility, "")) + if hist: + visibility_ids.add(hist) + + # If we failed to find any history visibility events then the default + # is "shared" visiblity. + if not visibility_ids: + defer.returnValue(events) + + event_map = yield store.get_events(visibility_ids) + all_open = all( + e.content.get("history_visibility") in (None, "shared", "world_readable") + for e in event_map.itervalues() + ) + + if all_open: + defer.returnValue(events) + + # Ok, so we're dealing with events that have non-trivial visibility + # rules, so we need to also get the memberships of the room. + + event_to_state_ids = yield store.get_state_ids_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, None), + ) + ) + + # We only want to pull out member events that correspond to the + # server's domain. + + def check_match(id): + try: + return server_name == get_domain_from_id(id) + except Exception: + return False + + # Parses mapping `event_id -> (type, state_key) -> state event_id` + # to get all state ids that we're interested in. + event_map = yield store.get_events([ + e_id + for key_to_eid in list(event_to_state_ids.values()) + for key, e_id in key_to_eid.items() + if key[0] != EventTypes.Member or check_match(key[1]) + ]) + + event_to_state = { + e_id: { + key: event_map[inner_e_id] + for key, inner_e_id in key_to_eid.iteritems() + if inner_e_id in event_map + } + for e_id, key_to_eid in event_to_state_ids.iteritems() + } + + erased_senders = yield store.are_users_erased( + e.sender for e in events, + ) + + def redact_disallowed(event, state): + # if the sender has been gdpr17ed, always return a redacted + # copy of the event. + if erased_senders[event.sender]: + logger.info( + "Sender of %s has been erased, redacting", + event.event_id, + ) + return prune_event(event) + + if not state: + return event + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + if visibility in ["invited", "joined"]: + # We now loop through all state events looking for + # membership states for the requesting server to determine + # if the server is either in the room or has been invited + # into the room. + for ev in state.itervalues(): + if ev.type != EventTypes.Member: + continue + try: + domain = get_domain_from_id(ev.state_key) + except Exception: + continue + + if domain != server_name: + continue + + memtype = ev.membership + if memtype == Membership.JOIN: + return event + elif memtype == Membership.INVITE: + if visibility == "invited": + return event + else: + return prune_event(event) + + return event + + defer.returnValue([ + redact_disallowed(e, event_to_state[e.event_id]) + for e in events + ]) -- cgit 1.5.1 From 09e29fb58b144ee629b2879e2c9fa57d7b11e3ab Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Jul 2018 16:32:46 +0100 Subject: Attempt to make _filter_events_for_server more efficient --- synapse/visibility.py | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index ddce41efaf..27061e4ad9 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -16,6 +16,7 @@ import itertools import logging import operator +import six from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -230,14 +231,6 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks def filter_events_for_server(store, server_name, events): - """Filter the given events for the given server, redacting those the - server can't see. - - Assumes the server is currently in the room. - - Returns - list[FrozenEvent] - """ # First lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). @@ -271,6 +264,8 @@ def filter_events_for_server(store, server_name, events): # Ok, so we're dealing with events that have non-trivial visibility # rules, so we need to also get the memberships of the room. + # first, for each event we're wanting to return, get the event_ids + # of the history vis and membership state at those events. event_to_state_ids = yield store.get_state_ids_for_events( frozenset(e.event_id for e in events), types=( @@ -281,20 +276,30 @@ def filter_events_for_server(store, server_name, events): # We only want to pull out member events that correspond to the # server's domain. + # + # event_to_state_ids contains lots of duplicates, so it turns out to be + # cheaper to build a complete set of unique + # ((type, state_key), event_id) tuples, and then filter out the ones we + # don't want. + # + state_key_to_event_id_set = { + e + for key_to_eid in six.itervalues(event_to_state_ids) + for e in key_to_eid.items() + } - def check_match(id): - try: - return server_name == get_domain_from_id(id) - except Exception: + def include(typ, state_key): + if typ != EventTypes.Member: + return True + idx = state_key.find(":") + if idx == -1: return False + return state_key[idx + 1:] == server_name - # Parses mapping `event_id -> (type, state_key) -> state event_id` - # to get all state ids that we're interested in. event_map = yield store.get_events([ e_id - for key_to_eid in list(event_to_state_ids.values()) - for key, e_id in key_to_eid.items() - if key[0] != EventTypes.Member or check_match(key[1]) + for key, e_id in state_key_to_event_id_set + if include(key[0], key[1]) ]) event_to_state = { @@ -349,6 +354,7 @@ def filter_events_for_server(store, server_name, events): if visibility == "invited": return event else: + # server has no users in the room: redact return prune_event(event) return event -- cgit 1.5.1 From 2172a3d8cb18724b2e77d74afd0789f2abb246e5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 11:13:57 +0100 Subject: add a comment --- synapse/visibility.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 27061e4ad9..6209bc0cde 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -291,6 +291,8 @@ def filter_events_for_server(store, server_name, events): def include(typ, state_key): if typ != EventTypes.Member: return True + + # we avoid using get_domain_from_id here for efficiency. idx = state_key.find(":") if idx == -1: return False -- cgit 1.5.1 From 94440ae9946936339453925c249f81ea463da4d0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 11:51:26 +0100 Subject: fix imports --- synapse/visibility.py | 1 + tests/test_visibility.py | 1 + 2 files changed, 2 insertions(+) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 6209bc0cde..dc33f61d2b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -17,6 +17,7 @@ import logging import operator import six + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 1401d831d2..8436c29fe8 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -19,6 +19,7 @@ from twisted.internet.defer import succeed from synapse.events import FrozenEvent from synapse.visibility import filter_events_for_server + import tests.unittest from tests.utils import setup_test_homeserver -- cgit 1.5.1 From d897be6a98ff2073235dc1d356a517f085dbf388 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 15:22:27 +0100 Subject: Fix visibility of events from erased users over federation --- synapse/visibility.py | 123 ++++++++++++++++++++++++++--------------------- tests/test_visibility.py | 63 ++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 56 deletions(-) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index dc33f61d2b..202daa6800 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -232,7 +232,57 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks def filter_events_for_server(store, server_name, events): - # First lets check to see if all the events have a history visibility + # Whatever else we do, we need to check for senders which have requested + # erasure of their data. + erased_senders = yield store.are_users_erased( + e.sender for e in events, + ) + + def redact_disallowed(event, state): + # if the sender has been gdpr17ed, always return a redacted + # copy of the event. + if erased_senders[event.sender]: + logger.info( + "Sender of %s has been erased, redacting", + event.event_id, + ) + return prune_event(event) + + if not state: + return event + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + if visibility in ["invited", "joined"]: + # We now loop through all state events looking for + # membership states for the requesting server to determine + # if the server is either in the room or has been invited + # into the room. + for ev in state.itervalues(): + if ev.type != EventTypes.Member: + continue + try: + domain = get_domain_from_id(ev.state_key) + except Exception: + continue + + if domain != server_name: + continue + + memtype = ev.membership + if memtype == Membership.JOIN: + return event + elif memtype == Membership.INVITE: + if visibility == "invited": + return event + else: + # server has no users in the room: redact + return prune_event(event) + + return event + + # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( @@ -251,15 +301,24 @@ def filter_events_for_server(store, server_name, events): # If we failed to find any history visibility events then the default # is "shared" visiblity. if not visibility_ids: - defer.returnValue(events) - - event_map = yield store.get_events(visibility_ids) - all_open = all( - e.content.get("history_visibility") in (None, "shared", "world_readable") - for e in event_map.itervalues() - ) + all_open = True + else: + event_map = yield store.get_events(visibility_ids) + all_open = all( + e.content.get("history_visibility") in (None, "shared", "world_readable") + for e in event_map.itervalues() + ) if all_open: + # all the history_visibility state affecting these events is open, so + # we don't need to filter by membership state. We *do* need to check + # for user erasure, though. + if erased_senders: + events = [ + redact_disallowed(e, None) + for e in events + ] + defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -314,54 +373,6 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in event_to_state_ids.iteritems() } - erased_senders = yield store.are_users_erased( - e.sender for e in events, - ) - - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. - if erased_senders[event.sender]: - logger.info( - "Sender of %s has been erased, redacting", - event.event_id, - ) - return prune_event(event) - - if not state: - return event - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - if visibility in ["invited", "joined"]: - # We now loop through all state events looking for - # membership states for the requesting server to determine - # if the server is either in the room or has been invited - # into the room. - for ev in state.itervalues(): - if ev.type != EventTypes.Member: - continue - try: - domain = get_domain_from_id(ev.state_key) - except Exception: - continue - - if domain != server_name: - continue - - memtype = ev.membership - if memtype == Membership.JOIN: - return event - elif memtype == Membership.INVITE: - if visibility == "invited": - return event - else: - # server has no users in the room: redact - return prune_event(event) - - return event - defer.returnValue([ redact_disallowed(e, event_to_state[e.event_id]) for e in events diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 8436c29fe8..0dc1a924d3 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -73,6 +73,51 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) self.assertEqual(filtered[i].content["a"], "b") + @tests.unittest.DEBUG + @defer.inlineCallbacks + def test_erased_user(self): + # 4 message events, from erased and unerased users, with a membership + # change in the middle of them. + events_to_filter = [] + + evt = yield self.inject_message("@unerased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@erased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_room_member("@joiner:remote_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@unerased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@erased:local_hs") + events_to_filter.append(evt) + + # the erasey user gets erased + self.hs.get_datastore().mark_user_erased("@erased:local_hs") + + # ... and the filtering happens. + filtered = yield filter_events_for_server( + self.store, "test_server", events_to_filter, + ) + + for i in range(0, len(events_to_filter)): + self.assertEqual( + events_to_filter[i].event_id, filtered[i].event_id, + "Unexpected event at result position %i" % (i, ) + ) + + for i in (0, 3): + self.assertEqual( + events_to_filter[i].content["body"], filtered[i].content["body"], + "Unexpected event content at result position %i" % (i,) + ) + + for i in (1, 4): + self.assertNotIn("body", filtered[i].content) + @defer.inlineCallbacks def inject_visibility(self, user_id, visibility): content = {"history_visibility": visibility} @@ -109,6 +154,24 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): yield self.hs.get_datastore().persist_event(event, context) defer.returnValue(event) + @defer.inlineCallbacks + def inject_message(self, user_id, content=None): + if content is None: + content = {"body": "testytest"} + builder = self.event_builder_factory.new({ + "type": "m.room.message", + "sender": user_id, + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) + @defer.inlineCallbacks def test_large_room(self): # see what happens when we have a large room with hundreds of thousands -- cgit 1.5.1 From 79eb339c662f47b2a525d303de941c67f52f0e21 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 14:53:34 +0100 Subject: add a comment --- synapse/visibility.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 202daa6800..9b97ea2b83 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -248,6 +248,8 @@ def filter_events_for_server(store, server_name, events): ) return prune_event(event) + # state will be None if we decided we didn't need to filter by + # room membership. if not state: return event -- cgit 1.5.1