From ea69d3565110a20f5abe00f1a5c2357b483140fb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 11:38:45 +0100 Subject: Move filter_events_for_server out of FederationHandler for easier unit testing. --- tests/test_visibility.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 tests/test_visibility.py (limited to 'tests/test_visibility.py') diff --git a/tests/test_visibility.py b/tests/test_visibility.py new file mode 100644 index 0000000000..86981958cb --- /dev/null +++ b/tests/test_visibility.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.visibility import filter_events_for_server +from tests import unittest +from tests.utils import setup_test_homeserver + +logger = logging.getLogger(__name__) + +TEST_ROOM_ID = "!TEST:ROOM" + + +class FilterEventsForServerTestCase(unittest.TestCase): + @defer.inlineCallbacks + def setUp(self): + self.hs = yield setup_test_homeserver() + self.event_creation_handler = self.hs.get_event_creation_handler() + self.event_builder_factory = self.hs.get_event_builder_factory() + self.store = self.hs.get_datastore() + + @defer.inlineCallbacks + def test_filtering(self): + # + # The events to be filtered consist of 10 membership events (it doesn't + # really matter if they are joins or leaves, so let's make them joins). + # One of those membership events is going to be for a user on the + # server we are filtering for (so we can check the filtering is doing + # the right thing). + # + + # before we do that, we persist some other events to act as state. + self.inject_visibility("@admin:hs", "joined") + for i in range(0, 10): + yield self.inject_room_member("@resident%i:hs" % i) + + events_to_filter = [] + + for i in range(0, 10): + user = "@user%i:%s" % ( + i, "test_server" if i == 5 else "other_server" + ) + evt = yield self.inject_room_member(user, extra_content={"a": "b"}) + events_to_filter.append(evt) + + filtered = yield filter_events_for_server( + self.store, "test_server", events_to_filter, + ) + + # the result should be 5 redacted events, and 5 unredacted events. + for i in range(0, 5): + self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) + self.assertNotIn("a", filtered[i].content) + + for i in range(5, 10): + self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) + self.assertEqual(filtered[i].content["a"], "b") + + @defer.inlineCallbacks + def inject_visibility(self, user_id, visibility): + content = {"history_visibility": visibility} + builder = self.event_builder_factory.new({ + "type": "m.room.history_visibility", + "sender": user_id, + "state_key": "", + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) + + @defer.inlineCallbacks + def inject_room_member(self, user_id, membership="join", extra_content={}): + content = {"membership": membership} + content.update(extra_content) + builder = self.event_builder_factory.new({ + "type": "m.room.member", + "sender": user_id, + "state_key": user_id, + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) -- cgit 1.4.1 From 15b13b537f40ba0318b104e28231e95dfc0ccf93 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 14:05:31 +0100 Subject: Add a test which profiles filter_events_for_server in a large room --- tests/test_visibility.py | 157 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) (limited to 'tests/test_visibility.py') diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 86981958cb..1401d831d2 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -15,9 +15,11 @@ import logging from twisted.internet import defer +from twisted.internet.defer import succeed +from synapse.events import FrozenEvent from synapse.visibility import filter_events_for_server -from tests import unittest +import tests.unittest from tests.utils import setup_test_homeserver logger = logging.getLogger(__name__) @@ -25,7 +27,7 @@ logger = logging.getLogger(__name__) TEST_ROOM_ID = "!TEST:ROOM" -class FilterEventsForServerTestCase(unittest.TestCase): +class FilterEventsForServerTestCase(tests.unittest.TestCase): @defer.inlineCallbacks def setUp(self): self.hs = yield setup_test_homeserver() @@ -105,3 +107,154 @@ class FilterEventsForServerTestCase(unittest.TestCase): yield self.hs.get_datastore().persist_event(event, context) defer.returnValue(event) + + @defer.inlineCallbacks + def test_large_room(self): + # see what happens when we have a large room with hundreds of thousands + # of membership events + + # As above, the events to be filtered consist of 10 membership events, + # where one of them is for a user on the server we are filtering for. + + import cProfile + import pstats + import time + + # we stub out the store, because building up all that state the normal + # way is very slow. + test_store = _TestStore() + + # our initial state is 100000 membership events and one + # history_visibility event. + room_state = [] + + history_visibility_evt = FrozenEvent({ + "event_id": "$history_vis", + "type": "m.room.history_visibility", + "sender": "@resident_user_0:test.com", + "state_key": "", + "room_id": TEST_ROOM_ID, + "content": {"history_visibility": "joined"}, + }) + room_state.append(history_visibility_evt) + test_store.add_event(history_visibility_evt) + + for i in range(0, 100000): + user = "@resident_user_%i:test.com" % (i, ) + evt = FrozenEvent({ + "event_id": "$res_event_%i" % (i, ), + "type": "m.room.member", + "state_key": user, + "sender": user, + "room_id": TEST_ROOM_ID, + "content": { + "membership": "join", + "extra": "zzz," + }, + }) + room_state.append(evt) + test_store.add_event(evt) + + events_to_filter = [] + for i in range(0, 10): + user = "@user%i:%s" % ( + i, "test_server" if i == 5 else "other_server" + ) + evt = FrozenEvent({ + "event_id": "$evt%i" % (i, ), + "type": "m.room.member", + "state_key": user, + "sender": user, + "room_id": TEST_ROOM_ID, + "content": { + "membership": "join", + "extra": "zzz", + }, + }) + events_to_filter.append(evt) + room_state.append(evt) + + test_store.add_event(evt) + test_store.set_state_ids_for_event(evt, { + (e.type, e.state_key): e.event_id for e in room_state + }) + + pr = cProfile.Profile() + pr.enable() + + logger.info("Starting filtering") + start = time.time() + filtered = yield filter_events_for_server( + test_store, "test_server", events_to_filter, + ) + logger.info("Filtering took %f seconds", time.time() - start) + + pr.disable() + with open("filter_events_for_server.profile", "w+") as f: + ps = pstats.Stats(pr, stream=f).sort_stats('cumulative') + ps.print_stats() + + # the result should be 5 redacted events, and 5 unredacted events. + for i in range(0, 5): + self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) + self.assertNotIn("extra", filtered[i].content) + + for i in range(5, 10): + self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) + self.assertEqual(filtered[i].content["extra"], "zzz") + + test_large_room.skip = "Disabled by default because it's slow" + + +class _TestStore(object): + """Implements a few methods of the DataStore, so that we can test + filter_events_for_server + + """ + def __init__(self): + # data for get_events: a map from event_id to event + self.events = {} + + # data for get_state_ids_for_events mock: a map from event_id to + # a map from (type_state_key) -> event_id for the state at that + # event + self.state_ids_for_events = {} + + def add_event(self, event): + self.events[event.event_id] = event + + def set_state_ids_for_event(self, event, state): + self.state_ids_for_events[event.event_id] = state + + def get_state_ids_for_events(self, events, types): + res = {} + include_memberships = False + for (type, state_key) in types: + if type == "m.room.history_visibility": + continue + if type != "m.room.member" or state_key is not None: + raise RuntimeError( + "Unimplemented: get_state_ids with type (%s, %s)" % + (type, state_key), + ) + include_memberships = True + + if include_memberships: + for event_id in events: + res[event_id] = self.state_ids_for_events[event_id] + + else: + k = ("m.room.history_visibility", "") + for event_id in events: + hve = self.state_ids_for_events[event_id][k] + res[event_id] = {k: hve} + + return succeed(res) + + def get_events(self, events): + return succeed({ + event_id: self.events[event_id] for event_id in events + }) + + def are_users_erased(self, users): + return succeed({u: False for u in users}) -- cgit 1.4.1 From 94440ae9946936339453925c249f81ea463da4d0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Jul 2018 11:51:26 +0100 Subject: fix imports --- synapse/visibility.py | 1 + tests/test_visibility.py | 1 + 2 files changed, 2 insertions(+) (limited to 'tests/test_visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index 6209bc0cde..dc33f61d2b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -17,6 +17,7 @@ import logging import operator import six + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 1401d831d2..8436c29fe8 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -19,6 +19,7 @@ from twisted.internet.defer import succeed from synapse.events import FrozenEvent from synapse.visibility import filter_events_for_server + import tests.unittest from tests.utils import setup_test_homeserver -- cgit 1.4.1 From d897be6a98ff2073235dc1d356a517f085dbf388 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 16 Jul 2018 15:22:27 +0100 Subject: Fix visibility of events from erased users over federation --- synapse/visibility.py | 123 ++++++++++++++++++++++++++--------------------- tests/test_visibility.py | 63 ++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 56 deletions(-) (limited to 'tests/test_visibility.py') diff --git a/synapse/visibility.py b/synapse/visibility.py index dc33f61d2b..202daa6800 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -232,7 +232,57 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks def filter_events_for_server(store, server_name, events): - # First lets check to see if all the events have a history visibility + # Whatever else we do, we need to check for senders which have requested + # erasure of their data. + erased_senders = yield store.are_users_erased( + e.sender for e in events, + ) + + def redact_disallowed(event, state): + # if the sender has been gdpr17ed, always return a redacted + # copy of the event. + if erased_senders[event.sender]: + logger.info( + "Sender of %s has been erased, redacting", + event.event_id, + ) + return prune_event(event) + + if not state: + return event + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + if visibility in ["invited", "joined"]: + # We now loop through all state events looking for + # membership states for the requesting server to determine + # if the server is either in the room or has been invited + # into the room. + for ev in state.itervalues(): + if ev.type != EventTypes.Member: + continue + try: + domain = get_domain_from_id(ev.state_key) + except Exception: + continue + + if domain != server_name: + continue + + memtype = ev.membership + if memtype == Membership.JOIN: + return event + elif memtype == Membership.INVITE: + if visibility == "invited": + return event + else: + # server has no users in the room: redact + return prune_event(event) + + return event + + # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( @@ -251,15 +301,24 @@ def filter_events_for_server(store, server_name, events): # If we failed to find any history visibility events then the default # is "shared" visiblity. if not visibility_ids: - defer.returnValue(events) - - event_map = yield store.get_events(visibility_ids) - all_open = all( - e.content.get("history_visibility") in (None, "shared", "world_readable") - for e in event_map.itervalues() - ) + all_open = True + else: + event_map = yield store.get_events(visibility_ids) + all_open = all( + e.content.get("history_visibility") in (None, "shared", "world_readable") + for e in event_map.itervalues() + ) if all_open: + # all the history_visibility state affecting these events is open, so + # we don't need to filter by membership state. We *do* need to check + # for user erasure, though. + if erased_senders: + events = [ + redact_disallowed(e, None) + for e in events + ] + defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -314,54 +373,6 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in event_to_state_ids.iteritems() } - erased_senders = yield store.are_users_erased( - e.sender for e in events, - ) - - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. - if erased_senders[event.sender]: - logger.info( - "Sender of %s has been erased, redacting", - event.event_id, - ) - return prune_event(event) - - if not state: - return event - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - if visibility in ["invited", "joined"]: - # We now loop through all state events looking for - # membership states for the requesting server to determine - # if the server is either in the room or has been invited - # into the room. - for ev in state.itervalues(): - if ev.type != EventTypes.Member: - continue - try: - domain = get_domain_from_id(ev.state_key) - except Exception: - continue - - if domain != server_name: - continue - - memtype = ev.membership - if memtype == Membership.JOIN: - return event - elif memtype == Membership.INVITE: - if visibility == "invited": - return event - else: - # server has no users in the room: redact - return prune_event(event) - - return event - defer.returnValue([ redact_disallowed(e, event_to_state[e.event_id]) for e in events diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 8436c29fe8..0dc1a924d3 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -73,6 +73,51 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id) self.assertEqual(filtered[i].content["a"], "b") + @tests.unittest.DEBUG + @defer.inlineCallbacks + def test_erased_user(self): + # 4 message events, from erased and unerased users, with a membership + # change in the middle of them. + events_to_filter = [] + + evt = yield self.inject_message("@unerased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@erased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_room_member("@joiner:remote_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@unerased:local_hs") + events_to_filter.append(evt) + + evt = yield self.inject_message("@erased:local_hs") + events_to_filter.append(evt) + + # the erasey user gets erased + self.hs.get_datastore().mark_user_erased("@erased:local_hs") + + # ... and the filtering happens. + filtered = yield filter_events_for_server( + self.store, "test_server", events_to_filter, + ) + + for i in range(0, len(events_to_filter)): + self.assertEqual( + events_to_filter[i].event_id, filtered[i].event_id, + "Unexpected event at result position %i" % (i, ) + ) + + for i in (0, 3): + self.assertEqual( + events_to_filter[i].content["body"], filtered[i].content["body"], + "Unexpected event content at result position %i" % (i,) + ) + + for i in (1, 4): + self.assertNotIn("body", filtered[i].content) + @defer.inlineCallbacks def inject_visibility(self, user_id, visibility): content = {"history_visibility": visibility} @@ -109,6 +154,24 @@ class FilterEventsForServerTestCase(tests.unittest.TestCase): yield self.hs.get_datastore().persist_event(event, context) defer.returnValue(event) + @defer.inlineCallbacks + def inject_message(self, user_id, content=None): + if content is None: + content = {"body": "testytest"} + builder = self.event_builder_factory.new({ + "type": "m.room.message", + "sender": user_id, + "room_id": TEST_ROOM_ID, + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.hs.get_datastore().persist_event(event, context) + defer.returnValue(event) + @defer.inlineCallbacks def test_large_room(self): # see what happens when we have a large room with hundreds of thousands -- cgit 1.4.1