summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--synapse/federation/federation_server.py13
-rw-r--r--synapse/handlers/deactivate_account.py7
-rw-r--r--synapse/handlers/federation.py57
-rw-r--r--synapse/rest/client/v1/admin.py4
-rw-r--r--synapse/rest/client/v2_alpha/account.py13
-rw-r--r--synapse/storage/__init__.py2
-rw-r--r--synapse/storage/account_data.py21
-rw-r--r--synapse/storage/schema/delta/50/erasure_store.sql21
-rw-r--r--synapse/storage/user_erasure_store.py103
-rw-r--r--synapse/visibility.py163
-rw-r--r--tests/replication/slave/storage/test_account_data.py8
11 files changed, 270 insertions, 142 deletions
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 2d420a58a2..d4dd967c60 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -277,7 +277,7 @@ class FederationServer(FederationBase):
     @defer.inlineCallbacks
     @log_function
     def on_pdu_request(self, origin, event_id):
-        pdu = yield self._get_persisted_pdu(origin, event_id)
+        pdu = yield self.handler.get_persisted_pdu(origin, event_id)
 
         if pdu:
             defer.returnValue(
@@ -470,17 +470,6 @@ class FederationServer(FederationBase):
         ts_now_ms = self._clock.time_msec()
         return self.store.get_user_id_for_open_id_token(token, ts_now_ms)
 
-    @log_function
-    def _get_persisted_pdu(self, origin, event_id, do_auth=True):
-        """ Get a PDU from the database with given origin and id.
-
-        Returns:
-            Deferred: Results in a `Pdu`.
-        """
-        return self.handler.get_persisted_pdu(
-            origin, event_id, do_auth=do_auth
-        )
-
     def _transaction_from_pdus(self, pdu_list):
         """Returns a new Transaction containing the given PDUs suitable for
         transmission.
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 8ec5ba2012..404b662469 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -42,7 +42,7 @@ class DeactivateAccountHandler(BaseHandler):
         reactor.callWhenRunning(self._start_user_parting)
 
     @defer.inlineCallbacks
-    def deactivate_account(self, user_id):
+    def deactivate_account(self, user_id, erase_data):
         """Deactivate a user's account
 
         Args:
@@ -92,6 +92,11 @@ class DeactivateAccountHandler(BaseHandler):
         # delete from user directory
         yield self.user_directory_handler.handle_user_deactivated(user_id)
 
+        # Mark the user as erased, if they asked for that
+        if erase_data:
+            logger.info("Marking %s as erased", user_id)
+            yield self.store.mark_user_erased(user_id)
+
         # Now start the process that goes through that list and
         # parts users from rooms (if it isn't already running)
         self._start_user_parting()
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index af94bf33bc..1ca56c2c97 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -103,8 +103,10 @@ class FederationHandler(BaseHandler):
         """
 
         # We reprocess pdus when we have seen them only as outliers
-        existing = yield self.get_persisted_pdu(
-            origin, pdu.event_id, do_auth=False
+        existing = yield self.store.get_event(
+            pdu.event_id,
+            allow_none=True,
+            allow_rejected=True,
         )
 
         # FIXME: Currently we fetch an event again when we already have it
@@ -493,7 +495,20 @@ class FederationHandler(BaseHandler):
             for e_id, key_to_eid in event_to_state_ids.iteritems()
         }
 
+        erased_senders = yield self.store.are_users_erased(
+            e.sender for e in events,
+        )
+
         def redact_disallowed(event, state):
+            # if the sender has been gdpr17ed, always return a redacted
+            # copy of the event.
+            if erased_senders[event.sender]:
+                logger.info(
+                    "Sender of %s has been erased, redacting",
+                    event.event_id,
+                )
+                return prune_event(event)
+
             if not state:
                 return event
 
@@ -1464,11 +1479,20 @@ class FederationHandler(BaseHandler):
 
     @defer.inlineCallbacks
     @log_function
-    def get_persisted_pdu(self, origin, event_id, do_auth=True):
-        """ Get a PDU from the database with given origin and id.
+    def get_persisted_pdu(self, origin, event_id):
+        """Get an event from the database for the given server.
+
+        Args:
+            origin [str]: hostname of server which is requesting the event; we
+               will check that the server is allowed to see it.
+            event_id [str]: id of the event being requested
 
         Returns:
-            Deferred: Results in a `Pdu`.
+            Deferred[EventBase|None]: None if we know nothing about the event;
+                otherwise the (possibly-redacted) event.
+
+        Raises:
+            AuthError if the server is not currently in the room
         """
         event = yield self.store.get_event(
             event_id,
@@ -1489,20 +1513,17 @@ class FederationHandler(BaseHandler):
                     )
                 )
 
-            if do_auth:
-                in_room = yield self.auth.check_host_in_room(
-                    event.room_id,
-                    origin
-                )
-                if not in_room:
-                    raise AuthError(403, "Host not in room.")
-
-                events = yield self._filter_events_for_server(
-                    origin, event.room_id, [event]
-                )
-
-                event = events[0]
+            in_room = yield self.auth.check_host_in_room(
+                event.room_id,
+                origin
+            )
+            if not in_room:
+                raise AuthError(403, "Host not in room.")
 
+            events = yield self._filter_events_for_server(
+                origin, event.room_id, [event]
+            )
+            event = events[0]
             defer.returnValue(event)
         else:
             defer.returnValue(None)
diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py
index b8665a45eb..ddaedb2a8c 100644
--- a/synapse/rest/client/v1/admin.py
+++ b/synapse/rest/client/v1/admin.py
@@ -254,7 +254,9 @@ class DeactivateAccountRestServlet(ClientV1RestServlet):
         if not is_admin:
             raise AuthError(403, "You are not a server admin")
 
-        yield self._deactivate_account_handler.deactivate_account(target_user_id)
+        yield self._deactivate_account_handler.deactivate_account(
+            target_user_id, False,
+        )
         defer.returnValue((200, {}))
 
 
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index e1281cfbb6..80dbc3c92e 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015, 2016 OpenMarket Ltd
 # Copyright 2017 Vector Creations Ltd
+# Copyright 2018 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,6 +16,7 @@
 # limitations under the License.
 import logging
 
+from six.moves import http_client
 from twisted.internet import defer
 
 from synapse.api.auth import has_access_token
@@ -186,13 +188,20 @@ class DeactivateAccountRestServlet(RestServlet):
     @defer.inlineCallbacks
     def on_POST(self, request):
         body = parse_json_object_from_request(request)
+        erase = body.get("erase", False)
+        if not isinstance(erase, bool):
+            raise SynapseError(
+                http_client.BAD_REQUEST,
+                "Param 'erase' must be a boolean, if given",
+                Codes.BAD_JSON,
+            )
 
         requester = yield self.auth.get_user_by_req(request)
 
         # allow ASes to dectivate their own users
         if requester.app_service:
             yield self._deactivate_account_handler.deactivate_account(
-                requester.user.to_string()
+                requester.user.to_string(), erase,
             )
             defer.returnValue((200, {}))
 
@@ -200,7 +209,7 @@ class DeactivateAccountRestServlet(RestServlet):
             requester, body, self.hs.get_ip_from_request(request),
         )
         yield self._deactivate_account_handler.deactivate_account(
-            requester.user.to_string(),
+            requester.user.to_string(), erase,
         )
         defer.returnValue((200, {}))
 
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 979fa22438..e843b702b9 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -20,6 +20,7 @@ import time
 import logging
 
 from synapse.storage.devices import DeviceStore
+from synapse.storage.user_erasure_store import UserErasureStore
 from .appservice import (
     ApplicationServiceStore, ApplicationServiceTransactionStore
 )
@@ -88,6 +89,7 @@ class DataStore(RoomMemberStore, RoomStore,
                 DeviceInboxStore,
                 UserDirectoryStore,
                 GroupServerStore,
+                UserErasureStore,
                 ):
 
     def __init__(self, db_conn, hs):
diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py
index f83ff0454a..284ec3c970 100644
--- a/synapse/storage/account_data.py
+++ b/synapse/storage/account_data.py
@@ -20,7 +20,7 @@ from synapse.storage._base import SQLBaseStore
 from synapse.storage.util.id_generators import StreamIdGenerator
 
 from synapse.util.caches.stream_change_cache import StreamChangeCache
-from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
 
 import abc
 import simplejson as json
@@ -114,25 +114,6 @@ class AccountDataWorkerStore(SQLBaseStore):
         else:
             defer.returnValue(None)
 
-    @cachedList(cached_method_name="get_global_account_data_by_type_for_user",
-                num_args=2, list_name="user_ids", inlineCallbacks=True)
-    def get_global_account_data_by_type_for_users(self, data_type, user_ids):
-        rows = yield self._simple_select_many_batch(
-            table="account_data",
-            column="user_id",
-            iterable=user_ids,
-            keyvalues={
-                "account_data_type": data_type,
-            },
-            retcols=("user_id", "content",),
-            desc="get_global_account_data_by_type_for_users",
-        )
-
-        defer.returnValue({
-            row["user_id"]: json.loads(row["content"]) if row["content"] else None
-            for row in rows
-        })
-
     @cached(num_args=2)
     def get_account_data_for_room(self, user_id, room_id):
         """Get all the client account_data for a user for a room.
diff --git a/synapse/storage/schema/delta/50/erasure_store.sql b/synapse/storage/schema/delta/50/erasure_store.sql
new file mode 100644
index 0000000000..5d8641a9ab
--- /dev/null
+++ b/synapse/storage/schema/delta/50/erasure_store.sql
@@ -0,0 +1,21 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- a table of users who have requested that their details be erased
+CREATE TABLE erased_users (
+    user_id TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id);
diff --git a/synapse/storage/user_erasure_store.py b/synapse/storage/user_erasure_store.py
new file mode 100644
index 0000000000..47bfc01e84
--- /dev/null
+++ b/synapse/storage/user_erasure_store.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import operator
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore
+from synapse.util.caches.descriptors import cachedList, cached
+
+
+class UserErasureWorkerStore(SQLBaseStore):
+    @cached()
+    def is_user_erased(self, user_id):
+        """
+        Check if the given user id has requested erasure
+
+        Args:
+            user_id (str): full user id to check
+
+        Returns:
+            Deferred[bool]: True if the user has requested erasure
+        """
+        return self._simple_select_onecol(
+            table="erased_users",
+            keyvalues={"user_id": user_id},
+            retcol="1",
+            desc="is_user_erased",
+        ).addCallback(operator.truth)
+
+    @cachedList(
+        cached_method_name="is_user_erased",
+        list_name="user_ids",
+        inlineCallbacks=True,
+    )
+    def are_users_erased(self, user_ids):
+        """
+        Checks which users in a list have requested erasure
+
+        Args:
+            user_ids (iterable[str]): full user id to check
+
+        Returns:
+            Deferred[dict[str, bool]]:
+                for each user, whether the user has requested erasure.
+        """
+        # this serves the dual purpose of (a) making sure we can do len and
+        # iterate it multiple times, and (b) avoiding duplicates.
+        user_ids = tuple(set(user_ids))
+
+        def _get_erased_users(txn):
+            txn.execute(
+                "SELECT user_id FROM erased_users WHERE user_id IN (%s)" % (
+                    ",".join("?" * len(user_ids))
+                ),
+                user_ids,
+            )
+            return set(r[0] for r in txn)
+
+        erased_users = yield self.runInteraction(
+            "are_users_erased", _get_erased_users,
+        )
+        res = dict((u, u in erased_users) for u in user_ids)
+        defer.returnValue(res)
+
+
+class UserErasureStore(UserErasureWorkerStore):
+    def mark_user_erased(self, user_id):
+        """Indicate that user_id wishes their message history to be erased.
+
+        Args:
+            user_id (str): full user_id to be erased
+        """
+        def f(txn):
+            # first check if they are already in the list
+            txn.execute(
+                "SELECT 1 FROM erased_users WHERE user_id = ?",
+                (user_id, )
+            )
+            if txn.fetchone():
+                return
+
+            # they are not already there: do the insert.
+            txn.execute(
+                "INSERT INTO erased_users (user_id) VALUES (?)",
+                (user_id, )
+            )
+
+            self._invalidate_cache_and_stream(
+                txn, self.is_user_erased, (user_id,)
+            )
+        return self.runInteraction("mark_user_erased", f)
diff --git a/synapse/visibility.py b/synapse/visibility.py
index aaca2c584c..65d79cf0d0 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -12,15 +12,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
+import logging
+import operator
 
 from twisted.internet import defer
 
-from synapse.api.constants import Membership, EventTypes
-
-from synapse.util.logcontext import make_deferred_yieldable, preserve_fn
-
-import logging
-
+from synapse.api.constants import EventTypes, Membership
+from synapse.events.utils import prune_event
+from synapse.util.logcontext import (
+    make_deferred_yieldable, preserve_fn,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -43,21 +45,35 @@ MEMBERSHIP_PRIORITY = (
 
 
 @defer.inlineCallbacks
-def filter_events_for_clients(store, user_tuples, events, event_id_to_state,
-                              always_include_ids=frozenset()):
-    """ Returns dict of user_id -> list of events that user is allowed to
-    see.
+def filter_events_for_client(store, user_id, events, is_peeking=False,
+                             always_include_ids=frozenset()):
+    """
+    Check which events a user is allowed to see
 
     Args:
-        user_tuples (str, bool): (user id, is_peeking) for each user to be
-            checked. is_peeking should be true if:
-            * the user is not currently a member of the room, and:
-            * the user has not been a member of the room since the
-            given events
-        events ([synapse.events.EventBase]): list of events to filter
+        store (synapse.storage.DataStore): our datastore (can also be a worker
+            store)
+        user_id(str): user id to be checked
+        events(list[synapse.events.EventBase]): sequence of events to be checked
+        is_peeking(bool): should be True if:
+          * the user is not currently a member of the room, and:
+          * the user has not been a member of the room since the given
+            events
         always_include_ids (set(event_id)): set of event ids to specifically
             include (unless sender is ignored)
+
+    Returns:
+        Deferred[list[synapse.events.EventBase]]
     """
+    types = (
+        (EventTypes.RoomHistoryVisibility, ""),
+        (EventTypes.Member, user_id),
+    )
+    event_id_to_state = yield store.get_state_for_events(
+        frozenset(e.event_id for e in events),
+        types=types,
+    )
+
     forgotten = yield make_deferred_yieldable(defer.gatherResults([
         defer.maybeDeferred(
             preserve_fn(store.who_forgot_in_room),
@@ -71,31 +87,37 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state,
         row["event_id"] for rows in forgotten for row in rows
     )
 
-    ignore_dict_content = yield store.get_global_account_data_by_type_for_users(
-        "m.ignored_user_list", user_ids=[user_id for user_id, _ in user_tuples]
+    ignore_dict_content = yield store.get_global_account_data_by_type_for_user(
+        "m.ignored_user_list", user_id,
     )
 
     # FIXME: This will explode if people upload something incorrect.
-    ignore_dict = {
-        user_id: frozenset(
-            content.get("ignored_users", {}).keys() if content else []
-        )
-        for user_id, content in ignore_dict_content.items()
-    }
+    ignore_list = frozenset(
+        ignore_dict_content.get("ignored_users", {}).keys()
+        if ignore_dict_content else []
+    )
+
+    erased_senders = yield store.are_users_erased((e.sender for e in events))
 
-    def allowed(event, user_id, is_peeking, ignore_list):
+    def allowed(event):
         """
         Args:
             event (synapse.events.EventBase): event to check
-            user_id (str)
-            is_peeking (bool)
-            ignore_list (list): list of users to ignore
+
+        Returns:
+            None|EventBase:
+               None if the user cannot see this event at all
+
+               a redacted copy of the event if they can only see a redacted
+               version
+
+               the original event if they can see it as normal.
         """
         if not event.is_state() and event.sender in ignore_list:
-            return False
+            return None
 
         if event.event_id in always_include_ids:
-            return True
+            return event
 
         state = event_id_to_state[event.event_id]
 
@@ -109,10 +131,6 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state,
         if visibility not in VISIBILITY_PRIORITY:
             visibility = "shared"
 
-        # if it was world_readable, it's easy: everyone can read it
-        if visibility == "world_readable":
-            return True
-
         # Always allow history visibility events on boundaries. This is done
         # by setting the effective visibility to the least restrictive
         # of the old vs new.
@@ -146,7 +164,7 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state,
             if membership == "leave" and (
                 prev_membership == "join" or prev_membership == "invite"
             ):
-                return True
+                return event
 
             new_priority = MEMBERSHIP_PRIORITY.index(membership)
             old_priority = MEMBERSHIP_PRIORITY.index(prev_membership)
@@ -157,70 +175,55 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state,
         if membership is None:
             membership_event = state.get((EventTypes.Member, user_id), None)
             if membership_event:
+                # XXX why do we do this?
+                # https://github.com/matrix-org/synapse/issues/3350
                 if membership_event.event_id not in event_id_forgotten:
                     membership = membership_event.membership
 
         # if the user was a member of the room at the time of the event,
         # they can see it.
         if membership == Membership.JOIN:
-            return True
+            return event
+
+        # otherwise, it depends on the room visibility.
 
         if visibility == "joined":
             # we weren't a member at the time of the event, so we can't
             # see this event.
-            return False
+            return None
 
         elif visibility == "invited":
             # user can also see the event if they were *invited* at the time
             # of the event.
-            return membership == Membership.INVITE
-
-        else:
-            # visibility is shared: user can also see the event if they have
-            # become a member since the event
+            return (
+                event if membership == Membership.INVITE else None
+            )
+
+        elif visibility == "shared" and is_peeking:
+            # if the visibility is shared, users cannot see the event unless
+            # they have *subequently* joined the room (or were members at the
+            # time, of course)
             #
             # XXX: if the user has subsequently joined and then left again,
             # ideally we would share history up to the point they left. But
-            # we don't know when they left.
-            return not is_peeking
+            # we don't know when they left. We just treat it as though they
+            # never joined, and restrict access.
+            return None
 
-    defer.returnValue({
-        user_id: [
-            event
-            for event in events
-            if allowed(event, user_id, is_peeking, ignore_dict.get(user_id, []))
-        ]
-        for user_id, is_peeking in user_tuples
-    })
+        # the visibility is either shared or world_readable, and the user was
+        # not a member at the time. We allow it, provided the original sender
+        # has not requested their data to be erased, in which case, we return
+        # a redacted version.
+        if erased_senders[event.sender]:
+            return prune_event(event)
 
+        return event
 
-@defer.inlineCallbacks
-def filter_events_for_client(store, user_id, events, is_peeking=False,
-                             always_include_ids=frozenset()):
-    """
-    Check which events a user is allowed to see
+    # check each event: gives an iterable[None|EventBase]
+    filtered_events = itertools.imap(allowed, events)
 
-    Args:
-        user_id(str): user id to be checked
-        events([synapse.events.EventBase]): list of events to be checked
-        is_peeking(bool): should be True if:
-          * the user is not currently a member of the room, and:
-          * the user has not been a member of the room since the given
-            events
+    # remove the None entries
+    filtered_events = filter(operator.truth, filtered_events)
 
-    Returns:
-        [synapse.events.EventBase]
-    """
-    types = (
-        (EventTypes.RoomHistoryVisibility, ""),
-        (EventTypes.Member, user_id),
-    )
-    event_id_to_state = yield store.get_state_for_events(
-        frozenset(e.event_id for e in events),
-        types=types
-    )
-    res = yield filter_events_for_clients(
-        store, [(user_id, is_peeking)], events, event_id_to_state,
-        always_include_ids=always_include_ids,
-    )
-    defer.returnValue(res.get(user_id, []))
+    # we turn it into a list before returning it.
+    defer.returnValue(list(filtered_events))
diff --git a/tests/replication/slave/storage/test_account_data.py b/tests/replication/slave/storage/test_account_data.py
index da54d478ce..f47a42e45d 100644
--- a/tests/replication/slave/storage/test_account_data.py
+++ b/tests/replication/slave/storage/test_account_data.py
@@ -37,10 +37,6 @@ class SlavedAccountDataStoreTestCase(BaseSlavedStoreTestCase):
             "get_global_account_data_by_type_for_user",
             [TYPE, USER_ID], {"a": 1}
         )
-        yield self.check(
-            "get_global_account_data_by_type_for_users",
-            [TYPE, [USER_ID]], {USER_ID: {"a": 1}}
-        )
 
         yield self.master_store.add_account_data_for_user(
             USER_ID, TYPE, {"a": 2}
@@ -50,7 +46,3 @@ class SlavedAccountDataStoreTestCase(BaseSlavedStoreTestCase):
             "get_global_account_data_by_type_for_user",
             [TYPE, USER_ID], {"a": 2}
         )
-        yield self.check(
-            "get_global_account_data_by_type_for_users",
-            [TYPE, [USER_ID]], {USER_ID: {"a": 2}}
-        )