From 617bf40924bc8d627734eb2fb25a9f3980ede5d5 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 25 Apr 2018 17:37:29 +0100 Subject: Generate user daily stats --- synapse/storage/__init__.py | 60 ++++++++++++++++++++-- synapse/storage/client_ips.py | 7 +++ synapse/storage/prepare_database.py | 2 +- .../schema/delta/49/add_user_daily_visits.sql | 25 +++++++++ .../delta/49/add_user_ips_last_seen_only_index.sql | 17 ++++++ 5 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/schema/delta/49/add_user_daily_visits.sql create mode 100644 synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 8cdfd50f90..6f2f947433 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,6 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import time +import logging + from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore @@ -55,10 +59,6 @@ from .engines import PostgresEngine from synapse.api.constants import PresenceState from synapse.util.caches.stream_change_cache import StreamChangeCache - -import logging - - logger = logging.getLogger(__name__) @@ -347,6 +347,58 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_r30_users", _count_r30_users) + + def generate_user_daily_visits(self): + """ + Generates daily visit data for use in cohort/ retention analysis + """ + def _generate_user_daily_visits(txn): + logger.info("Calling _generate_user_daily_visits") + # determine timestamp of previous days + yesterday = datetime.datetime.now() - datetime.timedelta(days=1) + yesterday_start = datetime.datetime(yesterday.year, + yesterday.month, + yesterday.day, 0, 0, 0, 0) + yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000 + + # Check that this job has not already been completed + sql = """ + SELECT timestamp + FROM user_daily_visits + ORDER by timestamp desc limit 1 + """ + txn.execute(sql) + row = txn.fetchone() + + # Bail if the most recent time is yesterday + if row and row[0] == yesterday_start_time: + logger.info("Bailing from _generate_user_daily_visits, already completed") + return + logger.info("inserting into user_daily_visits") + # Not specificying an upper bound means that if the update is run at + # 10 mins past midnight and the user is active during a 30 min session + # that the user is still included in the previous days stats + # This does mean that if the update is run hours late, then it is possible + # to overstate the cohort, but this seems a reasonable trade off + # The alternative is to insert on every request - but prefer to avoid + # for performance reasons + sql = """ + SELECT user_id, user_agent, device_id + FROM user_ips + WHERE last_seen > ? + """ + txn.execute(sql, (yesterday_start_time,)) + + sql = """ + INSERT INTO user_daily_visits (user_id, user_agent, device_id, timestamp) + VALUES (?, ?, ?, ?) + """ + + for row in txn: + txn.execute(sql, (row + (yesterday_start_time,))) + + return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) + def get_users(self): """Function to reterive a list of users in users table. diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 7b44dae0fc..ba46907737 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -55,6 +55,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["user_id", "last_seen"], ) + self.register_background_index_update( + "user_ips_last_seen_only_index", + index_name="user_ips_last_seen_only", + table="user_ips", + columns=["last_seen"], + ) + # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) self._batch_row_update = {} diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 04411a665f..c08e9cd65a 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 48 +SCHEMA_VERSION = 49 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/schema/delta/49/add_user_daily_visits.sql new file mode 100644 index 0000000000..11356a97fc --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_daily_visits.sql @@ -0,0 +1,25 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, + device_id TEXT, + user_agent TEXT NOT NULL, + timestamp BIGINT NOT NULL ); + +/* What indexes should I include? + * Reads are offline so should optimise for writes + * Need to check if already an entry so user,day + */ diff --git a/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql new file mode 100644 index 0000000000..3a4ed59b5b --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('user_ips_last_seen_only_index', '{}'); -- cgit 1.4.1 From fb6015d0a6828d57c18d601ea36b1a8a4cebc0e2 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 25 Apr 2018 17:56:11 +0100 Subject: pep8 --- synapse/storage/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 6f2f947433..c22d38800c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -347,7 +347,6 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_r30_users", _count_r30_users) - def generate_user_daily_visits(self): """ Generates daily visit data for use in cohort/ retention analysis @@ -390,14 +389,16 @@ class DataStore(RoomMemberStore, RoomStore, txn.execute(sql, (yesterday_start_time,)) sql = """ - INSERT INTO user_daily_visits (user_id, user_agent, device_id, timestamp) + INSERT INTO user_daily_visits (user_id. user_agent, + device_id, timestamp) VALUES (?, ?, ?, ?) """ for row in txn: txn.execute(sql, (row + (yesterday_start_time,))) - return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) + return self.runInteraction("generate_user_daily_visits", + _generate_user_daily_visits) def get_users(self): """Function to reterive a list of users in users table. -- cgit 1.4.1 From d0857702e8e22b8bd3dae7fc3cd3718da1e94f19 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 1 May 2018 12:12:57 +0100 Subject: add inidexes based on usage --- synapse/storage/schema/delta/49/add_user_daily_visits.sql | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/schema/delta/49/add_user_daily_visits.sql index 11356a97fc..3dd478196f 100644 --- a/synapse/storage/schema/delta/49/add_user_daily_visits.sql +++ b/synapse/storage/schema/delta/49/add_user_daily_visits.sql @@ -16,10 +16,6 @@ CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, device_id TEXT, - user_agent TEXT NOT NULL, timestamp BIGINT NOT NULL ); - -/* What indexes should I include? - * Reads are offline so should optimise for writes - * Need to check if already an entry so user,day - */ +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp); -- cgit 1.4.1 From dd1a8324195f37d508d518db10ba37d9028afc86 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 1 May 2018 12:13:49 +0100 Subject: remove user agent from data model, will just join on user_ips --- synapse/storage/__init__.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index c22d38800c..b51cf70336 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -15,6 +15,7 @@ # limitations under the License. import datetime +from dateutil import tz import time import logging @@ -354,10 +355,9 @@ class DataStore(RoomMemberStore, RoomStore, def _generate_user_daily_visits(txn): logger.info("Calling _generate_user_daily_visits") # determine timestamp of previous days - yesterday = datetime.datetime.now() - datetime.timedelta(days=1) - yesterday_start = datetime.datetime(yesterday.year, - yesterday.month, - yesterday.day, 0, 0, 0, 0) + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + yesterday_start = datetime.datetime(yesterday.year, yesterday.month, + yesterday.day, tzinfo=tz.tzutc()) yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000 # Check that this job has not already been completed @@ -371,9 +371,8 @@ class DataStore(RoomMemberStore, RoomStore, # Bail if the most recent time is yesterday if row and row[0] == yesterday_start_time: - logger.info("Bailing from _generate_user_daily_visits, already completed") return - logger.info("inserting into user_daily_visits") + # Not specificying an upper bound means that if the update is run at # 10 mins past midnight and the user is active during a 30 min session # that the user is still included in the previous days stats @@ -382,20 +381,20 @@ class DataStore(RoomMemberStore, RoomStore, # The alternative is to insert on every request - but prefer to avoid # for performance reasons sql = """ - SELECT user_id, user_agent, device_id + SELECT user_id, device_id FROM user_ips WHERE last_seen > ? """ txn.execute(sql, (yesterday_start_time,)) + user_visits = txn.fetchall() sql = """ - INSERT INTO user_daily_visits (user_id. user_agent, - device_id, timestamp) - VALUES (?, ?, ?, ?) + INSERT INTO user_daily_visits (user_id, device_id, timestamp) + VALUES (?, ?, ?) """ - for row in txn: - txn.execute(sql, (row + (yesterday_start_time,))) + for visit in user_visits: + txn.execute(sql, (visit + (yesterday_start_time,))) return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) -- cgit 1.4.1 From 6406b70aebd61ecddf61e45196d7cb13428b509e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 May 2018 15:30:11 +0100 Subject: Use stream rather depth ordering for push actions This simplifies things as it is, but will also allow us to change the way we traverse topologically without having to update the way push actions work. --- synapse/storage/event_push_actions.py | 53 ++++++++++---------------------- synapse/storage/receipts.py | 1 - tests/storage/test_event_push_actions.py | 4 +-- 3 files changed, 18 insertions(+), 40 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index c22762eb5c..f084a5f54b 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -18,8 +18,6 @@ from synapse.storage._base import SQLBaseStore, LoggingTransaction from twisted.internet import defer from synapse.util.async import sleep from synapse.util.caches.descriptors import cachedInlineCallbacks -from synapse.types import RoomStreamToken -from .stream import lower_bound import logging import simplejson as json @@ -99,7 +97,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): def _get_unread_counts_by_receipt_txn(self, txn, room_id, user_id, last_read_event_id): sql = ( - "SELECT stream_ordering, topological_ordering" + "SELECT stream_ordering" " FROM events" " WHERE room_id = ? AND event_id = ?" ) @@ -111,17 +109,12 @@ class EventPushActionsWorkerStore(SQLBaseStore): return {"notify_count": 0, "highlight_count": 0} stream_ordering = results[0][0] - topological_ordering = results[0][1] return self._get_unread_counts_by_pos_txn( - txn, room_id, user_id, topological_ordering, stream_ordering + txn, room_id, user_id, stream_ordering ) - def _get_unread_counts_by_pos_txn(self, txn, room_id, user_id, topological_ordering, - stream_ordering): - token = RoomStreamToken( - topological_ordering, stream_ordering - ) + def _get_unread_counts_by_pos_txn(self, txn, room_id, user_id, stream_ordering): # First get number of notifications. # We don't need to put a notif=1 clause as all rows always have @@ -132,10 +125,10 @@ class EventPushActionsWorkerStore(SQLBaseStore): " WHERE" " user_id = ?" " AND room_id = ?" - " AND %s" - ) % (lower_bound(token, self.database_engine, inclusive=False),) + " AND stream_ordering > ?" + ) - txn.execute(sql, (user_id, room_id)) + txn.execute(sql, (user_id, room_id, stream_ordering)) row = txn.fetchone() notify_count = row[0] if row else 0 @@ -155,10 +148,10 @@ class EventPushActionsWorkerStore(SQLBaseStore): " highlight = 1" " AND user_id = ?" " AND room_id = ?" - " AND %s" - ) % (lower_bound(token, self.database_engine, inclusive=False),) + " AND stream_ordering > ?" + ) - txn.execute(sql, (user_id, room_id)) + txn.execute(sql, (user_id, room_id, stream_ordering)) row = txn.fetchone() highlight_count = row[0] if row else 0 @@ -209,7 +202,6 @@ class EventPushActionsWorkerStore(SQLBaseStore): " ep.highlight " " FROM (" " SELECT room_id," - " MAX(topological_ordering) as topological_ordering," " MAX(stream_ordering) as stream_ordering" " FROM events" " INNER JOIN receipts_linearized USING (room_id, event_id)" @@ -219,13 +211,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): " event_push_actions AS ep" " WHERE" " ep.room_id = rl.room_id" - " AND (" - " ep.topological_ordering > rl.topological_ordering" - " OR (" - " ep.topological_ordering = rl.topological_ordering" - " AND ep.stream_ordering > rl.stream_ordering" - " )" - " )" + " AND ep.stream_ordering > rl.stream_ordering" " AND ep.user_id = ?" " AND ep.stream_ordering > ?" " AND ep.stream_ordering <= ?" @@ -318,7 +304,6 @@ class EventPushActionsWorkerStore(SQLBaseStore): " ep.highlight, e.received_ts" " FROM (" " SELECT room_id," - " MAX(topological_ordering) as topological_ordering," " MAX(stream_ordering) as stream_ordering" " FROM events" " INNER JOIN receipts_linearized USING (room_id, event_id)" @@ -329,13 +314,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): " INNER JOIN events AS e USING (room_id, event_id)" " WHERE" " ep.room_id = rl.room_id" - " AND (" - " ep.topological_ordering > rl.topological_ordering" - " OR (" - " ep.topological_ordering = rl.topological_ordering" - " AND ep.stream_ordering > rl.stream_ordering" - " )" - " )" + " AND ep.stream_ordering > rl.stream_ordering" " AND ep.user_id = ?" " AND ep.stream_ordering > ?" " AND ep.stream_ordering <= ?" @@ -762,10 +741,10 @@ class EventPushActionsStore(EventPushActionsWorkerStore): ) def _remove_old_push_actions_before_txn(self, txn, room_id, user_id, - topological_ordering, stream_ordering): + stream_ordering): """ Purges old push actions for a user and room before a given - topological_ordering. + stream_ordering. We however keep a months worth of highlighted notifications, so that users can still get a list of recent highlights. @@ -774,7 +753,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): txn: The transcation room_id: Room ID to delete from user_id: user ID to delete for - topological_ordering: The lowest topological ordering which will + stream_ordering: The lowest stream ordering which will not be deleted. """ txn.call_after( @@ -793,9 +772,9 @@ class EventPushActionsStore(EventPushActionsWorkerStore): txn.execute( "DELETE FROM event_push_actions " " WHERE user_id = ? AND room_id = ? AND " - " topological_ordering <= ?" + " stream_ordering <= ?" " AND ((stream_ordering < ? AND highlight = 1) or highlight = 0)", - (user_id, room_id, topological_ordering, self.stream_ordering_month_ago) + (user_id, room_id, stream_ordering, self.stream_ordering_month_ago) ) txn.execute(""" diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 63997ed449..2f95e7e82a 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -407,7 +407,6 @@ class ReceiptsStore(ReceiptsWorkerStore): txn, room_id=room_id, user_id=user_id, - topological_ordering=topological_ordering, stream_ordering=stream_ordering, ) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 9962ce8a5d..3cbf9a78b1 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -55,7 +55,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): def _assert_counts(noitf_count, highlight_count): counts = yield self.store.runInteraction( "", self.store._get_unread_counts_by_pos_txn, - room_id, user_id, 0, 0 + room_id, user_id, 0 ) self.assertEquals( counts, @@ -86,7 +86,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): def _mark_read(stream, depth): return self.store.runInteraction( "", self.store._remove_old_push_actions_before_txn, - room_id, user_id, depth, stream + room_id, user_id, stream ) yield _assert_counts(0, 0) -- cgit 1.4.1 From f077e97914c9b5c82c94786130d98af52516cde0 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 14 May 2018 13:50:58 +0100 Subject: instead of inserting user daily visit data at the end of the day, instead insert incrementally through the day --- synapse/app/homeserver.py | 19 +++++++++++++--- synapse/storage/__init__.py | 54 ++++++++++++--------------------------------- 2 files changed, 30 insertions(+), 43 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f785a7a22b..bfc79a5e81 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -17,6 +17,7 @@ import gc import logging import os import sys +import datetime import synapse import synapse.config.logger @@ -475,9 +476,24 @@ def run(hs): " changes across releases." ) + # def recurring_user_daily_visit_stats(): + def generate_user_daily_visit_stats(): hs.get_datastore().generate_user_daily_visits() + # Since user daily stats are bucketed at midnight UTC, + # and user_ips.last_seen can be updated at any time, it is important to call + # generate_user_daily_visit_stats immediately prior to the day end. Assuming + # an hourly cadence, the simplist way is to allign all calls to the hour + # end + end_of_hour = datetime.datetime.now().replace(microsecond=0, second=0, minute=0) \ + + datetime.timedelta(hours=1) \ + - datetime.timedelta(seconds=10) # Ensure method fires before day transistion + + time_to_next_hour = end_of_hour - datetime.datetime.now() + clock.call_later(time_to_next_hour.seconds, + clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000)) + if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) @@ -490,9 +506,6 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, phone_stats_home) - clock.looping_call(generate_user_daily_visit_stats, 10 * 60 * 1000) - clock.call_later(5 * 60, generate_user_daily_visit_stats) - if hs.config.daemonize and hs.config.print_pidfile: print (hs.config.pid_file) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index b51cf70336..6949876c13 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -353,48 +353,22 @@ class DataStore(RoomMemberStore, RoomStore, Generates daily visit data for use in cohort/ retention analysis """ def _generate_user_daily_visits(txn): - logger.info("Calling _generate_user_daily_visits") - # determine timestamp of previous days - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) - yesterday_start = datetime.datetime(yesterday.year, yesterday.month, - yesterday.day, tzinfo=tz.tzutc()) - yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000 - - # Check that this job has not already been completed - sql = """ - SELECT timestamp - FROM user_daily_visits - ORDER by timestamp desc limit 1 - """ - txn.execute(sql) - row = txn.fetchone() - - # Bail if the most recent time is yesterday - if row and row[0] == yesterday_start_time: - return - - # Not specificying an upper bound means that if the update is run at - # 10 mins past midnight and the user is active during a 30 min session - # that the user is still included in the previous days stats - # This does mean that if the update is run hours late, then it is possible - # to overstate the cohort, but this seems a reasonable trade off - # The alternative is to insert on every request - but prefer to avoid - # for performance reasons - sql = """ - SELECT user_id, device_id - FROM user_ips - WHERE last_seen > ? - """ - txn.execute(sql, (yesterday_start_time,)) - user_visits = txn.fetchall() + # determine timestamp of the day start + now = datetime.datetime.utcnow() + today_start = datetime.datetime(now.year, now.month, + now.day, tzinfo=tz.tzutc()) + today_start_time = int(time.mktime(today_start.timetuple())) * 1000 + logger.info(today_start_time) sql = """ - INSERT INTO user_daily_visits (user_id, device_id, timestamp) - VALUES (?, ?, ?) - """ - - for visit in user_visits: - txn.execute(sql, (visit + (yesterday_start_time,))) + INSERT INTO user_daily_visits (user_id, device_id, timestamp) + SELECT user_id, device_id, ? + FROM user_ips AS u + LEFT JOIN user_daily_visits USING (user_id, device_id) + WHERE last_seen > ? AND timestamp IS NULL + GROUP BY user_id, device_id; + """ + txn.execute(sql, (today_start_time, today_start_time)) return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) -- cgit 1.4.1 From 47815edcfae73c5b938f8354853a09c0b80ef27e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 11 May 2018 00:17:11 +0100 Subject: ConsentResource to gather policy consent from users Hopefully there are enough comments and docs in this that it makes sense on its own. --- docs/privacy_policy_templates/README.md | 23 +++ docs/privacy_policy_templates/en/1.0.html | 17 ++ docs/privacy_policy_templates/en/success.html | 11 ++ synapse/app/homeserver.py | 9 + synapse/config/__init__.py | 6 + synapse/config/consent_config.py | 42 +++++ synapse/config/homeserver.py | 8 +- synapse/config/key.py | 10 + synapse/http/server.py | 76 +++++++- synapse/rest/consent/__init__.py | 0 synapse/rest/consent/consent_resource.py | 210 +++++++++++++++++++++ synapse/server.py | 3 + synapse/storage/registration.py | 18 ++ .../storage/schema/delta/48/add_user_consent.sql | 18 ++ 14 files changed, 446 insertions(+), 5 deletions(-) create mode 100644 docs/privacy_policy_templates/README.md create mode 100644 docs/privacy_policy_templates/en/1.0.html create mode 100644 docs/privacy_policy_templates/en/success.html create mode 100644 synapse/config/consent_config.py create mode 100644 synapse/rest/consent/__init__.py create mode 100644 synapse/rest/consent/consent_resource.py create mode 100644 synapse/storage/schema/delta/48/add_user_consent.sql (limited to 'synapse/storage') diff --git a/docs/privacy_policy_templates/README.md b/docs/privacy_policy_templates/README.md new file mode 100644 index 0000000000..8e91c516b3 --- /dev/null +++ b/docs/privacy_policy_templates/README.md @@ -0,0 +1,23 @@ +If enabling the 'consent' resource in synapse, you will need some templates +for the HTML to be served to the user. This directory contains very simple +examples of the sort of thing that can be done. + +You'll need to add this sort of thing to your homeserver.yaml: + +``` +form_secret: + +user_consent: + template_dir: docs/privacy_policy_templates + default_version: 1.0 +``` + +You should then be able to enable the `consent` resource under a `listener` +entry. For example: + +``` +listeners: + - port: 8008 + resources: + - names: [client, consent] +``` diff --git a/docs/privacy_policy_templates/en/1.0.html b/docs/privacy_policy_templates/en/1.0.html new file mode 100644 index 0000000000..ab8666f0c3 --- /dev/null +++ b/docs/privacy_policy_templates/en/1.0.html @@ -0,0 +1,17 @@ + + + + Matrix.org Privacy policy + + +

+ All your base are belong to us. +

+
+ + + + +
+ + diff --git a/docs/privacy_policy_templates/en/success.html b/docs/privacy_policy_templates/en/success.html new file mode 100644 index 0000000000..d55e90c94f --- /dev/null +++ b/docs/privacy_policy_templates/en/success.html @@ -0,0 +1,11 @@ + + + + Matrix.org Privacy policy + + +

+ Sweet. +

+ + diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index a0e465d644..730271628e 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -41,6 +41,7 @@ from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource +from synapse.rest.consent.consent_resource import ConsentResource from synapse.rest.key.v1.server_key_resource import LocalKey from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.media.v0.content_repository import ContentRepoResource @@ -182,6 +183,14 @@ class SynapseHomeServer(HomeServer): "/_matrix/client/versions": client_resource, }) + if name == "consent": + consent_resource = ConsentResource(self) + if compress: + consent_resource = gz_wrap(consent_resource) + resources.update({ + "/_matrix/consent": consent_resource, + }) + if name == "federation": resources.update({ FEDERATION_PREFIX: TransportLayerServer(self), diff --git a/synapse/config/__init__.py b/synapse/config/__init__.py index bfebb0f644..f2a5a41e92 100644 --- a/synapse/config/__init__.py +++ b/synapse/config/__init__.py @@ -12,3 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from ._base import ConfigError + +# export ConfigError if somebody does import * +# this is largely a fudge to stop PEP8 moaning about the import +__all__ = ["ConfigError"] diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py new file mode 100644 index 0000000000..675fce0911 --- /dev/null +++ b/synapse/config/consent_config.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import Config + +DEFAULT_CONFIG = """\ +# User Consent configuration +# +# uncomment and configure if enabling the 'consent' resource under 'listeners'. +# +# 'template_dir' gives the location of the templates for the HTML forms. +# This directory should contain one subdirectory per language (eg, 'en', 'fr'), +# and each language directory should contain the policy document (named as +# '.html') and a success page (success.html). +# +# 'default_version' gives the version of the policy document to serve up if +# there is no 'v' parameter. +# +# user_consent: +# template_dir: res/templates/privacy +# default_version: 1.0 +""" + + +class ConsentConfig(Config): + def read_config(self, config): + self.consent_config = config.get("user_consent") + + def default_config(self, **kwargs): + return DEFAULT_CONFIG diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index bf19cfee29..fb6bd3b421 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from .tls import TlsConfig from .server import ServerConfig from .logger import LoggingConfig @@ -37,6 +37,7 @@ from .push import PushConfig from .spam_checker import SpamCheckerConfig from .groups import GroupsConfig from .user_directory import UserDirectoryConfig +from .consent_config import ConsentConfig class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, @@ -45,12 +46,13 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig, AppServiceConfig, KeyConfig, SAML2Config, CasConfig, JWTConfig, PasswordConfig, EmailConfig, WorkerConfig, PasswordAuthProviderConfig, PushConfig, - SpamCheckerConfig, GroupsConfig, UserDirectoryConfig,): + SpamCheckerConfig, GroupsConfig, UserDirectoryConfig, + ConsentConfig): pass if __name__ == '__main__': import sys sys.stdout.write( - HomeServerConfig().generate_config(sys.argv[1], sys.argv[2])[0] + HomeServerConfig().generate_config(sys.argv[1], sys.argv[2], True)[0] ) diff --git a/synapse/config/key.py b/synapse/config/key.py index 4b8fc063d0..d1382ad9ac 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -59,14 +59,20 @@ class KeyConfig(Config): self.expire_access_token = config.get("expire_access_token", False) + # a secret which is used to calculate HMACs for form values, to stop + # falsification of values + self.form_secret = config.get("form_secret", None) + def default_config(self, config_dir_path, server_name, is_generating_file=False, **kwargs): base_key_name = os.path.join(config_dir_path, server_name) if is_generating_file: macaroon_secret_key = random_string_with_symbols(50) + form_secret = '"%s"' % random_string_with_symbols(50) else: macaroon_secret_key = None + form_secret = 'null' return """\ macaroon_secret_key: "%(macaroon_secret_key)s" @@ -74,6 +80,10 @@ class KeyConfig(Config): # Used to enable access token expiration. expire_access_token: False + # a secret which is used to calculate HMACs for form values, to stop + # falsification of values + form_secret: %(form_secret)s + ## Signing Keys ## # Path to the signing key to sign messages with diff --git a/synapse/http/server.py b/synapse/http/server.py index f29e36f490..a38209770d 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import cgi +from six.moves import http_client from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes @@ -44,6 +45,18 @@ import simplejson logger = logging.getLogger(__name__) +HTML_ERROR_TEMPLATE = """ + + + + Error {code} + + +

{msg}

+ + +""" + def wrap_json_request_handler(h): """Wraps a request handler method with exception handling. @@ -104,6 +117,65 @@ def wrap_json_request_handler(h): return wrap_request_handler_with_logging(wrapped_request_handler) +def wrap_html_request_handler(h): + """Wraps a request handler method with exception handling. + + Also adds logging as per wrap_request_handler_with_logging. + + The handler method must have a signature of "handle_foo(self, request)", + where "self" must have a "clock" attribute (and "request" must be a + SynapseRequest). + """ + def wrapped_request_handler(self, request): + d = defer.maybeDeferred(h, self, request) + d.addErrback(_return_html_error, request) + return d + + return wrap_request_handler_with_logging(wrapped_request_handler) + + +def _return_html_error(f, request): + """Sends an HTML error page corresponding to the given failure + + Args: + f (twisted.python.failure.Failure): + request (twisted.web.iweb.IRequest): + """ + if f.check(CodeMessageException): + cme = f.value + code = cme.code + msg = cme.msg + + if isinstance(cme, SynapseError): + logger.info( + "%s SynapseError: %s - %s", request, code, msg + ) + else: + logger.error( + "Failed handle request %r: %s", + request, + f.getTraceback().rstrip(), + ) + else: + code = http_client.INTERNAL_SERVER_ERROR + msg = "Internal server error" + + logger.error( + "Failed handle request %r: %s", + request, + f.getTraceback().rstrip(), + ) + + body = HTML_ERROR_TEMPLATE.format( + code=code, msg=cgi.escape(msg), + ).encode("utf-8") + request.setResponseCode(code) + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%i" % (len(body),)) + request.write(body) + finish_request(request) + + def wrap_request_handler_with_logging(h): """Wraps a request handler to provide logging and metrics @@ -134,7 +206,7 @@ def wrap_request_handler_with_logging(h): servlet_name = self.__class__.__name__ with request.processing(servlet_name): with PreserveLoggingContext(request_context): - d = h(self, request) + d = defer.maybeDeferred(h, self, request) # record the arrival of the request *after* # dispatching to the handler, so that the handler diff --git a/synapse/rest/consent/__init__.py b/synapse/rest/consent/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py new file mode 100644 index 0000000000..d791302278 --- /dev/null +++ b/synapse/rest/consent/consent_resource.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from hashlib import sha256 +import hmac +import logging +from os import path +from six.moves import http_client + +import jinja2 +from jinja2 import TemplateNotFound +from twisted.internet import defer +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET + +from synapse.api.errors import NotFoundError, SynapseError, StoreError +from synapse.config import ConfigError +from synapse.http.server import ( + finish_request, + wrap_html_request_handler, +) +from synapse.http.servlet import parse_string +from synapse.types import UserID + + +# language to use for the templates. TODO: figure this out from Accept-Language +TEMPLATE_LANGUAGE = "en" + +logger = logging.getLogger(__name__) + +# use hmac.compare_digest if we have it (python 2.7.7), else just use equality +if hasattr(hmac, "compare_digest"): + compare_digest = hmac.compare_digest +else: + def compare_digest(a, b): + return a == b + + +class ConsentResource(Resource): + """A twisted Resource to display a privacy policy and gather consent to it + + When accessed via GET, returns the privacy policy via a template. + + When accessed via POST, records the user's consent in the database and + displays a success page. + + The config should include a template_dir setting which contains templates + for the HTML. The directory should contain one subdirectory per language + (eg, 'en', 'fr'), and each language directory should contain the policy + document (named as '.html') and a success page (success.html). + + Both forms take a set of parameters from the browser. For the POST form, + these are normally sent as form parameters (but may be query-params); for + GET requests they must be query params. These are: + + u: the complete mxid, or the localpart of the user giving their + consent. Required for both GET (where it is used as an input to the + template) and for POST (where it is used to find the row in the db + to update). + + h: hmac_sha256(secret, u), where 'secret' is the privacy_secret in the + config file. If it doesn't match, the request is 403ed. + + v: the version of the privacy policy being agreed to. + + For GET: optional, and defaults to whatever was set in the config + file. Used to choose the version of the policy to pick from the + templates directory. + + For POST: required; gives the value to be recorded in the database + against the user. + """ + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): homeserver + """ + Resource.__init__(self) + + self.hs = hs + self.store = hs.get_datastore() + + # this is required by the request_handler wrapper + self.clock = hs.get_clock() + + consent_config = hs.config.consent_config + if consent_config is None: + raise ConfigError( + "Consent resource is enabled but user_consent section is " + "missing in config file.", + ) + + # daemonize changes the cwd to /, so make the path absolute now. + consent_template_directory = path.abspath( + consent_config["template_dir"], + ) + if not path.isdir(consent_template_directory): + raise ConfigError( + "Could not find template directory '%s'" % ( + consent_template_directory, + ), + ) + + loader = jinja2.FileSystemLoader(consent_template_directory) + self._jinja_env = jinja2.Environment(loader=loader) + + self._default_consent_verison = consent_config["default_version"] + + if hs.config.form_secret is None: + raise ConfigError( + "Consent resource is enabled but form_secret is not set in " + "config file. It should be set to an arbitrary secret string.", + ) + + self._hmac_secret = hs.config.form_secret.encode("utf-8") + + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + @wrap_html_request_handler + def _async_render_GET(self, request): + """ + Args: + request (twisted.web.http.Request): + """ + + version = parse_string(request, "v", + default=self._default_consent_verison) + username = parse_string(request, "u", required=True) + userhmac = parse_string(request, "h", required=True) + + self._check_hash(username, userhmac) + + try: + self._render_template( + request, "%s.html" % (version,), + user=username, userhmac=userhmac, version=version, + ) + except TemplateNotFound: + raise NotFoundError("Unknown policy version") + + def render_POST(self, request): + self._async_render_POST(request) + return NOT_DONE_YET + + @wrap_html_request_handler + @defer.inlineCallbacks + def _async_render_POST(self, request): + """ + Args: + request (twisted.web.http.Request): + """ + version = parse_string(request, "v", required=True) + username = parse_string(request, "u", required=True) + userhmac = parse_string(request, "h", required=True) + + self._check_hash(username, userhmac) + + if username.startswith('@'): + qualified_user_id = username + else: + qualified_user_id = UserID(username, self.hs.hostname).to_string() + + try: + yield self.store.user_set_consent_version(qualified_user_id, version) + except StoreError as e: + if e.code != 404: + raise + raise NotFoundError("Unknown user") + + try: + self._render_template(request, "success.html") + except TemplateNotFound: + raise NotFoundError("success.html not found") + + def _render_template(self, request, template_name, **template_args): + # get_template checks for ".." so we don't need to worry too much + # about path traversal here. + template_html = self._jinja_env.get_template( + path.join(TEMPLATE_LANGUAGE, template_name) + ) + html_bytes = template_html.render(**template_args).encode("utf8") + + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.setHeader(b"Content-Length", b"%i" % len(html_bytes)) + request.write(html_bytes) + finish_request(request) + + def _check_hash(self, userid, userhmac): + want_mac = hmac.new( + key=self._hmac_secret, + msg=userid, + digestmod=sha256, + ).hexdigest() + + if not compare_digest(want_mac, userhmac): + raise SynapseError(http_client.FORBIDDEN, "HMAC incorrect") diff --git a/synapse/server.py b/synapse/server.py index ebdea6b0c4..21cde5b6fc 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -97,6 +97,9 @@ class HomeServer(object): which must be implemented by the subclass. This code may call any of the required "get" methods on the instance to obtain the sub-dependencies that one requires. + + Attributes: + config (synapse.config.homeserver.HomeserverConfig): """ DEPENDENCIES = [ diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index a50717db2d..6ffc397861 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -286,6 +286,24 @@ class RegistrationStore(RegistrationWorkerStore, "user_set_password_hash", user_set_password_hash_txn ) + def user_set_consent_version(self, user_id, consent_version): + """Updates the user table to record privacy policy consent + + Args: + user_id (str): full mxid of the user to update + consent_version (str): version of the policy the user has consented + to + + Raises: + StoreError(404) if user not found + """ + return self._simple_update_one( + table='users', + keyvalues={'name': user_id, }, + updatevalues={'consent_version': consent_version, }, + desc="user_set_consent_version" + ) + def user_delete_access_tokens(self, user_id, except_token_id=None, device_id=None): """ diff --git a/synapse/storage/schema/delta/48/add_user_consent.sql b/synapse/storage/schema/delta/48/add_user_consent.sql new file mode 100644 index 0000000000..5237491506 --- /dev/null +++ b/synapse/storage/schema/delta/48/add_user_consent.sql @@ -0,0 +1,18 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* record the version of the privacy policy the user has consented to + */ +ALTER TABLE users ADD COLUMN consent_version TEXT; -- cgit 1.4.1 From 37dbee6490dbb9cd66d887ed4d902ae8482fae2f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 15 May 2018 15:48:40 +0100 Subject: Use events_to_purge table rather than token --- synapse/storage/events.py | 49 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 05cde96afc..b4ae6664f0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1871,6 +1871,10 @@ class EventsStore(EventsWorkerStore): "CREATE INDEX events_to_purge_should_delete" " ON events_to_purge(should_delete)", ) + txn.execute( + "CREATE INDEX events_to_purge_id" + " ON events_to_purge(event_id)", + ) # First ensure that we're not about to delete all the forward extremeties txn.execute( @@ -1927,9 +1931,8 @@ class EventsStore(EventsWorkerStore): txn.execute( "SELECT DISTINCT e.event_id FROM events_to_purge AS e" " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" - " INNER JOIN events AS e2 ON e2.event_id = ed.event_id" - " WHERE e2.topological_ordering >= ?", - (topological_ordering, ) + " LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id" + " WHERE ep2.event_id IS NULL", ) new_backwards_extrems = txn.fetchall() @@ -1953,16 +1956,22 @@ class EventsStore(EventsWorkerStore): # Get all state groups that are only referenced by events that are # to be deleted. - txn.execute( - "SELECT state_group FROM event_to_state_groups" - " INNER JOIN events USING (event_id)" - " WHERE state_group IN (" - " SELECT DISTINCT state_group FROM events_to_purge" - " INNER JOIN event_to_state_groups USING (event_id)" - " )" - " GROUP BY state_group HAVING MAX(topological_ordering) < ?", - (topological_ordering, ) - ) + # This works by first getting state groups that we may want to delete, + # joining against event_to_state_groups to get events that use that + # state group, then left joining against events_to_purge again. Any + # state group where the left join produce *no nulls* are referenced + # only by events that are going to be purged. + txn.execute(""" + SELECT state_group FROM + ( + SELECT DISTINCT state_group FROM events_to_purge + INNER JOIN event_to_state_groups USING (event_id) + ) AS sp + INNER JOIN event_to_state_groups USING (state_group) + LEFT JOIN events_to_purge AS ep USING (event_id) + GROUP BY state_group + HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0 + """) state_rows = txn.fetchall() logger.info("[purge] found %i redundant state groups", len(state_rows)) @@ -2109,10 +2118,20 @@ class EventsStore(EventsWorkerStore): # # So, let's stick it at the end so that we don't block event # persistence. - logger.info("[purge] updating room_depth") + txn.execute(""" + SELECT COALESCE(MIN(depth), 0) + FROM event_backward_extremities AS eb + INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id + INNER JOIN events AS e ON e.event_id = eg.event_id + WHERE eb.room_id = ? + """, (room_id,)) + min_depth, = txn.fetchone() + + logger.info("[purge] updating room_depth to %d", min_depth) + txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", - (topological_ordering, room_id,) + (min_depth, room_id,) ) # finally, drop the temp table. this will commit the txn in sqlite, -- cgit 1.4.1 From 5f27ed75ad804ab9b5287f0deb8fd24b9a3a6232 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 15 May 2018 16:06:30 +0100 Subject: Make purge_history operate on tokens As we're soon going to change how topological_ordering works --- synapse/handlers/message.py | 12 ++++++------ synapse/rest/client/v1/admin.py | 17 ++++++++++------- synapse/storage/events.py | 17 +++++++++-------- 3 files changed, 25 insertions(+), 21 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index b793fc4df7..8343b5839d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -86,14 +86,14 @@ class MessageHandler(BaseHandler): # map from purge id to PurgeStatus self._purges_by_id = {} - def start_purge_history(self, room_id, topological_ordering, + def start_purge_history(self, room_id, token, delete_local_events=False): """Start off a history purge on a room. Args: room_id (str): The room to purge from - topological_ordering (int): minimum topo ordering to preserve + token (str): topological token to delete events before delete_local_events (bool): True to delete local events as well as remote ones @@ -115,19 +115,19 @@ class MessageHandler(BaseHandler): self._purges_by_id[purge_id] = PurgeStatus() run_in_background( self._purge_history, - purge_id, room_id, topological_ordering, delete_local_events, + purge_id, room_id, token, delete_local_events, ) return purge_id @defer.inlineCallbacks - def _purge_history(self, purge_id, room_id, topological_ordering, + def _purge_history(self, purge_id, room_id, token, delete_local_events): """Carry out a history purge on a room. Args: purge_id (str): The id for this purge room_id (str): The room to purge from - topological_ordering (int): minimum topo ordering to preserve + token (str): topological token to delete events before delete_local_events (bool): True to delete local events as well as remote ones @@ -138,7 +138,7 @@ class MessageHandler(BaseHandler): try: with (yield self.pagination_lock.write(room_id)): yield self.store.purge_history( - room_id, topological_ordering, delete_local_events, + room_id, token, delete_local_events, ) logger.info("[purge] complete") self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index efd5c9873d..282ce6be42 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -151,10 +151,11 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): if event.room_id != room_id: raise SynapseError(400, "Event is for wrong room.") - depth = event.depth + token = yield self.store.get_topological_token_for_event(event_id) + logger.info( - "[purge] purging up to depth %i (event_id %s)", - depth, event_id, + "[purge] purging up to token %s (event_id %s)", + token, event_id, ) elif 'purge_up_to_ts' in body: ts = body['purge_up_to_ts'] @@ -174,7 +175,9 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): ) ) if room_event_after_stream_ordering: - (_, depth, _) = room_event_after_stream_ordering + token = yield self.store.get_topological_token_for_event( + room_event_after_stream_ordering, + ) else: logger.warn( "[purge] purging events not possible: No event found " @@ -187,9 +190,9 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): errcode=Codes.NOT_FOUND, ) logger.info( - "[purge] purging up to depth %i (received_ts %i => " + "[purge] purging up to token %d (received_ts %i => " "stream_ordering %i)", - depth, ts, stream_ordering, + token, ts, stream_ordering, ) else: raise SynapseError( @@ -199,7 +202,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): ) purge_id = yield self.handlers.message_handler.start_purge_history( - room_id, depth, + room_id, token, delete_local_events=delete_local_events, ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b4ae6664f0..f65e18c1ee 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -33,7 +33,7 @@ from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from synapse.types import get_domain_from_id +from synapse.types import get_domain_from_id, RoomStreamToken import synapse.metrics # these are only included to make the type annotations work @@ -1803,15 +1803,14 @@ class EventsStore(EventsWorkerStore): return self.runInteraction("get_all_new_events", get_all_new_events_txn) def purge_history( - self, room_id, topological_ordering, delete_local_events, + self, room_id, token, delete_local_events, ): """Deletes room history before a certain point Args: room_id (str): - topological_ordering (int): - minimum topo ordering to preserve + token (str): A topological token to delete events before delete_local_events (bool): if True, we will delete local events as well as remote ones @@ -1821,12 +1820,12 @@ class EventsStore(EventsWorkerStore): return self.runInteraction( "purge_history", - self._purge_history_txn, room_id, topological_ordering, + self._purge_history_txn, room_id, token, delete_local_events, ) def _purge_history_txn( - self, txn, room_id, topological_ordering, delete_local_events, + self, txn, room_id, token, delete_local_events, ): # Tables that should be pruned: # event_auth @@ -1856,6 +1855,8 @@ class EventsStore(EventsWorkerStore): # furthermore, we might already have the table from a previous (failed) # purge attempt, so let's drop the table first. + token = RoomStreamToken.parse(token) + txn.execute("DROP TABLE IF EXISTS events_to_purge") txn.execute( @@ -1888,7 +1889,7 @@ class EventsStore(EventsWorkerStore): rows = txn.fetchall() max_depth = max(row[0] for row in rows) - if max_depth <= topological_ordering: + if max_depth <= token.topological: # We need to ensure we don't delete all the events from the datanase # otherwise we wouldn't be able to send any events (due to not # having any backwards extremeties) @@ -1904,7 +1905,7 @@ class EventsStore(EventsWorkerStore): should_delete_expr += " AND event_id NOT LIKE ?" should_delete_params += ("%:" + self.hs.hostname, ) - should_delete_params += (room_id, topological_ordering) + should_delete_params += (room_id, token.topological) txn.execute( "INSERT INTO events_to_purge" -- cgit 1.4.1 From 05ac15ae824cc538b869e3cc8db7af2ac22e6754 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 15 May 2018 17:01:33 +0100 Subject: Limit query load of generate_user_daily_visits The aim is to keep track of when it was last called and only query from that point in time --- synapse/app/homeserver.py | 21 ++++++---------- synapse/storage/__init__.py | 60 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 27 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index bfc79a5e81..f25eaf9ffc 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -476,23 +476,16 @@ def run(hs): " changes across releases." ) - # def recurring_user_daily_visit_stats(): - def generate_user_daily_visit_stats(): hs.get_datastore().generate_user_daily_visits() - # Since user daily stats are bucketed at midnight UTC, - # and user_ips.last_seen can be updated at any time, it is important to call - # generate_user_daily_visit_stats immediately prior to the day end. Assuming - # an hourly cadence, the simplist way is to allign all calls to the hour - # end - end_of_hour = datetime.datetime.now().replace(microsecond=0, second=0, minute=0) \ - + datetime.timedelta(hours=1) \ - - datetime.timedelta(seconds=10) # Ensure method fires before day transistion - - time_to_next_hour = end_of_hour - datetime.datetime.now() - clock.call_later(time_to_next_hour.seconds, - clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000)) + def recurring_user_daily_visit_stats(): + clock.looping_call(generate_user_daily_visit_stats, 60 * 60 * 1000) + + # Rather than update on per session basis, batch up the requests. + # If you increase the loop period, the accuracy of user_daily_visits + # table will decrease + clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000) if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 6949876c13..52f176a03c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -214,6 +214,9 @@ class DataStore(RoomMemberStore, RoomStore, self._stream_order_on_start = self.get_room_max_stream_ordering() self._min_stream_order_on_start = self.get_room_min_stream_ordering() + # Used in _generate_user_daily_visits to keep track of progress + self._last_user_visit_update = self._get_start_of_day() + super(DataStore, self).__init__(db_conn, hs) def take_presence_startup_info(self): @@ -348,27 +351,58 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_r30_users", _count_r30_users) + def _get_start_of_day(self): + """ + Returns millisecond unixtime for start of UTC day. + """ + now = datetime.datetime.utcnow() + today_start = datetime.datetime(now.year, now.month, + now.day, tzinfo=tz.tzutc()) + return int(time.mktime(today_start.timetuple())) * 1000 + def generate_user_daily_visits(self): """ Generates daily visit data for use in cohort/ retention analysis """ def _generate_user_daily_visits(txn): + logger.info("Calling _generate_user_daily_visits") + today_start = self._get_start_of_day() + a_day_in_milliseconds = 24 * 60 * 60 * 1000 - # determine timestamp of the day start - now = datetime.datetime.utcnow() - today_start = datetime.datetime(now.year, now.month, - now.day, tzinfo=tz.tzutc()) - today_start_time = int(time.mktime(today_start.timetuple())) * 1000 - logger.info(today_start_time) sql = """ INSERT INTO user_daily_visits (user_id, device_id, timestamp) - SELECT user_id, device_id, ? - FROM user_ips AS u - LEFT JOIN user_daily_visits USING (user_id, device_id) - WHERE last_seen > ? AND timestamp IS NULL - GROUP BY user_id, device_id; - """ - txn.execute(sql, (today_start_time, today_start_time)) + SELECT u.user_id, u.device_id, ? + FROM user_ips AS u + LEFT JOIN ( + SELECT user_id, device_id, timestamp FROM user_daily_visits + WHERE timestamp IS ? + ) udv + ON u.user_id = udv.user_id AND u.device_id=udv.device_id + WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL + """ + + # This means that the day has rolled over but there could still + # be entries from the previous day. There is an edge case + # where if the user logs in at 23:59 and overwrites their + # last_seen at 00:01 then they will not be counted in the + # previous day's stats - it is important that the query is run + # to minimise this case. + if today_start > self._last_user_visit_update: + yesterday_start = today_start - a_day_in_milliseconds + txn.execute(sql, (yesterday_start, yesterday_start, + self._last_user_visit_update, today_start)) + self._last_user_visit_update = today_start + + txn.execute(sql, (today_start, today_start, + self._last_user_visit_update, + today_start + a_day_in_milliseconds)) + # Update _last_user_visit_update to now. The reason to do this + # rather just clamping to the beginning of the day is to limit + # the size of the join - meaning that the query can be run more + # frequently + + now = datetime.datetime.utcnow() + self._last_user_visit_update = int(time.mktime(now.timetuple())) * 1000 return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) -- cgit 1.4.1 From 31c2502ca8d74797f1eae553690830b8c132aed9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 16 May 2018 09:46:43 +0100 Subject: style and further contraining query --- synapse/storage/__init__.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 52f176a03c..6f324e075f 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -368,6 +368,7 @@ class DataStore(RoomMemberStore, RoomStore, logger.info("Calling _generate_user_daily_visits") today_start = self._get_start_of_day() a_day_in_milliseconds = 24 * 60 * 60 * 1000 + now = self.clock.time_msec() sql = """ INSERT INTO user_daily_visits (user_id, device_id, timestamp) @@ -386,23 +387,26 @@ class DataStore(RoomMemberStore, RoomStore, # where if the user logs in at 23:59 and overwrites their # last_seen at 00:01 then they will not be counted in the # previous day's stats - it is important that the query is run - # to minimise this case. + # often to minimise this case. if today_start > self._last_user_visit_update: yesterday_start = today_start - a_day_in_milliseconds - txn.execute(sql, (yesterday_start, yesterday_start, - self._last_user_visit_update, today_start)) + txn.execute(sql, ( + yesterday_start, yesterday_start, + self._last_user_visit_update, today_start + )) self._last_user_visit_update = today_start - txn.execute(sql, (today_start, today_start, - self._last_user_visit_update, - today_start + a_day_in_milliseconds)) + txn.execute(sql, ( + today_start, today_start, + self._last_user_visit_update, + now + )) # Update _last_user_visit_update to now. The reason to do this # rather just clamping to the beginning of the day is to limit # the size of the join - meaning that the query can be run more # frequently - now = datetime.datetime.utcnow() - self._last_user_visit_update = int(time.mktime(now.timetuple())) * 1000 + self._last_user_visit_update = now return self.runInteraction("generate_user_daily_visits", _generate_user_daily_visits) -- cgit 1.4.1 From be11a02c4f854452687f0f54492bc53b4827637d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 16 May 2018 10:45:40 +0100 Subject: remove empty line --- synapse/storage/__init__.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 6f324e075f..4551cf8774 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -405,7 +405,6 @@ class DataStore(RoomMemberStore, RoomStore, # rather just clamping to the beginning of the day is to limit # the size of the join - meaning that the query can be run more # frequently - self._last_user_visit_update = now return self.runInteraction("generate_user_daily_visits", -- cgit 1.4.1 From c945af8799671db8fdfc47acf6b286520fb08715 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 16 May 2018 10:52:06 +0100 Subject: Move and rename variable --- synapse/storage/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f65e18c1ee..22699cb689 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1825,8 +1825,10 @@ class EventsStore(EventsWorkerStore): ) def _purge_history_txn( - self, txn, room_id, token, delete_local_events, + self, txn, room_id, token_str, delete_local_events, ): + token = RoomStreamToken.parse(token_str) + # Tables that should be pruned: # event_auth # event_backward_extremities @@ -1855,8 +1857,6 @@ class EventsStore(EventsWorkerStore): # furthermore, we might already have the table from a previous (failed) # purge attempt, so let's drop the table first. - token = RoomStreamToken.parse(token) - txn.execute("DROP TABLE IF EXISTS events_to_purge") txn.execute( -- cgit 1.4.1 From 43e6e82c4d368d981f8a0015246d19c333511ace Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 16 May 2018 11:13:31 +0100 Subject: Comments --- synapse/storage/events.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 22699cb689..93ac80ee48 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1872,6 +1872,9 @@ class EventsStore(EventsWorkerStore): "CREATE INDEX events_to_purge_should_delete" " ON events_to_purge(should_delete)", ) + + # We do joins against events_to_purge for e.g. calculating state + # groups to purge, etc., so lets make an index. txn.execute( "CREATE INDEX events_to_purge_id" " ON events_to_purge(event_id)", @@ -2119,6 +2122,11 @@ class EventsStore(EventsWorkerStore): # # So, let's stick it at the end so that we don't block event # persistence. + # + # We do this by calculating the minimum depth of the backwards + # extremities. However, the events in event_backward_extremities + # are ones we don't have yet so we need to look at the events that + # point to it via event_edges table. txn.execute(""" SELECT COALESCE(MIN(depth), 0) FROM event_backward_extremities AS eb -- cgit 1.4.1 From 680530cc7f80533b9d63ce9bed04dc28ca87ca13 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 16 May 2018 11:47:26 +0100 Subject: Clarify comment --- synapse/storage/events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 93ac80ee48..5ebef98c4f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1931,7 +1931,8 @@ class EventsStore(EventsWorkerStore): logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding - # all events that point to events that are to be purged + # events to be purged that are pointed to by events we're not going to + # purge. txn.execute( "SELECT DISTINCT e.event_id FROM events_to_purge AS e" " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" -- cgit 1.4.1 From 8b1c856d815c8ab939cd5a272665d80a22ee5d33 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 18 May 2018 09:15:35 +0100 Subject: Fix error in handling receipts Fixes an error which has been happening ever since #2158 (v0.21.0-rc1): > TypeError: argument of type 'ObservableDeferred' is not iterable fixes #3234 --- synapse/storage/receipts.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 2f95e7e82a..709c69a926 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -297,18 +297,22 @@ class ReceiptsWorkerStore(SQLBaseStore): if receipt_type != "m.read": return - # Returns an ObservableDeferred + # Returns either an ObservableDeferred or the raw result res = self.get_users_with_read_receipts_in_room.cache.get( room_id, None, update_metrics=False, ) - if res: - if isinstance(res, defer.Deferred) and res.called: + # first handle the Deferred case + if isinstance(res, defer.Deferred): + if res.called: res = res.result - if user_id in res: - # We'd only be adding to the set, so no point invalidating if the - # user is already there - return + else: + res = None + + if res and user_id in res: + # We'd only be adding to the set, so no point invalidating if the + # user is already there + return self.get_users_with_read_receipts_in_room.invalidate((room_id,)) -- cgit 1.4.1 From ef466b3a131c0a6c93027a8f8194538678f920c1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 18 May 2018 15:51:21 +0100 Subject: fix psql compatability bug --- synapse/storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 4551cf8774..ac264b5d25 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -376,7 +376,7 @@ class DataStore(RoomMemberStore, RoomStore, FROM user_ips AS u LEFT JOIN ( SELECT user_id, device_id, timestamp FROM user_daily_visits - WHERE timestamp IS ? + WHERE timestamp = ? ) udv ON u.user_id = udv.user_id AND u.device_id=udv.device_id WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL -- cgit 1.4.1 From 644aac5f73dbacfe4cb47af09e804b9d1d2788a4 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 18 May 2018 17:10:35 +0100 Subject: Tighter filtering for user_daily_visits --- synapse/storage/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index ac264b5d25..979fa22438 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -379,7 +379,11 @@ class DataStore(RoomMemberStore, RoomStore, WHERE timestamp = ? ) udv ON u.user_id = udv.user_id AND u.device_id=udv.device_id - WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL + INNER JOIN users ON users.name=u.user_id + WHERE last_seen > ? AND last_seen <= ? + AND udv.timestamp IS NULL AND users.is_guest=0 + AND users.appservice_id IS NULL + GROUP BY u.user_id, u.device_id """ # This means that the day has rolled over but there could still -- cgit 1.4.1 From 9ea219c5143de009c8e411fbb01da1da5ce50829 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 17 May 2018 17:35:31 +0100 Subject: Send users a server notice about consent When a user first syncs, we will send them a server notice asking them to consent to the privacy policy if they have not already done so. --- synapse/config/consent_config.py | 8 ++ synapse/handlers/presence.py | 10 +- synapse/replication/tcp/resource.py | 2 + synapse/server.py | 5 + synapse/server.pyi | 4 + synapse/server_notices/consent_server_notices.py | 101 +++++++++++++++++++++ synapse/server_notices/server_notices_sender.py | 58 ++++++++++++ synapse/storage/registration.py | 46 ++++++++-- .../49/add_user_consent_server_notice_sent.sql | 20 ++++ tests/storage/test_registration.py | 11 ++- tests/utils.py | 1 + 11 files changed, 255 insertions(+), 11 deletions(-) create mode 100644 synapse/server_notices/consent_server_notices.py create mode 100644 synapse/server_notices/server_notices_sender.py create mode 100644 synapse/storage/schema/delta/49/add_user_consent_server_notice_sent.sql (limited to 'synapse/storage') diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 45856b9e8a..a6fbc5a058 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -30,9 +30,17 @@ DEFAULT_CONFIG = """\ # the version to be served by the consent resource if there is no 'v' # parameter. # +# 'server_notice_content', if enabled, will send a user a "Server Notice" +# asking them to consent to the privacy policy. The 'server_notices' section +# must also be configured for this to work. +# # user_consent: # template_dir: res/templates/privacy # version: 1.0 +# server_notice_content: +# msgtype: m.text +# body: | +# Pls do consent kthx """ diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 91218e40e6..adc816f747 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -87,6 +87,11 @@ assert LAST_ACTIVE_GRANULARITY < IDLE_TIMER class PresenceHandler(object): def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer): + """ self.is_mine = hs.is_mine self.is_mine_id = hs.is_mine_id self.clock = hs.get_clock() @@ -94,8 +99,8 @@ class PresenceHandler(object): self.wheel_timer = WheelTimer() self.notifier = hs.get_notifier() self.federation = hs.get_federation_sender() - self.state = hs.get_state_handler() + self._server_notices_sender = hs.get_server_notices_sender() federation_registry = hs.get_federation_registry() @@ -428,6 +433,9 @@ class PresenceHandler(object): last_user_sync_ts=self.clock.time_msec(), )]) + # send any outstanding server notices to the user. + yield self._server_notices_sender.on_user_syncing(user_id) + @defer.inlineCallbacks def _end(): try: diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index a41af4fd6c..a603c520ea 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -69,6 +69,7 @@ class ReplicationStreamer(object): self.presence_handler = hs.get_presence_handler() self.clock = hs.get_clock() self.notifier = hs.get_notifier() + self._server_notices_sender = hs.get_server_notices_sender() # Current connections. self.connections = [] @@ -253,6 +254,7 @@ class ReplicationStreamer(object): yield self.store.insert_client_ip( user_id, access_token, ip, user_agent, device_id, last_seen, ) + yield self._server_notices_sender.on_user_ip(user_id) def send_sync_to_all_connections(self, data): """Sends a SYNC command to all clients. diff --git a/synapse/server.py b/synapse/server.py index 85f54cd047..e7c733f2d4 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -73,6 +73,7 @@ from synapse.rest.media.v1.media_repository import ( MediaRepositoryResource, ) from synapse.server_notices.server_notices_manager import ServerNoticesManager +from synapse.server_notices.server_notices_sender import ServerNoticesSender from synapse.state import StateHandler, StateResolutionHandler from synapse.storage import DataStore from synapse.streams.events import EventSources @@ -158,6 +159,7 @@ class HomeServer(object): 'room_member_handler', 'federation_registry', 'server_notices_manager', + 'server_notices_sender', ] def __init__(self, hostname, **kwargs): @@ -403,6 +405,9 @@ class HomeServer(object): def build_server_notices_manager(self): return ServerNoticesManager(self) + def build_server_notices_sender(self): + return ServerNoticesSender(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/synapse/server.pyi b/synapse/server.pyi index 6fbe15168d..ce28486233 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -10,6 +10,7 @@ import synapse.handlers.e2e_keys import synapse.handlers.set_password import synapse.rest.media.v1.media_repository import synapse.server_notices.server_notices_manager +import synapse.server_notices.server_notices_sender import synapse.state import synapse.storage @@ -69,3 +70,6 @@ class HomeServer(object): def get_server_notices_manager(self) -> synapse.server_notices.server_notices_manager.ServerNoticesManager: pass + + def get_server_notices_sender(self) -> synapse.server_notices.server_notices_sender.ServerNoticesSender: + pass diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py new file mode 100644 index 0000000000..e9098aef27 --- /dev/null +++ b/synapse/server_notices/consent_server_notices.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.config import ConfigError + +logger = logging.getLogger(__name__) + + +class ConsentServerNotices(object): + """Keeps track of whether we need to send users server_notices about + privacy policy consent, and sends one if we do. + """ + def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer): + """ + self._server_notices_manager = hs.get_server_notices_manager() + self._store = hs.get_datastore() + + self._current_consent_version = None + self._server_notice_content = None + self._users_in_progress = set() + + consent_config = hs.config.consent_config + if consent_config is not None: + self._current_consent_version = str(consent_config["version"]) + self._server_notice_content = consent_config.get( + "server_notice_content" + ) + + if self._server_notice_content is not None: + if not self._server_notices_manager.is_enabled(): + raise ConfigError( + "user_consent configuration requires server notices, but " + "server notices are not enabled.", + ) + if 'body' not in self._server_notice_content: + raise ConfigError( + "user_consent server_notice_consent must contain a 'body' " + "key.", + ) + + @defer.inlineCallbacks + def maybe_send_server_notice_to_user(self, user_id): + """Check if we need to send a notice to this user, and does so if so + + Args: + user_id (str): user to check + + Returns: + Deferred + """ + if self._server_notice_content is None: + # not enabled + return + + # make sure we don't send two messages to the same user at once + if user_id in self._users_in_progress: + return + self._users_in_progress.add(user_id) + try: + u = yield self._store.get_user_by_id(user_id) + + if u["consent_version"] == self._current_consent_version: + # user has already consented + return + + if u["consent_server_notice_sent"] == self._current_consent_version: + # we've already sent a notice to the user + return + + # need to send a message + try: + yield self._server_notices_manager.send_notice( + user_id, self._server_notice_content, + ) + yield self._store.user_set_consent_server_notice_sent( + user_id, self._current_consent_version, + ) + except SynapseError as e: + logger.error("Error sending server notice about user consent: %s", e) + finally: + self._users_in_progress.remove(user_id) diff --git a/synapse/server_notices/server_notices_sender.py b/synapse/server_notices/server_notices_sender.py new file mode 100644 index 0000000000..9eade85851 --- /dev/null +++ b/synapse/server_notices/server_notices_sender.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.server_notices.consent_server_notices import ConsentServerNotices + + +class ServerNoticesSender(object): + """A centralised place which sends server notices automatically when + Certain Events take place + """ + def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer): + """ + # todo: it would be nice to make this more dynamic + self._consent_server_notices = ConsentServerNotices(hs) + + def on_user_syncing(self, user_id): + """Called when the user performs a sync operation. + + This is only called when /sync (or /events) is called on the synapse + master. In a deployment with synchrotrons, on_user_ip is called + + Args: + user_id (str): mxid of user who synced + + Returns: + Deferred + """ + return self._consent_server_notices.maybe_send_server_notice_to_user( + user_id, + ) + + def on_user_ip(self, user_id): + """Called when a worker process saw a client request. + + Args: + user_id (str): mxid + + Returns: + Deferred + """ + return self._consent_server_notices.maybe_send_server_notice_to_user( + user_id, + ) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 8d1a01f1ee..a530e29f43 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -33,7 +33,10 @@ class RegistrationWorkerStore(SQLBaseStore): keyvalues={ "name": user_id, }, - retcols=["name", "password_hash", "is_guest"], + retcols=[ + "name", "password_hash", "is_guest", + "consent_version", "consent_server_notice_sent", + ], allow_none=True, desc="get_user_by_id", ) @@ -297,12 +300,41 @@ class RegistrationStore(RegistrationWorkerStore, Raises: StoreError(404) if user not found """ - return self._simple_update_one( - table='users', - keyvalues={'name': user_id, }, - updatevalues={'consent_version': consent_version, }, - desc="user_set_consent_version" - ) + def f(txn): + self._simple_update_one_txn( + txn, + table='users', + keyvalues={'name': user_id, }, + updatevalues={'consent_version': consent_version, }, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_by_id, (user_id,) + ) + return self.runInteraction("user_set_consent_version", f) + + def user_set_consent_server_notice_sent(self, user_id, consent_version): + """Updates the user table to record that we have sent the user a server + notice about privacy policy consent + + Args: + user_id (str): full mxid of the user to update + consent_version (str): version of the policy we have notified the + user about + + Raises: + StoreError(404) if user not found + """ + def f(txn): + self._simple_update_one_txn( + txn, + table='users', + keyvalues={'name': user_id, }, + updatevalues={'consent_server_notice_sent': consent_version, }, + ) + self._invalidate_cache_and_stream( + txn, self.get_user_by_id, (user_id,) + ) + return self.runInteraction("user_set_consent_server_notice_sent", f) def user_delete_access_tokens(self, user_id, except_token_id=None, device_id=None): diff --git a/synapse/storage/schema/delta/49/add_user_consent_server_notice_sent.sql b/synapse/storage/schema/delta/49/add_user_consent_server_notice_sent.sql new file mode 100644 index 0000000000..14dcf18d73 --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_consent_server_notice_sent.sql @@ -0,0 +1,20 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* record whether we have sent a server notice about consenting to the + * privacy policy. Specifically records the version of the policy we sent + * a message about. + */ +ALTER TABLE users ADD COLUMN consent_server_notice_sent TEXT; diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 7c7b164ee6..cc637dda1c 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -42,9 +42,14 @@ class RegistrationStoreTestCase(unittest.TestCase): yield self.store.register(self.user_id, self.tokens[0], self.pwhash) self.assertEquals( - # TODO(paul): Surely this field should be 'user_id', not 'name' - # Additionally surely it shouldn't come in a 1-element list - {"name": self.user_id, "password_hash": self.pwhash, "is_guest": 0}, + { + # TODO(paul): Surely this field should be 'user_id', not 'name' + "name": self.user_id, + "password_hash": self.pwhash, + "is_guest": 0, + "consent_version": None, + "consent_server_notice_sent": None, + }, (yield self.store.get_user_by_id(self.user_id)) ) diff --git a/tests/utils.py b/tests/utils.py index c2beb5d9f7..63d8e9c640 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -63,6 +63,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.federation_rc_concurrent = 10 config.filter_timeline_limit = 5000 config.user_directory_search_all_users = False + config.consent_config = None # disable user directory updates, because they get done in the # background, which upsets the test runner. -- cgit 1.4.1