diff options
author | Neil Johnson <neil@matrix.org> | 2018-05-16 10:09:24 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-16 10:09:24 +0000 |
commit | dc8930ea9ec0b34ada0e275b31c7e61eb12137e1 (patch) | |
tree | 4372d63f3b40c27f3700e552b01f69cfa95364d6 | |
parent | Merge pull request #3213 from matrix-org/rav/consent_handler (diff) | |
parent | remove empty line (diff) | |
download | synapse-dc8930ea9ec0b34ada0e275b31c7e61eb12137e1.tar.xz |
Merge pull request #3163 from matrix-org/cohort_analytics
user visit data
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | synapse/app/homeserver.py | 8 | ||||
-rw-r--r-- | synapse/storage/__init__.py | 71 | ||||
-rw-r--r-- | synapse/storage/client_ips.py | 7 | ||||
-rw-r--r-- | synapse/storage/prepare_database.py | 2 | ||||
-rw-r--r-- | synapse/storage/schema/delta/49/add_user_daily_visits.sql | 21 | ||||
-rw-r--r-- | synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql | 17 |
7 files changed, 122 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore index 6b45e62157..7acfe56d26 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ env/ *.config .vscode/ +.ropeproject/ diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 48594724c1..bceb21a8d5 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -484,6 +484,14 @@ def run(hs): " changes across releases." ) + def generate_user_daily_visit_stats(): + hs.get_datastore().generate_user_daily_visits() + + # Rather than update on per session basis, batch up the requests. + # If you increase the loop period, the accuracy of user_daily_visits + # table will decrease + clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000) + if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 8cdfd50f90..4551cf8774 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -14,6 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +from dateutil import tz +import time +import logging + from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore @@ -55,10 +60,6 @@ from .engines import PostgresEngine from synapse.api.constants import PresenceState from synapse.util.caches.stream_change_cache import StreamChangeCache - -import logging - - logger = logging.getLogger(__name__) @@ -213,6 +214,9 @@ class DataStore(RoomMemberStore, RoomStore, self._stream_order_on_start = self.get_room_max_stream_ordering() self._min_stream_order_on_start = self.get_room_min_stream_ordering() + # Used in _generate_user_daily_visits to keep track of progress + self._last_user_visit_update = self._get_start_of_day() + super(DataStore, self).__init__(db_conn, hs) def take_presence_startup_info(self): @@ -347,6 +351,65 @@ class DataStore(RoomMemberStore, RoomStore, return self.runInteraction("count_r30_users", _count_r30_users) + def _get_start_of_day(self): + """ + Returns millisecond unixtime for start of UTC day. + """ + now = datetime.datetime.utcnow() + today_start = datetime.datetime(now.year, now.month, + now.day, tzinfo=tz.tzutc()) + return int(time.mktime(today_start.timetuple())) * 1000 + + def generate_user_daily_visits(self): + """ + Generates daily visit data for use in cohort/ retention analysis + """ + def _generate_user_daily_visits(txn): + logger.info("Calling _generate_user_daily_visits") + today_start = self._get_start_of_day() + a_day_in_milliseconds = 24 * 60 * 60 * 1000 + now = self.clock.time_msec() + + sql = """ + INSERT INTO user_daily_visits (user_id, device_id, timestamp) + SELECT u.user_id, u.device_id, ? + FROM user_ips AS u + LEFT JOIN ( + SELECT user_id, device_id, timestamp FROM user_daily_visits + WHERE timestamp IS ? + ) udv + ON u.user_id = udv.user_id AND u.device_id=udv.device_id + WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL + """ + + # This means that the day has rolled over but there could still + # be entries from the previous day. There is an edge case + # where if the user logs in at 23:59 and overwrites their + # last_seen at 00:01 then they will not be counted in the + # previous day's stats - it is important that the query is run + # often to minimise this case. + if today_start > self._last_user_visit_update: + yesterday_start = today_start - a_day_in_milliseconds + txn.execute(sql, ( + yesterday_start, yesterday_start, + self._last_user_visit_update, today_start + )) + self._last_user_visit_update = today_start + + txn.execute(sql, ( + today_start, today_start, + self._last_user_visit_update, + now + )) + # Update _last_user_visit_update to now. The reason to do this + # rather just clamping to the beginning of the day is to limit + # the size of the join - meaning that the query can be run more + # frequently + self._last_user_visit_update = now + + return self.runInteraction("generate_user_daily_visits", + _generate_user_daily_visits) + def get_users(self): """Function to reterive a list of users in users table. diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 7b44dae0fc..ba46907737 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -55,6 +55,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): columns=["user_id", "last_seen"], ) + self.register_background_index_update( + "user_ips_last_seen_only_index", + index_name="user_ips_last_seen_only", + table="user_ips", + columns=["last_seen"], + ) + # (user_id, access_token, ip) -> (user_agent, device_id, last_seen) self._batch_row_update = {} diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 04411a665f..c08e9cd65a 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 48 +SCHEMA_VERSION = 49 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/schema/delta/49/add_user_daily_visits.sql new file mode 100644 index 0000000000..3dd478196f --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_daily_visits.sql @@ -0,0 +1,21 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, + device_id TEXT, + timestamp BIGINT NOT NULL ); +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp); diff --git a/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql new file mode 100644 index 0000000000..3a4ed59b5b --- /dev/null +++ b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('user_ips_last_seen_only_index', '{}'); |