diff --git a/.gitignore b/.gitignore
index 6b45e62157..7acfe56d26 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,4 @@ env/
*.config
.vscode/
+.ropeproject/
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 75f40fd5a4..f785a7a22b 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -475,6 +475,9 @@ def run(hs):
" changes across releases."
)
+ def generate_user_daily_visit_stats():
+ hs.get_datastore().generate_user_daily_visits()
+
if hs.config.report_stats:
logger.info("Scheduling stats reporting for 3 hour intervals")
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000)
@@ -487,6 +490,9 @@ def run(hs):
# be quite busy the first few minutes
clock.call_later(5 * 60, phone_stats_home)
+ clock.looping_call(generate_user_daily_visit_stats, 10 * 60 * 1000)
+ clock.call_later(5 * 60, generate_user_daily_visit_stats)
+
if hs.config.daemonize and hs.config.print_pidfile:
print (hs.config.pid_file)
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 8cdfd50f90..b51cf70336 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -14,6 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import datetime
+from dateutil import tz
+import time
+import logging
+
from synapse.storage.devices import DeviceStore
from .appservice import (
ApplicationServiceStore, ApplicationServiceTransactionStore
@@ -55,10 +60,6 @@ from .engines import PostgresEngine
from synapse.api.constants import PresenceState
from synapse.util.caches.stream_change_cache import StreamChangeCache
-
-import logging
-
-
logger = logging.getLogger(__name__)
@@ -347,6 +348,57 @@ class DataStore(RoomMemberStore, RoomStore,
return self.runInteraction("count_r30_users", _count_r30_users)
+ def generate_user_daily_visits(self):
+ """
+ Generates daily visit data for use in cohort/ retention analysis
+ """
+ def _generate_user_daily_visits(txn):
+ logger.info("Calling _generate_user_daily_visits")
+ # determine timestamp of previous days
+ yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)
+ yesterday_start = datetime.datetime(yesterday.year, yesterday.month,
+ yesterday.day, tzinfo=tz.tzutc())
+ yesterday_start_time = int(time.mktime(yesterday_start.timetuple())) * 1000
+
+ # Check that this job has not already been completed
+ sql = """
+ SELECT timestamp
+ FROM user_daily_visits
+ ORDER by timestamp desc limit 1
+ """
+ txn.execute(sql)
+ row = txn.fetchone()
+
+ # Bail if the most recent time is yesterday
+ if row and row[0] == yesterday_start_time:
+ return
+
+ # Not specificying an upper bound means that if the update is run at
+ # 10 mins past midnight and the user is active during a 30 min session
+ # that the user is still included in the previous days stats
+ # This does mean that if the update is run hours late, then it is possible
+ # to overstate the cohort, but this seems a reasonable trade off
+ # The alternative is to insert on every request - but prefer to avoid
+ # for performance reasons
+ sql = """
+ SELECT user_id, device_id
+ FROM user_ips
+ WHERE last_seen > ?
+ """
+ txn.execute(sql, (yesterday_start_time,))
+ user_visits = txn.fetchall()
+
+ sql = """
+ INSERT INTO user_daily_visits (user_id, device_id, timestamp)
+ VALUES (?, ?, ?)
+ """
+
+ for visit in user_visits:
+ txn.execute(sql, (visit + (yesterday_start_time,)))
+
+ return self.runInteraction("generate_user_daily_visits",
+ _generate_user_daily_visits)
+
def get_users(self):
"""Function to reterive a list of users in users table.
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 7b44dae0fc..ba46907737 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -55,6 +55,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
columns=["user_id", "last_seen"],
)
+ self.register_background_index_update(
+ "user_ips_last_seen_only_index",
+ index_name="user_ips_last_seen_only",
+ table="user_ips",
+ columns=["last_seen"],
+ )
+
# (user_id, access_token, ip) -> (user_agent, device_id, last_seen)
self._batch_row_update = {}
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 04411a665f..c08e9cd65a 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
# Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts.
-SCHEMA_VERSION = 48
+SCHEMA_VERSION = 49
dir_path = os.path.abspath(os.path.dirname(__file__))
diff --git a/synapse/storage/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/schema/delta/49/add_user_daily_visits.sql
new file mode 100644
index 0000000000..3dd478196f
--- /dev/null
+++ b/synapse/storage/schema/delta/49/add_user_daily_visits.sql
@@ -0,0 +1,21 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL,
+ device_id TEXT,
+ timestamp BIGINT NOT NULL );
+CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp);
+CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp);
diff --git a/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql
new file mode 100644
index 0000000000..3a4ed59b5b
--- /dev/null
+++ b/synapse/storage/schema/delta/49/add_user_ips_last_seen_only_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('user_ips_last_seen_only_index', '{}');
|