summary refs log tree commit diff
path: root/synapse/storage/__init__.py
diff options
context:
space:
mode:
authorNeil Johnson <neil@matrix.org>2018-05-16 10:09:24 +0000
committerGitHub <noreply@github.com>2018-05-16 10:09:24 +0000
commitdc8930ea9ec0b34ada0e275b31c7e61eb12137e1 (patch)
tree4372d63f3b40c27f3700e552b01f69cfa95364d6 /synapse/storage/__init__.py
parentMerge pull request #3213 from matrix-org/rav/consent_handler (diff)
parentremove empty line (diff)
downloadsynapse-dc8930ea9ec0b34ada0e275b31c7e61eb12137e1.tar.xz
Merge pull request #3163 from matrix-org/cohort_analytics
user visit data
Diffstat (limited to 'synapse/storage/__init__.py')
-rw-r--r--synapse/storage/__init__.py71
1 files changed, 67 insertions, 4 deletions
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 8cdfd50f90..4551cf8774 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -14,6 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
+from dateutil import tz
+import time
+import logging
+
 from synapse.storage.devices import DeviceStore
 from .appservice import (
     ApplicationServiceStore, ApplicationServiceTransactionStore
@@ -55,10 +60,6 @@ from .engines import PostgresEngine
 from synapse.api.constants import PresenceState
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-
-import logging
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -213,6 +214,9 @@ class DataStore(RoomMemberStore, RoomStore,
         self._stream_order_on_start = self.get_room_max_stream_ordering()
         self._min_stream_order_on_start = self.get_room_min_stream_ordering()
 
+        # Used in _generate_user_daily_visits to keep track of progress
+        self._last_user_visit_update = self._get_start_of_day()
+
         super(DataStore, self).__init__(db_conn, hs)
 
     def take_presence_startup_info(self):
@@ -347,6 +351,65 @@ class DataStore(RoomMemberStore, RoomStore,
 
         return self.runInteraction("count_r30_users", _count_r30_users)
 
+    def _get_start_of_day(self):
+        """
+        Returns millisecond unixtime for start of UTC day.
+        """
+        now = datetime.datetime.utcnow()
+        today_start = datetime.datetime(now.year, now.month,
+                                        now.day, tzinfo=tz.tzutc())
+        return int(time.mktime(today_start.timetuple())) * 1000
+
+    def generate_user_daily_visits(self):
+        """
+        Generates daily visit data for use in cohort/ retention analysis
+        """
+        def _generate_user_daily_visits(txn):
+            logger.info("Calling _generate_user_daily_visits")
+            today_start = self._get_start_of_day()
+            a_day_in_milliseconds = 24 * 60 * 60 * 1000
+            now = self.clock.time_msec()
+
+            sql = """
+                INSERT INTO user_daily_visits (user_id, device_id, timestamp)
+                    SELECT u.user_id, u.device_id, ?
+                    FROM user_ips AS u
+                    LEFT JOIN (
+                      SELECT user_id, device_id, timestamp FROM user_daily_visits
+                      WHERE timestamp IS ?
+                    ) udv
+                    ON u.user_id = udv.user_id AND u.device_id=udv.device_id
+                    WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL
+            """
+
+            # This means that the day has rolled over but there could still
+            # be entries from the previous day. There is an edge case
+            # where if the user logs in at 23:59 and overwrites their
+            # last_seen at 00:01 then they will not be counted in the
+            # previous day's stats - it is important that the query is run
+            # often to minimise this case.
+            if today_start > self._last_user_visit_update:
+                yesterday_start = today_start - a_day_in_milliseconds
+                txn.execute(sql, (
+                    yesterday_start, yesterday_start,
+                    self._last_user_visit_update, today_start
+                ))
+                self._last_user_visit_update = today_start
+
+            txn.execute(sql, (
+                today_start, today_start,
+                self._last_user_visit_update,
+                now
+            ))
+            # Update _last_user_visit_update to now. The reason to do this
+            # rather just clamping to the beginning of the day is to limit
+            # the size of the join - meaning that the query can be run more
+            # frequently
+            self._last_user_visit_update = now
+
+        return self.runInteraction("generate_user_daily_visits",
+                                   _generate_user_daily_visits)
+
     def get_users(self):
         """Function to reterive a list of users in users table.