summary refs log tree commit diff
diff options
context:
space:
mode:
authorreivilibre <38398653+reivilibre@users.noreply.github.com>2021-07-19 16:11:34 +0100
committerGitHub <noreply@github.com>2021-07-19 16:11:34 +0100
commit4e340412c020f685cb402a735b983f6e332e206b (patch)
treee23f86f1e79d9af7293adb9fcf16a6429904f6c5
parent[pyupgrade] `synapse/` (#10348) (diff)
downloadsynapse-4e340412c020f685cb402a735b983f6e332e206b.tar.xz
Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric (#10332)
Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
-rw-r--r--changelog.d/10332.feature1
-rw-r--r--synapse/app/phone_stats_home.py4
-rw-r--r--synapse/storage/databases/main/metrics.py129
-rw-r--r--tests/app/test_phone_stats_home.py242
-rw-r--r--tests/rest/client/v1/utils.py30
-rw-r--r--tests/unittest.py15
6 files changed, 416 insertions, 5 deletions
diff --git a/changelog.d/10332.feature b/changelog.d/10332.feature
new file mode 100644
index 0000000000..091947ff22
--- /dev/null
+++ b/changelog.d/10332.feature
@@ -0,0 +1 @@
+Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric.
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
index 8f86cecb76..7904c246df 100644
--- a/synapse/app/phone_stats_home.py
+++ b/synapse/app/phone_stats_home.py
@@ -107,6 +107,10 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process):
     for name, count in r30_results.items():
         stats["r30_users_" + name] = count
 
+    r30v2_results = await hs.get_datastore().count_r30_users()
+    for name, count in r30v2_results.items():
+        stats["r30v2_users_" + name] = count
+
     stats["cache_factor"] = hs.config.caches.global_factor
     stats["event_cache_size"] = hs.config.caches.event_cache_size
 
diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py
index e3a544d9b2..dc0bbc56ac 100644
--- a/synapse/storage/databases/main/metrics.py
+++ b/synapse/storage/databases/main/metrics.py
@@ -316,6 +316,135 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
 
         return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
 
+    async def count_r30v2_users(self) -> Dict[str, int]:
+        """
+        Counts the number of 30 day retained users, defined as users that:
+         - Appear more than once in the past 60 days
+         - Have more than 30 days between the most and least recent appearances that
+           occurred in the past 60 days.
+
+        (This is the second version of this metric, hence R30'v2')
+
+        Returns:
+             A mapping from client type to the number of 30-day retained users for that client.
+
+             The dict keys are:
+              - "all" (a combined number of users across any and all clients)
+              - "android" (Element Android)
+              - "ios" (Element iOS)
+              - "electron" (Element Desktop)
+              - "web" (any web application -- it's not possible to distinguish Element Web here)
+        """
+
+        def _count_r30v2_users(txn):
+            thirty_days_in_secs = 86400 * 30
+            now = int(self._clock.time())
+            sixty_days_ago_in_secs = now - 2 * thirty_days_in_secs
+            one_day_from_now_in_secs = now + 86400
+
+            # This is the 'per-platform' count.
+            sql = """
+                SELECT
+                    client_type,
+                    count(client_type)
+                FROM
+                    (
+                        SELECT
+                            user_id,
+                            CASE
+                                WHEN
+                                    LOWER(user_agent) LIKE '%%riot%%' OR
+                                    LOWER(user_agent) LIKE '%%element%%'
+                                    THEN CASE
+                                        WHEN
+                                            LOWER(user_agent) LIKE '%%electron%%'
+                                            THEN 'electron'
+                                        WHEN
+                                            LOWER(user_agent) LIKE '%%android%%'
+                                            THEN 'android'
+                                        WHEN
+                                            LOWER(user_agent) LIKE '%%ios%%'
+                                            THEN 'ios'
+                                        ELSE 'unknown'
+                                    END
+                                WHEN
+                                    LOWER(user_agent) LIKE '%%mozilla%%' OR
+                                    LOWER(user_agent) LIKE '%%gecko%%'
+                                    THEN 'web'
+                                ELSE 'unknown'
+                            END as client_type
+                        FROM
+                            user_daily_visits
+                        WHERE
+                            timestamp > ?
+                            AND
+                            timestamp < ?
+                        GROUP BY
+                            user_id,
+                            client_type
+                        HAVING
+                            max(timestamp) - min(timestamp) > ?
+                    ) AS temp
+                GROUP BY
+                    client_type
+                ;
+            """
+
+            # We initialise all the client types to zero, so we get an explicit
+            # zero if they don't appear in the query results
+            results = {"ios": 0, "android": 0, "web": 0, "electron": 0}
+            txn.execute(
+                sql,
+                (
+                    sixty_days_ago_in_secs * 1000,
+                    one_day_from_now_in_secs * 1000,
+                    thirty_days_in_secs * 1000,
+                ),
+            )
+
+            for row in txn:
+                if row[0] == "unknown":
+                    continue
+                results[row[0]] = row[1]
+
+            # This is the 'all users' count.
+            sql = """
+                SELECT COUNT(*) FROM (
+                    SELECT
+                        1
+                    FROM
+                        user_daily_visits
+                    WHERE
+                        timestamp > ?
+                        AND
+                        timestamp < ?
+                    GROUP BY
+                        user_id
+                    HAVING
+                        max(timestamp) - min(timestamp) > ?
+                ) AS r30_users
+            """
+
+            txn.execute(
+                sql,
+                (
+                    sixty_days_ago_in_secs * 1000,
+                    one_day_from_now_in_secs * 1000,
+                    thirty_days_in_secs * 1000,
+                ),
+            )
+            row = txn.fetchone()
+            if row is None:
+                results["all"] = 0
+            else:
+                results["all"] = row[0]
+
+            return results
+
+        return await self.db_pool.runInteraction(
+            "count_r30v2_users", _count_r30v2_users
+        )
+
     def _get_start_of_day(self):
         """
         Returns millisecond unixtime for start of UTC day.
diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py
index 2da6ba4dde..5527e278db 100644
--- a/tests/app/test_phone_stats_home.py
+++ b/tests/app/test_phone_stats_home.py
@@ -1,9 +1,11 @@
 import synapse
+from synapse.app.phone_stats_home import start_phone_stats_home
 from synapse.rest.client.v1 import login, room
 
 from tests import unittest
 from tests.unittest import HomeserverTestCase
 
+FIVE_MINUTES_IN_SECONDS = 300
 ONE_DAY_IN_SECONDS = 86400
 
 
@@ -151,3 +153,243 @@ class PhoneHomeTestCase(HomeserverTestCase):
         # *Now* the user appears in R30.
         r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
         self.assertEqual(r30_results, {"all": 1, "unknown": 1})
+
+
+class PhoneHomeR30V2TestCase(HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def _advance_to(self, desired_time_secs: float):
+        now = self.hs.get_clock().time()
+        assert now < desired_time_secs
+        self.reactor.advance(desired_time_secs - now)
+
+    def make_homeserver(self, reactor, clock):
+        hs = super(PhoneHomeR30V2TestCase, self).make_homeserver(reactor, clock)
+
+        # We don't want our tests to actually report statistics, so check
+        # that it's not enabled
+        assert not hs.config.report_stats
+
+        # This starts the needed data collection that we rely on to calculate
+        # R30v2 metrics.
+        start_phone_stats_home(hs)
+        return hs
+
+    def test_r30v2_minimum_usage(self):
+        """
+        Tests the minimum amount of interaction necessary for the R30v2 metric
+        to consider a user 'retained'.
+        """
+
+        # Register a user, log it in, create a room and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
+        self.helper.send(room_id, "message", tok=access_token)
+        first_post_at = self.hs.get_clock().time()
+
+        # Give time for user_daily_visits table to be updated.
+        # (user_daily_visits is updated every 5 minutes using a looping call.)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        store = self.hs.get_datastore()
+
+        # Check the R30 results do not count that user.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Advance 31 days.
+        # (R30v2 includes users with **more** than 30 days between the two visits,
+        #  and user_daily_visits records the timestamp as the start of the day.)
+        self.reactor.advance(31 * ONE_DAY_IN_SECONDS)
+        # Also advance 5 minutes to let another user_daily_visits update occur
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # (Make sure the user isn't somehow counted by this point.)
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Send a message (this counts as activity)
+        self.helper.send(room_id, "message2", tok=access_token)
+
+        # We have to wait a few minutes for the user_daily_visits table to
+        # be updated by a background process.
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # *Now* the user is counted.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Advance to JUST under 60 days after the user's first post
+        self._advance_to(first_post_at + 60 * ONE_DAY_IN_SECONDS - 5)
+
+        # Check the user is still counted.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Advance into the next day. The user's first activity is now more than 60 days old.
+        self._advance_to(first_post_at + 60 * ONE_DAY_IN_SECONDS + 5)
+
+        # Check the user is now no longer counted in R30.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+    def test_r30v2_user_must_be_retained_for_at_least_a_month(self):
+        """
+        Tests that a newly-registered user must be retained for a whole month
+        before appearing in the R30v2 statistic, even if they post every day
+        during that time!
+        """
+
+        # set a custom user-agent to impersonate Element/Android.
+        headers = (
+            (
+                "User-Agent",
+                "Element/1.1 (Linux; U; Android 9; MatrixAndroidSDK_X 0.0.1)",
+            ),
+        )
+
+        # Register a user and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!", custom_headers=headers)
+        room_id = self.helper.create_room_as(
+            room_creator=user_id, tok=access_token, custom_headers=headers
+        )
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # Give time for user_daily_visits table to be updated.
+        # (user_daily_visits is updated every 5 minutes using a looping call.)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        store = self.hs.get_datastore()
+
+        # Check the user does not contribute to R30 yet.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        for _ in range(30):
+            # This loop posts a message every day for 30 days
+            self.reactor.advance(ONE_DAY_IN_SECONDS - FIVE_MINUTES_IN_SECONDS)
+            self.helper.send(
+                room_id, "I'm still here", tok=access_token, custom_headers=headers
+            )
+
+            # give time for user_daily_visits to update
+            self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+            # Notice that the user *still* does not contribute to R30!
+            r30_results = self.get_success(store.count_r30v2_users())
+            self.assertEqual(
+                r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+            )
+
+        # advance yet another day with more activity
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+        self.helper.send(
+            room_id, "Still here!", tok=access_token, custom_headers=headers
+        )
+
+        # give time for user_daily_visits to update
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # *Now* the user appears in R30.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "android": 1, "electron": 0, "ios": 0, "web": 0}
+        )
+
+    def test_r30v2_returning_dormant_users_not_counted(self):
+        """
+        Tests that dormant users (users inactive for a long time) do not
+        contribute to R30v2 when they return for just a single day.
+        This is a key difference between R30 and R30v2.
+        """
+
+        # set a custom user-agent to impersonate Element/iOS.
+        headers = (
+            (
+                "User-Agent",
+                "Riot/1.4 (iPhone; iOS 13; Scale/4.00)",
+            ),
+        )
+
+        # Register a user and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!", custom_headers=headers)
+        room_id = self.helper.create_room_as(
+            room_creator=user_id, tok=access_token, custom_headers=headers
+        )
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # the user goes inactive for 2 months
+        self.reactor.advance(60 * ONE_DAY_IN_SECONDS)
+
+        # the user returns for one day, perhaps just to check out a new feature
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # Give time for user_daily_visits table to be updated.
+        # (user_daily_visits is updated every 5 minutes using a looping call.)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        store = self.hs.get_datastore()
+
+        # Check that the user does not contribute to R30v2, even though it's been
+        # more than 30 days since registration.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Check that this is a situation where old R30 differs:
+        # old R30 DOES count this as 'retained'.
+        r30_results = self.get_success(store.count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "ios": 1})
+
+        # Now we want to check that the user will still be able to appear in
+        # R30v2 as long as the user performs some other activity between
+        # 30 and 60 days later.
+        self.reactor.advance(32 * ONE_DAY_IN_SECONDS)
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # (give time for tables to update)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # Check the user now satisfies the requirements to appear in R30v2.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "ios": 1, "android": 0, "electron": 0, "web": 0}
+        )
+
+        # Advance to 59.5 days after the user's first R30v2-eligible activity.
+        self.reactor.advance(27.5 * ONE_DAY_IN_SECONDS)
+
+        # Check the user still appears in R30v2.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "ios": 1, "android": 0, "electron": 0, "web": 0}
+        )
+
+        # Advance to 60.5 days after the user's first R30v2-eligible activity.
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+
+        # Check the user no longer appears in R30v2.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py
index 69798e95c3..fc2d35596e 100644
--- a/tests/rest/client/v1/utils.py
+++ b/tests/rest/client/v1/utils.py
@@ -19,7 +19,7 @@ import json
 import re
 import time
 import urllib.parse
-from typing import Any, Dict, Mapping, MutableMapping, Optional
+from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional, Tuple, Union
 from unittest.mock import patch
 
 import attr
@@ -53,6 +53,9 @@ class RestHelper:
         tok: str = None,
         expect_code: int = 200,
         extra_content: Optional[Dict] = None,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
     ) -> str:
         """
         Create a room.
@@ -87,6 +90,7 @@ class RestHelper:
             "POST",
             path,
             json.dumps(content).encode("utf8"),
+            custom_headers=custom_headers,
         )
 
         assert channel.result["code"] == b"%d" % expect_code, channel.result
@@ -175,14 +179,30 @@ class RestHelper:
 
         self.auth_user_id = temp_id
 
-    def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200):
+    def send(
+        self,
+        room_id,
+        body=None,
+        txn_id=None,
+        tok=None,
+        expect_code=200,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
+    ):
         if body is None:
             body = "body_text_here"
 
         content = {"msgtype": "m.text", "body": body}
 
         return self.send_event(
-            room_id, "m.room.message", content, txn_id, tok, expect_code
+            room_id,
+            "m.room.message",
+            content,
+            txn_id,
+            tok,
+            expect_code,
+            custom_headers=custom_headers,
         )
 
     def send_event(
@@ -193,6 +213,9 @@ class RestHelper:
         txn_id=None,
         tok=None,
         expect_code=200,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
     ):
         if txn_id is None:
             txn_id = "m%s" % (str(time.time()))
@@ -207,6 +230,7 @@ class RestHelper:
             "PUT",
             path,
             json.dumps(content or {}).encode("utf8"),
+            custom_headers=custom_headers,
         )
 
         assert (
diff --git a/tests/unittest.py b/tests/unittest.py
index c6d9064423..3eec9c4d5b 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -594,7 +594,15 @@ class HomeserverTestCase(TestCase):
         user_id = channel.json_body["user_id"]
         return user_id
 
-    def login(self, username, password, device_id=None):
+    def login(
+        self,
+        username,
+        password,
+        device_id=None,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
+    ):
         """
         Log in a user, and get an access token. Requires the Login API be
         registered.
@@ -605,7 +613,10 @@ class HomeserverTestCase(TestCase):
             body["device_id"] = device_id
 
         channel = self.make_request(
-            "POST", "/_matrix/client/r0/login", json.dumps(body).encode("utf8")
+            "POST",
+            "/_matrix/client/r0/login",
+            json.dumps(body).encode("utf8"),
+            custom_headers=custom_headers,
         )
         self.assertEqual(channel.code, 200, channel.result)