summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorAndrew Morgan <1342360+anoadragon453@users.noreply.github.com>2020-08-03 13:54:24 -0700
committerGitHub <noreply@github.com>2020-08-03 21:54:24 +0100
commit5d92a1428ceb4077801afc1785a5472e89fd9df3 (patch)
tree318ff4720ba136d23267d8fa533fd2e9322fd37f /synapse
parentImplement handling of HTTP HEAD requests. (#7999) (diff)
downloadsynapse-5d92a1428ceb4077801afc1785a5472e89fd9df3.tar.xz
Prevent join->join membership transitions changing member count (#7977)
`StatsHandler` handles updates to the `current_state_delta_stream`, and updates room stats such as the amount of state events, joined users, etc.

However, it counts every new join membership as a new user entering a room (and that user being in another room), whereas it's possible for a user's membership status to go from join -> join, for instance when they change their per-room profile information.

This PR adds a check for join->join membership transitions, and bails out early, as none of the further checks are necessary at that point.

Due to this bug, membership stats in many rooms have ended up being wildly larger than their true values. I am not sure if we also want to include a migration step which recalculates these statistics (possibly using the `_populate_stats_process_rooms` bg update).

Bug introduced in the initial implementation https://github.com/matrix-org/synapse/pull/4338.
Diffstat (limited to 'synapse')
-rw-r--r--synapse/handlers/stats.py2
-rw-r--r--synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql32
-rw-r--r--synapse/storage/data_stores/main/stats.py34
3 files changed, 62 insertions, 6 deletions
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 149f861239..249ffe2a55 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -232,7 +232,7 @@ class StatsHandler:
 
                 if membership == prev_membership:
                     pass  # noop
-                if membership == Membership.JOIN:
+                elif membership == Membership.JOIN:
                     room_stats_delta["joined_members"] += 1
                 elif membership == Membership.INVITE:
                     room_stats_delta["invited_members"] += 1
diff --git a/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql b/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql
new file mode 100644
index 0000000000..cade5dcca8
--- /dev/null
+++ b/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql
@@ -0,0 +1,32 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Recalculate the stats for all rooms after the fix to joined_members erroneously
+-- incrementing on per-room profile changes.
+
+-- Note that the populate_stats_process_rooms background update is already set to
+-- run if you're upgrading from Synapse <1.0.0.
+
+-- Additionally, if you've upgraded to v1.18.0 (which doesn't include this fix),
+-- this bg job runs, and then update to v1.19.0, you'd end up with only half of
+-- your rooms having room stats recalculated after this fix was in place.
+
+-- So we've switched the old `populate_stats_process_rooms` background job to a
+-- no-op, and then kick off a bg job with a new name, but with the same
+-- functionality as the old one. This effectively restarts the background job
+-- from the beginning, without running it twice in a row, supporting both
+-- upgrade usecases.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+    ('populate_stats_process_rooms_2', '{}');
diff --git a/synapse/storage/data_stores/main/stats.py b/synapse/storage/data_stores/main/stats.py
index 922400a7c3..40db8f594e 100644
--- a/synapse/storage/data_stores/main/stats.py
+++ b/synapse/storage/data_stores/main/stats.py
@@ -73,6 +73,9 @@ class StatsStore(StateDeltasStore):
             "populate_stats_process_rooms", self._populate_stats_process_rooms
         )
         self.db.updates.register_background_update_handler(
+            "populate_stats_process_rooms_2", self._populate_stats_process_rooms_2
+        )
+        self.db.updates.register_background_update_handler(
             "populate_stats_process_users", self._populate_stats_process_users
         )
         # we no longer need to perform clean-up, but we will give ourselves
@@ -141,10 +144,29 @@ class StatsStore(StateDeltasStore):
 
     async def _populate_stats_process_rooms(self, progress, batch_size):
         """
+        This was a background update which regenerated statistics for rooms.
+
+        It has been replaced by StatsStore._populate_stats_process_rooms_2. This background
+        job has been scheduled to run as part of Synapse v1.0.0, and again now. To ensure
+        someone upgrading from <v1.0.0, this background task has been turned into a no-op
+        so that the potentially expensive task is not run twice.
+
+        Further context: https://github.com/matrix-org/synapse/pull/7977
+        """
+        await self.db.updates._end_background_update("populate_stats_process_rooms")
+        return 1
+
+    async def _populate_stats_process_rooms_2(self, progress, batch_size):
+        """
         This is a background update which regenerates statistics for rooms.
+
+        It replaces StatsStore._populate_stats_process_rooms. See its docstring for the
+        reasoning.
         """
         if not self.stats_enabled:
-            await self.db.updates._end_background_update("populate_stats_process_rooms")
+            await self.db.updates._end_background_update(
+                "populate_stats_process_rooms_2"
+            )
             return 1
 
         last_room_id = progress.get("last_room_id", "")
@@ -160,12 +182,14 @@ class StatsStore(StateDeltasStore):
             return [r for r, in txn]
 
         rooms_to_work_on = await self.db.runInteraction(
-            "populate_stats_rooms_get_batch", _get_next_batch
+            "populate_stats_rooms_2_get_batch", _get_next_batch
         )
 
         # No more rooms -- complete the transaction.
         if not rooms_to_work_on:
-            await self.db.updates._end_background_update("populate_stats_process_rooms")
+            await self.db.updates._end_background_update(
+                "populate_stats_process_rooms_2"
+            )
             return 1
 
         for room_id in rooms_to_work_on:
@@ -173,9 +197,9 @@ class StatsStore(StateDeltasStore):
             progress["last_room_id"] = room_id
 
         await self.db.runInteraction(
-            "_populate_stats_process_rooms",
+            "_populate_stats_process_rooms_2",
             self.db.updates._background_update_progress_txn,
-            "populate_stats_process_rooms",
+            "populate_stats_process_rooms_2",
             progress,
         )