From 5d92a1428ceb4077801afc1785a5472e89fd9df3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 3 Aug 2020 13:54:24 -0700 Subject: Prevent join->join membership transitions changing member count (#7977) `StatsHandler` handles updates to the `current_state_delta_stream`, and updates room stats such as the amount of state events, joined users, etc. However, it counts every new join membership as a new user entering a room (and that user being in another room), whereas it's possible for a user's membership status to go from join -> join, for instance when they change their per-room profile information. This PR adds a check for join->join membership transitions, and bails out early, as none of the further checks are necessary at that point. Due to this bug, membership stats in many rooms have ended up being wildly larger than their true values. I am not sure if we also want to include a migration step which recalculates these statistics (possibly using the `_populate_stats_process_rooms` bg update). Bug introduced in the initial implementation https://github.com/matrix-org/synapse/pull/4338. --- .../main/schema/delta/58/12room_stats.sql | 32 ++++++++++++++++++++ synapse/storage/data_stores/main/stats.py | 34 ++++++++++++++++++---- 2 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql (limited to 'synapse/storage') diff --git a/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql b/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql new file mode 100644 index 0000000000..cade5dcca8 --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/58/12room_stats.sql @@ -0,0 +1,32 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Recalculate the stats for all rooms after the fix to joined_members erroneously +-- incrementing on per-room profile changes. + +-- Note that the populate_stats_process_rooms background update is already set to +-- run if you're upgrading from Synapse <1.0.0. + +-- Additionally, if you've upgraded to v1.18.0 (which doesn't include this fix), +-- this bg job runs, and then update to v1.19.0, you'd end up with only half of +-- your rooms having room stats recalculated after this fix was in place. + +-- So we've switched the old `populate_stats_process_rooms` background job to a +-- no-op, and then kick off a bg job with a new name, but with the same +-- functionality as the old one. This effectively restarts the background job +-- from the beginning, without running it twice in a row, supporting both +-- upgrade usecases. +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_stats_process_rooms_2', '{}'); diff --git a/synapse/storage/data_stores/main/stats.py b/synapse/storage/data_stores/main/stats.py index 922400a7c3..40db8f594e 100644 --- a/synapse/storage/data_stores/main/stats.py +++ b/synapse/storage/data_stores/main/stats.py @@ -72,6 +72,9 @@ class StatsStore(StateDeltasStore): self.db.updates.register_background_update_handler( "populate_stats_process_rooms", self._populate_stats_process_rooms ) + self.db.updates.register_background_update_handler( + "populate_stats_process_rooms_2", self._populate_stats_process_rooms_2 + ) self.db.updates.register_background_update_handler( "populate_stats_process_users", self._populate_stats_process_users ) @@ -140,11 +143,30 @@ class StatsStore(StateDeltasStore): return len(users_to_work_on) async def _populate_stats_process_rooms(self, progress, batch_size): + """ + This was a background update which regenerated statistics for rooms. + + It has been replaced by StatsStore._populate_stats_process_rooms_2. This background + job has been scheduled to run as part of Synapse v1.0.0, and again now. To ensure + someone upgrading from