From 1efb826b54f4a8fdfa56c27a32da9c332fa87cc3 Mon Sep 17 00:00:00 2001 From: Devon Hudson Date: Fri, 21 Mar 2025 17:09:49 +0000 Subject: Delete unreferenced state groups in background (#18254) This PR fixes #18154 to avoid de-deltaing state groups which resulted in DB size temporarily increasing until the DB was `VACUUM`'ed. As a result, less state groups will get deleted now. It also attempts to improve performance by not duplicating work when processing state groups it has already processed in previous iterations. ### Pull Request Checklist * [X] Pull request is based on the develop branch * [X] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Erik Johnston --- synapse/storage/databases/state/deletion.py | 42 ++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'synapse/storage/databases/state/deletion.py') diff --git a/synapse/storage/databases/state/deletion.py b/synapse/storage/databases/state/deletion.py index d4b1c20a45..f77c46f6ae 100644 --- a/synapse/storage/databases/state/deletion.py +++ b/synapse/storage/databases/state/deletion.py @@ -321,19 +321,43 @@ class StateDeletionDataStore: async def mark_state_groups_as_pending_deletion( self, state_groups: Collection[int] ) -> None: - """Mark the given state groups as pending deletion""" + """Mark the given state groups as pending deletion. - now = self._clock.time_msec() + If any of the state groups are already pending deletion, then those records are + left as is. + """ - await self.db_pool.simple_upsert_many( - table="state_groups_pending_deletion", - key_names=("state_group",), - key_values=[(state_group,) for state_group in state_groups], - value_names=("insertion_ts",), - value_values=[(now,) for _ in state_groups], - desc="mark_state_groups_as_pending_deletion", + await self.db_pool.runInteraction( + "mark_state_groups_as_pending_deletion", + self._mark_state_groups_as_pending_deletion_txn, + state_groups, ) + def _mark_state_groups_as_pending_deletion_txn( + self, + txn: LoggingTransaction, + state_groups: Collection[int], + ) -> None: + sql = """ + INSERT INTO state_groups_pending_deletion (state_group, insertion_ts) + VALUES %s + ON CONFLICT (state_group) + DO NOTHING + """ + + now = self._clock.time_msec() + rows = [ + ( + state_group, + now, + ) + for state_group in state_groups + ] + if isinstance(txn.database_engine, PostgresEngine): + txn.execute_values(sql % ("?",), rows, fetch=False) + else: + txn.execute_batch(sql % ("(?, ?)",), rows) + async def mark_state_groups_as_used(self, state_groups: Collection[int]) -> None: """Mark the given state groups as now being referenced""" -- cgit 1.5.1