From 1efb826b54f4a8fdfa56c27a32da9c332fa87cc3 Mon Sep 17 00:00:00 2001 From: Devon Hudson Date: Fri, 21 Mar 2025 17:09:49 +0000 Subject: Delete unreferenced state groups in background (#18254) This PR fixes #18154 to avoid de-deltaing state groups which resulted in DB size temporarily increasing until the DB was `VACUUM`'ed. As a result, less state groups will get deleted now. It also attempts to improve performance by not duplicating work when processing state groups it has already processed in previous iterations. ### Pull Request Checklist * [X] Pull request is based on the develop branch * [X] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Erik Johnston --- synapse/_scripts/synapse_port_db.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'synapse/_scripts/synapse_port_db.py') diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 1bb9940180..438b2ff8a0 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -192,6 +192,11 @@ APPEND_ONLY_TABLES = [ IGNORED_TABLES = { + # Porting the auto generated sequence in this table is non-trivial. + # None of the entries in this list are mandatory for Synapse to keep working. + # If state group disk space is an issue after the port, the + # `mark_unreferenced_state_groups_for_deletion_bg_update` background task can be run again. + "state_groups_pending_deletion", # We don't port these tables, as they're a faff and we can regenerate # them anyway. "user_directory", @@ -217,6 +222,15 @@ IGNORED_TABLES = { } +# These background updates will not be applied upon creation of the postgres database. +IGNORED_BACKGROUND_UPDATES = { + # Reapplying this background update to the postgres database is unnecessary after + # already having waited for the SQLite database to complete all running background + # updates. + "mark_unreferenced_state_groups_for_deletion_bg_update", +} + + # Error returned by the run function. Used at the top-level part of the script to # handle errors and return codes. end_error: Optional[str] = None @@ -688,6 +702,20 @@ class Porter: # 0 means off. 1 means full. 2 means incremental. return autovacuum_setting != 0 + async def remove_ignored_background_updates_from_database(self) -> None: + def _remove_delete_unreferenced_state_groups_bg_updates( + txn: LoggingTransaction, + ) -> None: + txn.execute( + "DELETE FROM background_updates WHERE update_name = ANY(?)", + (list(IGNORED_BACKGROUND_UPDATES),), + ) + + await self.postgres_store.db_pool.runInteraction( + "remove_delete_unreferenced_state_groups_bg_updates", + _remove_delete_unreferenced_state_groups_bg_updates, + ) + async def run(self) -> None: """Ports the SQLite database to a PostgreSQL database. @@ -733,6 +761,8 @@ class Porter: self.hs_config.database.get_single_database() ) + await self.remove_ignored_background_updates_from_database() + await self.run_background_updates_on_postgres() self.progress.set_state("Creating port tables") -- cgit 1.5.1