summary refs log tree commit diff
path: root/synapse/storage/databases/state/deletion.py
diff options
context:
space:
mode:
authorDevon Hudson <devon.dmytro@gmail.com>2025-03-21 17:09:49 +0000
committerGitHub <noreply@github.com>2025-03-21 17:09:49 +0000
commit1efb826b54f4a8fdfa56c27a32da9c332fa87cc3 (patch)
tree2c458e7e5d9dd483689359b30fc0cedc34d83083 /synapse/storage/databases/state/deletion.py
parentUpdate Poetry to 2.1.1, including updating the lock file version. (#18251) (diff)
downloadsynapse-1efb826b54f4a8fdfa56c27a32da9c332fa87cc3.tar.xz
Delete unreferenced state groups in background (#18254)
This PR fixes #18154 to avoid de-deltaing state groups which resulted in
DB size temporarily increasing until the DB was `VACUUM`'ed. As a
result, less state groups will get deleted now.
It also attempts to improve performance by not duplicating work when
processing state groups it has already processed in previous iterations.

### Pull Request Checklist

<!-- Please read
https://element-hq.github.io/synapse/latest/development/contributing_guide.html
before submitting your pull request -->

* [X] Pull request is based on the develop branch
* [X] Pull request includes a [changelog
file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog).
The entry should:
- Be a short description of your change which makes sense to users.
"Fixed a bug that prevented receiving messages from other servers."
instead of "Moved X method from `EventStore` to `EventWorkerStore`.".
  - Use markdown where necessary, mostly for `code blocks`.
  - End with either a period (.) or an exclamation mark (!).
  - Start with a capital letter.
- Feel free to credit yourself, by adding a sentence "Contributed by
@github_username." or "Contributed by [Your Name]." to the end of the
entry.
* [X] [Code
style](https://element-hq.github.io/synapse/latest/code_style.html) is
correct
(run the
[linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters))

---------

Co-authored-by: Erik Johnston <erikj@element.io>
Diffstat (limited to 'synapse/storage/databases/state/deletion.py')
-rw-r--r--synapse/storage/databases/state/deletion.py42
1 files changed, 33 insertions, 9 deletions
diff --git a/synapse/storage/databases/state/deletion.py b/synapse/storage/databases/state/deletion.py

index d4b1c20a45..f77c46f6ae 100644 --- a/synapse/storage/databases/state/deletion.py +++ b/synapse/storage/databases/state/deletion.py
@@ -321,19 +321,43 @@ class StateDeletionDataStore: async def mark_state_groups_as_pending_deletion( self, state_groups: Collection[int] ) -> None: - """Mark the given state groups as pending deletion""" + """Mark the given state groups as pending deletion. - now = self._clock.time_msec() + If any of the state groups are already pending deletion, then those records are + left as is. + """ - await self.db_pool.simple_upsert_many( - table="state_groups_pending_deletion", - key_names=("state_group",), - key_values=[(state_group,) for state_group in state_groups], - value_names=("insertion_ts",), - value_values=[(now,) for _ in state_groups], - desc="mark_state_groups_as_pending_deletion", + await self.db_pool.runInteraction( + "mark_state_groups_as_pending_deletion", + self._mark_state_groups_as_pending_deletion_txn, + state_groups, ) + def _mark_state_groups_as_pending_deletion_txn( + self, + txn: LoggingTransaction, + state_groups: Collection[int], + ) -> None: + sql = """ + INSERT INTO state_groups_pending_deletion (state_group, insertion_ts) + VALUES %s + ON CONFLICT (state_group) + DO NOTHING + """ + + now = self._clock.time_msec() + rows = [ + ( + state_group, + now, + ) + for state_group in state_groups + ] + if isinstance(txn.database_engine, PostgresEngine): + txn.execute_values(sql % ("?",), rows, fetch=False) + else: + txn.execute_batch(sql % ("(?, ?)",), rows) + async def mark_state_groups_as_used(self, state_groups: Collection[int]) -> None: """Mark the given state groups as now being referenced"""