diff options
author | Erik Johnston <erikj@jki.re> | 2019-02-12 13:05:09 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-12 13:05:09 +0000 |
commit | cf823389307bf8e78dec0b018c44abc8eec91ddf (patch) | |
tree | ece1464c64888a165d956ea69111dc45840adaa9 /synapse/storage/client_ips.py | |
parent | Merge pull request #4626 from matrix-org/erikj/fixup_user_ips_dedupe (diff) | |
parent | Fix pep8 (diff) | |
download | synapse-cf823389307bf8e78dec0b018c44abc8eec91ddf.tar.xz |
Merge pull request #4627 from matrix-org/erikj/user_ips_analyze
Analyze user_ips before running deduplication
Diffstat (limited to 'synapse/storage/client_ips.py')
-rw-r--r-- | synapse/storage/client_ips.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a20cc8231f..9c21362226 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -66,6 +66,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + + self.register_background_update_handler( "user_ips_remove_dupes", self._remove_user_ip_dupes, ) @@ -109,6 +114,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): defer.returnValue(1) @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + + @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): # This works function works by scanning the user_ips table in batches # based on `last_seen`. For each row in a batch it searches the rest of |