summary refs log tree commit diff
path: root/synapse/storage/client_ips.py
diff options
context:
space:
mode:
authorErik Johnston <erikj@jki.re>2019-02-12 13:05:09 +0000
committerGitHub <noreply@github.com>2019-02-12 13:05:09 +0000
commitcf823389307bf8e78dec0b018c44abc8eec91ddf (patch)
treeece1464c64888a165d956ea69111dc45840adaa9 /synapse/storage/client_ips.py
parentMerge pull request #4626 from matrix-org/erikj/fixup_user_ips_dedupe (diff)
parentFix pep8 (diff)
downloadsynapse-cf823389307bf8e78dec0b018c44abc8eec91ddf.tar.xz
Merge pull request #4627 from matrix-org/erikj/user_ips_analyze
Analyze user_ips before running deduplication
Diffstat (limited to 'synapse/storage/client_ips.py')
-rw-r--r--synapse/storage/client_ips.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py

index a20cc8231f..9c21362226 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py
@@ -66,6 +66,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + + self.register_background_update_handler( "user_ips_remove_dupes", self._remove_user_ip_dupes, ) @@ -109,6 +114,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): defer.returnValue(1) @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + + @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): # This works function works by scanning the user_ips table in batches # based on `last_seen`. For each row in a batch it searches the rest of