diff options
author | Erik Johnston <erikj@jki.re> | 2019-02-12 13:05:09 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-12 13:05:09 +0000 |
commit | cf823389307bf8e78dec0b018c44abc8eec91ddf (patch) | |
tree | ece1464c64888a165d956ea69111dc45840adaa9 | |
parent | Merge pull request #4626 from matrix-org/erikj/fixup_user_ips_dedupe (diff) | |
parent | Fix pep8 (diff) | |
download | synapse-cf823389307bf8e78dec0b018c44abc8eec91ddf.tar.xz |
Merge pull request #4627 from matrix-org/erikj/user_ips_analyze
Analyze user_ips before running deduplication
-rw-r--r-- | changelog.d/4627.misc | 1 | ||||
-rw-r--r-- | synapse/storage/client_ips.py | 24 | ||||
-rw-r--r-- | synapse/storage/schema/delta/53/user_ips_index.sql | 10 |
3 files changed, 32 insertions, 3 deletions
diff --git a/changelog.d/4627.misc b/changelog.d/4627.misc new file mode 100644 index 0000000000..f1a57dcf9a --- /dev/null +++ b/changelog.d/4627.misc @@ -0,0 +1 @@ +Improve 'user_ips' table deduplication background update diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a20cc8231f..9c21362226 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -66,6 +66,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): ) self.register_background_update_handler( + "user_ips_analyze", + self._analyze_user_ip, + ) + + self.register_background_update_handler( "user_ips_remove_dupes", self._remove_user_ip_dupes, ) @@ -109,6 +114,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): defer.returnValue(1) @defer.inlineCallbacks + def _analyze_user_ip(self, progress, batch_size): + # Background update to analyze user_ips table before we run the + # deduplication background update. The table may not have been analyzed + # for ages due to the table locks. + # + # This will lock out the naive upserts to user_ips while it happens, but + # the analyze should be quick (28GB table takes ~10s) + def user_ips_analyze(txn): + txn.execute("ANALYZE user_ips") + + yield self.runInteraction( + "user_ips_analyze", user_ips_analyze + ) + + yield self._end_background_update("user_ips_analyze") + + defer.returnValue(1) + + @defer.inlineCallbacks def _remove_user_ip_dupes(self, progress, batch_size): # This works function works by scanning the user_ips table in batches # based on `last_seen`. For each row in a batch it searches the rest of diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql index 4ca346c111..b812c5794f 100644 --- a/synapse/storage/schema/delta/53/user_ips_index.sql +++ b/synapse/storage/schema/delta/53/user_ips_index.sql @@ -13,9 +13,13 @@ * limitations under the License. */ --- delete duplicates + -- analyze user_ips, to help ensure the correct indices are used INSERT INTO background_updates (update_name, progress_json) VALUES - ('user_ips_remove_dupes', '{}'); + ('user_ips_analyze', '{}'); + +-- delete duplicates +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('user_ips_remove_dupes', '{}', 'user_ips_analyze'); -- add a new unique index to user_ips table INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES @@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES -- drop the old original index INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); \ No newline at end of file + ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); |