diff --git a/changelog.d/4627.misc b/changelog.d/4627.misc
new file mode 100644
index 0000000000..f1a57dcf9a
--- /dev/null
+++ b/changelog.d/4627.misc
@@ -0,0 +1 @@
+Improve 'user_ips' table deduplication background update
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index a20cc8231f..9c21362226 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -66,6 +66,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
)
self.register_background_update_handler(
+ "user_ips_analyze",
+ self._analyze_user_ip,
+ )
+
+ self.register_background_update_handler(
"user_ips_remove_dupes",
self._remove_user_ip_dupes,
)
@@ -109,6 +114,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
defer.returnValue(1)
@defer.inlineCallbacks
+ def _analyze_user_ip(self, progress, batch_size):
+ # Background update to analyze user_ips table before we run the
+ # deduplication background update. The table may not have been analyzed
+ # for ages due to the table locks.
+ #
+ # This will lock out the naive upserts to user_ips while it happens, but
+ # the analyze should be quick (28GB table takes ~10s)
+ def user_ips_analyze(txn):
+ txn.execute("ANALYZE user_ips")
+
+ yield self.runInteraction(
+ "user_ips_analyze", user_ips_analyze
+ )
+
+ yield self._end_background_update("user_ips_analyze")
+
+ defer.returnValue(1)
+
+ @defer.inlineCallbacks
def _remove_user_ip_dupes(self, progress, batch_size):
# This works function works by scanning the user_ips table in batches
# based on `last_seen`. For each row in a batch it searches the rest of
diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql
index 4ca346c111..b812c5794f 100644
--- a/synapse/storage/schema/delta/53/user_ips_index.sql
+++ b/synapse/storage/schema/delta/53/user_ips_index.sql
@@ -13,9 +13,13 @@
* limitations under the License.
*/
--- delete duplicates
+ -- analyze user_ips, to help ensure the correct indices are used
INSERT INTO background_updates (update_name, progress_json) VALUES
- ('user_ips_remove_dupes', '{}');
+ ('user_ips_analyze', '{}');
+
+-- delete duplicates
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('user_ips_remove_dupes', '{}', 'user_ips_analyze');
-- add a new unique index to user_ips table
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
@@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
-- drop the old original index
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
- ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
\ No newline at end of file
+ ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
|