summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2019-02-12 11:55:27 +0000
committerErik Johnston <erik@matrix.org>2019-02-12 11:55:27 +0000
commit483ba85c7a1a8ee9b7eebcc5c07d522c71229c9f (patch)
tree0f7dfe27671a2184027a636be8c92208c5535a9c /synapse
parentMerge pull request #4619 from matrix-org/rav/remove_docker_no_tls_hacks (diff)
downloadsynapse-483ba85c7a1a8ee9b7eebcc5c07d522c71229c9f.tar.xz
Analyze user_ips before running deduplication
Due to the table locks taken out by the naive upsert, the table
statistics may be out of date. During deduplication it is important that
the correct index is used as otherwise a full table scan may be
incorrectly used, which can end up thrashing the database badly.
Diffstat (limited to 'synapse')
-rw-r--r--synapse/storage/client_ips.py24
-rw-r--r--synapse/storage/schema/delta/53/user_ips_index.sql10
2 files changed, 31 insertions, 3 deletions
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 091d7116c5..6f81406269 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -66,6 +66,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
         )
 
         self.register_background_update_handler(
+            "user_ips_analyze",
+            self._analyze_user_ip,
+        )
+
+        self.register_background_update_handler(
             "user_ips_remove_dupes",
             self._remove_user_ip_dupes,
         )
@@ -109,6 +114,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
         defer.returnValue(1)
 
     @defer.inlineCallbacks
+    def _analyze_user_ip(self, progress, batch_size):
+        # Background update to analyze user_ips table before we run the
+        # deduplication background update. The table may not have been analyzed
+        # for ages due to the table locks.
+        #
+        # This will lock out the naive upserts to user_ips while it happens, but
+        # the analyze should be quick (28GB table takes ~10s)
+        def user_ips_analyze(txn):
+            txn.execute("ANALYZE user_ips")
+
+        end_last_seen = yield self.runInteraction(
+            "user_ips_analyze", user_ips_analyze
+        )
+
+        yield self._end_background_update("user_ips_analyze")
+
+        defer.returnValue(1)
+
+    @defer.inlineCallbacks
     def _remove_user_ip_dupes(self, progress, batch_size):
         # This works function works by scanning the user_ips table in batches
         # based on `last_seen`. For each row in a batch it searches the rest of
diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql
index 4ca346c111..b812c5794f 100644
--- a/synapse/storage/schema/delta/53/user_ips_index.sql
+++ b/synapse/storage/schema/delta/53/user_ips_index.sql
@@ -13,9 +13,13 @@
  * limitations under the License.
  */
 
--- delete duplicates
+ -- analyze user_ips, to help ensure the correct indices are used
 INSERT INTO background_updates (update_name, progress_json) VALUES
-  ('user_ips_remove_dupes', '{}');
+  ('user_ips_analyze', '{}');
+
+-- delete duplicates
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+  ('user_ips_remove_dupes', '{}', 'user_ips_analyze');
 
 -- add a new unique index to user_ips table
 INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
@@ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
 
 -- drop the old original index
 INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
-  ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
\ No newline at end of file
+  ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');