summary refs log tree commit diff
path: root/synapse/storage/client_ips.py
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2019-01-22 11:55:53 +0000
committerErik Johnston <erik@matrix.org>2019-01-22 13:33:46 +0000
commit2557531f0f60ee1891a74babed54800cb1bcfd06 (patch)
treefb7f0cc1c7de55884d6710d24a8f9fde0486ac15 /synapse/storage/client_ips.py
parentMerge pull request #4402 from matrix-org/erikj/fed_v2_invite_server (diff)
downloadsynapse-2557531f0f60ee1891a74babed54800cb1bcfd06.tar.xz
Fix bug when removing duplicate rows from user_ips
This was caused by accidentally overwritting a `last_seen` variable
in a for loop, causing the wrong value to be written to the progress
table. The result of which was that we didn't scan sections of the table
when searching for duplicates, and so some duplicates did not get
deleted.
Diffstat (limited to 'synapse/storage/client_ips.py')
-rw-r--r--synapse/storage/client_ips.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 5d548f250a..9548cfd3f8 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -110,8 +110,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
 
     @defer.inlineCallbacks
     def _remove_user_ip_dupes(self, progress, batch_size):
+        # This works function works by scanning the user_ips table in batches
+        # based on `last_seen`. For each row in a batch it searches the rest of
+        # the table to see if there are any duplicates, if there are then they
+        # are removed and replaced with a suitable row.
 
-        last_seen_progress = progress.get("last_seen", 0)
+        # Fetch the start of the batch
+        begin_last_seen = progress.get("last_seen", 0)
 
         def get_last_seen(txn):
             txn.execute(
@@ -122,17 +127,20 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
                 LIMIT 1
                 OFFSET ?
                 """,
-                (last_seen_progress, batch_size)
+                (begin_last_seen, batch_size)
             )
-            results = txn.fetchone()
-            return results
-
-        # Get a last seen that's sufficiently far away enough from the last one
-        last_seen = yield self.runInteraction(
+            row = txn.fetchone()
+            if row:
+                return row[0]
+            else:
+                return None
+
+        # Get a last seen that has roughly `batch_size` since `begin_last_seen`
+        end_last_seen = yield self.runInteraction(
             "user_ips_dups_get_last_seen", get_last_seen
         )
 
-        if not last_seen:
+        if end_last_seen is None:
             # If we get a None then we're reaching the end and just need to
             # delete the last batch.
             last = True
@@ -142,9 +150,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
             last_seen = int(self.clock.time_msec()) * 2
         else:
             last = False
-            last_seen = last_seen[0]
 
-        def remove(txn, last_seen_progress, last_seen):
+        def remove(txn, begin_last_seen, end_last_seen):
             # This works by looking at all entries in the given time span, and
             # then for each (user_id, access_token, ip) tuple in that range
             # checking for any duplicates in the rest of the table (via a join).
@@ -166,7 +173,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
                 INNER JOIN user_ips USING (user_id, access_token, ip)
                 GROUP BY user_id, access_token, ip
                 HAVING count(*) > 1""",
-                (last_seen_progress, last_seen)
+                (begin_last_seen, end_last_seen)
             )
             res = txn.fetchall()
 
@@ -194,11 +201,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
                 )
 
             self._background_update_progress_txn(
-                txn, "user_ips_remove_dupes", {"last_seen": last_seen}
+                txn, "user_ips_remove_dupes", {"last_seen": end_last_seen}
             )
 
         yield self.runInteraction(
-            "user_ips_dups_remove", remove, last_seen_progress, last_seen
+            "user_ips_dups_remove", remove, begin_last_seen, end_last_seen
         )
         if last:
             yield self._end_background_update("user_ips_remove_dupes")