diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 091d7116c5..a20cc8231f 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -167,12 +167,16 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
clause = "? <= last_seen AND last_seen < ?"
args = (begin_last_seen, end_last_seen)
+ # (Note: The DISTINCT in the inner query is important to ensure that
+ # the COUNT(*) is accurate, otherwise double counting may happen due
+ # to the join effectively being a cross product)
txn.execute(
"""
SELECT user_id, access_token, ip,
- MAX(device_id), MAX(user_agent), MAX(last_seen)
+ MAX(device_id), MAX(user_agent), MAX(last_seen),
+ COUNT(*)
FROM (
- SELECT user_id, access_token, ip
+ SELECT DISTINCT user_id, access_token, ip
FROM user_ips
WHERE {}
) c
@@ -186,7 +190,60 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
# We've got some duplicates
for i in res:
- user_id, access_token, ip, device_id, user_agent, last_seen = i
+ user_id, access_token, ip, device_id, user_agent, last_seen, count = i
+
+ # We want to delete the duplicates so we end up with only a
+ # single row.
+ #
+ # The naive way of doing this would be just to delete all rows
+ # and reinsert a constructed row. However, if there are a lot of
+ # duplicate rows this can cause the table to grow a lot, which
+ # can be problematic in two ways:
+ # 1. If user_ips is already large then this can cause the
+ # table to rapidly grow, potentially filling the disk.
+ # 2. Reinserting a lot of rows can confuse the table
+ # statistics for postgres, causing it to not use the
+ # correct indices for the query above, resulting in a full
+ # table scan. This is incredibly slow for large tables and
+ # can kill database performance. (This seems to mainly
+ # happen for the last query where the clause is simply `? <
+ # last_seen`)
+ #
+ # So instead we want to delete all but *one* of the duplicate
+ # rows. That is hard to do reliably, so we cheat and do a two
+ # step process:
+ # 1. Delete all rows with a last_seen strictly less than the
+ # max last_seen. This hopefully results in deleting all but
+ # one row the majority of the time, but there may be
+ # duplicate last_seen
+ # 2. If multiple rows remain, we fall back to the naive method
+ # and simply delete all rows and reinsert.
+ #
+ # Note that this relies on no new duplicate rows being inserted,
+ # but if that is happening then this entire process is futile
+ # anyway.
+
+ # Do step 1:
+
+ txn.execute(
+ """
+ DELETE FROM user_ips
+ WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ?
+ """,
+ (user_id, access_token, ip, last_seen)
+ )
+ if txn.rowcount == count - 1:
+ # We deleted all but one of the duplicate rows, i.e. there
+ # is exactly one remaining and so there is nothing left to
+ # do.
+ continue
+ elif txn.rowcount >= count:
+ raise Exception(
+ "We deleted more duplicate rows from 'user_ips' than expected",
+ )
+
+ # The previous step didn't delete enough rows, so we fallback to
+ # step 2:
# Drop all the duplicates
txn.execute(
|