summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2016-10-27 14:14:44 +0100
committerErik Johnston <erik@matrix.org>2016-10-27 14:14:44 +0100
commit1fc1bc2a51b025e46e3d44056ea1836dfc48cc6e (patch)
treecfe3173336cbc2d0e2440119bdc9e0eb4bbf5594 /synapse
parentUpdate changelog (diff)
downloadsynapse-1fc1bc2a51b025e46e3d44056ea1836dfc48cc6e.tar.xz
Fix user_threepids schema delta
The delta `37/user_threepids.sql` aimed to update all the email
addresses to be lower case, however duplicate emails may exist in the
table already.

This commit adds a step where the delta moves the duplicate emails to a
new `medium` `email_old`. Only the most recently used account keeps the
binding intact. We move rather than delete so that we retain some record
of which emails were associated with which account.
Diffstat (limited to 'synapse')
-rw-r--r--synapse/storage/schema/delta/37/user_threepids.sql39
1 files changed, 38 insertions, 1 deletions
diff --git a/synapse/storage/schema/delta/37/user_threepids.sql b/synapse/storage/schema/delta/37/user_threepids.sql
index ef8813e72a..5f483a2ed7 100644
--- a/synapse/storage/schema/delta/37/user_threepids.sql
+++ b/synapse/storage/schema/delta/37/user_threepids.sql
@@ -17,7 +17,44 @@
  * Update any email addresses that were stored with mixed case into all
  * lowercase
  */
-UPDATE user_threepids SET address = LOWER(address) where medium = 'email';
+
+ -- There may be "duplicate" emails (with different case) already in the table,
+ -- so we find them and move all but the most recently used account.
+ UPDATE user_threepids
+ SET medium = 'email_old'
+ WHERE medium = 'email'
+    AND address IN (
+         -- `user_last_seen` maps user_ids to the last time we saw them
+         WITH user_last_seen AS (
+             SELECT user_id, max(last_seen) AS ts FROM user_ips GROUP BY user_id
+         ),
+         -- `duplicate_addresses` is a table of all the email addresses that
+         -- appear multiple times and the most recently we saw any of their users
+         duplicate_addresses AS (
+             SELECT lower(u1.address) AS address, max(ts.ts) AS max_ts
+             FROM user_threepids AS u1
+             INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address
+             INNER JOIN user_last_seen as ts ON ts.user_id = u1.user_id
+             WHERE u1.medium = 'email' AND u2.medium = 'email'
+             GROUP BY lower(u1.address)
+         )
+         -- We select all the addresses that are linked to the user_id that is NOT
+         -- the most recently seen.
+         SELECT u.address
+         FROM
+             user_threepids AS u,
+             duplicate_addresses,
+             user_last_seen AS ts
+         WHERE
+             lower(u.address) = duplicate_addresses.address
+             AND u.user_id = ts.user_id
+             AND ts.ts != max_ts  -- NOT the most recently used
+     );
+
+
+-- This update is now safe since we've removed the duplicate addresses.
+UPDATE user_threepids SET address = LOWER(address) WHERE medium = 'email';
+
 
 /* Add an index for the select we do on passwored reset */
 CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);