From 1fc1bc2a51b025e46e3d44056ea1836dfc48cc6e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Oct 2016 14:14:44 +0100 Subject: Fix user_threepids schema delta The delta `37/user_threepids.sql` aimed to update all the email addresses to be lower case, however duplicate emails may exist in the table already. This commit adds a step where the delta moves the duplicate emails to a new `medium` `email_old`. Only the most recently used account keeps the binding intact. We move rather than delete so that we retain some record of which emails were associated with which account. --- synapse/storage/schema/delta/37/user_threepids.sql | 39 +++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/37/user_threepids.sql b/synapse/storage/schema/delta/37/user_threepids.sql index ef8813e72a..5f483a2ed7 100644 --- a/synapse/storage/schema/delta/37/user_threepids.sql +++ b/synapse/storage/schema/delta/37/user_threepids.sql @@ -17,7 +17,44 @@ * Update any email addresses that were stored with mixed case into all * lowercase */ -UPDATE user_threepids SET address = LOWER(address) where medium = 'email'; + + -- There may be "duplicate" emails (with different case) already in the table, + -- so we find them and move all but the most recently used account. + UPDATE user_threepids + SET medium = 'email_old' + WHERE medium = 'email' + AND address IN ( + -- `user_last_seen` maps user_ids to the last time we saw them + WITH user_last_seen AS ( + SELECT user_id, max(last_seen) AS ts FROM user_ips GROUP BY user_id + ), + -- `duplicate_addresses` is a table of all the email addresses that + -- appear multiple times and the most recently we saw any of their users + duplicate_addresses AS ( + SELECT lower(u1.address) AS address, max(ts.ts) AS max_ts + FROM user_threepids AS u1 + INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address + INNER JOIN user_last_seen as ts ON ts.user_id = u1.user_id + WHERE u1.medium = 'email' AND u2.medium = 'email' + GROUP BY lower(u1.address) + ) + -- We select all the addresses that are linked to the user_id that is NOT + -- the most recently seen. + SELECT u.address + FROM + user_threepids AS u, + duplicate_addresses, + user_last_seen AS ts + WHERE + lower(u.address) = duplicate_addresses.address + AND u.user_id = ts.user_id + AND ts.ts != max_ts -- NOT the most recently used + ); + + +-- This update is now safe since we've removed the duplicate addresses. +UPDATE user_threepids SET address = LOWER(address) WHERE medium = 'email'; + /* Add an index for the select we do on passwored reset */ CREATE INDEX user_threepids_medium_address on user_threepids (medium, address); -- cgit 1.4.1 From a9111786f9fb848a8012976ba3c310492b20d0e2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Oct 2016 14:32:45 +0100 Subject: Use most recently added binding, not most recently seen user. --- synapse/storage/schema/delta/37/user_threepids.sql | 30 ++++++++-------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'synapse/storage/schema') diff --git a/synapse/storage/schema/delta/37/user_threepids.sql b/synapse/storage/schema/delta/37/user_threepids.sql index 5f483a2ed7..cf7a90dd10 100644 --- a/synapse/storage/schema/delta/37/user_threepids.sql +++ b/synapse/storage/schema/delta/37/user_threepids.sql @@ -24,31 +24,23 @@ SET medium = 'email_old' WHERE medium = 'email' AND address IN ( - -- `user_last_seen` maps user_ids to the last time we saw them - WITH user_last_seen AS ( - SELECT user_id, max(last_seen) AS ts FROM user_ips GROUP BY user_id - ), - -- `duplicate_addresses` is a table of all the email addresses that - -- appear multiple times and the most recently we saw any of their users - duplicate_addresses AS ( - SELECT lower(u1.address) AS address, max(ts.ts) AS max_ts - FROM user_threepids AS u1 - INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address - INNER JOIN user_last_seen as ts ON ts.user_id = u1.user_id - WHERE u1.medium = 'email' AND u2.medium = 'email' - GROUP BY lower(u1.address) - ) -- We select all the addresses that are linked to the user_id that is NOT - -- the most recently seen. + -- the most recently created. SELECT u.address FROM user_threepids AS u, - duplicate_addresses, - user_last_seen AS ts + -- `duplicate_addresses` is a table of all the email addresses that + -- appear multiple times and when the binding was created + ( + SELECT lower(u1.address) AS address, max(u1.added_at) AS max_ts + FROM user_threepids AS u1 + INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address + WHERE u1.medium = 'email' AND u2.medium = 'email' + GROUP BY lower(u1.address) + ) AS duplicate_addresses WHERE lower(u.address) = duplicate_addresses.address - AND u.user_id = ts.user_id - AND ts.ts != max_ts -- NOT the most recently used + AND u.added_at != max_ts -- NOT the most recently created ); -- cgit 1.4.1