diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 1d3069b143..254fdc04c6 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -192,6 +192,41 @@ class SQLBaseStore(object):
self.database_engine = hs.database_engine
+ # A set of tables that are not safe to use native upserts in.
+ self._unsafe_to_upsert_tables = {"user_ips"}
+
+ if self.database_engine.can_native_upsert:
+ # Check ASAP (and then later, every 1s) to see if we have finished
+ # background updates of tables that aren't safe to update.
+ self._clock.call_later(0.0, self._check_safe_to_upsert)
+
+ @defer.inlineCallbacks
+ def _check_safe_to_upsert(self):
+ """
+ Is it safe to use native UPSERT?
+
+ If there are background updates, we will need to wait, as they may be
+ the addition of indexes that set the UNIQUE constraint that we require.
+
+ If the background updates have not completed, wait a second and check again.
+ """
+ updates = yield self._simple_select_list(
+ "background_updates",
+ keyvalues=None,
+ retcols=["update_name"],
+ desc="check_background_updates",
+ )
+ updates = [x["update_name"] for x in updates]
+
+ # The User IPs table in schema #53 was missing a unique index, which we
+ # run as a background update.
+ if "user_ips_device_unique_index" not in updates:
+ self._unsafe_to_upsert_tables.discard("user_id")
+
+ # If there's any tables left to check, reschedule to run.
+ if self._unsafe_to_upsert_tables:
+ self._clock.call_later(1.0, self._check_safe_to_upsert)
+
def start_profiling(self):
self._previous_loop_ts = self._clock.time_msec()
@@ -494,8 +529,15 @@ class SQLBaseStore(object):
txn.executemany(sql, vals)
@defer.inlineCallbacks
- def _simple_upsert(self, table, keyvalues, values,
- insertion_values={}, desc="_simple_upsert", lock=True):
+ def _simple_upsert(
+ self,
+ table,
+ keyvalues,
+ values,
+ insertion_values={},
+ desc="_simple_upsert",
+ lock=True
+ ):
"""
`lock` should generally be set to True (the default), but can be set
@@ -516,16 +558,21 @@ class SQLBaseStore(object):
inserting
lock (bool): True to lock the table when doing the upsert.
Returns:
- Deferred(bool): True if a new entry was created, False if an
- existing one was updated.
+ Deferred(None or bool): Native upserts always return None. Emulated
+ upserts return True if a new entry was created, False if an existing
+ one was updated.
"""
attempts = 0
while True:
try:
result = yield self.runInteraction(
desc,
- self._simple_upsert_txn, table, keyvalues, values, insertion_values,
- lock=lock
+ self._simple_upsert_txn,
+ table,
+ keyvalues,
+ values,
+ insertion_values,
+ lock=lock,
)
defer.returnValue(result)
except self.database_engine.module.IntegrityError as e:
@@ -537,21 +584,76 @@ class SQLBaseStore(object):
# presumably we raced with another transaction: let's retry.
logger.warn(
- "IntegrityError when upserting into %s; retrying: %s",
- table, e
+ "%s when upserting into %s; retrying: %s", e.__name__, table, e
)
- def _simple_upsert_txn(self, txn, table, keyvalues, values, insertion_values={},
- lock=True):
+ def _simple_upsert_txn(
+ self,
+ txn,
+ table,
+ keyvalues,
+ values,
+ insertion_values={},
+ lock=True,
+ ):
+ """
+ Pick the UPSERT method which works best on the platform. Either the
+ native one (Pg9.5+, recent SQLites), or fall back to an emulated method.
+
+ Args:
+ txn: The transaction to use.
+ table (str): The table to upsert into
+ keyvalues (dict): The unique key tables and their new values
+ values (dict): The nonunique columns and their new values
+ insertion_values (dict): additional key/values to use only when
+ inserting
+ lock (bool): True to lock the table when doing the upsert.
+ Returns:
+ Deferred(None or bool): Native upserts always return None. Emulated
+ upserts return True if a new entry was created, False if an existing
+ one was updated.
+ """
+ if (
+ self.database_engine.can_native_upsert
+ and table not in self._unsafe_to_upsert_tables
+ ):
+ return self._simple_upsert_txn_native_upsert(
+ txn,
+ table,
+ keyvalues,
+ values,
+ insertion_values=insertion_values,
+ )
+ else:
+ return self._simple_upsert_txn_emulated(
+ txn,
+ table,
+ keyvalues,
+ values,
+ insertion_values=insertion_values,
+ lock=lock,
+ )
+
+ def _simple_upsert_txn_emulated(
+ self, txn, table, keyvalues, values, insertion_values={}, lock=True
+ ):
# We need to lock the table :(, unless we're *really* careful
if lock:
self.database_engine.lock_table(txn, table)
+ def _getwhere(key):
+ # If the value we're passing in is None (aka NULL), we need to use
+ # IS, not =, as NULL = NULL equals NULL (False).
+ if keyvalues[key] is None:
+ return "%s IS ?" % (key,)
+ else:
+ return "%s = ?" % (key,)
+
# First try to update.
sql = "UPDATE %s SET %s WHERE %s" % (
table,
", ".join("%s = ?" % (k,) for k in values),
- " AND ".join("%s = ?" % (k,) for k in keyvalues)
+ " AND ".join(_getwhere(k) for k in keyvalues)
)
sqlargs = list(values.values()) + list(keyvalues.values())
@@ -569,12 +671,44 @@ class SQLBaseStore(object):
sql = "INSERT INTO %s (%s) VALUES (%s)" % (
table,
", ".join(k for k in allvalues),
- ", ".join("?" for _ in allvalues)
+ ", ".join("?" for _ in allvalues),
)
txn.execute(sql, list(allvalues.values()))
# successfully inserted
return True
+ def _simple_upsert_txn_native_upsert(
+ self, txn, table, keyvalues, values, insertion_values={}
+ ):
+ """
+ Use the native UPSERT functionality in recent PostgreSQL versions.
+
+ Args:
+ table (str): The table to upsert into
+ keyvalues (dict): The unique key tables and their new values
+ values (dict): The nonunique columns and their new values
+ insertion_values (dict): additional key/values to use only when
+ inserting
+ Returns:
+ None
+ """
+ allvalues = {}
+ allvalues.update(keyvalues)
+ allvalues.update(values)
+ allvalues.update(insertion_values)
+
+ sql = (
+ "INSERT INTO %s (%s) VALUES (%s) "
+ "ON CONFLICT (%s) DO UPDATE SET %s"
+ ) % (
+ table,
+ ", ".join(k for k in allvalues),
+ ", ".join("?" for _ in allvalues),
+ ", ".join(k for k in keyvalues),
+ ", ".join(k + "=EXCLUDED." + k for k in values),
+ )
+ txn.execute(sql, list(allvalues.values()))
+
def _simple_select_one(self, table, keyvalues, retcols,
allow_none=False, desc="_simple_select_one"):
"""Executes a SELECT query on the named table, which is expected to
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 11513af2ec..4a221d90ec 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -65,7 +65,27 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
columns=["last_seen"],
)
- # (user_id, access_token, ip) -> (user_agent, device_id, last_seen)
+ self.register_background_update_handler(
+ "user_ips_remove_dupes",
+ self._remove_user_ip_dupes,
+ )
+
+ # Register a unique index
+ self.register_background_index_update(
+ "user_ips_device_unique_index",
+ index_name="user_ips_user_token_ip_unique_index",
+ table="user_ips",
+ columns=["user_id", "access_token", "ip"],
+ unique=True,
+ )
+
+ # Drop the old non-unique index
+ self.register_background_update_handler(
+ "user_ips_drop_nonunique_index",
+ self._remove_user_ip_nonunique,
+ )
+
+ # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
self._batch_row_update = {}
self._client_ip_looper = self._clock.looping_call(
@@ -76,6 +96,129 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
)
@defer.inlineCallbacks
+ def _remove_user_ip_nonunique(self, progress, batch_size):
+ def f(conn):
+ txn = conn.cursor()
+ txn.execute(
+ "DROP INDEX IF EXISTS user_ips_user_ip"
+ )
+ txn.close()
+
+ yield self.runWithConnection(f)
+ yield self._end_background_update("user_ips_drop_nonunique_index")
+ defer.returnValue(1)
+
+ @defer.inlineCallbacks
+ def _remove_user_ip_dupes(self, progress, batch_size):
+ # This works function works by scanning the user_ips table in batches
+ # based on `last_seen`. For each row in a batch it searches the rest of
+ # the table to see if there are any duplicates, if there are then they
+ # are removed and replaced with a suitable row.
+
+ # Fetch the start of the batch
+ begin_last_seen = progress.get("last_seen", 0)
+
+ def get_last_seen(txn):
+ txn.execute(
+ """
+ SELECT last_seen FROM user_ips
+ WHERE last_seen > ?
+ ORDER BY last_seen
+ LIMIT 1
+ OFFSET ?
+ """,
+ (begin_last_seen, batch_size)
+ )
+ row = txn.fetchone()
+ if row:
+ return row[0]
+ else:
+ return None
+
+ # Get a last seen that has roughly `batch_size` since `begin_last_seen`
+ end_last_seen = yield self.runInteraction(
+ "user_ips_dups_get_last_seen", get_last_seen
+ )
+
+ # If it returns None, then we're processing the last batch
+ last = end_last_seen is None
+
+ logger.info(
+ "Scanning for duplicate 'user_ips' rows in range: %s <= last_seen < %s",
+ begin_last_seen, end_last_seen,
+ )
+
+ def remove(txn):
+ # This works by looking at all entries in the given time span, and
+ # then for each (user_id, access_token, ip) tuple in that range
+ # checking for any duplicates in the rest of the table (via a join).
+ # It then only returns entries which have duplicates, and the max
+ # last_seen across all duplicates, which can the be used to delete
+ # all other duplicates.
+ # It is efficient due to the existence of (user_id, access_token,
+ # ip) and (last_seen) indices.
+
+ # Define the search space, which requires handling the last batch in
+ # a different way
+ if last:
+ clause = "? <= last_seen"
+ args = (begin_last_seen,)
+ else:
+ clause = "? <= last_seen AND last_seen < ?"
+ args = (begin_last_seen, end_last_seen)
+
+ txn.execute(
+ """
+ SELECT user_id, access_token, ip,
+ MAX(device_id), MAX(user_agent), MAX(last_seen)
+ FROM (
+ SELECT user_id, access_token, ip
+ FROM user_ips
+ WHERE {}
+ ) c
+ INNER JOIN user_ips USING (user_id, access_token, ip)
+ GROUP BY user_id, access_token, ip
+ HAVING count(*) > 1
+ """.format(clause),
+ args
+ )
+ res = txn.fetchall()
+
+ # We've got some duplicates
+ for i in res:
+ user_id, access_token, ip, device_id, user_agent, last_seen = i
+
+ # Drop all the duplicates
+ txn.execute(
+ """
+ DELETE FROM user_ips
+ WHERE user_id = ? AND access_token = ? AND ip = ?
+ """,
+ (user_id, access_token, ip)
+ )
+
+ # Add in one to be the last_seen
+ txn.execute(
+ """
+ INSERT INTO user_ips
+ (user_id, access_token, ip, device_id, user_agent, last_seen)
+ VALUES (?, ?, ?, ?, ?, ?)
+ """,
+ (user_id, access_token, ip, device_id, user_agent, last_seen)
+ )
+
+ self._background_update_progress_txn(
+ txn, "user_ips_remove_dupes", {"last_seen": end_last_seen}
+ )
+
+ yield self.runInteraction("user_ips_dups_remove", remove)
+
+ if last:
+ yield self._end_background_update("user_ips_remove_dupes")
+
+ defer.returnValue(batch_size)
+
+ @defer.inlineCallbacks
def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id,
now=None):
if not now:
@@ -114,7 +257,10 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
)
def _update_client_ips_batch_txn(self, txn, to_update):
- self.database_engine.lock_table(txn, "user_ips")
+ if "user_ips" in self._unsafe_to_upsert_tables or (
+ not self.database_engine.can_native_upsert
+ ):
+ self.database_engine.lock_table(txn, "user_ips")
for entry in iteritems(to_update):
(user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
@@ -127,10 +273,10 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
"user_id": user_id,
"access_token": access_token,
"ip": ip,
- "user_agent": user_agent,
- "device_id": device_id,
},
values={
+ "user_agent": user_agent,
+ "device_id": device_id,
"last_seen": last_seen,
},
lock=False,
@@ -227,7 +373,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
results = {}
for key in self._batch_row_update:
- uid, access_token, ip = key
+ uid, access_token, ip, = key
if uid == user_id:
user_agent, _, last_seen = self._batch_row_update[key]
results[(access_token, ip)] = (user_agent, last_seen)
diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py
index e2f9de8451..ff5ef97ca8 100644
--- a/synapse/storage/engines/__init__.py
+++ b/synapse/storage/engines/__init__.py
@@ -18,7 +18,7 @@ import platform
from ._base import IncorrectDatabaseSetup
from .postgres import PostgresEngine
-from .sqlite3 import Sqlite3Engine
+from .sqlite import Sqlite3Engine
SUPPORTED_MODULE = {
"sqlite3": Sqlite3Engine,
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 42225f8a2a..4004427c7b 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -38,6 +38,13 @@ class PostgresEngine(object):
return sql.replace("?", "%s")
def on_new_connection(self, db_conn):
+
+ # Get the version of PostgreSQL that we're using. As per the psycopg2
+ # docs: The number is formed by converting the major, minor, and
+ # revision numbers into two-decimal-digit numbers and appending them
+ # together. For example, version 8.1.5 will be returned as 80105
+ self._version = db_conn.server_version
+
db_conn.set_isolation_level(
self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ
)
@@ -54,6 +61,13 @@ class PostgresEngine(object):
cursor.close()
+ @property
+ def can_native_upsert(self):
+ """
+ Can we use native UPSERTs? This requires PostgreSQL 9.5+.
+ """
+ return self._version >= 90500
+
def is_deadlock(self, error):
if isinstance(error, self.module.DatabaseError):
# https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite.py
index 19949fc474..c64d73ff21 100644
--- a/synapse/storage/engines/sqlite3.py
+++ b/synapse/storage/engines/sqlite.py
@@ -15,6 +15,7 @@
import struct
import threading
+from sqlite3 import sqlite_version_info
from synapse.storage.prepare_database import prepare_database
@@ -30,6 +31,14 @@ class Sqlite3Engine(object):
self._current_state_group_id = None
self._current_state_group_id_lock = threading.Lock()
+ @property
+ def can_native_upsert(self):
+ """
+ Do we support native UPSERTs? This requires SQLite3 3.24+, plus some
+ more work we haven't done yet to tell what was inserted vs updated.
+ """
+ return sqlite_version_info >= (3, 24, 0)
+
def check_database(self, txn):
pass
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 2047110b1d..79e0276de6 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -739,7 +739,18 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore
}
events_map = {ev.event_id: ev for ev, _ in events_context}
- room_version = yield self.get_room_version(room_id)
+
+ # We need to get the room version, which is in the create event.
+ # Normally that'd be in the database, but its also possible that we're
+ # currently trying to persist it.
+ room_version = None
+ for ev, _ in events_context:
+ if ev.type == EventTypes.Create and ev.state_key == "":
+ room_version = ev.content.get("room_version", "1")
+ break
+
+ if not room_version:
+ room_version = yield self.get_room_version(room_id)
logger.debug("calling resolve_state_groups from preserve_events")
res = yield self._state_resolution_handler.resolve_state_groups(
diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py
index 2743b52bad..134297e284 100644
--- a/synapse/storage/pusher.py
+++ b/synapse/storage/pusher.py
@@ -215,7 +215,7 @@ class PusherStore(PusherWorkerStore):
with self._pushers_id_gen.get_next() as stream_id:
# no need to lock because `pushers` has a unique key on
# (app_id, pushkey, user_name) so _simple_upsert will retry
- newly_inserted = yield self._simple_upsert(
+ yield self._simple_upsert(
table="pushers",
keyvalues={
"app_id": app_id,
@@ -238,7 +238,12 @@ class PusherStore(PusherWorkerStore):
lock=False,
)
- if newly_inserted:
+ user_has_pusher = self.get_if_user_has_pusher.cache.get(
+ (user_id,), None, update_metrics=False
+ )
+
+ if user_has_pusher is not True:
+ # invalidate, since we the user might not have had a pusher before
yield self.runInteraction(
"add_pusher",
self._invalidate_cache_and_stream,
diff --git a/synapse/storage/schema/delta/53/user_ips_index.sql b/synapse/storage/schema/delta/53/user_ips_index.sql
new file mode 100644
index 0000000000..4ca346c111
--- /dev/null
+++ b/synapse/storage/schema/delta/53/user_ips_index.sql
@@ -0,0 +1,26 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- delete duplicates
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('user_ips_remove_dupes', '{}');
+
+-- add a new unique index to user_ips table
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('user_ips_device_unique_index', '{}', 'user_ips_remove_dupes');
+
+-- drop the old original index
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
\ No newline at end of file
diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py
index a8781b0e5d..ce48212265 100644
--- a/synapse/storage/user_directory.py
+++ b/synapse/storage/user_directory.py
@@ -168,14 +168,14 @@ class UserDirectoryStore(SQLBaseStore):
if isinstance(self.database_engine, PostgresEngine):
# We weight the localpart most highly, then display name and finally
# server name
- if new_entry:
+ if self.database_engine.can_native_upsert:
sql = """
INSERT INTO user_directory_search(user_id, vector)
VALUES (?,
setweight(to_tsvector('english', ?), 'A')
|| setweight(to_tsvector('english', ?), 'D')
|| setweight(to_tsvector('english', COALESCE(?, '')), 'B')
- )
+ ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
"""
txn.execute(
sql,
@@ -185,20 +185,45 @@ class UserDirectoryStore(SQLBaseStore):
)
)
else:
- sql = """
- UPDATE user_directory_search
- SET vector = setweight(to_tsvector('english', ?), 'A')
- || setweight(to_tsvector('english', ?), 'D')
- || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
- WHERE user_id = ?
- """
- txn.execute(
- sql,
- (
- get_localpart_from_id(user_id), get_domain_from_id(user_id),
- display_name, user_id,
+ # TODO: Remove this code after we've bumped the minimum version
+ # of postgres to always support upserts, so we can get rid of
+ # `new_entry` usage
+ if new_entry is True:
+ sql = """
+ INSERT INTO user_directory_search(user_id, vector)
+ VALUES (?,
+ setweight(to_tsvector('english', ?), 'A')
+ || setweight(to_tsvector('english', ?), 'D')
+ || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+ )
+ """
+ txn.execute(
+ sql,
+ (
+ user_id, get_localpart_from_id(user_id),
+ get_domain_from_id(user_id), display_name,
+ )
+ )
+ elif new_entry is False:
+ sql = """
+ UPDATE user_directory_search
+ SET vector = setweight(to_tsvector('english', ?), 'A')
+ || setweight(to_tsvector('english', ?), 'D')
+ || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+ WHERE user_id = ?
+ """
+ txn.execute(
+ sql,
+ (
+ get_localpart_from_id(user_id),
+ get_domain_from_id(user_id),
+ display_name, user_id,
+ )
+ )
+ else:
+ raise RuntimeError(
+ "upsert returned None when 'can_native_upsert' is False"
)
- )
elif isinstance(self.database_engine, Sqlite3Engine):
value = "%s %s" % (user_id, display_name,) if display_name else user_id
self._simple_upsert_txn(
|