diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 11a1b942f1..c88759bf2c 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -242,6 +242,25 @@ class BackgroundUpdateStore(SQLBaseStore):
"""
self._background_update_handlers[update_name] = update_handler
+ def register_noop_background_update(self, update_name):
+ """Register a noop handler for a background update.
+
+ This is useful when we previously did a background update, but no
+ longer wish to do the update. In this case the background update should
+ be removed from the schema delta files, but there may still be some
+ users who have the background update queued, so this method should
+ also be called to clear the update.
+
+ Args:
+ update_name (str): Name of update
+ """
+ @defer.inlineCallbacks
+ def noop_update(progress, batch_size):
+ yield self._end_background_update(update_name)
+ defer.returnValue(1)
+
+ self.register_background_update_handler(update_name, noop_update)
+
def register_background_index_update(self, update_name, index_name,
table, columns, where_clause=None,
unique=False,
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 3aa810981f..95f75d6df1 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -39,12 +39,7 @@ class RegistrationStore(background_updates.BackgroundUpdateStore):
# we no longer use refresh tokens, but it's possible that some people
# might have a background update queued to build this index. Just
# clear the background update.
- @defer.inlineCallbacks
- def noop_update(progress, batch_size):
- yield self._end_background_update("refresh_tokens_device_index")
- defer.returnValue(1)
- self.register_background_update_handler(
- "refresh_tokens_device_index", noop_update)
+ self.register_noop_background_update("refresh_tokens_device_index")
@defer.inlineCallbacks
def add_access_token_to_user(self, user_id, token, device_id=None):
diff --git a/synapse/storage/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/schema/delta/38/postgres_fts_gist.sql
index f090a7b75a..515e6b8e84 100644
--- a/synapse/storage/schema/delta/38/postgres_fts_gist.sql
+++ b/synapse/storage/schema/delta/38/postgres_fts_gist.sql
@@ -13,5 +13,7 @@
* limitations under the License.
*/
- INSERT into background_updates (update_name, progress_json)
- VALUES ('event_search_postgres_gist', '{}');
+-- We no longer do this given we back it out again in schema 47
+
+-- INSERT into background_updates (update_name, progress_json)
+-- VALUES ('event_search_postgres_gist', '{}');
diff --git a/synapse/storage/schema/delta/47/postgres_fts_gin.sql b/synapse/storage/schema/delta/47/postgres_fts_gin.sql
new file mode 100644
index 0000000000..31d7a817eb
--- /dev/null
+++ b/synapse/storage/schema/delta/47/postgres_fts_gin.sql
@@ -0,0 +1,17 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('event_search_postgres_gin', '{}');
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index f1ac9ba0fd..2755acff40 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -38,6 +38,7 @@ class SearchStore(BackgroundUpdateStore):
EVENT_SEARCH_UPDATE_NAME = "event_search"
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
+ EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
def __init__(self, db_conn, hs):
super(SearchStore, self).__init__(db_conn, hs)
@@ -48,9 +49,19 @@ class SearchStore(BackgroundUpdateStore):
self.EVENT_SEARCH_ORDER_UPDATE_NAME,
self._background_reindex_search_order
)
- self.register_background_update_handler(
+
+ # we used to have a background update to turn the GIN index into a
+ # GIST one; we no longer do that (obviously) because we actually want
+ # a GIN index. However, it's possible that some people might still have
+ # the background update queued, so we register a handler to clear the
+ # background update.
+ self.register_noop_background_update(
self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME,
- self._background_reindex_gist_search
+ )
+
+ self.register_background_update_handler(
+ self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME,
+ self._background_reindex_gin_search
)
@defer.inlineCallbacks
@@ -151,25 +162,48 @@ class SearchStore(BackgroundUpdateStore):
defer.returnValue(result)
@defer.inlineCallbacks
- def _background_reindex_gist_search(self, progress, batch_size):
+ def _background_reindex_gin_search(self, progress, batch_size):
+ """This handles old synapses which used GIST indexes, if any;
+ converting them back to be GIN as per the actual schema.
+ """
+
def create_index(conn):
conn.rollback()
- conn.set_session(autocommit=True)
- c = conn.cursor()
- c.execute(
- "CREATE INDEX CONCURRENTLY event_search_fts_idx_gist"
- " ON event_search USING GIST (vector)"
- )
+ # we have to set autocommit, because postgres refuses to
+ # CREATE INDEX CONCURRENTLY without it.
+ conn.set_session(autocommit=True)
- c.execute("DROP INDEX event_search_fts_idx")
+ try:
+ c = conn.cursor()
- conn.set_session(autocommit=False)
+ # if we skipped the conversion to GIST, we may already/still
+ # have an event_search_fts_idx; unfortunately postgres 9.4
+ # doesn't support CREATE INDEX IF EXISTS so we just catch the
+ # exception and ignore it.
+ import psycopg2
+ try:
+ c.execute(
+ "CREATE INDEX CONCURRENTLY event_search_fts_idx"
+ " ON event_search USING GIN (vector)"
+ )
+ except psycopg2.ProgrammingError as e:
+ logger.warn(
+ "Ignoring error %r when trying to switch from GIST to GIN",
+ e
+ )
+
+ # we should now be able to delete the GIST index.
+ c.execute(
+ "DROP INDEX IF EXISTS event_search_fts_idx_gist"
+ )
+ finally:
+ conn.set_session(autocommit=False)
if isinstance(self.database_engine, PostgresEngine):
yield self.runWithConnection(create_index)
- yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME)
+ yield self._end_background_update(self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME)
defer.returnValue(1)
@defer.inlineCallbacks
@@ -289,7 +323,30 @@ class SearchStore(BackgroundUpdateStore):
entry.stream_ordering, entry.origin_server_ts,
) for entry in entries)
+ # inserts to a GIN index are normally batched up into a pending
+ # list, and then all committed together once the list gets to a
+ # certain size. The trouble with that is that postgres (pre-9.5)
+ # uses work_mem to determine the length of the list, and work_mem
+ # is typically very large.
+ #
+ # We therefore reduce work_mem while we do the insert.
+ #
+ # (postgres 9.5 uses the separate gin_pending_list_limit setting,
+ # so doesn't suffer the same problem, but changing work_mem will
+ # be harmless)
+ #
+ # Note that we don't need to worry about restoring it on
+ # exception, because exceptions will cause the transaction to be
+ # rolled back, including the effects of the SET command.
+ #
+ # Also: we use SET rather than SET LOCAL because there's lots of
+ # other stuff going on in this transaction, which want to have the
+ # normal work_mem setting.
+
+ txn.execute("SET work_mem='256kB'")
txn.executemany(sql, args)
+ txn.execute("RESET work_mem")
+
elif isinstance(self.database_engine, Sqlite3Engine):
sql = (
"INSERT INTO event_search (event_id, room_id, key, value)"
|