diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 49904046cf..66a995157d 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -173,11 +173,12 @@ class BackgroundUpdateStore(SQLBaseStore):
logger.info(
"Updating %r. Updated %r items in %rms."
- " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)",
+ " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r, batch_size=%r)",
update_name, items_updated, duration_ms,
performance.total_items_per_ms(),
performance.average_items_per_ms(),
performance.total_item_count,
+ batch_size,
)
performance.update(items_updated, duration_ms)
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index dd58e001dc..438eef6ba3 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1145,6 +1145,12 @@ class EventsStore(SQLBaseStore):
current_backfill_id, current_forward_id, limit):
"""Get all the new events that have arrived at the server either as
new events or as backfilled events"""
+ have_backfill_events = last_backfill_id != current_backfill_id
+ have_forward_events = last_forward_id != current_forward_id
+
+ if not have_backfill_events and not have_forward_events:
+ return defer.succeed(AllNewEventsResult([], [], [], [], []))
+
def get_all_new_events_txn(txn):
sql = (
"SELECT e.stream_ordering, ej.internal_metadata, ej.json, eg.state_group"
@@ -1157,7 +1163,7 @@ class EventsStore(SQLBaseStore):
" ORDER BY e.stream_ordering ASC"
" LIMIT ?"
)
- if last_forward_id != current_forward_id:
+ if have_forward_events:
txn.execute(sql, (last_forward_id, current_forward_id, limit))
new_forward_events = txn.fetchall()
@@ -1201,7 +1207,7 @@ class EventsStore(SQLBaseStore):
" ORDER BY e.stream_ordering DESC"
" LIMIT ?"
)
- if last_backfill_id != current_backfill_id:
+ if have_backfill_events:
txn.execute(sql, (-last_backfill_id, -current_backfill_id, limit))
new_backfill_events = txn.fetchall()
diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py
index 5fb47d418a..d9afd7ec87 100644
--- a/synapse/storage/pusher.py
+++ b/synapse/storage/pusher.py
@@ -106,6 +106,9 @@ class PusherStore(SQLBaseStore):
return self._pushers_id_gen.get_current_token()
def get_all_updated_pushers(self, last_id, current_id, limit):
+ if last_id == current_id:
+ return defer.succeed(([], []))
+
def get_all_updated_pushers_txn(txn):
sql = (
"SELECT id, user_name, access_token, profile_tag, kind,"
diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py
index 3b8805593e..935fc503d9 100644
--- a/synapse/storage/receipts.py
+++ b/synapse/storage/receipts.py
@@ -391,6 +391,9 @@ class ReceiptsStore(SQLBaseStore):
)
def get_all_updated_receipts(self, last_id, current_id, limit=None):
+ if last_id == current_id:
+ return defer.succeed([])
+
def get_all_updated_receipts_txn(txn):
sql = (
"SELECT stream_id, room_id, receipt_type, user_id, event_id, data"
diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index 9be977f387..70aa64fb31 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -169,20 +169,28 @@ class RoomStore(SQLBaseStore):
def _store_event_search_txn(self, txn, event, key, value):
if isinstance(self.database_engine, PostgresEngine):
sql = (
- "INSERT INTO event_search (event_id, room_id, key, vector)"
- " VALUES (?,?,?,to_tsvector('english', ?))"
+ "INSERT INTO event_search"
+ " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
+ " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
+ )
+ txn.execute(
+ sql,
+ (
+ event.event_id, event.room_id, key, value,
+ event.internal_metadata.stream_ordering,
+ event.origin_server_ts,
+ )
)
elif isinstance(self.database_engine, Sqlite3Engine):
sql = (
"INSERT INTO event_search (event_id, room_id, key, value)"
" VALUES (?,?,?,?)"
)
+ txn.execute(sql, (event.event_id, event.room_id, key, value,))
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")
- txn.execute(sql, (event.event_id, event.room_id, key, value,))
-
@cachedInlineCallbacks()
def get_room_name_and_aliases(self, room_id):
def f(txn):
diff --git a/synapse/storage/schema/delta/31/search_update.py b/synapse/storage/schema/delta/31/search_update.py
new file mode 100644
index 0000000000..470ae0c005
--- /dev/null
+++ b/synapse/storage/schema/delta/31/search_update.py
@@ -0,0 +1,65 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+import logging
+import ujson
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = """
+ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT;
+ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT;
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if not isinstance(database_engine, PostgresEngine):
+ return
+
+ for statement in get_statements(ALTER_TABLE.splitlines()):
+ cur.execute(statement)
+
+ cur.execute("SELECT MIN(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ min_stream_id = rows[0][0]
+
+ cur.execute("SELECT MAX(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ max_stream_id = rows[0][0]
+
+ if min_stream_id is not None and max_stream_id is not None:
+ progress = {
+ "target_min_stream_id_inclusive": min_stream_id,
+ "max_stream_id_exclusive": max_stream_id + 1,
+ "rows_inserted": 0,
+ "have_added_indexes": False,
+ }
+ progress_json = ujson.dumps(progress)
+
+ sql = (
+ "INSERT into background_updates (update_name, progress_json)"
+ " VALUES (?, ?)"
+ )
+
+ sql = database_engine.convert_param_style(sql)
+
+ cur.execute(sql, ("event_search_order", progress_json))
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index 59ac7f424c..0224299625 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -29,12 +29,17 @@ logger = logging.getLogger(__name__)
class SearchStore(BackgroundUpdateStore):
EVENT_SEARCH_UPDATE_NAME = "event_search"
+ EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
def __init__(self, hs):
super(SearchStore, self).__init__(hs)
self.register_background_update_handler(
self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search
)
+ self.register_background_update_handler(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME,
+ self._background_reindex_search_order
+ )
@defer.inlineCallbacks
def _background_reindex_search(self, progress, batch_size):
@@ -132,6 +137,82 @@ class SearchStore(BackgroundUpdateStore):
defer.returnValue(result)
@defer.inlineCallbacks
+ def _background_reindex_search_order(self, progress, batch_size):
+ target_min_stream_id = progress["target_min_stream_id_inclusive"]
+ max_stream_id = progress["max_stream_id_exclusive"]
+ rows_inserted = progress.get("rows_inserted", 0)
+ have_added_index = progress['have_added_indexes']
+
+ if not have_added_index:
+ def create_index(conn):
+ conn.rollback()
+ conn.set_session(autocommit=True)
+ c = conn.cursor()
+
+ # We create with NULLS FIRST so that when we search *backwards*
+ # we get the ones with non null origin_server_ts *first*
+ c.execute(
+ "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search("
+ "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+ )
+ c.execute(
+ "CREATE INDEX CONCURRENTLY event_search_order ON event_search("
+ "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+ )
+ conn.set_session(autocommit=False)
+
+ yield self.runWithConnection(create_index)
+
+ pg = dict(progress)
+ pg["have_added_indexes"] = True
+
+ yield self.runInteraction(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME,
+ self._background_update_progress_txn,
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME, pg,
+ )
+
+ def reindex_search_txn(txn):
+ sql = (
+ "UPDATE event_search AS es SET stream_ordering = e.stream_ordering,"
+ " origin_server_ts = e.origin_server_ts"
+ " FROM events AS e"
+ " WHERE e.event_id = es.event_id"
+ " AND ? <= e.stream_ordering AND e.stream_ordering < ?"
+ " RETURNING es.stream_ordering"
+ )
+
+ min_stream_id = max_stream_id - batch_size
+ txn.execute(sql, (min_stream_id, max_stream_id))
+ rows = txn.fetchall()
+
+ if min_stream_id < target_min_stream_id:
+ # We've recached the end.
+ return len(rows), False
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ "rows_inserted": rows_inserted + len(rows),
+ "have_added_indexes": True,
+ }
+
+ self._background_update_progress_txn(
+ txn, self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress
+ )
+
+ return len(rows), True
+
+ num_rows, finished = yield self.runInteraction(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME, reindex_search_txn
+ )
+
+ if not finished:
+ yield self._end_background_update(self.EVENT_SEARCH_ORDER_UPDATE_NAME)
+
+ defer.returnValue(num_rows)
+
+ @defer.inlineCallbacks
def search_msgs(self, room_ids, search_term, keys):
"""Performs a full text search over events with given keys.
@@ -310,7 +391,6 @@ class SearchStore(BackgroundUpdateStore):
"SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank,"
" origin_server_ts, stream_ordering, room_id, event_id"
" FROM event_search"
- " NATURAL JOIN events"
" WHERE vector @@ to_tsquery('english', ?) AND "
)
args = [search_query, search_query] + args
@@ -355,7 +435,15 @@ class SearchStore(BackgroundUpdateStore):
# We add an arbitrary limit here to ensure we don't try to pull the
# entire table from the database.
- sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?"
+ if isinstance(self.database_engine, PostgresEngine):
+ sql += (
+ " ORDER BY origin_server_ts DESC NULLS LAST,"
+ " stream_ordering DESC NULLS LAST LIMIT ?"
+ )
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?"
+ else:
+ raise Exception("Unrecognized database engine")
args.append(limit)
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index c5d2a3a6df..5b743db67a 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -174,6 +174,12 @@ class StateStore(SQLBaseStore):
return [r[0] for r in results]
return self.runInteraction("get_current_state_for_key", f)
+ @cached(num_args=2, lru=True, max_entries=1000)
+ def _get_state_group_from_group(self, group, types):
+ raise NotImplementedError()
+
+ @cachedList(cached_method_name="_get_state_group_from_group",
+ list_name="groups", num_args=2, inlineCallbacks=True)
def _get_state_groups_from_groups(self, groups, types):
"""Returns dictionary state_group -> (dict of (type, state_key) -> event id)
"""
@@ -201,18 +207,23 @@ class StateStore(SQLBaseStore):
txn.execute(sql, args)
rows = self.cursor_to_dict(txn)
- results = {}
+ results = {group: {} for group in groups}
for row in rows:
key = (row["type"], row["state_key"])
- results.setdefault(row["state_group"], {})[key] = row["event_id"]
+ results[row["state_group"]][key] = row["event_id"]
return results
+ results = {}
+
chunks = [groups[i:i + 100] for i in xrange(0, len(groups), 100)]
for chunk in chunks:
- return self.runInteraction(
+ res = yield self.runInteraction(
"_get_state_groups_from_groups",
f, chunk
)
+ results.update(res)
+
+ defer.returnValue(results)
@defer.inlineCallbacks
def get_state_for_events(self, event_ids, types):
@@ -359,6 +370,8 @@ class StateStore(SQLBaseStore):
a `state_key` of None matches all state_keys. If `types` is None then
all events are returned.
"""
+ if types:
+ types = frozenset(types)
results = {}
missing_groups = []
if types is not None:
|