diff options
author | Paul "LeoNerd" Evans <paul@matrix.org> | 2015-11-13 17:26:59 +0000 |
---|---|---|
committer | Paul "LeoNerd" Evans <paul@matrix.org> | 2015-11-13 17:26:59 +0000 |
commit | 4fbe6ca4010f804d784e16255fe4c63b7f607c52 (patch) | |
tree | 6a483ad84dae70964241b918b98d2525038f2fb2 /synapse/storage | |
parent | Merge branch 'develop' into paul/tiny-fixes (diff) | |
parent | Merge pull request #374 from matrix-org/daniel/guestleave (diff) | |
download | synapse-4fbe6ca4010f804d784e16255fe4c63b7f607c52.tar.xz |
Merge branch 'develop' into paul/tiny-fixes
Diffstat (limited to 'synapse/storage')
-rw-r--r-- | synapse/storage/__init__.py | 2 | ||||
-rw-r--r-- | synapse/storage/background_updates.py | 256 | ||||
-rw-r--r-- | synapse/storage/events.py | 10 | ||||
-rw-r--r-- | synapse/storage/registration.py | 15 | ||||
-rw-r--r-- | synapse/storage/room.py | 74 | ||||
-rw-r--r-- | synapse/storage/schema/delta/25/00background_updates.sql | 21 | ||||
-rw-r--r-- | synapse/storage/schema/delta/25/fts.py | 101 | ||||
-rw-r--r-- | synapse/storage/schema/delta/25/guest_access.sql | 25 | ||||
-rw-r--r-- | synapse/storage/schema/delta/25/history_visibility.sql | 25 | ||||
-rw-r--r-- | synapse/storage/schema/delta/25/tags.sql | 38 | ||||
-rw-r--r-- | synapse/storage/search.py | 234 | ||||
-rw-r--r-- | synapse/storage/state.py | 14 | ||||
-rw-r--r-- | synapse/storage/stream.py | 47 | ||||
-rw-r--r-- | synapse/storage/tags.py | 216 | ||||
-rw-r--r-- | synapse/storage/transactions.py | 50 |
15 files changed, 959 insertions, 169 deletions
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index a1bd9c4ce9..e7443f2838 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -41,6 +41,7 @@ from .end_to_end_keys import EndToEndKeyStore from .receipts import ReceiptsStore from .search import SearchStore +from .tags import TagsStore import logging @@ -71,6 +72,7 @@ class DataStore(RoomMemberStore, RoomStore, ReceiptsStore, EndToEndKeyStore, SearchStore, + TagsStore, ): def __init__(self, hs): diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py new file mode 100644 index 0000000000..45fccc2e5e --- /dev/null +++ b/synapse/storage/background_updates.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore + +from twisted.internet import defer + +import ujson as json +import logging + +logger = logging.getLogger(__name__) + + +class BackgroundUpdatePerformance(object): + """Tracks the how long a background update is taking to update its items""" + + def __init__(self, name): + self.name = name + self.total_item_count = 0 + self.total_duration_ms = 0 + self.avg_item_count = 0 + self.avg_duration_ms = 0 + + def update(self, item_count, duration_ms): + """Update the stats after doing an update""" + self.total_item_count += item_count + self.total_duration_ms += duration_ms + + # Exponential moving averages for the number of items updated and + # the duration. + self.avg_item_count += 0.1 * (item_count - self.avg_item_count) + self.avg_duration_ms += 0.1 * (duration_ms - self.avg_duration_ms) + + def average_items_per_ms(self): + """An estimate of how long it takes to do a single update. + Returns: + A duration in ms as a float + """ + if self.total_item_count == 0: + return None + else: + # Use the exponential moving average so that we can adapt to + # changes in how long the update process takes. + return float(self.avg_item_count) / float(self.avg_duration_ms) + + def total_items_per_ms(self): + """An estimate of how long it takes to do a single update. + Returns: + A duration in ms as a float + """ + if self.total_item_count == 0: + return None + else: + return float(self.total_item_count) / float(self.total_duration_ms) + + +class BackgroundUpdateStore(SQLBaseStore): + """ Background updates are updates to the database that run in the + background. Each update processes a batch of data at once. We attempt to + limit the impact of each update by monitoring how long each batch takes to + process and autotuning the batch size. + """ + + MINIMUM_BACKGROUND_BATCH_SIZE = 100 + DEFAULT_BACKGROUND_BATCH_SIZE = 100 + BACKGROUND_UPDATE_INTERVAL_MS = 1000 + BACKGROUND_UPDATE_DURATION_MS = 100 + + def __init__(self, hs): + super(BackgroundUpdateStore, self).__init__(hs) + self._background_update_performance = {} + self._background_update_queue = [] + self._background_update_handlers = {} + self._background_update_timer = None + + @defer.inlineCallbacks + def start_doing_background_updates(self): + while True: + if self._background_update_timer is not None: + return + + sleep = defer.Deferred() + self._background_update_timer = self._clock.call_later( + self.BACKGROUND_UPDATE_INTERVAL_MS / 1000., sleep.callback, None + ) + try: + yield sleep + finally: + self._background_update_timer = None + + try: + result = yield self.do_background_update( + self.BACKGROUND_UPDATE_DURATION_MS + ) + except: + logger.exception("Error doing update") + + if result is None: + logger.info( + "No more background updates to do." + " Unscheduling background update task." + ) + return + + @defer.inlineCallbacks + def do_background_update(self, desired_duration_ms): + """Does some amount of work on a background update + Args: + desired_duration_ms(float): How long we want to spend + updating. + Returns: + A deferred that completes once some amount of work is done. + The deferred will have a value of None if there is currently + no more work to do. + """ + if not self._background_update_queue: + updates = yield self._simple_select_list( + "background_updates", + keyvalues=None, + retcols=("update_name",), + ) + for update in updates: + self._background_update_queue.append(update['update_name']) + + if not self._background_update_queue: + defer.returnValue(None) + + update_name = self._background_update_queue.pop(0) + self._background_update_queue.append(update_name) + + update_handler = self._background_update_handlers[update_name] + + performance = self._background_update_performance.get(update_name) + + if performance is None: + performance = BackgroundUpdatePerformance(update_name) + self._background_update_performance[update_name] = performance + + items_per_ms = performance.average_items_per_ms() + + if items_per_ms is not None: + batch_size = int(desired_duration_ms * items_per_ms) + # Clamp the batch size so that we always make progress + batch_size = max(batch_size, self.MINIMUM_BACKGROUND_BATCH_SIZE) + else: + batch_size = self.DEFAULT_BACKGROUND_BATCH_SIZE + + progress_json = yield self._simple_select_one_onecol( + "background_updates", + keyvalues={"update_name": update_name}, + retcol="progress_json" + ) + + progress = json.loads(progress_json) + + time_start = self._clock.time_msec() + items_updated = yield update_handler(progress, batch_size) + time_stop = self._clock.time_msec() + + duration_ms = time_stop - time_start + + logger.info( + "Updating %r. Updated %r items in %rms." + " (total_rate=%r/ms, current_rate=%r/ms, total_updated=%r)", + update_name, items_updated, duration_ms, + performance.total_items_per_ms(), + performance.average_items_per_ms(), + performance.total_item_count, + ) + + performance.update(items_updated, duration_ms) + + defer.returnValue(len(self._background_update_performance)) + + def register_background_update_handler(self, update_name, update_handler): + """Register a handler for doing a background update. + + The handler should take two arguments: + + * A dict of the current progress + * An integer count of the number of items to update in this batch. + + The handler should return a deferred integer count of items updated. + The hander is responsible for updating the progress of the update. + + Args: + update_name(str): The name of the update that this code handles. + update_handler(function): The function that does the update. + """ + self._background_update_handlers[update_name] = update_handler + + def start_background_update(self, update_name, progress): + """Starts a background update running. + + Args: + update_name: The update to set running. + progress: The initial state of the progress of the update. + + Returns: + A deferred that completes once the task has been added to the + queue. + """ + # Clear the background update queue so that we will pick up the new + # task on the next iteration of do_background_update. + self._background_update_queue = [] + progress_json = json.dumps(progress) + + return self._simple_insert( + "background_updates", + {"update_name": update_name, "progress_json": progress_json} + ) + + def _end_background_update(self, update_name): + """Removes a completed background update task from the queue. + + Args: + update_name(str): The name of the completed task to remove + Returns: + A deferred that completes once the task is removed. + """ + self._background_update_queue = [ + name for name in self._background_update_queue if name != update_name + ] + return self._simple_delete_one( + "background_updates", keyvalues={"update_name": update_name} + ) + + def _background_update_progress_txn(self, txn, update_name, progress): + """Update the progress of a background update + + Args: + txn(cursor): The transaction. + update_name(str): The name of the background update task + progress(dict): The progress of the update. + """ + + progress_json = json.dumps(progress) + + self._simple_update_one_txn( + txn, + "background_updates", + keyvalues={"update_name": update_name}, + updatevalues={"progress_json": progress_json}, + ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index e6c1abfc27..5d35ca90b9 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -311,6 +311,10 @@ class EventsStore(SQLBaseStore): self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) + elif event.type == EventTypes.RoomHistoryVisibility: + self._store_history_visibility_txn(txn, event) + elif event.type == EventTypes.GuestAccess: + self._store_guest_access_txn(txn, event) self._store_room_members_txn( txn, @@ -827,7 +831,8 @@ class EventsStore(SQLBaseStore): allow_none=True, ) if prev: - ev.unsigned["prev_content"] = prev.get_dict()["content"] + ev.unsigned["prev_content"] = prev.content + ev.unsigned["prev_sender"] = prev.sender self._get_event_cache.prefill( (ev.event_id, check_redacted, get_prev_content), ev @@ -884,7 +889,8 @@ class EventsStore(SQLBaseStore): get_prev_content=False, ) if prev: - ev.unsigned["prev_content"] = prev.get_dict()["content"] + ev.unsigned["prev_content"] = prev.content + ev.unsigned["prev_sender"] = prev.sender self._get_event_cache.prefill( (ev.event_id, check_redacted, get_prev_content), ev diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index b454dd5b3a..2e5eddd259 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -102,13 +102,14 @@ class RegistrationStore(SQLBaseStore): 400, "User ID already taken.", errcode=Codes.USER_IN_USE ) - # it's possible for this to get a conflict, but only for a single user - # since tokens are namespaced based on their user ID - txn.execute( - "INSERT INTO access_tokens(id, user_id, token)" - " VALUES (?,?,?)", - (next_id, user_id, token,) - ) + if token: + # it's possible for this to get a conflict, but only for a single user + # since tokens are namespaced based on their user ID + txn.execute( + "INSERT INTO access_tokens(id, user_id, token)" + " VALUES (?,?,?)", + (next_id, user_id, token,) + ) def get_user_by_id(self, user_id): return self._simple_select_one( diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 13441fcdce..4f08df478c 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -99,34 +99,39 @@ class RoomStore(SQLBaseStore): """ def f(txn): - topic_subquery = ( - "SELECT topics.event_id as event_id, " - "topics.room_id as room_id, topic " - "FROM topics " - "INNER JOIN current_state_events as c " - "ON c.event_id = topics.event_id " - ) - - name_subquery = ( - "SELECT room_names.event_id as event_id, " - "room_names.room_id as room_id, name " - "FROM room_names " - "INNER JOIN current_state_events as c " - "ON c.event_id = room_names.event_id " - ) + def subquery(table_name, column_name=None): + column_name = column_name or table_name + return ( + "SELECT %(table_name)s.event_id as event_id, " + "%(table_name)s.room_id as room_id, %(column_name)s " + "FROM %(table_name)s " + "INNER JOIN current_state_events as c " + "ON c.event_id = %(table_name)s.event_id " % { + "column_name": column_name, + "table_name": table_name, + } + ) - # We use non printing ascii character US (\x1F) as a separator sql = ( - "SELECT r.room_id, max(n.name), max(t.topic)" + "SELECT" + " r.room_id," + " max(n.name)," + " max(t.topic)," + " max(v.history_visibility)," + " max(g.guest_access)" " FROM rooms AS r" " LEFT JOIN (%(topic)s) AS t ON t.room_id = r.room_id" " LEFT JOIN (%(name)s) AS n ON n.room_id = r.room_id" + " LEFT JOIN (%(history_visibility)s) AS v ON v.room_id = r.room_id" + " LEFT JOIN (%(guest_access)s) AS g ON g.room_id = r.room_id" " WHERE r.is_public = ?" - " GROUP BY r.room_id" - ) % { - "topic": topic_subquery, - "name": name_subquery, - } + " GROUP BY r.room_id" % { + "topic": subquery("topics", "topic"), + "name": subquery("room_names", "name"), + "history_visibility": subquery("history_visibility"), + "guest_access": subquery("guest_access"), + } + ) txn.execute(sql, (is_public,)) @@ -156,10 +161,12 @@ class RoomStore(SQLBaseStore): "room_id": r[0], "name": r[1], "topic": r[2], - "aliases": r[3], + "world_readable": r[3] == "world_readable", + "guest_can_join": r[4] == "can_join", + "aliases": r[5], } for r in rows - if r[3] # We only return rooms that have at least one alias. + if r[5] # We only return rooms that have at least one alias. ] defer.returnValue(ret) @@ -202,6 +209,25 @@ class RoomStore(SQLBaseStore): txn, event, "content.body", event.content["body"] ) + def _store_history_visibility_txn(self, txn, event): + self._store_content_index_txn(txn, event, "history_visibility") + + def _store_guest_access_txn(self, txn, event): + self._store_content_index_txn(txn, event, "guest_access") + + def _store_content_index_txn(self, txn, event, key): + if hasattr(event, "content") and key in event.content: + sql = ( + "INSERT INTO %(key)s" + " (event_id, room_id, %(key)s)" + " VALUES (?, ?, ?)" % {"key": key} + ) + txn.execute(sql, ( + event.event_id, + event.room_id, + event.content[key] + )) + def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): sql = ( diff --git a/synapse/storage/schema/delta/25/00background_updates.sql b/synapse/storage/schema/delta/25/00background_updates.sql new file mode 100644 index 0000000000..41a9b59b1b --- /dev/null +++ b/synapse/storage/schema/delta/25/00background_updates.sql @@ -0,0 +1,21 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS background_updates( + update_name TEXT NOT NULL, -- The name of the background update. + progress_json TEXT NOT NULL, -- The current progress of the update as JSON. + CONSTRAINT background_updates_uniqueness UNIQUE (update_name) +); diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index b7cd0ce3b8..5239d69073 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -22,7 +22,7 @@ import ujson logger = logging.getLogger(__name__) -POSTGRES_SQL = """ +POSTGRES_TABLE = """ CREATE TABLE IF NOT EXISTS event_search ( event_id TEXT, room_id TEXT, @@ -31,22 +31,6 @@ CREATE TABLE IF NOT EXISTS event_search ( vector tsvector ); -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.body', - to_tsvector('english', json::json->'content'->>'body') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; - -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.name', - to_tsvector('english', json::json->'content'->>'name') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; - -INSERT INTO event_search SELECT - event_id, room_id, json::json->>'sender', 'content.topic', - to_tsvector('english', json::json->'content'->>'topic') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; - - CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); CREATE INDEX event_search_ev_idx ON event_search(event_id); CREATE INDEX event_search_ev_ridx ON event_search(room_id); @@ -61,67 +45,34 @@ SQLITE_TABLE = ( def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): - run_postgres_upgrade(cur) - return + for statement in get_statements(POSTGRES_TABLE.splitlines()): + cur.execute(statement) + elif isinstance(database_engine, Sqlite3Engine): + cur.execute(SQLITE_TABLE) + else: + raise Exception("Unrecognized database engine") - if isinstance(database_engine, Sqlite3Engine): - run_sqlite_upgrade(cur) - return + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] -def run_postgres_upgrade(cur): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + sql = ( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)" + ) -def run_sqlite_upgrade(cur): - cur.execute(SQLITE_TABLE) + sql = database_engine.convert_param_style(sql) - rowid = -1 - while True: - cur.execute( - "SELECT rowid, json FROM event_json" - " WHERE rowid > ?" - " ORDER BY rowid ASC LIMIT 100", - (rowid,) - ) - - res = cur.fetchall() - - if not res: - break - - events = [ - ujson.loads(js) - for _, js in res - ] - - rowid = max(rid for rid, _ in res) - - rows = [] - for ev in events: - content = ev.get("content", {}) - body = content.get("body", None) - name = content.get("name", None) - topic = content.get("topic", None) - sender = ev.get("sender", None) - if ev["type"] == "m.room.message" and body: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.body", body - )) - if ev["type"] == "m.room.name" and name: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.name", name - )) - if ev["type"] == "m.room.topic" and topic: - rows.append(( - ev["event_id"], ev["room_id"], sender, "content.topic", topic - )) - - if rows: - logger.info(rows) - cur.executemany( - "INSERT INTO event_search (event_id, room_id, sender, key, value)" - " VALUES (?,?,?,?,?)", - rows - ) + cur.execute(sql, ("event_search", progress_json)) diff --git a/synapse/storage/schema/delta/25/guest_access.sql b/synapse/storage/schema/delta/25/guest_access.sql new file mode 100644 index 0000000000..bdb90e7118 --- /dev/null +++ b/synapse/storage/schema/delta/25/guest_access.sql @@ -0,0 +1,25 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a manual index of guest_access content of state events, + * so that we can join on them in SELECT statements. + */ +CREATE TABLE IF NOT EXISTS guest_access( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + guest_access TEXT NOT NULL, + UNIQUE (event_id) +); diff --git a/synapse/storage/schema/delta/25/history_visibility.sql b/synapse/storage/schema/delta/25/history_visibility.sql new file mode 100644 index 0000000000..532cb05151 --- /dev/null +++ b/synapse/storage/schema/delta/25/history_visibility.sql @@ -0,0 +1,25 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a manual index of history_visibility content of state events, + * so that we can join on them in SELECT statements. + */ +CREATE TABLE IF NOT EXISTS history_visibility( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + history_visibility TEXT NOT NULL, + UNIQUE (event_id) +); diff --git a/synapse/storage/schema/delta/25/tags.sql b/synapse/storage/schema/delta/25/tags.sql new file mode 100644 index 0000000000..527424c998 --- /dev/null +++ b/synapse/storage/schema/delta/25/tags.sql @@ -0,0 +1,38 @@ +/* Copyright 2015 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +CREATE TABLE IF NOT EXISTS room_tags( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + tag TEXT NOT NULL, -- The name of the tag. + content TEXT NOT NULL, -- The JSON content of the tag. + CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) +); + +CREATE TABLE IF NOT EXISTS room_tags_revisions ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + stream_id BIGINT NOT NULL, -- The current version of the room tags. + CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) +); + +CREATE TABLE IF NOT EXISTS private_user_data_max_stream_id( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); + +INSERT INTO private_user_data_max_stream_id (stream_id) VALUES (0); diff --git a/synapse/storage/search.py b/synapse/storage/search.py index cdf003502f..380270b009 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -15,22 +15,116 @@ from twisted.internet import defer -from _base import SQLBaseStore +from .background_updates import BackgroundUpdateStore +from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine -from collections import namedtuple +import logging -"""The result of a search. -Fields: - rank_map (dict): Mapping event_id -> rank - event_map (dict): Mapping event_id -> event - pagination_token (str): Pagination token -""" -SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) +logger = logging.getLogger(__name__) -class SearchStore(SQLBaseStore): +class SearchStore(BackgroundUpdateStore): + + EVENT_SEARCH_UPDATE_NAME = "event_search" + + def __init__(self, hs): + super(SearchStore, self).__init__(hs) + self.register_background_update_handler( + self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search + ) + + @defer.inlineCallbacks + def _background_reindex_search(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + INSERT_CLUMP_SIZE = 1000 + TYPES = ["m.room.name", "m.room.message", "m.room.topic"] + + def reindex_search_txn(txn): + sql = ( + "SELECT stream_ordering, event_id FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " AND (%s)" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),) + + txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + min_stream_id = rows[-1][0] + event_ids = [row[1] for row in rows] + + events = self._get_events_txn(txn, event_ids) + + event_search_rows = [] + for event in events: + try: + event_id = event.event_id + room_id = event.room_id + content = event.content + if event.type == "m.room.message": + key = "content.body" + value = content["body"] + elif event.type == "m.room.topic": + key = "content.topic" + value = content["topic"] + elif event.type == "m.room.name": + key = "content.name" + value = content["name"] + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + event_search_rows.append((event_id, room_id, key, value)) + + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): + clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] + txn.executemany(sql, clump) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(event_search_rows) + } + + self._background_update_progress_txn( + txn, self.EVENT_SEARCH_UPDATE_NAME, progress + ) + + return len(event_search_rows) + + result = yield self.runInteraction( + self.EVENT_SEARCH_UPDATE_NAME, reindex_search_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_SEARCH_UPDATE_NAME) + + defer.returnValue(result) + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. @@ -42,7 +136,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - SearchResult + list of dicts """ clauses = [] args = [] @@ -100,12 +194,114 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(SearchResult( + defer.returnValue([ + { + "event": event_map[r["event_id"]], + "rank": r["rank"], + } + for r in results + if r["event_id"] in event_map + ]) + + @defer.inlineCallbacks + def search_room(self, room_id, search_term, keys, limit, pagination_token=None): + """Performs a full text search over events with given keys. + + Args: + room_id (str): The room_id to search in + search_term (str): Search term to search for + keys (list): List of keys to search in, currently supports + "content.body", "content.name", "content.topic" + pagination_token (str): A pagination token previously returned + + Returns: + list of dicts + """ + clauses = [] + args = [search_term, room_id] + + local_clauses = [] + for key in keys: + local_clauses.append("key = ?") + args.append(key) + + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) + + if pagination_token: + try: + topo, stream = pagination_token.split(",") + topo = int(topo) + stream = int(stream) + except: + raise SynapseError(400, "Invalid pagination token") + + clauses.append( + "(topological_ordering < ?" + " OR (topological_ordering = ? AND stream_ordering < ?))" + ) + args.extend([topo, topo, stream]) + + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "SELECT ts_rank_cd(vector, query) as rank," + " topological_ordering, stream_ordering, room_id, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " NATURAL JOIN events" + " WHERE vector @@ query AND room_id = ?" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + # We use CROSS JOIN here to ensure we use the right indexes. + # https://sqlite.org/optoverview.html#crossjoin + # + # We want to use the full text search index on event_search to + # extract all possible matches first, then lookup those matches + # in the events table to get the topological ordering. We need + # to use the indexes in this order because sqlite refuses to + # MATCH unless it uses the full text search index + sql = ( + "SELECT rank(matchinfo) as rank, room_id, event_id," + " topological_ordering, stream_ordering" + " FROM (SELECT key, event_id, matchinfo(event_search) as matchinfo" + " FROM event_search" + " WHERE value MATCH ?" + " )" + " CROSS JOIN events USING (event_id)" + " WHERE room_id = ?" + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + for clause in clauses: + sql += " AND " + clause + + # We add an arbitrary limit here to ensure we don't try to pull the + # entire table from the database. + sql += " ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ?" + + args.append(limit) + + results = yield self._execute( + "search_rooms", self.cursor_to_dict, sql, *args + ) + + events = yield self._get_events([r["event_id"] for r in results]) + + event_map = { + ev.event_id: ev + for ev in events + } + + defer.returnValue([ { - r["event_id"]: r["rank"] - for r in results - if r["event_id"] in event_map - }, - event_map, - None - )) + "event": event_map[r["event_id"]], + "rank": r["rank"], + "pagination_token": "%s,%s" % ( + r["topological_ordering"], r["stream_ordering"] + ), + } + for r in results + if r["event_id"] in event_map + ]) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index acfb322a53..80e9b63f50 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -237,6 +237,20 @@ class StateStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) + @defer.inlineCallbacks + def get_state_for_event(self, event_id, types=None): + """ + Get the state dict corresponding to a particular event + + :param str event_id: event whose state should be returned + :param list[(str, str)]|None types: List of (type, state_key) tuples + which are used to filter the state fetched. May be None, which + matches any key + :return: a deferred dict from (type, state_key) -> state_event + """ + state_map = yield self.get_state_for_events([event_id], types) + defer.returnValue(state_map[event_id]) + @cached(num_args=2, lru=True, max_entries=10000) def _get_state_group_for_event(self, room_id, event_id): return self._simple_select_one_onecol( diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 15d4c2bf68..be8ba76aae 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -158,14 +158,40 @@ class StreamStore(SQLBaseStore): defer.returnValue(results) @log_function - def get_room_events_stream(self, user_id, from_key, to_key, room_id, - limit=0): - current_room_membership_sql = ( - "SELECT m.room_id FROM room_memberships as m " - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id AND c.state_key = m.user_id" - " WHERE m.user_id = ? AND m.membership = 'join'" - ) + def get_room_events_stream( + self, + user_id, + from_key, + to_key, + limit=0, + is_guest=False, + room_ids=None + ): + room_ids = room_ids or [] + room_ids = [r for r in room_ids] + if is_guest: + current_room_membership_sql = ( + "SELECT c.room_id FROM history_visibility AS h" + " INNER JOIN current_state_events AS c" + " ON h.event_id = c.event_id" + " WHERE c.room_id IN (%s) AND h.history_visibility = 'world_readable'" % ( + ",".join(map(lambda _: "?", room_ids)) + ) + ) + current_room_membership_args = room_ids + else: + current_room_membership_sql = ( + "SELECT m.room_id FROM room_memberships as m " + " INNER JOIN current_state_events as c" + " ON m.event_id = c.event_id AND c.state_key = m.user_id" + " WHERE m.user_id = ? AND m.membership = 'join'" + ) + current_room_membership_args = [user_id] + if room_ids: + current_room_membership_sql += " AND m.room_id in (%s)" % ( + ",".join(map(lambda _: "?", room_ids)) + ) + current_room_membership_args = [user_id] + room_ids # We also want to get any membership events about that user, e.g. # invites or leave notifications. @@ -174,6 +200,7 @@ class StreamStore(SQLBaseStore): "INNER JOIN current_state_events as c ON m.event_id = c.event_id " "WHERE m.user_id = ? " ) + membership_args = [user_id] if limit: limit = max(limit, MAX_STREAM_SIZE) @@ -200,7 +227,9 @@ class StreamStore(SQLBaseStore): } def f(txn): - txn.execute(sql, (False, user_id, user_id, from_id.stream, to_id.stream,)) + args = ([False] + current_room_membership_args + membership_args + + [from_id.stream, to_id.stream]) + txn.execute(sql, args) rows = self.cursor_to_dict(txn) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py new file mode 100644 index 0000000000..bf695b7800 --- /dev/null +++ b/synapse/storage/tags.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +# Copyright 2014, 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._base import SQLBaseStore +from synapse.util.caches.descriptors import cached +from twisted.internet import defer +from .util.id_generators import StreamIdGenerator + +import ujson as json +import logging + +logger = logging.getLogger(__name__) + + +class TagsStore(SQLBaseStore): + def __init__(self, hs): + super(TagsStore, self).__init__(hs) + + self._private_user_data_id_gen = StreamIdGenerator( + "private_user_data_max_stream_id", "stream_id" + ) + + def get_max_private_user_data_stream_id(self): + """Get the current max stream id for the private user data stream + + Returns: + A deferred int. + """ + return self._private_user_data_id_gen.get_max_token(self) + + @cached() + def get_tags_for_user(self, user_id): + """Get all the tags for a user. + + + Args: + user_id(str): The user to get the tags for. + Returns: + A deferred dict mapping from room_id strings to lists of tag + strings. + """ + + deferred = self._simple_select_list( + "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"] + ) + + @deferred.addCallback + def tags_by_room(rows): + tags_by_room = {} + for row in rows: + room_tags = tags_by_room.setdefault(row["room_id"], {}) + room_tags[row["tag"]] = json.loads(row["content"]) + return tags_by_room + + return deferred + + @defer.inlineCallbacks + def get_updated_tags(self, user_id, stream_id): + """Get all the tags for the rooms where the tags have changed since the + given version + + Args: + user_id(str): The user to get the tags for. + stream_id(int): The earliest update to get for the user. + Returns: + A deferred dict mapping from room_id strings to lists of tag + strings for all the rooms that changed since the stream_id token. + """ + def get_updated_tags_txn(txn): + sql = ( + "SELECT room_id from room_tags_revisions" + " WHERE user_id = ? AND stream_id > ?" + ) + txn.execute(sql, (user_id, stream_id)) + room_ids = [row[0] for row in txn.fetchall()] + return room_ids + + room_ids = yield self.runInteraction( + "get_updated_tags", get_updated_tags_txn + ) + + results = {} + if room_ids: + tags_by_room = yield self.get_tags_for_user(user_id) + for room_id in room_ids: + results[room_id] = tags_by_room.get(room_id, {}) + + defer.returnValue(results) + + def get_tags_for_room(self, user_id, room_id): + """Get all the tags for the given room + Args: + user_id(str): The user to get tags for + room_id(str): The room to get tags for + Returns: + A deferred list of string tags. + """ + return self._simple_select_list( + table="room_tags", + keyvalues={"user_id": user_id, "room_id": room_id}, + retcols=("tag", "content"), + desc="get_tags_for_room", + ).addCallback(lambda rows: { + row["tag"]: json.loads(row["content"]) for row in rows + }) + + @defer.inlineCallbacks + def add_tag_to_room(self, user_id, room_id, tag, content): + """Add a tag to a room for a user. + Args: + user_id(str): The user to add a tag for. + room_id(str): The room to add a tag for. + tag(str): The tag name to add. + content(dict): A json object to associate with the tag. + Returns: + A deferred that completes once the tag has been added. + """ + content_json = json.dumps(content) + + def add_tag_txn(txn, next_id): + self._simple_upsert_txn( + txn, + table="room_tags", + keyvalues={ + "user_id": user_id, + "room_id": room_id, + "tag": tag, + }, + values={ + "content": content_json, + } + ) + self._update_revision_txn(txn, user_id, room_id, next_id) + + with (yield self._private_user_data_id_gen.get_next(self)) as next_id: + yield self.runInteraction("add_tag", add_tag_txn, next_id) + + self.get_tags_for_user.invalidate((user_id,)) + + result = yield self._private_user_data_id_gen.get_max_token(self) + defer.returnValue(result) + + @defer.inlineCallbacks + def remove_tag_from_room(self, user_id, room_id, tag): + """Remove a tag from a room for a user. + Returns: + A deferred that completes once the tag has been removed + """ + def remove_tag_txn(txn, next_id): + sql = ( + "DELETE FROM room_tags " + " WHERE user_id = ? AND room_id = ? AND tag = ?" + ) + txn.execute(sql, (user_id, room_id, tag)) + self._update_revision_txn(txn, user_id, room_id, next_id) + + with (yield self._private_user_data_id_gen.get_next(self)) as next_id: + yield self.runInteraction("remove_tag", remove_tag_txn, next_id) + + self.get_tags_for_user.invalidate((user_id,)) + + result = yield self._private_user_data_id_gen.get_max_token(self) + defer.returnValue(result) + + def _update_revision_txn(self, txn, user_id, room_id, next_id): + """Update the latest revision of the tags for the given user and room. + + Args: + txn: The database cursor + user_id(str): The ID of the user. + room_id(str): The ID of the room. + next_id(int): The the revision to advance to. + """ + + update_max_id_sql = ( + "UPDATE private_user_data_max_stream_id" + " SET stream_id = ?" + " WHERE stream_id < ?" + ) + txn.execute(update_max_id_sql, (next_id, next_id)) + + update_sql = ( + "UPDATE room_tags_revisions" + " SET stream_id = ?" + " WHERE user_id = ?" + " AND room_id = ?" + ) + txn.execute(update_sql, (next_id, user_id, room_id)) + + if txn.rowcount == 0: + insert_sql = ( + "INSERT INTO room_tags_revisions (user_id, room_id, stream_id)" + " VALUES (?, ?, ?)" + ) + try: + txn.execute(insert_sql, (user_id, room_id, next_id)) + except self.database_engine.module.IntegrityError: + # Ignore insertion errors. It doesn't matter if the row wasn't + # inserted because if two updates happend concurrently the one + # with the higher stream_id will not be reported to a client + # unless the previous update has completed. It doesn't matter + # which stream_id ends up in the table, as long as it is higher + # than the id that the client has. + pass diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 15695e9831..ad099775eb 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -59,7 +59,7 @@ class TransactionStore(SQLBaseStore): allow_none=True, ) - if result and result.response_code: + if result and result["response_code"]: return result["response_code"], result["response_json"] else: return None @@ -253,16 +253,6 @@ class TransactionStore(SQLBaseStore): retry_interval (int) - how long until next retry in ms """ - # As this is the new value, we might as well prefill the cache - self.get_destination_retry_timings.prefill( - destination, - { - "destination": destination, - "retry_last_ts": retry_last_ts, - "retry_interval": retry_interval - }, - ) - # XXX: we could chose to not bother persisting this if our cache thinks # this is a NOOP return self.runInteraction( @@ -275,31 +265,25 @@ class TransactionStore(SQLBaseStore): def _set_destination_retry_timings(self, txn, destination, retry_last_ts, retry_interval): - query = ( - "UPDATE destinations" - " SET retry_last_ts = ?, retry_interval = ?" - " WHERE destination = ?" - ) + txn.call_after(self.get_destination_retry_timings.invalidate, (destination,)) - txn.execute( - query, - ( - retry_last_ts, retry_interval, destination, - ) + self._simple_upsert_txn( + txn, + "destinations", + keyvalues={ + "destination": destination, + }, + values={ + "retry_last_ts": retry_last_ts, + "retry_interval": retry_interval, + }, + insertion_values={ + "destination": destination, + "retry_last_ts": retry_last_ts, + "retry_interval": retry_interval, + } ) - if txn.rowcount == 0: - # destination wasn't already in table. Insert it. - self._simple_insert_txn( - txn, - table="destinations", - values={ - "destination": destination, - "retry_last_ts": retry_last_ts, - "retry_interval": retry_interval, - } - ) - def get_destinations_needing_retry(self): """Get all destinations which are due a retry for sending a transaction. |