diff options
author | Erik Johnston <erik@matrix.org> | 2016-07-13 14:35:45 +0100 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2016-07-13 15:16:02 +0100 |
commit | 518b3a3f89f374474e5291810cbe0fa0103e802e (patch) | |
tree | a2a5db5022277ae60d8c9c6e8a09f355d8e3e5fb /synapse | |
parent | Merge pull request #914 from matrix-org/markjh/upgrade (diff) | |
download | synapse-518b3a3f89f374474e5291810cbe0fa0103e802e.tar.xz |
Track in DB file message events
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/storage/room.py | 118 | ||||
-rw-r--r-- | synapse/storage/schema/delta/33/msgtype.py | 72 |
2 files changed, 190 insertions, 0 deletions
diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 8251f58670..1ad5c67bcf 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -34,6 +34,108 @@ OpsLevel = collections.namedtuple( class RoomStore(SQLBaseStore): + EVENT_FILES_UPDATE_NAME = "event_files" + + FILE_MSGTYPES = ( + "m.image", + "m.video", + "m.file", + "m.audio", + ) + + def __init__(self, hs): + super(RoomStore, self).__init__(hs) + self.register_background_update_handler( + self.EVENT_FILES_UPDATE_NAME, self._background_reindex_files + ) + + @defer.inlineCallbacks + def _background_reindex_files(self, progress, batch_size): + target_min_stream_id = progress["target_min_stream_id_inclusive"] + max_stream_id = progress["max_stream_id_exclusive"] + rows_inserted = progress.get("rows_inserted", 0) + + def reindex_txn(txn): + sql = ( + "SELECT topological_ordering, stream_ordering, event_id, room_id," + " type, content FROM events" + " WHERE ? <= stream_ordering AND stream_ordering < ?" + " AND type = 'm.room.message'" + " AND content LIKE ?" + " ORDER BY stream_ordering DESC" + " LIMIT ?" + ) + + txn.execute(sql, (target_min_stream_id, max_stream_id, '%url%', batch_size)) + + rows = self.cursor_to_dict(txn) + if not rows: + return 0 + + min_stream_id = rows[-1]["stream_ordering"] + + event_files_rows = [] + for row in rows: + try: + so = row["stream_ordering"] + to = row["topological_ordering"] + event_id = row["event_id"] + room_id = row["room_id"] + try: + content = json.loads(row["content"]) + except: + continue + + msgtype = content["msgtype"] + if msgtype not in self.FILE_MSGTYPES: + continue + + url = content["url"] + + if not isinstance(url, basestring): + continue + if not isinstance(msgtype, basestring): + continue + except (KeyError, AttributeError): + # If the event is missing a necessary field then + # skip over it. + continue + + event_files_rows.append({ + "topological_ordering": to, + "stream_ordering": so, + "event_id": event_id, + "room_id": room_id, + "msgtype": msgtype, + "url": url, + }) + + self._simple_insert_many_txn( + txn, + table="event_files", + values=event_files_rows, + ) + + progress = { + "target_min_stream_id_inclusive": target_min_stream_id, + "max_stream_id_exclusive": min_stream_id, + "rows_inserted": rows_inserted + len(event_files_rows) + } + + self._background_update_progress_txn( + txn, self.EVENT_FILES_UPDATE_NAME, progress + ) + + return len(rows) + + result = yield self.runInteraction( + self.EVENT_FILES_UPDATE_NAME, reindex_txn + ) + + if not result: + yield self._end_background_update(self.EVENT_FILES_UPDATE_NAME) + + defer.returnValue(result) @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -142,6 +244,22 @@ class RoomStore(SQLBaseStore): ) def _store_room_message_txn(self, txn, event): + msgtype = event.content.get("msgtype") + url = event.content.get("url") + if msgtype in self.FILE_MSGTYPES and url: + self._simple_insert_txn( + txn, + table="event_files", + values={ + "topological_ordering": event.depth, + "stream_ordering": event.internal_metadata.stream_ordering, + "room_id": event.room_id, + "event_id": event.event_id, + "msgtype": msgtype, + "url": url, + } + ) + if hasattr(event, "content") and "body" in event.content: self._store_event_search_txn( txn, event, "content.body", event.content["body"] diff --git a/synapse/storage/schema/delta/33/msgtype.py b/synapse/storage/schema/delta/33/msgtype.py new file mode 100644 index 0000000000..f600ff6f64 --- /dev/null +++ b/synapse/storage/schema/delta/33/msgtype.py @@ -0,0 +1,72 @@ +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.prepare_database import get_statements + +import logging +import ujson + +logger = logging.getLogger(__name__) + + +CREATE_TABLE = """ +CREATE TABLE event_files( + topological_ordering BIGINT NOT NULL, + stream_ordering BIGINT NOT NULL, + room_id TEXT NOT NULL, + event_id TEXT NOT NULL, + msgtype TEXT NOT NULL, + url TEXT NOT NULL +); + +CREATE INDEX event_files_rm_id ON event_files(room_id, event_id); +CREATE INDEX event_files_order ON event_files( + room_id, topological_ordering, stream_ordering +); +CREATE INDEX event_files_order_stream ON event_files(room_id, stream_ordering); +""" + + +def run_create(cur, database_engine, *args, **kwargs): + for statement in get_statements(CREATE_TABLE.splitlines()): + cur.execute(statement) + + cur.execute("SELECT MIN(stream_ordering) FROM events") + rows = cur.fetchall() + min_stream_id = rows[0][0] + + cur.execute("SELECT MAX(stream_ordering) FROM events") + rows = cur.fetchall() + max_stream_id = rows[0][0] + + if min_stream_id is not None and max_stream_id is not None: + progress = { + "target_min_stream_id_inclusive": min_stream_id, + "max_stream_id_exclusive": max_stream_id + 1, + "rows_inserted": 0, + } + progress_json = ujson.dumps(progress) + + sql = ( + "INSERT into background_updates (update_name, progress_json)" + " VALUES (?, ?)" + ) + + sql = database_engine.convert_param_style(sql) + + cur.execute(sql, ("event_files", progress_json)) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + pass |