summary refs log tree commit diff
path: root/synapse/storage/schema/delta/25/fts.py
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2015-10-22 11:12:28 +0100
committerErik Johnston <erik@matrix.org>2015-10-22 11:12:28 +0100
commit4d25bc6c92c3d219786892c5d510e0c4c4eb8b96 (patch)
tree879210050337441223b6cd103964230ba13cb1e7 /synapse/storage/schema/delta/25/fts.py
parentFilter room ids before hitting the database (diff)
downloadsynapse-4d25bc6c92c3d219786892c5d510e0c4c4eb8b96.tar.xz
Move FTS to delta 25
Diffstat (limited to 'synapse/storage/schema/delta/25/fts.py')
-rw-r--r--synapse/storage/schema/delta/25/fts.py123
1 files changed, 123 insertions, 0 deletions
diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py
new file mode 100644
index 0000000000..fd181fc630
--- /dev/null
+++ b/synapse/storage/schema/delta/25/fts.py
@@ -0,0 +1,123 @@
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.prepare_database import get_statements
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+
+import ujson
+
+logger = logging.getLogger(__name__)
+
+
+POSTGRES_SQL = """
+CREATE TABLE IF NOT EXISTS event_search (
+    event_id TEXT,
+    room_id TEXT,
+    key TEXT,
+    vector tsvector
+);
+
+INSERT INTO event_search SELECT
+    event_id, room_id, 'content.body',
+    to_tsvector('english', json::json->'content'->>'body')
+    FROM events NATURAL JOIN event_json WHERE type = 'm.room.message';
+
+INSERT INTO event_search SELECT
+    event_id, room_id, 'content.name',
+    to_tsvector('english', json::json->'content'->>'name')
+    FROM events NATURAL JOIN event_json WHERE type = 'm.room.name';
+
+INSERT INTO event_search SELECT
+    event_id, room_id, 'content.topic',
+    to_tsvector('english', json::json->'content'->>'topic')
+    FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
+
+
+CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
+CREATE INDEX event_search_ev_idx ON event_search(event_id);
+CREATE INDEX event_search_ev_ridx ON event_search(room_id);
+"""
+
+
+SQLITE_TABLE = (
+    "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)"
+)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    if isinstance(database_engine, PostgresEngine):
+        run_postgres_upgrade(cur)
+        return
+
+    if isinstance(database_engine, Sqlite3Engine):
+        run_sqlite_upgrade(cur)
+        return
+
+
+def run_postgres_upgrade(cur):
+    for statement in get_statements(POSTGRES_SQL.splitlines()):
+        cur.execute(statement)
+
+
+def run_sqlite_upgrade(cur):
+        cur.execute(SQLITE_TABLE)
+
+        rowid = -1
+        while True:
+            cur.execute(
+                "SELECT rowid, json FROM event_json"
+                " WHERE rowid > ?"
+                " ORDER BY rowid ASC LIMIT 100",
+                (rowid,)
+            )
+
+            res = cur.fetchall()
+
+            if not res:
+                break
+
+            events = [
+                ujson.loads(js)
+                for _, js in res
+            ]
+
+            rowid = max(rid for rid, _ in res)
+
+            rows = []
+            for ev in events:
+                if ev["type"] == "m.room.message":
+                    rows.append((
+                        ev["event_id"], ev["room_id"], "content.body",
+                        ev["content"]["body"]
+                    ))
+                if ev["type"] == "m.room.name":
+                    rows.append((
+                        ev["event_id"], ev["room_id"], "content.name",
+                        ev["content"]["name"]
+                    ))
+                if ev["type"] == "m.room.topic":
+                    rows.append((
+                        ev["event_id"], ev["room_id"], "content.topic",
+                        ev["content"]["topic"]
+                    ))
+
+            if rows:
+                logger.info(rows)
+                cur.executemany(
+                    "INSERT INTO event_search (event_id, room_id, key, value)"
+                    " VALUES (?,?,?,?)",
+                    rows
+                )