summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2015-10-09 15:48:31 +0100
committerErik Johnston <erik@matrix.org>2015-10-09 15:48:31 +0100
commitc85c9125627a62c73711786723be12be30d7a81e (patch)
treee5d32fb9a3f90a764ce3bbe89b5f8e0f619205bc /synapse/storage
parentMerge pull request #292 from matrix-org/daniel/useragent (diff)
downloadsynapse-c85c9125627a62c73711786723be12be30d7a81e.tar.xz
Add basic full text search impl.
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/__init__.py2
-rw-r--r--synapse/storage/_base.py2
-rw-r--r--synapse/storage/schema/delta/24/fts.py57
-rw-r--r--synapse/storage/search.py75
4 files changed, 135 insertions, 1 deletions
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 340e59afcb..5f91ef77c0 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -40,6 +40,7 @@ from .filtering import FilteringStore
 from .end_to_end_keys import EndToEndKeyStore
 
 from .receipts import ReceiptsStore
+from .search import SearchStore
 
 
 import fnmatch
@@ -79,6 +80,7 @@ class DataStore(RoomMemberStore, RoomStore,
                 EventsStore,
                 ReceiptsStore,
                 EndToEndKeyStore,
+                SearchStore,
                 ):
 
     def __init__(self, hs):
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 693784ad38..218e708054 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -519,7 +519,7 @@ class SQLBaseStore(object):
                                   allow_none=False,
                                   desc="_simple_select_one_onecol"):
         """Executes a SELECT query on the named table, which is expected to
-        return a single row, returning a single column from it."
+        return a single row, returning a single column from it.
 
         Args:
             table : string giving the table name
diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py
new file mode 100644
index 0000000000..5680332758
--- /dev/null
+++ b/synapse/storage/schema/delta/24/fts.py
@@ -0,0 +1,57 @@
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage import get_statements
+from synapse.storage.engines import PostgresEngine
+
+logger = logging.getLogger(__name__)
+
+
+POSTGRES_SQL = """
+CREATE TABLE event_search (
+    event_id TEXT,
+    room_id TEXT,
+    key TEXT,
+    vector tsvector
+);
+
+INSERT INTO event_search SELECT
+    event_id, room_id, 'content.body',
+    to_tsvector('english', json::json->'content'->>'body')
+    FROM events NATURAL JOIN event_json WHERE type = 'm.room.message';
+
+INSERT INTO event_search SELECT
+    event_id, room_id, 'content.name',
+    to_tsvector('english', json::json->'content'->>'name')
+    FROM events NATURAL JOIN event_json WHERE type = 'm.room.name';
+
+INSERT INTO event_search SELECT
+    event_id, room_id, 'content.topic',
+    to_tsvector('english', json::json->'content'->>'topic')
+    FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
+
+
+CREATE INDEX event_search_idx ON event_search USING gin(vector);
+"""
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    if not isinstance(database_engine, PostgresEngine):
+        # We only support FTS for postgres currently.
+        return
+
+    for statement in get_statements(POSTGRES_SQL.splitlines()):
+        cur.execute(statement)
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
new file mode 100644
index 0000000000..eea4477765
--- /dev/null
+++ b/synapse/storage/search.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+
+from _base import SQLBaseStore
+from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes
+
+
+class SearchStore(SQLBaseStore):
+    @defer.inlineCallbacks
+    def search_msgs(self, constraints):
+        clauses = []
+        args = []
+        fts = None
+
+        for c in constraints:
+            local_clauses = []
+            if c.search_type == SearchConstraintTypes.FTS:
+                fts = c.value
+                for key in c.keys:
+                    local_clauses.append("key = ?")
+                    args.append(key)
+            elif c.search_type == SearchConstraintTypes.EXACT:
+                for key in c.keys:
+                    if key == KnownRoomEventKeys.ROOM_ID:
+                        for value in c.value:
+                            local_clauses.append("room_id = ?")
+                            args.append(value)
+            clauses.append(
+                "(%s)" % (" OR ".join(local_clauses),)
+            )
+
+        sql = (
+            "SELECT ts_rank_cd(vector, query) AS rank, event_id"
+            " FROM plainto_tsquery('english', ?) as query, event_search"
+            " WHERE vector @@ query"
+        )
+
+        for clause in clauses:
+            sql += " AND " + clause
+
+        sql += " ORDER BY rank DESC"
+
+        results = yield self._execute(
+            "search_msgs", self.cursor_to_dict, sql, *([fts] + args)
+        )
+
+        events = yield self._get_events([r["event_id"] for r in results])
+
+        event_map = {
+            ev.event_id: ev
+            for ev in events
+        }
+
+        defer.returnValue([
+            {
+                "rank": r["rank"],
+                "result": event_map[r["event_id"]]
+            }
+            for r in results
+            if r["event_id"] in event_map
+        ])