summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2015-10-12 10:49:53 +0100
committerErik Johnston <erik@matrix.org>2015-10-12 10:49:53 +0100
commit61561b9df791ec90e287e535cc75831c2016bf36 (patch)
tree903d6e26bc4bc3892c59c0a38bc9e2d3eccad96e
parentAdd basic full text search impl. (diff)
downloadsynapse-61561b9df791ec90e287e535cc75831c2016bf36.tar.xz
Keep FTS indexes up to date. Only search through rooms currently joined
Diffstat (limited to '')
-rw-r--r--synapse/handlers/search.py31
-rw-r--r--synapse/rest/client/v1/room.py2
-rw-r--r--synapse/storage/events.py2
-rw-r--r--synapse/storage/room.py22
-rw-r--r--synapse/storage/schema/delta/24/fts.py3
-rw-r--r--synapse/storage/search.py7
6 files changed, 55 insertions, 12 deletions
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 8b997fc394..b6bdb752e9 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -65,7 +65,7 @@ class SearchHandler(BaseHandler):
         super(SearchHandler, self).__init__(hs)
 
     @defer.inlineCallbacks
-    def search(self, content):
+    def search(self, user, content):
         constraint_dicts = content["search_categories"]["room_events"]["constraints"]
         constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts]
 
@@ -76,20 +76,33 @@ class SearchHandler(BaseHandler):
                     raise SynapseError(400, "Only one constraint can be FTS")
                 fts = True
 
-        res = yield self.hs.get_datastore().search_msgs(constraints)
+        rooms = yield self.store.get_rooms_for_user(
+            user.to_string(),
+        )
 
-        time_now = self.hs.get_clock().time_msec()
+        # For some reason the list of events contains duplicates
+        # TODO(paul): work out why because I really don't think it should
+        room_ids = set(r.room_id for r in rooms)
 
-        results = [
-            {
+        res = yield self.store.search_msgs(room_ids, constraints)
+
+        time_now = self.clock.time_msec()
+
+        results = {
+            r["result"].event_id: {
                 "rank": r["rank"],
                 "result": serialize_event(r["result"], time_now)
             }
             for r in res
-        ]
+        }
 
         logger.info("returning: %r", results)
 
-        results.sort(key=lambda r: -r["rank"])
-
-        defer.returnValue(results)
+        defer.returnValue({
+            "search_categories": {
+                "room_events": {
+                    "results": results,
+                    "count": len(results)
+                }
+            }
+        })
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 35bd702a43..94adabca62 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -540,7 +540,7 @@ class SearchRestServlet(ClientV1RestServlet):
 
         content = _parse_json(request)
 
-        results = yield self.handlers.search_handler.search(content)
+        results = yield self.handlers.search_handler.search(auth_user, content)
 
         defer.returnValue((200, results))
 
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 416ef6af93..e6c1abfc27 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -307,6 +307,8 @@ class EventsStore(SQLBaseStore):
                 self._store_room_name_txn(txn, event)
             elif event.type == EventTypes.Topic:
                 self._store_room_topic_txn(txn, event)
+            elif event.type == EventTypes.Message:
+                self._store_room_message_txn(txn, event)
             elif event.type == EventTypes.Redaction:
                 self._store_redaction(txn, event)
 
diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index 5e07b7e0e5..e4e830944a 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -175,6 +175,10 @@ class RoomStore(SQLBaseStore):
                 },
             )
 
+            self._store_event_search_txn(
+                txn, event, "content.topic", event.content["topic"]
+            )
+
     def _store_room_name_txn(self, txn, event):
         if hasattr(event, "content") and "name" in event.content:
             self._simple_insert_txn(
@@ -187,6 +191,24 @@ class RoomStore(SQLBaseStore):
                 }
             )
 
+            self._store_event_search_txn(
+                txn, event, "content.name", event.content["name"]
+            )
+
+    def _store_room_message_txn(self, txn, event):
+        if hasattr(event, "content") and "body" in event.content:
+            self._store_event_search_txn(
+                txn, event, "content.body", event.content["body"]
+            )
+
+    def _store_event_search_txn(self, txn, event, key, value):
+        sql = (
+            "INSERT INTO event_search (event_id, room_id, key, vector)"
+            " VALUES (?,?,?,to_tsvector('english', ?))"
+        )
+
+        txn.execute(sql, (event.event_id, event.room_id, key, value,))
+
     @cachedInlineCallbacks()
     def get_room_name_and_aliases(self, room_id):
         def f(txn):
diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py
index 5680332758..05f1605fdd 100644
--- a/synapse/storage/schema/delta/24/fts.py
+++ b/synapse/storage/schema/delta/24/fts.py
@@ -44,7 +44,8 @@ INSERT INTO event_search SELECT
     FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
 
 
-CREATE INDEX event_search_idx ON event_search USING gin(vector);
+CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
+CREATE INDEX event_search_ev_idx ON event_search(event_id);
 """
 
 
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index eea4477765..e66b5f9edc 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -21,11 +21,16 @@ from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes
 
 class SearchStore(SQLBaseStore):
     @defer.inlineCallbacks
-    def search_msgs(self, constraints):
+    def search_msgs(self, room_ids, constraints):
         clauses = []
         args = []
         fts = None
 
+        clauses.append(
+            "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),)
+        )
+        args.extend(room_ids)
+
         for c in constraints:
             local_clauses = []
             if c.search_type == SearchConstraintTypes.FTS: