diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 50688e51a8..6d2197339e 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -139,11 +139,18 @@ class SearchHandler(BaseHandler):
# Holds the next_batch for the entire result set if one of those exists
global_next_batch = None
+ highlights = set()
+
if order_by == "rank":
- results = yield self.store.search_msgs(
+ search_result = yield self.store.search_msgs(
room_ids, search_term, keys
)
+ if search_result["highlights"]:
+ highlights.update(search_result["highlights"])
+
+ results = search_result["results"]
+
results_map = {r["event"].event_id: r for r in results}
rank_map.update({r["event"].event_id: r["rank"] for r in results})
@@ -187,11 +194,16 @@ class SearchHandler(BaseHandler):
# But only go around 5 times since otherwise synapse will be sad.
while len(room_events) < search_filter.limit() and i < 5:
i += 1
- results = yield self.store.search_room(
+ search_result = yield self.store.search_room(
room_id, search_term, keys, search_filter.limit() * 2,
pagination_token=pagination_token,
)
+ if search_result["highlights"]:
+ highlights.update(search_result["highlights"])
+
+ results = search_result["results"]
+
results_map = {r["event"].event_id: r for r in results}
rank_map.update({r["event"].event_id: r["rank"] for r in results})
@@ -347,7 +359,8 @@ class SearchHandler(BaseHandler):
rooms_cat_res = {
"results": results,
- "count": len(results)
+ "count": len(results),
+ "highlights": list(highlights),
}
if state_results:
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index 380270b009..c6386642df 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -20,6 +20,7 @@ from synapse.api.errors import SynapseError
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
import logging
+import re
logger = logging.getLogger(__name__)
@@ -194,14 +195,21 @@ class SearchStore(BackgroundUpdateStore):
for ev in events
}
- defer.returnValue([
- {
- "event": event_map[r["event_id"]],
- "rank": r["rank"],
- }
- for r in results
- if r["event_id"] in event_map
- ])
+ highlights = None
+ if isinstance(self.database_engine, PostgresEngine):
+ highlights = yield self._find_highlights_in_postgres(search_term, events)
+
+ defer.returnValue({
+ "results": [
+ {
+ "event": event_map[r["event_id"]],
+ "rank": r["rank"],
+ }
+ for r in results
+ if r["event_id"] in event_map
+ ],
+ "highlights": highlights,
+ })
@defer.inlineCallbacks
def search_room(self, room_id, search_term, keys, limit, pagination_token=None):
@@ -294,14 +302,91 @@ class SearchStore(BackgroundUpdateStore):
for ev in events
}
- defer.returnValue([
- {
- "event": event_map[r["event_id"]],
- "rank": r["rank"],
- "pagination_token": "%s,%s" % (
- r["topological_ordering"], r["stream_ordering"]
- ),
- }
- for r in results
- if r["event_id"] in event_map
- ])
+ highlights = None
+ if isinstance(self.database_engine, PostgresEngine):
+ highlights = yield self._find_highlights_in_postgres(search_term, events)
+
+ defer.returnValue({
+ "results": [
+ {
+ "event": event_map[r["event_id"]],
+ "rank": r["rank"],
+ "pagination_token": "%s,%s" % (
+ r["topological_ordering"], r["stream_ordering"]
+ ),
+ }
+ for r in results
+ if r["event_id"] in event_map
+ ],
+ "highlights": highlights,
+ })
+
+ def _find_highlights_in_postgres(self, search_term, events):
+ """Given a list of events and a search term, return a list of words
+ that match from the content of the event.
+
+ This is used to give a list of words that clients can match against to
+ highlight the matching parts.
+
+ Args:
+ search_term (str)
+ events (list): A list of events
+
+ Returns:
+ deferred : A set of strings.
+ """
+ def f(txn):
+ highlight_words = set()
+ for event in events:
+ # As a hack we simply join values of all possible keys. This is
+ # fine since we're only using them to find possible highlights.
+ values = []
+ for key in ("body", "name", "topic"):
+ v = event.content.get(key, None)
+ if v:
+ values.append(v)
+
+ if not values:
+ continue
+
+ value = " ".join(values)
+
+ # We need to find some values for StartSel and StopSel that
+ # aren't in the value so that we can pick results out.
+ start_sel = "<"
+ stop_sel = ">"
+
+ while start_sel in value:
+ start_sel += "<"
+ while stop_sel in value:
+ stop_sel += ">"
+
+ query = "SELECT ts_headline(?, plainto_tsquery('english', ?), %s)" % (
+ _to_postgres_options({
+ "StartSel": start_sel,
+ "StopSel": stop_sel,
+ "MaxFragments": "50",
+ })
+ )
+ txn.execute(query, (value, search_term,))
+ headline, = txn.fetchall()[0]
+
+ # Now we need to pick the possible highlights out of the haedline
+ # result.
+ matcher_regex = "%s(.*?)%s" % (
+ re.escape(start_sel),
+ re.escape(stop_sel),
+ )
+
+ res = re.findall(matcher_regex, headline)
+ highlight_words.update([r.lower() for r in res])
+
+ return highlight_words
+
+ return self.runInteraction("_find_highlights", f)
+
+
+def _to_postgres_options(options_dict):
+ return "'%s'" % (
+ ",".join("%s=%s" % (k, v) for k, v in options_dict.items()),
+ )
|