summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorMelvyn Laïly <melvyn.laily@gmail.com>2024-04-26 10:43:52 +0200
committerGitHub <noreply@github.com>2024-04-26 09:43:52 +0100
commit59710437e4a885252de5e5555fbcf42d223b092c (patch)
tree4801c0791fa120c743cb8679e82f5e909464bc78 /synapse/storage
parentBump serde_json from 1.0.115 to 1.0.116 (#17112) (diff)
downloadsynapse-59710437e4a885252de5e5555fbcf42d223b092c.tar.xz
Return the search terms as search highlights for SQLite instead of nothing (#17000)
Fixes https://github.com/element-hq/synapse/issues/16999 and
https://github.com/element-hq/element-android/pull/8729 by returning the
search terms as search highlights.
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/databases/main/search.py31
1 files changed, 24 insertions, 7 deletions
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 4a0afb50ac..20fcfd3122 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -470,6 +470,8 @@ class SearchStore(SearchBackgroundUpdateStore):
         count_args = args
         count_clauses = clauses
 
+        sqlite_highlights: List[str] = []
+
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             sql = """
@@ -486,7 +488,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
-            search_query = _parse_query_for_sqlite(search_term)
+            search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
 
             sql = """
             SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
@@ -531,9 +533,11 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         event_map = {ev.event_id: ev for ev in events}
 
-        highlights = None
+        highlights: Collection[str] = []
         if isinstance(self.database_engine, PostgresEngine):
             highlights = await self._find_highlights_in_postgres(search_query, events)
+        else:
+            highlights = sqlite_highlights
 
         count_sql += " GROUP BY room_id"
 
@@ -597,6 +601,8 @@ class SearchStore(SearchBackgroundUpdateStore):
         count_args = list(args)
         count_clauses = list(clauses)
 
+        sqlite_highlights: List[str] = []
+
         if pagination_token:
             try:
                 origin_server_ts_str, stream_str = pagination_token.split(",")
@@ -647,7 +653,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             CROSS JOIN events USING (event_id)
             WHERE
             """
-            search_query = _parse_query_for_sqlite(search_term)
+            search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
             count_sql = """
@@ -694,9 +700,11 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         event_map = {ev.event_id: ev for ev in events}
 
-        highlights = None
+        highlights: Collection[str] = []
         if isinstance(self.database_engine, PostgresEngine):
             highlights = await self._find_highlights_in_postgres(search_query, events)
+        else:
+            highlights = sqlite_highlights
 
         count_sql += " GROUP BY room_id"
 
@@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList:
     return tokens
 
 
-def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
+def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]:
     """
     Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
     Assume sqlite was compiled with enhanced query syntax.
 
+    Returns the sqlite-formatted query string and the tokenized search terms
+    that can be used as highlights.
+
     Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
     """
     match_query = []
+    highlights = []
     for token in tokens:
         if isinstance(token, str):
             match_query.append(token)
+            highlights.append(token)
         elif isinstance(token, Phrase):
             match_query.append('"' + " ".join(token.phrase) + '"')
+            highlights.append(" ".join(token.phrase))
         elif token == SearchToken.Not:
             # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
             # term has already been added before this.
@@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
         else:
             raise ValueError(f"unknown token {token}")
 
-    return "".join(match_query)
+    return "".join(match_query), highlights
 
 
-def _parse_query_for_sqlite(search_term: str) -> str:
+def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]:
     """Takes a plain unicode string from the user and converts it into a form
     that can be passed to sqllite's matchinfo().
+
+    Returns the converted query string and the tokenized search terms
+    that can be used as highlights.
     """
     return _tokens_to_sqlite_match_query(_tokenize_query(search_term))