summary refs log tree commit diff
diff options
context:
space:
mode:
authorMelvyn Laïly <melvyn.laily@gmail.com>2024-04-26 10:43:52 +0200
committerGitHub <noreply@github.com>2024-04-26 09:43:52 +0100
commit59710437e4a885252de5e5555fbcf42d223b092c (patch)
tree4801c0791fa120c743cb8679e82f5e909464bc78
parentBump serde_json from 1.0.115 to 1.0.116 (#17112) (diff)
downloadsynapse-59710437e4a885252de5e5555fbcf42d223b092c.tar.xz
Return the search terms as search highlights for SQLite instead of nothing (#17000)
Fixes https://github.com/element-hq/synapse/issues/16999 and
https://github.com/element-hq/element-android/pull/8729 by returning the
search terms as search highlights.
-rw-r--r--changelog.d/17000.bugfix1
-rw-r--r--synapse/storage/databases/main/search.py31
-rw-r--r--tests/storage/test_room_search.py13
3 files changed, 31 insertions, 14 deletions
diff --git a/changelog.d/17000.bugfix b/changelog.d/17000.bugfix
new file mode 100644
index 0000000000..86b21c9615
--- /dev/null
+++ b/changelog.d/17000.bugfix
@@ -0,0 +1 @@
+Fixed search feature of Element Android on homesevers using SQLite by returning search terms as search highlights.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 4a0afb50ac..20fcfd3122 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -470,6 +470,8 @@ class SearchStore(SearchBackgroundUpdateStore):
         count_args = args
         count_clauses = clauses
 
+        sqlite_highlights: List[str] = []
+
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             sql = """
@@ -486,7 +488,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
-            search_query = _parse_query_for_sqlite(search_term)
+            search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
 
             sql = """
             SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
@@ -531,9 +533,11 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         event_map = {ev.event_id: ev for ev in events}
 
-        highlights = None
+        highlights: Collection[str] = []
         if isinstance(self.database_engine, PostgresEngine):
             highlights = await self._find_highlights_in_postgres(search_query, events)
+        else:
+            highlights = sqlite_highlights
 
         count_sql += " GROUP BY room_id"
 
@@ -597,6 +601,8 @@ class SearchStore(SearchBackgroundUpdateStore):
         count_args = list(args)
         count_clauses = list(clauses)
 
+        sqlite_highlights: List[str] = []
+
         if pagination_token:
             try:
                 origin_server_ts_str, stream_str = pagination_token.split(",")
@@ -647,7 +653,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             CROSS JOIN events USING (event_id)
             WHERE
             """
-            search_query = _parse_query_for_sqlite(search_term)
+            search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
             count_sql = """
@@ -694,9 +700,11 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         event_map = {ev.event_id: ev for ev in events}
 
-        highlights = None
+        highlights: Collection[str] = []
         if isinstance(self.database_engine, PostgresEngine):
             highlights = await self._find_highlights_in_postgres(search_query, events)
+        else:
+            highlights = sqlite_highlights
 
         count_sql += " GROUP BY room_id"
 
@@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList:
     return tokens
 
 
-def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
+def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]:
     """
     Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
     Assume sqlite was compiled with enhanced query syntax.
 
+    Returns the sqlite-formatted query string and the tokenized search terms
+    that can be used as highlights.
+
     Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
     """
     match_query = []
+    highlights = []
     for token in tokens:
         if isinstance(token, str):
             match_query.append(token)
+            highlights.append(token)
         elif isinstance(token, Phrase):
             match_query.append('"' + " ".join(token.phrase) + '"')
+            highlights.append(" ".join(token.phrase))
         elif token == SearchToken.Not:
             # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
             # term has already been added before this.
@@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
         else:
             raise ValueError(f"unknown token {token}")
 
-    return "".join(match_query)
+    return "".join(match_query), highlights
 
 
-def _parse_query_for_sqlite(search_term: str) -> str:
+def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]:
     """Takes a plain unicode string from the user and converts it into a form
     that can be passed to sqllite's matchinfo().
+
+    Returns the converted query string and the tokenized search terms
+    that can be used as highlights.
     """
     return _tokens_to_sqlite_match_query(_tokenize_query(search_term))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 1eab89f140..340642b7e7 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -71,17 +71,16 @@ class EventSearchInsertionTest(HomeserverTestCase):
             store.search_msgs([room_id], "hi bob", ["content.body"])
         )
         self.assertEqual(result.get("count"), 1)
-        if isinstance(store.database_engine, PostgresEngine):
-            self.assertIn("hi", result.get("highlights"))
-            self.assertIn("bob", result.get("highlights"))
+        self.assertIn("hi", result.get("highlights"))
+        self.assertIn("bob", result.get("highlights"))
 
         # Check that search works for an unrelated message
         result = self.get_success(
             store.search_msgs([room_id], "another", ["content.body"])
         )
         self.assertEqual(result.get("count"), 1)
-        if isinstance(store.database_engine, PostgresEngine):
-            self.assertIn("another", result.get("highlights"))
+
+        self.assertIn("another", result.get("highlights"))
 
         # Check that search works for a search term that overlaps with the message
         # containing a null byte and an unrelated message.
@@ -90,8 +89,8 @@ class EventSearchInsertionTest(HomeserverTestCase):
         result = self.get_success(
             store.search_msgs([room_id], "hi alice", ["content.body"])
         )
-        if isinstance(store.database_engine, PostgresEngine):
-            self.assertIn("alice", result.get("highlights"))
+
+        self.assertIn("alice", result.get("highlights"))
 
     def test_non_string(self) -> None:
         """Test that non-string `value`s are not inserted into `event_search`.