From 59710437e4a885252de5e5555fbcf42d223b092c Mon Sep 17 00:00:00 2001 From: Melvyn Laïly Date: Fri, 26 Apr 2024 10:43:52 +0200 Subject: Return the search terms as search highlights for SQLite instead of nothing (#17000) Fixes https://github.com/element-hq/synapse/issues/16999 and https://github.com/element-hq/element-android/pull/8729 by returning the search terms as search highlights. --- synapse/storage/databases/main/search.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 4a0afb50ac..20fcfd3122 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -470,6 +470,8 @@ class SearchStore(SearchBackgroundUpdateStore): count_args = args count_clauses = clauses + sqlite_highlights: List[str] = [] + if isinstance(self.database_engine, PostgresEngine): search_query = search_term sql = """ @@ -486,7 +488,7 @@ class SearchStore(SearchBackgroundUpdateStore): """ count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): - search_query = _parse_query_for_sqlite(search_term) + search_query, sqlite_highlights = _parse_query_for_sqlite(search_term) sql = """ SELECT rank(matchinfo(event_search)) as rank, room_id, event_id @@ -531,9 +533,11 @@ class SearchStore(SearchBackgroundUpdateStore): event_map = {ev.event_id: ev for ev in events} - highlights = None + highlights: Collection[str] = [] if isinstance(self.database_engine, PostgresEngine): highlights = await self._find_highlights_in_postgres(search_query, events) + else: + highlights = sqlite_highlights count_sql += " GROUP BY room_id" @@ -597,6 +601,8 @@ class SearchStore(SearchBackgroundUpdateStore): count_args = list(args) count_clauses = list(clauses) + sqlite_highlights: List[str] = [] + if pagination_token: try: origin_server_ts_str, stream_str = pagination_token.split(",") @@ -647,7 +653,7 @@ class SearchStore(SearchBackgroundUpdateStore): CROSS JOIN events USING (event_id) WHERE """ - search_query = _parse_query_for_sqlite(search_term) + search_query, sqlite_highlights = _parse_query_for_sqlite(search_term) args = [search_query] + args count_sql = """ @@ -694,9 +700,11 @@ class SearchStore(SearchBackgroundUpdateStore): event_map = {ev.event_id: ev for ev in events} - highlights = None + highlights: Collection[str] = [] if isinstance(self.database_engine, PostgresEngine): highlights = await self._find_highlights_in_postgres(search_query, events) + else: + highlights = sqlite_highlights count_sql += " GROUP BY room_id" @@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList: return tokens -def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: +def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]: """ Convert the list of tokens to a string suitable for passing to sqlite's MATCH. Assume sqlite was compiled with enhanced query syntax. + Returns the sqlite-formatted query string and the tokenized search terms + that can be used as highlights. + Ref: https://www.sqlite.org/fts3.html#full_text_index_queries """ match_query = [] + highlights = [] for token in tokens: if isinstance(token, str): match_query.append(token) + highlights.append(token) elif isinstance(token, Phrase): match_query.append('"' + " ".join(token.phrase) + '"') + highlights.append(" ".join(token.phrase)) elif token == SearchToken.Not: # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search # term has already been added before this. @@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: else: raise ValueError(f"unknown token {token}") - return "".join(match_query) + return "".join(match_query), highlights -def _parse_query_for_sqlite(search_term: str) -> str: +def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]: """Takes a plain unicode string from the user and converts it into a form that can be passed to sqllite's matchinfo(). + + Returns the converted query string and the tokenized search terms + that can be used as highlights. """ return _tokens_to_sqlite_match_query(_tokenize_query(search_term)) -- cgit 1.4.1