diff options
author | Erik Johnston <erik@matrix.org> | 2015-12-11 16:27:38 +0000 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2015-12-11 16:27:38 +0000 |
commit | cde49d3d2be95994aab26d0bc8650e1279f841e1 (patch) | |
tree | 089dc58b46130562c801b25e3aab48f31193edd7 /synapse/storage | |
parent | Merge pull request #433 from matrix-org/paul/tiny-fixes (diff) | |
parent | Include approximate count of search results (diff) | |
download | synapse-cde49d3d2be95994aab26d0bc8650e1279f841e1.tar.xz |
Merge pull request #435 from matrix-org/erikj/search
Include approximate count of search results
Diffstat (limited to 'synapse/storage')
-rw-r--r-- | synapse/storage/search.py | 73 |
1 files changed, 65 insertions, 8 deletions
diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 39f600f53c..efd87d99bb 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -143,7 +143,7 @@ class SearchStore(BackgroundUpdateStore): search_query = search_query = _parse_query(self.database_engine, search_term) - args = [search_query] + args = [] # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. @@ -162,18 +162,36 @@ class SearchStore(BackgroundUpdateStore): "(%s)" % (" OR ".join(local_clauses),) ) + count_args = args + count_clauses = clauses + if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id" - " FROM to_tsquery('english', ?) as query, event_search" - " WHERE vector @@ query" + "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank," + " room_id, event_id" + " FROM event_search" + " WHERE vector @@ to_tsquery('english', ?)" + ) + args = [search_query, search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE vector @@ to_tsquery('english', ?)" ) + count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" " FROM event_search" " WHERE value MATCH ?" ) + args = [search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE value MATCH ? AND " + ) + count_args = [search_term] + count_args else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -181,6 +199,9 @@ class SearchStore(BackgroundUpdateStore): for clause in clauses: sql += " AND " + clause + for clause in count_clauses: + count_sql += " AND " + clause + # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. sql += " ORDER BY rank DESC LIMIT 500" @@ -202,6 +223,14 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): highlights = yield self._find_highlights_in_postgres(search_query, events) + count_sql += " GROUP BY room_id" + + count_results = yield self._execute( + "search_rooms_count", self.cursor_to_dict, count_sql, *count_args + ) + + count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) + defer.returnValue({ "results": [ { @@ -212,6 +241,7 @@ class SearchStore(BackgroundUpdateStore): if r["event_id"] in event_map ], "highlights": highlights, + "count": count, }) @defer.inlineCallbacks @@ -232,7 +262,7 @@ class SearchStore(BackgroundUpdateStore): search_query = search_query = _parse_query(self.database_engine, search_term) - args = [search_query] + args = [] # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. @@ -251,6 +281,9 @@ class SearchStore(BackgroundUpdateStore): "(%s)" % (" OR ".join(local_clauses),) ) + count_args = args + count_clauses = clauses + if pagination_token: try: origin_server_ts, stream = pagination_token.split(",") @@ -267,12 +300,19 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) as rank," + "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank," " origin_server_ts, stream_ordering, room_id, event_id" - " FROM to_tsquery('english', ?) as query, event_search" + " FROM event_search" " NATURAL JOIN events" - " WHERE vector @@ query AND " + " WHERE vector @@ to_tsquery('english', ?) AND " ) + args = [search_query, search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE vector @@ to_tsquery('english', ?) AND " + ) + count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): # We use CROSS JOIN here to ensure we use the right indexes. # https://sqlite.org/optoverview.html#crossjoin @@ -292,11 +332,19 @@ class SearchStore(BackgroundUpdateStore): " CROSS JOIN events USING (event_id)" " WHERE " ) + args = [search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE value MATCH ? AND " + ) + count_args = [search_term] + count_args else: # This should be unreachable. raise Exception("Unrecognized database engine") sql += " AND ".join(clauses) + count_sql += " AND ".join(count_clauses) # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. @@ -321,6 +369,14 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): highlights = yield self._find_highlights_in_postgres(search_query, events) + count_sql += " GROUP BY room_id" + + count_results = yield self._execute( + "search_rooms_count", self.cursor_to_dict, count_sql, *count_args + ) + + count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) + defer.returnValue({ "results": [ { @@ -334,6 +390,7 @@ class SearchStore(BackgroundUpdateStore): if r["event_id"] in event_map ], "highlights": highlights, + "count": count, }) def _find_highlights_in_postgres(self, search_query, events): |