From 51fb590c0e787c385bea1d595fa8bceea23c26e5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Dec 2015 11:12:57 +0000 Subject: Use more efficient query form --- synapse/storage/search.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'synapse/storage/search.py') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 39f600f53c..c39d54a7ca 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -143,7 +143,7 @@ class SearchStore(BackgroundUpdateStore): search_query = search_query = _parse_query(self.database_engine, search_term) - args = [search_query] + args = [] # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. @@ -164,16 +164,19 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id" - " FROM to_tsquery('english', ?) as query, event_search" - " WHERE vector @@ query" + "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank," + " room_id, event_id" + " FROM event_search" + " WHERE vector @@ to_tsquery('english', ?)" ) + args = [search_query, search_query] + args elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" " FROM event_search" " WHERE value MATCH ?" ) + args = [search_query] + args else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -232,7 +235,7 @@ class SearchStore(BackgroundUpdateStore): search_query = search_query = _parse_query(self.database_engine, search_term) - args = [search_query] + args = [] # Make sure we don't explode because the person is in too many rooms. # We filter the results below regardless. @@ -267,12 +270,13 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) as rank," + "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank," " origin_server_ts, stream_ordering, room_id, event_id" - " FROM to_tsquery('english', ?) as query, event_search" + " FROM event_search" " NATURAL JOIN events" - " WHERE vector @@ query AND " + " WHERE vector @@ to_tsquery('english', ?) AND " ) + args = [search_term, search_term] + args elif isinstance(self.database_engine, Sqlite3Engine): # We use CROSS JOIN here to ensure we use the right indexes. # https://sqlite.org/optoverview.html#crossjoin @@ -292,6 +296,7 @@ class SearchStore(BackgroundUpdateStore): " CROSS JOIN events USING (event_id)" " WHERE " ) + args = [search_term] + args else: # This should be unreachable. raise Exception("Unrecognized database engine") -- cgit 1.4.1 From d9a5c56930c22b02268f5deca4df84eba345ec2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 11 Dec 2015 11:40:23 +0000 Subject: Include approximate count of search results --- synapse/handlers/search.py | 8 ++++++- synapse/storage/search.py | 56 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 3 deletions(-) (limited to 'synapse/storage/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bc79564287..99ef56871c 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -152,11 +152,15 @@ class SearchHandler(BaseHandler): highlights = set() + count = None + if order_by == "rank": search_result = yield self.store.search_msgs( room_ids, search_term, keys ) + count = search_result["count"] + if search_result["highlights"]: highlights.update(search_result["highlights"]) @@ -207,6 +211,8 @@ class SearchHandler(BaseHandler): if search_result["highlights"]: highlights.update(search_result["highlights"]) + count = search_result["count"] + results = search_result["results"] results_map = {r["event"].event_id: r for r in results} @@ -359,7 +365,7 @@ class SearchHandler(BaseHandler): rooms_cat_res = { "results": results, - "count": len(results), + "count": count, "highlights": list(highlights), } diff --git a/synapse/storage/search.py b/synapse/storage/search.py index c39d54a7ca..efd87d99bb 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -162,6 +162,9 @@ class SearchStore(BackgroundUpdateStore): "(%s)" % (" OR ".join(local_clauses),) ) + count_args = args + count_clauses = clauses + if isinstance(self.database_engine, PostgresEngine): sql = ( "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank," @@ -170,6 +173,12 @@ class SearchStore(BackgroundUpdateStore): " WHERE vector @@ to_tsquery('english', ?)" ) args = [search_query, search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE vector @@ to_tsquery('english', ?)" + ) + count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" @@ -177,6 +186,12 @@ class SearchStore(BackgroundUpdateStore): " WHERE value MATCH ?" ) args = [search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE value MATCH ? AND " + ) + count_args = [search_term] + count_args else: # This should be unreachable. raise Exception("Unrecognized database engine") @@ -184,6 +199,9 @@ class SearchStore(BackgroundUpdateStore): for clause in clauses: sql += " AND " + clause + for clause in count_clauses: + count_sql += " AND " + clause + # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. sql += " ORDER BY rank DESC LIMIT 500" @@ -205,6 +223,14 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): highlights = yield self._find_highlights_in_postgres(search_query, events) + count_sql += " GROUP BY room_id" + + count_results = yield self._execute( + "search_rooms_count", self.cursor_to_dict, count_sql, *count_args + ) + + count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) + defer.returnValue({ "results": [ { @@ -215,6 +241,7 @@ class SearchStore(BackgroundUpdateStore): if r["event_id"] in event_map ], "highlights": highlights, + "count": count, }) @defer.inlineCallbacks @@ -254,6 +281,9 @@ class SearchStore(BackgroundUpdateStore): "(%s)" % (" OR ".join(local_clauses),) ) + count_args = args + count_clauses = clauses + if pagination_token: try: origin_server_ts, stream = pagination_token.split(",") @@ -276,7 +306,13 @@ class SearchStore(BackgroundUpdateStore): " NATURAL JOIN events" " WHERE vector @@ to_tsquery('english', ?) AND " ) - args = [search_term, search_term] + args + args = [search_query, search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE vector @@ to_tsquery('english', ?) AND " + ) + count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): # We use CROSS JOIN here to ensure we use the right indexes. # https://sqlite.org/optoverview.html#crossjoin @@ -296,12 +332,19 @@ class SearchStore(BackgroundUpdateStore): " CROSS JOIN events USING (event_id)" " WHERE " ) - args = [search_term] + args + args = [search_query] + args + + count_sql = ( + "SELECT room_id, count(*) as count FROM event_search" + " WHERE value MATCH ? AND " + ) + count_args = [search_term] + count_args else: # This should be unreachable. raise Exception("Unrecognized database engine") sql += " AND ".join(clauses) + count_sql += " AND ".join(count_clauses) # We add an arbitrary limit here to ensure we don't try to pull the # entire table from the database. @@ -326,6 +369,14 @@ class SearchStore(BackgroundUpdateStore): if isinstance(self.database_engine, PostgresEngine): highlights = yield self._find_highlights_in_postgres(search_query, events) + count_sql += " GROUP BY room_id" + + count_results = yield self._execute( + "search_rooms_count", self.cursor_to_dict, count_sql, *count_args + ) + + count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) + defer.returnValue({ "results": [ { @@ -339,6 +390,7 @@ class SearchStore(BackgroundUpdateStore): if r["event_id"] in event_map ], "highlights": highlights, + "count": count, }) def _find_highlights_in_postgres(self, search_query, events): -- cgit 1.4.1 From bfc52a2342999a7887dcc5ba653b67454c0fc2c8 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 14 Dec 2015 11:38:11 +0000 Subject: Fix typo in sql for full text search on sqlite3 --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/search.py') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index efd87d99bb..00f89ff02f 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -189,7 +189,7 @@ class SearchStore(BackgroundUpdateStore): count_sql = ( "SELECT room_id, count(*) as count FROM event_search" - " WHERE value MATCH ? AND " + " WHERE value MATCH ?" ) count_args = [search_term] + count_args else: -- cgit 1.4.1 From 98dfa7d24f91ff083b36f1379ce2426c8e6cdb75 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Mon, 14 Dec 2015 13:55:46 +0000 Subject: Skip events that where the body, name or topic isn't a string when back populating the FTS index --- synapse/storage/search.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/storage/search.py') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 39f600f53c..04246101df 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -85,6 +85,11 @@ class SearchStore(BackgroundUpdateStore): # skip over it. continue + if not isinstance(value, basestring): + # If the event body, name or topic isn't a string + # then skip over it + continue + event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): -- cgit 1.4.1 From a64f9bbfe0fc592043a3da8979b7f2545187dbb6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 17 Dec 2015 12:47:26 +0000 Subject: Fix 500 error when back-paginating search results We were mistakenly adding pagination clauses to the count query, which then failed because the count query doesn't join to the events table. --- synapse/storage/search.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse/storage/search.py') diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 57c9cc1c5f..6cb5e73b6e 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -286,8 +286,10 @@ class SearchStore(BackgroundUpdateStore): "(%s)" % (" OR ".join(local_clauses),) ) - count_args = args - count_clauses = clauses + # take copies of the current args and clauses lists, before adding + # pagination clauses to main query. + count_args = list(args) + count_clauses = list(clauses) if pagination_token: try: -- cgit 1.4.1