From c8baada94a6539cfcd1ec1316892302ae2271f4c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Oct 2015 17:09:53 +0100 Subject: Filter search results --- synapse/handlers/search.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 22808b9c07..473aab53f0 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -18,6 +18,7 @@ from twisted.internet import defer from ._base import BaseHandler from synapse.api.constants import Membership +from synapse.api.filtering import Filter from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -49,9 +50,12 @@ class SearchHandler(BaseHandler): keys = content["search_categories"]["room_events"].get("keys", [ "content.body", "content.name", "content.topic", ]) + filter_dict = content["search_categories"]["room_events"].get("filter", {}) except KeyError: raise SynapseError(400, "Invalid search query") + filtr = Filter(filter_dict) + # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( user.to_string(), @@ -64,11 +68,12 @@ class SearchHandler(BaseHandler): rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + filtered_events = filtr.filter(event_map.values()) + allowed_events = yield self._filter_events_for_client( - user.to_string(), event_map.values() + user.to_string(), filtered_events ) - # TODO: Filter allowed_events # TODO: Add a limit time_now = self.clock.time_msec() -- cgit 1.4.1 From 5c41224a89a9ceedeb5db10f972c10344382faf2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Oct 2015 10:09:26 +0100 Subject: Filter room ids before hitting the database --- synapse/api/filtering.py | 20 ++++++++++++++++++++ synapse/handlers/search.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 60b6648e0d..ab14b47281 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -202,6 +202,26 @@ class Filter(object): return True + def filter_rooms(self, room_ids): + """Apply the 'rooms' filter to a given list of rooms. + + Args: + room_ids (list): A list of room_ids. + + Returns: + list: A list of room_ids that match the filter + """ + room_ids = set(room_ids) + + disallowed_rooms = set(self.filter_json.get("not_rooms", [])) + room_ids -= disallowed_rooms + + allowed_rooms = self.filter_json.get("rooms", None) + if allowed_rooms is not None: + room_ids &= set(allowed_rooms) + + return room_ids + def filter(self, events): return filter(self.check, events) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 473aab53f0..f53e5d35ac 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -64,7 +64,7 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - # TODO: Apply room filter to rooms list + room_ids = filtr.filter_rooms(room_ids) rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) -- cgit 1.4.1 From 4d25bc6c92c3d219786892c5d510e0c4c4eb8b96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:12:28 +0100 Subject: Move FTS to delta 25 --- synapse/storage/prepare_database.py | 2 +- synapse/storage/schema/delta/24/fts.py | 123 --------------------------------- synapse/storage/schema/delta/25/fts.py | 123 +++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 124 deletions(-) delete mode 100644 synapse/storage/schema/delta/24/fts.py create mode 100644 synapse/storage/schema/delta/25/fts.py diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 1ddf55be4d..1a74d6e360 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 24 +SCHEMA_VERSION = 25 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py deleted file mode 100644 index 0c752d8426..0000000000 --- a/synapse/storage/schema/delta/24/fts.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging - -from synapse.storage.prepare_database import get_statements -from synapse.storage.engines import PostgresEngine, Sqlite3Engine - -import ujson - -logger = logging.getLogger(__name__) - - -POSTGRES_SQL = """ -CREATE TABLE event_search ( - event_id TEXT, - room_id TEXT, - key TEXT, - vector tsvector -); - -INSERT INTO event_search SELECT - event_id, room_id, 'content.body', - to_tsvector('english', json::json->'content'->>'body') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; - -INSERT INTO event_search SELECT - event_id, room_id, 'content.name', - to_tsvector('english', json::json->'content'->>'name') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; - -INSERT INTO event_search SELECT - event_id, room_id, 'content.topic', - to_tsvector('english', json::json->'content'->>'topic') - FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; - - -CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); -CREATE INDEX event_search_ev_idx ON event_search(event_id); -CREATE INDEX event_search_ev_ridx ON event_search(room_id); -""" - - -SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" -) - - -def run_upgrade(cur, database_engine, *args, **kwargs): - if isinstance(database_engine, PostgresEngine): - run_postgres_upgrade(cur) - return - - if isinstance(database_engine, Sqlite3Engine): - run_sqlite_upgrade(cur) - return - - -def run_postgres_upgrade(cur): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) - - -def run_sqlite_upgrade(cur): - cur.execute(SQLITE_TABLE) - - rowid = -1 - while True: - cur.execute( - "SELECT rowid, json FROM event_json" - " WHERE rowid > ?" - " ORDER BY rowid ASC LIMIT 100", - (rowid,) - ) - - res = cur.fetchall() - - if not res: - break - - events = [ - ujson.loads(js) - for _, js in res - ] - - rowid = max(rid for rid, _ in res) - - rows = [] - for ev in events: - if ev["type"] == "m.room.message": - rows.append(( - ev["event_id"], ev["room_id"], "content.body", - ev["content"]["body"] - )) - if ev["type"] == "m.room.name": - rows.append(( - ev["event_id"], ev["room_id"], "content.name", - ev["content"]["name"] - )) - if ev["type"] == "m.room.topic": - rows.append(( - ev["event_id"], ev["room_id"], "content.topic", - ev["content"]["topic"] - )) - - if rows: - logger.info(rows) - cur.executemany( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)", - rows - ) diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py new file mode 100644 index 0000000000..fd181fc630 --- /dev/null +++ b/synapse/storage/schema/delta/25/fts.py @@ -0,0 +1,123 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage.prepare_database import get_statements +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +import ujson + +logger = logging.getLogger(__name__) + + +POSTGRES_SQL = """ +CREATE TABLE IF NOT EXISTS event_search ( + event_id TEXT, + room_id TEXT, + key TEXT, + vector tsvector +); + +INSERT INTO event_search SELECT + event_id, room_id, 'content.body', + to_tsvector('english', json::json->'content'->>'body') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.name', + to_tsvector('english', json::json->'content'->>'name') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.topic', + to_tsvector('english', json::json->'content'->>'topic') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; + + +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); +CREATE INDEX event_search_ev_ridx ON event_search(room_id); +""" + + +SQLITE_TABLE = ( + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" +) + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + run_postgres_upgrade(cur) + return + + if isinstance(database_engine, Sqlite3Engine): + run_sqlite_upgrade(cur) + return + + +def run_postgres_upgrade(cur): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) + + +def run_sqlite_upgrade(cur): + cur.execute(SQLITE_TABLE) + + rowid = -1 + while True: + cur.execute( + "SELECT rowid, json FROM event_json" + " WHERE rowid > ?" + " ORDER BY rowid ASC LIMIT 100", + (rowid,) + ) + + res = cur.fetchall() + + if not res: + break + + events = [ + ujson.loads(js) + for _, js in res + ] + + rowid = max(rid for rid, _ in res) + + rows = [] + for ev in events: + if ev["type"] == "m.room.message": + rows.append(( + ev["event_id"], ev["room_id"], "content.body", + ev["content"]["body"] + )) + if ev["type"] == "m.room.name": + rows.append(( + ev["event_id"], ev["room_id"], "content.name", + ev["content"]["name"] + )) + if ev["type"] == "m.room.topic": + rows.append(( + ev["event_id"], ev["room_id"], "content.topic", + ev["content"]["topic"] + )) + + if rows: + logger.info(rows) + cur.executemany( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)", + rows + ) -- cgit 1.4.1 From f142898f529f89c5bd90710db2d0771894b0b33f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 11:18:01 +0100 Subject: PEP8 --- synapse/storage/schema/delta/25/fts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/delta/25/fts.py b/synapse/storage/schema/delta/25/fts.py index fd181fc630..ed3cc06557 100644 --- a/synapse/storage/schema/delta/25/fts.py +++ b/synapse/storage/schema/delta/25/fts.py @@ -53,7 +53,8 @@ CREATE INDEX event_search_ev_ridx ON event_search(room_id); SQLITE_TABLE = ( - "CREATE VIRTUAL TABLE IF NOT EXISTS event_search USING fts3 ( event_id, room_id, key, value)" + "CREATE VIRTUAL TABLE IF NOT EXISTS event_search" + " USING fts3 ( event_id, room_id, key, value)" ) -- cgit 1.4.1 From ba02bba88c6895bc9196d226a2bbc8047b93e28b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 13:25:22 +0100 Subject: Limit max number of SQL vars --- synapse/storage/search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index a3c69c5ab3..810b5406ad 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -36,10 +36,12 @@ class SearchStore(SQLBaseStore): clauses = [] args = [] - clauses.append( - "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) - ) - args.extend(room_ids) + # Make sure we don't explode because the person is in too many rooms. + if len(room_ids) > 500: + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) local_clauses = [] for key in keys: -- cgit 1.4.1 From 232beb3a3c28ccdc5388daa9396d5054b7768b12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 15:02:35 +0100 Subject: Use namedtuple as return value --- synapse/handlers/search.py | 4 +++- synapse/storage/search.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index f53e5d35ac..bdc79ffc55 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -66,7 +66,9 @@ class SearchHandler(BaseHandler): room_ids = filtr.filter_rooms(room_ids) - rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + rank_map, event_map, _ = yield self.store.search_msgs( + room_ids, search_term, keys + ) filtered_events = filtr.filter(event_map.values()) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 810b5406ad..41451ade57 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -18,6 +18,17 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from collections import namedtuple + +"""The result of a search. + +Fields: + rank_map (dict): Mapping event_id -> rank + event_map (dict): Mapping event_id -> event + pagination_token (str): Pagination token +""" +SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) + class SearchStore(SQLBaseStore): @defer.inlineCallbacks @@ -31,7 +42,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - 2-tuple of (dict event_id -> rank, dict event_id -> event) + SearchResult """ clauses = [] args = [] @@ -85,11 +96,12 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(( + defer.returnValue(SearchResult( { r["event_id"]: r["rank"] for r in results if r["event_id"] in event_map }, - event_map + event_map, + None )) -- cgit 1.4.1 From fb0fecd0b9f430670ca6de1f4746601dd6cc24f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:18:35 +0100 Subject: LESS THAN --- synapse/storage/search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 41451ade57..e658e07dc7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -48,7 +48,8 @@ class SearchStore(SQLBaseStore): args = [] # Make sure we don't explode because the person is in too many rooms. - if len(room_ids) > 500: + # We filter the results regardless. + if len(room_ids) < 500: clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) ) -- cgit 1.4.1 From 2980136d7535077f0513b8a12fd7f224700ca140 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:19:53 +0100 Subject: Rename --- synapse/handlers/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bdc79ffc55..bbe82b1425 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -54,7 +54,7 @@ class SearchHandler(BaseHandler): except KeyError: raise SynapseError(400, "Invalid search query") - filtr = Filter(filter_dict) + search_filter = Filter(filter_dict) # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( @@ -64,13 +64,13 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - room_ids = filtr.filter_rooms(room_ids) + room_ids = search_filter.filter_rooms(room_ids) rank_map, event_map, _ = yield self.store.search_msgs( room_ids, search_term, keys ) - filtered_events = filtr.filter(event_map.values()) + filtered_events = search_filter.filter(event_map.values()) allowed_events = yield self._filter_events_for_client( user.to_string(), filtered_events -- cgit 1.4.1 From 671ac699f1d4672bed3817b3cafb7498df99c030 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:54:56 +0100 Subject: Actually filter results --- synapse/storage/search.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e658e07dc7..9608b5d6a7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -48,7 +48,7 @@ class SearchStore(SQLBaseStore): args = [] # Make sure we don't explode because the person is in too many rooms. - # We filter the results regardless. + # We filter the results below regardless. if len(room_ids) < 500: clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) @@ -66,13 +66,13 @@ class SearchStore(SQLBaseStore): if isinstance(self.database_engine, PostgresEngine): sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, event_id" + "SELECT ts_rank_cd(vector, query) AS rank, room_id, event_id" " FROM plainto_tsquery('english', ?) as query, event_search" " WHERE vector @@ query" ) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( - "SELECT 0 as rank, event_id FROM event_search" + "SELECT 0 as rank, room_id, event_id FROM event_search" " WHERE value MATCH ?" ) else: @@ -90,6 +90,8 @@ class SearchStore(SQLBaseStore): "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) ) + results = filter(lambda row: row["room_id"] in room_ids, results) + events = yield self._get_events([r["event_id"] for r in results]) event_map = { -- cgit 1.4.1