From c85c9125627a62c73711786723be12be30d7a81e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Oct 2015 15:48:31 +0100 Subject: Add basic full text search impl. --- synapse/handlers/search.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 synapse/handlers/search.py (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py new file mode 100644 index 0000000000..8b997fc394 --- /dev/null +++ b/synapse/handlers/search.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from ._base import BaseHandler + +from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.api.errors import SynapseError +from synapse.events.utils import serialize_event + +import logging + + +logger = logging.getLogger(__name__) + + +KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { + KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], + KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], + KnownRoomEventKeys.CONTENT_NAME: [SearchConstraintTypes.FTS, SearchConstraintTypes.EXACT, SearchConstraintTypes.SUBSTRING], + KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], + KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], + KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], + KnownRoomEventKeys.ROOM_ID: [SearchConstraintTypes.EXACT], +} + + +class RoomConstraint(object): + def __init__(self, search_type, keys, value): + self.search_type = search_type + self.keys = keys + self.value = value + + @classmethod + def from_dict(cls, d): + search_type = d["type"] + keys = d["keys"] + + for key in keys: + if key not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES: + raise SynapseError(400, "Unrecognized key %r", key) + + if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: + raise SynapseError(400, "Disallowed constraint type %r for key %r", search_type, key) + + return cls(search_type, keys, d["value"]) + + +class SearchHandler(BaseHandler): + + def __init__(self, hs): + super(SearchHandler, self).__init__(hs) + + @defer.inlineCallbacks + def search(self, content): + constraint_dicts = content["search_categories"]["room_events"]["constraints"] + constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] + + fts = False + for c in constraints: + if c.search_type == SearchConstraintTypes.FTS: + if fts: + raise SynapseError(400, "Only one constraint can be FTS") + fts = True + + res = yield self.hs.get_datastore().search_msgs(constraints) + + time_now = self.hs.get_clock().time_msec() + + results = [ + { + "rank": r["rank"], + "result": serialize_event(r["result"], time_now) + } + for r in res + ] + + logger.info("returning: %r", results) + + results.sort(key=lambda r: -r["rank"]) + + defer.returnValue(results) -- cgit 1.5.1 From 61561b9df791ec90e287e535cc75831c2016bf36 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 10:49:53 +0100 Subject: Keep FTS indexes up to date. Only search through rooms currently joined --- synapse/handlers/search.py | 31 ++++++++++++++++++++++--------- synapse/rest/client/v1/room.py | 2 +- synapse/storage/events.py | 2 ++ synapse/storage/room.py | 22 ++++++++++++++++++++++ synapse/storage/schema/delta/24/fts.py | 3 ++- synapse/storage/search.py | 7 ++++++- 6 files changed, 55 insertions(+), 12 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8b997fc394..b6bdb752e9 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -65,7 +65,7 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def search(self, content): + def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] @@ -76,20 +76,33 @@ class SearchHandler(BaseHandler): raise SynapseError(400, "Only one constraint can be FTS") fts = True - res = yield self.hs.get_datastore().search_msgs(constraints) + rooms = yield self.store.get_rooms_for_user( + user.to_string(), + ) - time_now = self.hs.get_clock().time_msec() + # For some reason the list of events contains duplicates + # TODO(paul): work out why because I really don't think it should + room_ids = set(r.room_id for r in rooms) - results = [ - { + res = yield self.store.search_msgs(room_ids, constraints) + + time_now = self.clock.time_msec() + + results = { + r["result"].event_id: { "rank": r["rank"], "result": serialize_event(r["result"], time_now) } for r in res - ] + } logger.info("returning: %r", results) - results.sort(key=lambda r: -r["rank"]) - - defer.returnValue(results) + defer.returnValue({ + "search_categories": { + "room_events": { + "results": results, + "count": len(results) + } + } + }) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 35bd702a43..94adabca62 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -540,7 +540,7 @@ class SearchRestServlet(ClientV1RestServlet): content = _parse_json(request) - results = yield self.handlers.search_handler.search(content) + results = yield self.handlers.search_handler.search(auth_user, content) defer.returnValue((200, results)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 416ef6af93..e6c1abfc27 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -307,6 +307,8 @@ class EventsStore(SQLBaseStore): self._store_room_name_txn(txn, event) elif event.type == EventTypes.Topic: self._store_room_topic_txn(txn, event) + elif event.type == EventTypes.Message: + self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5e07b7e0e5..e4e830944a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -175,6 +175,10 @@ class RoomStore(SQLBaseStore): }, ) + self._store_event_search_txn( + txn, event, "content.topic", event.content["topic"] + ) + def _store_room_name_txn(self, txn, event): if hasattr(event, "content") and "name" in event.content: self._simple_insert_txn( @@ -187,6 +191,24 @@ class RoomStore(SQLBaseStore): } ) + self._store_event_search_txn( + txn, event, "content.name", event.content["name"] + ) + + def _store_room_message_txn(self, txn, event): + if hasattr(event, "content") and "body" in event.content: + self._store_event_search_txn( + txn, event, "content.body", event.content["body"] + ) + + def _store_event_search_txn(self, txn, event, key, value): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 5680332758..05f1605fdd 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -44,7 +44,8 @@ INSERT INTO event_search SELECT FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; -CREATE INDEX event_search_idx ON event_search USING gin(vector); +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index eea4477765..e66b5f9edc 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -21,11 +21,16 @@ from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, constraints): + def search_msgs(self, room_ids, constraints): clauses = [] args = [] fts = None + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) + for c in constraints: local_clauses = [] if c.search_type == SearchConstraintTypes.FTS: -- cgit 1.5.1 From ae72e247fa478a541c837aaa7663aa3ca01ba840 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 10:50:46 +0100 Subject: PEP8 --- synapse/handlers/search.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index b6bdb752e9..9dc474aa56 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -30,7 +30,11 @@ logger = logging.getLogger(__name__) KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.CONTENT_NAME: [SearchConstraintTypes.FTS, SearchConstraintTypes.EXACT, SearchConstraintTypes.SUBSTRING], + KnownRoomEventKeys.CONTENT_NAME: [ + SearchConstraintTypes.FTS, + SearchConstraintTypes.EXACT, + SearchConstraintTypes.SUBSTRING, + ], KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], @@ -54,7 +58,10 @@ class RoomConstraint(object): raise SynapseError(400, "Unrecognized key %r", key) if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: - raise SynapseError(400, "Disallowed constraint type %r for key %r", search_type, key) + raise SynapseError( + 400, + "Disallowed constraint type %r for key %r", search_type, key + ) return cls(search_type, keys, d["value"]) -- cgit 1.5.1 From 927004e34905d4ad6a69576ee1799fe8019d8985 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 15:06:14 +0100 Subject: Remove unused room_id parameter --- synapse/handlers/federation.py | 2 +- synapse/handlers/message.py | 10 ++++----- synapse/handlers/search.py | 50 +++++++++++++++++++++++++++++++++++++++++- synapse/handlers/sync.py | 2 +- synapse/storage/state.py | 11 +++++----- 5 files changed, 61 insertions(+), 14 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3882ba79ed..a710bdcfdb 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -242,7 +242,7 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): event_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, None), diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 30949ff7a6..d2f0892f7a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -164,7 +164,7 @@ class MessageHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): event_id_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), @@ -290,7 +290,7 @@ class MessageHandler(BaseHandler): elif member_event.membership == Membership.LEAVE: key = (event_type, state_key) room_state = yield self.store.get_state_for_events( - room_id, [member_event.event_id], [key] + [member_event.event_id], [key] ) data = room_state[member_event.event_id].get(key) @@ -314,7 +314,7 @@ class MessageHandler(BaseHandler): room_state = yield self.state_handler.get_current_state(room_id) elif member_event.membership == Membership.LEAVE: room_state = yield self.store.get_state_for_events( - room_id, [member_event.event_id], None + [member_event.event_id], None ) room_state = room_state[member_event.event_id] @@ -403,7 +403,7 @@ class MessageHandler(BaseHandler): elif event.membership == Membership.LEAVE: room_end_token = "s%d" % (event.stream_ordering,) deferred_room_state = self.store.get_state_for_events( - event.room_id, [event.event_id], None + [event.event_id], None ) deferred_room_state.addCallback( lambda states: states[event.event_id] @@ -496,7 +496,7 @@ class MessageHandler(BaseHandler): def _room_initial_sync_parted(self, user_id, room_id, pagin_config, member_event): room_state = yield self.store.get_state_for_events( - member_event.room_id, [member_event.event_id], None + [member_event.event_id], None ) room_state = room_state[member_event.event_id] diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 9dc474aa56..71182a8fe0 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -17,7 +17,9 @@ from twisted.internet import defer from ._base import BaseHandler -from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.api.constants import ( + EventTypes, KnownRoomEventKeys, Membership, SearchConstraintTypes +) from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -71,6 +73,52 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def _filter_events_for_client(self, user_id, room_id, events): + event_id_to_state = yield self.store.get_state_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) + ) + + def allowed(event, state): + if event.type == EventTypes.RoomHistoryVisibility: + return True + + membership_ev = state.get((EventTypes.Member, user_id), None) + if membership_ev: + membership = membership_ev.membership + else: + membership = Membership.LEAVE + + if membership == Membership.JOIN: + return True + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + else: + visibility = "shared" + + if visibility == "public": + return True + elif visibility == "shared": + return True + elif visibility == "joined": + return membership == Membership.JOIN + elif visibility == "invited": + return membership == Membership.INVITE + + return True + + defer.returnValue([ + event + for event in events + if allowed(event, event_id_to_state[event.event_id]) + ]) + @defer.inlineCallbacks def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9914ff6f9c..a8940de166 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -312,7 +312,7 @@ class SyncHandler(BaseHandler): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): event_id_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e935b9443b..acfb322a53 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -54,7 +54,7 @@ class StateStore(SQLBaseStore): defer.returnValue({}) event_to_groups = yield self._get_state_group_for_events( - room_id, event_ids, + event_ids, ) groups = set(event_to_groups.values()) @@ -208,13 +208,12 @@ class StateStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_state_for_events(self, room_id, event_ids, types): + def get_state_for_events(self, event_ids, types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: - room_id (str) event_ids (list) types (list): List of (type, state_key) tuples which are used to filter the state fetched. `state_key` may be None, which matches @@ -225,7 +224,7 @@ class StateStore(SQLBaseStore): The dicts are mappings from (type, state_key) -> state_events """ event_to_groups = yield self._get_state_group_for_events( - room_id, event_ids, + event_ids, ) groups = set(event_to_groups.values()) @@ -251,8 +250,8 @@ class StateStore(SQLBaseStore): ) @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", - num_args=2) - def _get_state_group_for_events(self, room_id, event_ids): + num_args=1) + def _get_state_group_for_events(self, event_ids): """Returns mapping event_id -> state_group """ def f(txn): -- cgit 1.5.1 From ca53ad74250d94b8c9b6581e6cedef0a29520fc2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 15:52:55 +0100 Subject: Filter events to only thsoe that the user is allowed to see --- synapse/handlers/search.py | 16 ++++++++++------ synapse/storage/search.py | 14 +++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 71182a8fe0..49b786dadb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -74,7 +74,7 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, room_id, events): + def _filter_events_for_client(self, user_id, events): event_id_to_state = yield self.store.get_state_for_events( frozenset(e.event_id for e in events), types=( @@ -139,16 +139,20 @@ class SearchHandler(BaseHandler): # TODO(paul): work out why because I really don't think it should room_ids = set(r.room_id for r in rooms) - res = yield self.store.search_msgs(room_ids, constraints) + rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) + + allowed_events = yield self._filter_events_for_client( + user.to_string(), event_map.values() + ) time_now = self.clock.time_msec() results = { - r["result"].event_id: { - "rank": r["rank"], - "result": serialize_event(r["result"], time_now) + e.event_id: { + "rank": rank_map[e.event_id], + "result": serialize_event(e, time_now) } - for r in res + for e in allowed_events } logger.info("returning: %r", results) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e66b5f9edc..238df38440 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -70,11 +70,11 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue([ + defer.returnValue(( { - "rank": r["rank"], - "result": event_map[r["event_id"]] - } - for r in results - if r["event_id"] in event_map - ]) + r["event_id"]: r["rank"] + for r in results + if r["event_id"] in event_map + }, + event_map + )) -- cgit 1.5.1 From 30c2783d2f2983764738383d73c378ec5dc61279 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 10:36:36 +0100 Subject: Search left rooms too --- synapse/handlers/search.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 49b786dadb..d5c395061c 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -131,12 +131,9 @@ class SearchHandler(BaseHandler): raise SynapseError(400, "Only one constraint can be FTS") fts = True - rooms = yield self.store.get_rooms_for_user( - user.to_string(), + rooms = yield self.store.get_rooms_for_user_where_membership_is( + user.to_string(), membership_list=[Membership.JOIN, Membership.LEAVE], ) - - # For some reason the list of events contains duplicates - # TODO(paul): work out why because I really don't think it should room_ids = set(r.room_id for r in rooms) rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) -- cgit 1.5.1 From 3e2a1297b513dc1fadb288c74684f6651a88016d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 15:22:14 +0100 Subject: Remove constraints in preperation of using filters --- synapse/handlers/search.py | 61 ++++++++-------------------------------------- synapse/storage/search.py | 30 ++++++++--------------- 2 files changed, 20 insertions(+), 71 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index d5c395061c..8864a921fc 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -18,7 +18,7 @@ from twisted.internet import defer from ._base import BaseHandler from synapse.api.constants import ( - EventTypes, KnownRoomEventKeys, Membership, SearchConstraintTypes + EventTypes, Membership, ) from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -29,45 +29,6 @@ import logging logger = logging.getLogger(__name__) -KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { - KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], - KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.CONTENT_NAME: [ - SearchConstraintTypes.FTS, - SearchConstraintTypes.EXACT, - SearchConstraintTypes.SUBSTRING, - ], - KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], - KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], - KnownRoomEventKeys.ROOM_ID: [SearchConstraintTypes.EXACT], -} - - -class RoomConstraint(object): - def __init__(self, search_type, keys, value): - self.search_type = search_type - self.keys = keys - self.value = value - - @classmethod - def from_dict(cls, d): - search_type = d["type"] - keys = d["keys"] - - for key in keys: - if key not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES: - raise SynapseError(400, "Unrecognized key %r", key) - - if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: - raise SynapseError( - 400, - "Disallowed constraint type %r for key %r", search_type, key - ) - - return cls(search_type, keys, d["value"]) - - class SearchHandler(BaseHandler): def __init__(self, hs): @@ -121,22 +82,20 @@ class SearchHandler(BaseHandler): @defer.inlineCallbacks def search(self, user, content): - constraint_dicts = content["search_categories"]["room_events"]["constraints"] - constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] - - fts = False - for c in constraints: - if c.search_type == SearchConstraintTypes.FTS: - if fts: - raise SynapseError(400, "Only one constraint can be FTS") - fts = True + try: + search_term = content["search_categories"]["room_events"]["search_term"] + keys = content["search_categories"]["room_events"]["keys"] + except KeyError: + raise SynapseError(400, "Invalid search query") rooms = yield self.store.get_rooms_for_user_where_membership_is( - user.to_string(), membership_list=[Membership.JOIN, Membership.LEAVE], + user.to_string(), + membership_list=[Membership.JOIN], + # membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban], ) room_ids = set(r.room_id for r in rooms) - rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) + rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) allowed_events = yield self._filter_events_for_client( user.to_string(), event_map.values() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 5843f80876..7a30ce25eb 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,38 +16,28 @@ from twisted.internet import defer from _base import SQLBaseStore -from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes from synapse.storage.engines import PostgresEngine class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, room_ids, constraints): + def search_msgs(self, room_ids, search_term, keys): clauses = [] args = [] - fts = None clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) ) args.extend(room_ids) - for c in constraints: - local_clauses = [] - if c.search_type == SearchConstraintTypes.FTS: - fts = c.value - for key in c.keys: - local_clauses.append("key = ?") - args.append(key) - elif c.search_type == SearchConstraintTypes.EXACT: - for key in c.keys: - if key == KnownRoomEventKeys.ROOM_ID: - for value in c.value: - local_clauses.append("room_id = ?") - args.append(value) - clauses.append( - "(%s)" % (" OR ".join(local_clauses),) - ) + local_clauses = [] + for key in keys: + local_clauses.append("key = ?") + args.append(key) + + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) if isinstance(self.database_engine, PostgresEngine): sql = ( @@ -67,7 +57,7 @@ class SearchStore(SQLBaseStore): sql += " ORDER BY rank DESC" results = yield self._execute( - "search_msgs", self.cursor_to_dict, sql, *([fts] + args) + "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) ) events = yield self._get_events([r["event_id"] for r in results]) -- cgit 1.5.1 From d25b0f65ea9ab36dbf4285d86a1ca3e357f6ad1c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 09:46:31 +0100 Subject: Add TODO markers --- synapse/handlers/search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8864a921fc..79c1569868 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -88,6 +88,7 @@ class SearchHandler(BaseHandler): except KeyError: raise SynapseError(400, "Invalid search query") + # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( user.to_string(), membership_list=[Membership.JOIN], @@ -95,6 +96,8 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) + # TODO: Apply room filter to rooms list + rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) allowed_events = yield self._filter_events_for_client( @@ -111,7 +114,7 @@ class SearchHandler(BaseHandler): for e in allowed_events } - logger.info("returning: %r", results) + logger.info("Found %d results", len(results)) defer.returnValue({ "search_categories": { -- cgit 1.5.1 From 1d9e109820c1aec7193278b2b26042259329c144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 09:49:00 +0100 Subject: More TODO markers --- synapse/handlers/search.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 79c1569868..8140c0b9d4 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -104,6 +104,9 @@ class SearchHandler(BaseHandler): user.to_string(), event_map.values() ) + # TODO: Filter allowed_events + # TODO: Add a limit + time_now = self.clock.time_msec() results = { -- cgit 1.5.1 From 8c9df8774e781da838efc18953785cfa1a2af0a7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 10:35:50 +0100 Subject: Make 'keys' optional --- synapse/handlers/search.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8140c0b9d4..7f1efe2b46 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -84,7 +84,9 @@ class SearchHandler(BaseHandler): def search(self, user, content): try: search_term = content["search_categories"]["room_events"]["search_term"] - keys = content["search_categories"]["room_events"]["keys"] + keys = content["search_categories"]["room_events"].get("keys", [ + "content.body", "content.name", "content.topic", + ]) except KeyError: raise SynapseError(400, "Invalid search query") -- cgit 1.5.1 From b62da463e18a05205725f75508d5053232f1a158 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:52:16 +0100 Subject: docstring --- synapse/handlers/search.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 7f1efe2b46..c01c12f8c1 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -82,6 +82,16 @@ class SearchHandler(BaseHandler): @defer.inlineCallbacks def search(self, user, content): + """Performs a full text search for a user. + + Args: + user (UserID) + content (dict): Search parameters + + Returns: + dict to be returned to the client with results of search + """ + try: search_term = content["search_categories"]["room_events"]["search_term"] keys = content["search_categories"]["room_events"].get("keys", [ -- cgit 1.5.1 From d4b5621e0a5edeb66a80d8dd88055a0129def2a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 15:19:52 +0100 Subject: Remove duplicate _filter_events_for_client --- synapse/handlers/search.py | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index c01c12f8c1..1a5d7381db 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -34,52 +34,6 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) - @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, events): - event_id_to_state = yield self.store.get_state_for_events( - frozenset(e.event_id for e in events), - types=( - (EventTypes.RoomHistoryVisibility, ""), - (EventTypes.Member, user_id), - ) - ) - - def allowed(event, state): - if event.type == EventTypes.RoomHistoryVisibility: - return True - - membership_ev = state.get((EventTypes.Member, user_id), None) - if membership_ev: - membership = membership_ev.membership - else: - membership = Membership.LEAVE - - if membership == Membership.JOIN: - return True - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - else: - visibility = "shared" - - if visibility == "public": - return True - elif visibility == "shared": - return True - elif visibility == "joined": - return membership == Membership.JOIN - elif visibility == "invited": - return membership == Membership.INVITE - - return True - - defer.returnValue([ - event - for event in events - if allowed(event, event_id_to_state[event.event_id]) - ]) - @defer.inlineCallbacks def search(self, user, content): """Performs a full text search for a user. -- cgit 1.5.1 From 380f148db7d710ece7679e207334483bda407aa5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 15:32:51 +0100 Subject: Remove unused import --- synapse/handlers/search.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 1a5d7381db..22808b9c07 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -17,9 +17,7 @@ from twisted.internet import defer from ._base import BaseHandler -from synapse.api.constants import ( - EventTypes, Membership, -) +from synapse.api.constants import Membership from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event -- cgit 1.5.1 From c8baada94a6539cfcd1ec1316892302ae2271f4c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Oct 2015 17:09:53 +0100 Subject: Filter search results --- synapse/handlers/search.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 22808b9c07..473aab53f0 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -18,6 +18,7 @@ from twisted.internet import defer from ._base import BaseHandler from synapse.api.constants import Membership +from synapse.api.filtering import Filter from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -49,9 +50,12 @@ class SearchHandler(BaseHandler): keys = content["search_categories"]["room_events"].get("keys", [ "content.body", "content.name", "content.topic", ]) + filter_dict = content["search_categories"]["room_events"].get("filter", {}) except KeyError: raise SynapseError(400, "Invalid search query") + filtr = Filter(filter_dict) + # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( user.to_string(), @@ -64,11 +68,12 @@ class SearchHandler(BaseHandler): rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + filtered_events = filtr.filter(event_map.values()) + allowed_events = yield self._filter_events_for_client( - user.to_string(), event_map.values() + user.to_string(), filtered_events ) - # TODO: Filter allowed_events # TODO: Add a limit time_now = self.clock.time_msec() -- cgit 1.5.1 From 5c41224a89a9ceedeb5db10f972c10344382faf2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Oct 2015 10:09:26 +0100 Subject: Filter room ids before hitting the database --- synapse/api/filtering.py | 20 ++++++++++++++++++++ synapse/handlers/search.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 60b6648e0d..ab14b47281 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -202,6 +202,26 @@ class Filter(object): return True + def filter_rooms(self, room_ids): + """Apply the 'rooms' filter to a given list of rooms. + + Args: + room_ids (list): A list of room_ids. + + Returns: + list: A list of room_ids that match the filter + """ + room_ids = set(room_ids) + + disallowed_rooms = set(self.filter_json.get("not_rooms", [])) + room_ids -= disallowed_rooms + + allowed_rooms = self.filter_json.get("rooms", None) + if allowed_rooms is not None: + room_ids &= set(allowed_rooms) + + return room_ids + def filter(self, events): return filter(self.check, events) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 473aab53f0..f53e5d35ac 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -64,7 +64,7 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - # TODO: Apply room filter to rooms list + room_ids = filtr.filter_rooms(room_ids) rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) -- cgit 1.5.1 From 232beb3a3c28ccdc5388daa9396d5054b7768b12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 15:02:35 +0100 Subject: Use namedtuple as return value --- synapse/handlers/search.py | 4 +++- synapse/storage/search.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index f53e5d35ac..bdc79ffc55 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -66,7 +66,9 @@ class SearchHandler(BaseHandler): room_ids = filtr.filter_rooms(room_ids) - rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) + rank_map, event_map, _ = yield self.store.search_msgs( + room_ids, search_term, keys + ) filtered_events = filtr.filter(event_map.values()) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 810b5406ad..41451ade57 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -18,6 +18,17 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from collections import namedtuple + +"""The result of a search. + +Fields: + rank_map (dict): Mapping event_id -> rank + event_map (dict): Mapping event_id -> event + pagination_token (str): Pagination token +""" +SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) + class SearchStore(SQLBaseStore): @defer.inlineCallbacks @@ -31,7 +42,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - 2-tuple of (dict event_id -> rank, dict event_id -> event) + SearchResult """ clauses = [] args = [] @@ -85,11 +96,12 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(( + defer.returnValue(SearchResult( { r["event_id"]: r["rank"] for r in results if r["event_id"] in event_map }, - event_map + event_map, + None )) -- cgit 1.5.1 From 2980136d7535077f0513b8a12fd7f224700ca140 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 22 Oct 2015 16:19:53 +0100 Subject: Rename --- synapse/handlers/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bdc79ffc55..bbe82b1425 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -54,7 +54,7 @@ class SearchHandler(BaseHandler): except KeyError: raise SynapseError(400, "Invalid search query") - filtr = Filter(filter_dict) + search_filter = Filter(filter_dict) # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( @@ -64,13 +64,13 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - room_ids = filtr.filter_rooms(room_ids) + room_ids = search_filter.filter_rooms(room_ids) rank_map, event_map, _ = yield self.store.search_msgs( room_ids, search_term, keys ) - filtered_events = filtr.filter(event_map.values()) + filtered_events = search_filter.filter(event_map.values()) allowed_events = yield self._filter_events_for_client( user.to_string(), filtered_events -- cgit 1.5.1 From a2e5f7f3d825840d7c714e9859c100752efcbc68 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Oct 2015 18:25:11 +0000 Subject: Optionally return event contexts with search results --- synapse/handlers/search.py | 53 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bbe82b1425..b13fb71d81 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -51,6 +51,17 @@ class SearchHandler(BaseHandler): "content.body", "content.name", "content.topic", ]) filter_dict = content["search_categories"]["room_events"].get("filter", {}) + event_context = content["search_categories"]["room_events"].get( + "event_context", None + ) + + if event_context is not None: + before_limit = int(event_context.get( + "before_limit", 5 + )) + after_limit = int(event_context.get( + "after_limit", 5 + )) except KeyError: raise SynapseError(400, "Invalid search query") @@ -76,14 +87,54 @@ class SearchHandler(BaseHandler): user.to_string(), filtered_events ) + if event_context is not None: + now_token = yield self.hs.get_event_sources().get_current_token() + + contexts = {} + for event in allowed_events: + res = yield self.store.get_events_around( + event.room_id, event.event_id, before_limit, after_limit + ) + + res["events_before"] = yield self._filter_events_for_client( + user.to_string(), res["events_before"] + ) + + res["events_after"] = yield self._filter_events_for_client( + user.to_string(), res["events_after"] + ) + + res["start"] = now_token.copy_and_replace( + "room_key", res["start"] + ).to_string() + + res["end"] = now_token.copy_and_replace( + "room_key", res["end"] + ).to_string() + + contexts[event.event_id] = res + else: + contexts = {} + # TODO: Add a limit time_now = self.clock.time_msec() + for context in contexts.values(): + context["events_before"] = [ + serialize_event(e, time_now) + for e in context["events_before"] + ] + context["events_after"] = [ + serialize_event(e, time_now) + for e in context["events_after"] + ] + results = { e.event_id: { "rank": rank_map[e.event_id], - "result": serialize_event(e, time_now) + "result": serialize_event(e, time_now), + "context": contexts.get(e.event_id, {}), } for e in allowed_events } -- cgit 1.5.1 From f6e6f3d87a30932f706e39f4fb2d9f07d3270dce Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 29 Oct 2015 16:17:47 +0000 Subject: Make search API honour limit set in filter --- synapse/handlers/search.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index b13fb71d81..2718e9482e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -87,6 +87,9 @@ class SearchHandler(BaseHandler): user.to_string(), filtered_events ) + allowed_events.sort(key=lambda e: -rank_map[e.event_id]) + allowed_events = allowed_events[:search_filter.limit()] + if event_context is not None: now_token = yield self.hs.get_event_sources().get_current_token() -- cgit 1.5.1 From 05c326d445fdd04ef06cb9a2a02cc0d6dde9935b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 4 Nov 2015 17:57:44 +0000 Subject: Implement order and group by --- synapse/handlers/search.py | 126 +++++++++++++++++++++++++++++++++++++++------ synapse/storage/search.py | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+), 17 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 2718e9482e..28f5300dc9 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -46,15 +46,20 @@ class SearchHandler(BaseHandler): """ try: - search_term = content["search_categories"]["room_events"]["search_term"] - keys = content["search_categories"]["room_events"].get("keys", [ + room_cat = content["search_categories"]["room_events"] + search_term = room_cat["search_term"] + keys = room_cat.get("keys", [ "content.body", "content.name", "content.topic", ]) - filter_dict = content["search_categories"]["room_events"].get("filter", {}) - event_context = content["search_categories"]["room_events"].get( + filter_dict = room_cat.get("filter", {}) + order_by = room_cat.get("order_by", "rank") + event_context = room_cat.get( "event_context", None ) + group_by = room_cat.get("groupings", {}).get("group_by", {}) + group_keys = [g["key"] for g in group_by] + if event_context is not None: before_limit = int(event_context.get( "before_limit", 5 @@ -65,6 +70,15 @@ class SearchHandler(BaseHandler): except KeyError: raise SynapseError(400, "Invalid search query") + if order_by not in ("rank", "recent"): + raise SynapseError(400, "Invalid order by: %r" % (order_by,)) + + if set(group_keys) - {"room_id", "sender"}: + raise SynapseError( + 400, + "Invalid group by keys: %r" % (set(group_keys) - {"room_id", "sender"},) + ) + search_filter = Filter(filter_dict) # TODO: Search through left rooms too @@ -77,18 +91,88 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) - rank_map, event_map, _ = yield self.store.search_msgs( - room_ids, search_term, keys - ) + rank_map = {} + allowed_events = [] + room_groups = {} + sender_group = {} - filtered_events = search_filter.filter(event_map.values()) + if order_by == "rank": + rank_map, event_map, _ = yield self.store.search_msgs( + room_ids, search_term, keys + ) - allowed_events = yield self._filter_events_for_client( - user.to_string(), filtered_events - ) + filtered_events = search_filter.filter(event_map.values()) - allowed_events.sort(key=lambda e: -rank_map[e.event_id]) - allowed_events = allowed_events[:search_filter.limit()] + events = yield self._filter_events_for_client( + user.to_string(), filtered_events + ) + + events.sort(key=lambda e: -rank_map[e.event_id]) + allowed_events = events[:search_filter.limit()] + + for e in allowed_events: + rm = room_groups.setdefault(e.room_id, { + "results": [], + "order": rank_map[e.event_id], + }) + rm["results"].append(e.event_id) + + s = sender_group.setdefault(e.sender, { + "results": [], + "order": rank_map[e.event_id], + }) + s["results"].append(e.event_id) + + elif order_by == "recent": + for room_id in room_ids: + room_events = [] + pagination_token = None + i = 0 + + while len(room_events) < search_filter.limit() and i < 5: + i += 5 + r_map, event_map, pagination_token = yield self.store.search_room( + room_id, search_term, keys, search_filter.limit() * 2, + pagination_token=pagination_token, + ) + rank_map.update(r_map) + + filtered_events = search_filter.filter(event_map.values()) + + events = yield self._filter_events_for_client( + user.to_string(), filtered_events + ) + + room_events.extend(events) + room_events = room_events[:search_filter.limit()] + + if len(event_map) < search_filter.limit() * 2: + break + + if room_events: + group = room_groups.setdefault(room_id, {}) + if pagination_token: + group["next_batch"] = pagination_token + + group["results"] = [e.event_id for e in room_events] + group["order"] = max( + e.origin_server_ts/1000 for e in room_events + if hasattr(e, "origin_server_ts") + ) + + allowed_events.extend(room_events) + + # Normalize the group ranks + if room_groups: + mx = max(g["order"] for g in room_groups.values()) + mn = min(g["order"] for g in room_groups.values()) + + for g in room_groups.values(): + g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + + else: + # We should never get here due to the guard earlier. + raise NotImplementedError() if event_context is not None: now_token = yield self.hs.get_event_sources().get_current_token() @@ -144,11 +228,19 @@ class SearchHandler(BaseHandler): logger.info("Found %d results", len(results)) + rooms_cat_res = { + "results": results, + "count": len(results) + } + + if room_groups and "room_id" in group_keys: + rooms_cat_res.setdefault("groups", {})["room_id"] = room_groups + + if sender_group and "sender" in group_keys: + rooms_cat_res.setdefault("groups", {})["sender"] = sender_group + defer.returnValue({ "search_categories": { - "room_events": { - "results": results, - "count": len(results) - } + "room_events": rooms_cat_res } }) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index cdf003502f..e37e56c1f2 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -20,6 +20,12 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine from collections import namedtuple +import logging + + +logger = logging.getLogger(__name__) + + """The result of a search. Fields: @@ -109,3 +115,93 @@ class SearchStore(SQLBaseStore): event_map, None )) + + @defer.inlineCallbacks + def search_room(self, room_id, search_term, keys, limit, pagination_token=None): + """Performs a full text search over events with given keys. + + Args: + room_id (str): The room_id to search in + search_term (str): Search term to search for + keys (list): List of keys to search in, currently supports + "content.body", "content.name", "content.topic" + pagination_token (str): A pagination token previously returned + + Returns: + SearchResult + """ + clauses = [] + args = [search_term, room_id] + + local_clauses = [] + for key in keys: + local_clauses.append("key = ?") + args.append(key) + + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) + + if pagination_token: + topo, stream = pagination_token.split(",") + clauses.append( + "(topological_ordering < ?" + " OR (topological_ordering = ? AND stream_ordering < ?))" + ) + args.extend([topo, topo, stream]) + + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "SELECT ts_rank_cd(vector, query) as rank," + " topological_ordering, stream_ordering, room_id, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " NATURAL JOIN events" + " WHERE vector @@ query AND room_id = ?" + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id" + " topological_ordering, stream_ordering" + " FROM event_search" + " NATURAL JOIN events" + " WHERE value MATCH ? AND room_id = ?" + ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + + for clause in clauses: + sql += " AND " + clause + + # We add an arbitrary limit here to ensure we don't try to pull the + # entire table from the database. + sql += " ORDER BY topological_ordering DESC, stream_ordering DESC LIMIT ?" + + args.append(limit) + + results = yield self._execute( + "search_rooms", self.cursor_to_dict, sql, *args + ) + + events = yield self._get_events([r["event_id"] for r in results]) + + event_map = { + ev.event_id: ev + for ev in events + } + + pagination_token = None + if results: + topo = results[-1]["topological_ordering"] + stream = results[-1]["stream_ordering"] + pagination_token = "%s,%s" % (topo, stream) + + defer.returnValue(SearchResult( + { + r["event_id"]: r["rank"] + for r in results + if r["event_id"] in event_map + }, + event_map, + pagination_token + )) -- cgit 1.5.1 From 7301e05122e07f6513916e8a35bf05581de6521d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 14:34:37 +0000 Subject: Implement basic pagination for search results --- synapse/handlers/search.py | 78 +++++++++++++++++++++++++++++++++++------- synapse/rest/client/v1/room.py | 3 +- synapse/storage/search.py | 55 ++++++++++------------------- 3 files changed, 86 insertions(+), 50 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 28f5300dc9..696780f34e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -22,6 +22,8 @@ from synapse.api.filtering import Filter from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event +from unpaddedbase64 import decode_base64, encode_base64 + import logging @@ -34,17 +36,32 @@ class SearchHandler(BaseHandler): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def search(self, user, content): + def search(self, user, content, batch=None): """Performs a full text search for a user. Args: user (UserID) content (dict): Search parameters + batch (str): The next_batch parameter. Used for pagination. Returns: dict to be returned to the client with results of search """ + batch_group = None + batch_group_key = None + batch_token = None + if batch: + try: + b = decode_base64(batch) + batch_group, batch_group_key, batch_token = b.split("\n") + + assert batch_group is not None + assert batch_group_key is not None + assert batch_token is not None + except: + raise SynapseError(400, "Invalid batch") + try: room_cat = content["search_categories"]["room_events"] search_term = room_cat["search_term"] @@ -91,17 +108,25 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) + if batch_group == "room_id": + room_ids = room_ids & {batch_group_key} + rank_map = {} allowed_events = [] room_groups = {} sender_group = {} + global_next_batch = None if order_by == "rank": - rank_map, event_map, _ = yield self.store.search_msgs( + results = yield self.store.search_msgs( room_ids, search_term, keys ) - filtered_events = search_filter.filter(event_map.values()) + results_map = {r["event"].event_id: r for r in results} + + rank_map.update({r["event"].event_id: r["rank"] for r in results}) + + filtered_events = search_filter.filter([r["event"] for r in results]) events = yield self._filter_events_for_client( user.to_string(), filtered_events @@ -126,18 +151,26 @@ class SearchHandler(BaseHandler): elif order_by == "recent": for room_id in room_ids: room_events = [] - pagination_token = None + if batch_group == "room_id" and batch_group_key == room_id: + pagination_token = batch_token + else: + pagination_token = None i = 0 while len(room_events) < search_filter.limit() and i < 5: i += 5 - r_map, event_map, pagination_token = yield self.store.search_room( + results = yield self.store.search_room( room_id, search_term, keys, search_filter.limit() * 2, pagination_token=pagination_token, ) - rank_map.update(r_map) - filtered_events = search_filter.filter(event_map.values()) + results_map = {r["event"].event_id: r for r in results} + + rank_map.update({r["event"].event_id: r["rank"] for r in results}) + + filtered_events = search_filter.filter([ + r["event"] for r in results + ]) events = yield self._filter_events_for_client( user.to_string(), filtered_events @@ -146,13 +179,26 @@ class SearchHandler(BaseHandler): room_events.extend(events) room_events = room_events[:search_filter.limit()] - if len(event_map) < search_filter.limit() * 2: + if len(results) < search_filter.limit() * 2: + pagination_token = None break + else: + pagination_token = results[-1]["pagination_token"] + + if room_events: + res = results_map[room_events[-1].event_id] + pagination_token = res["pagination_token"] if room_events: group = room_groups.setdefault(room_id, {}) if pagination_token: - group["next_batch"] = pagination_token + next_batch = encode_base64("%s\n%s\n%s" % ( + "room_id", room_id, pagination_token + )) + group["next_batch"] = next_batch + + if batch_token: + global_next_batch = next_batch group["results"] = [e.event_id for e in room_events] group["order"] = max( @@ -164,11 +210,14 @@ class SearchHandler(BaseHandler): # Normalize the group ranks if room_groups: - mx = max(g["order"] for g in room_groups.values()) - mn = min(g["order"] for g in room_groups.values()) + if len(room_groups) > 1: + mx = max(g["order"] for g in room_groups.values()) + mn = min(g["order"] for g in room_groups.values()) - for g in room_groups.values(): - g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + for g in room_groups.values(): + g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + else: + room_groups.values()[0]["order"] = 1 else: # We should never get here due to the guard earlier. @@ -239,6 +288,9 @@ class SearchHandler(BaseHandler): if sender_group and "sender" in group_keys: rooms_cat_res.setdefault("groups", {})["sender"] = sender_group + if global_next_batch: + rooms_cat_res["next_batch"] = global_next_batch + defer.returnValue({ "search_categories": { "room_events": rooms_cat_res diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 2dcaee86cd..8e28f12d29 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -601,7 +601,8 @@ class SearchRestServlet(ClientV1RestServlet): content = _parse_json(request) - results = yield self.handlers.search_handler.search(auth_user, content) + batch = request.args.get("next_batch", [None])[0] + results = yield self.handlers.search_handler.search(auth_user, content, batch) defer.returnValue((200, results)) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e37e56c1f2..7342e7bae6 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -18,24 +18,12 @@ from twisted.internet import defer from _base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine -from collections import namedtuple - import logging logger = logging.getLogger(__name__) -"""The result of a search. - -Fields: - rank_map (dict): Mapping event_id -> rank - event_map (dict): Mapping event_id -> event - pagination_token (str): Pagination token -""" -SearchResult = namedtuple("SearchResult", ("rank_map", "event_map", "pagination_token")) - - class SearchStore(SQLBaseStore): @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): @@ -48,7 +36,7 @@ class SearchStore(SQLBaseStore): "content.body", "content.name", "content.topic" Returns: - SearchResult + list of dicts """ clauses = [] args = [] @@ -106,15 +94,14 @@ class SearchStore(SQLBaseStore): for ev in events } - defer.returnValue(SearchResult( + defer.returnValue([ { - r["event_id"]: r["rank"] - for r in results - if r["event_id"] in event_map - }, - event_map, - None - )) + "event": event_map[r["event_id"]], + "rank": r["rank"], + } + for r in results + if r["event_id"] in event_map + ]) @defer.inlineCallbacks def search_room(self, room_id, search_term, keys, limit, pagination_token=None): @@ -128,7 +115,7 @@ class SearchStore(SQLBaseStore): pagination_token (str): A pagination token previously returned Returns: - SearchResult + list of dicts """ clauses = [] args = [search_term, room_id] @@ -190,18 +177,14 @@ class SearchStore(SQLBaseStore): for ev in events } - pagination_token = None - if results: - topo = results[-1]["topological_ordering"] - stream = results[-1]["stream_ordering"] - pagination_token = "%s,%s" % (topo, stream) - - defer.returnValue(SearchResult( + defer.returnValue([ { - r["event_id"]: r["rank"] - for r in results - if r["event_id"] in event_map - }, - event_map, - pagination_token - )) + "event": event_map[r["event_id"]], + "rank": r["rank"], + "pagination_token": "%s,%s" % ( + r["topological_ordering"], r["stream_ordering"] + ), + } + for r in results + if r["event_id"] in event_map + ]) -- cgit 1.5.1 From 1ad6222ebfbd30a94b3ae085001e1a4242e637fd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 16:29:16 +0000 Subject: COMMENTS --- synapse/handlers/search.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 696780f34e..c39f4697e3 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -64,16 +64,28 @@ class SearchHandler(BaseHandler): try: room_cat = content["search_categories"]["room_events"] + + # The actual thing to query in FTS search_term = room_cat["search_term"] + + # Which "keys" to search over in FTS query keys = room_cat.get("keys", [ "content.body", "content.name", "content.topic", ]) + + # Filter to apply to results filter_dict = room_cat.get("filter", {}) + + # What to order results by (impacts whether pagination can be doen) order_by = room_cat.get("order_by", "rank") + + # Include context around each event? event_context = room_cat.get( "event_context", None ) + # Group results together? May allow clients to paginate within a + # group group_by = room_cat.get("groupings", {}).get("group_by", {}) group_keys = [g["key"] for g in group_by] @@ -111,10 +123,12 @@ class SearchHandler(BaseHandler): if batch_group == "room_id": room_ids = room_ids & {batch_group_key} - rank_map = {} + rank_map = {} # event_id -> rank of event allowed_events = [] - room_groups = {} - sender_group = {} + room_groups = {} # Holds result of grouping by room, if applicable + sender_group = {} # Holds result of grouping by sender, if applicable + + # Holds the next_batch for the entire result set if one of those exists global_next_batch = None if order_by == "rank": @@ -149,6 +163,9 @@ class SearchHandler(BaseHandler): s["results"].append(e.event_id) elif order_by == "recent": + # In this case we specifically loop through each room as the given + # limit applies to each room, rather than a global list. + # This is not necessarilly a good idea. for room_id in room_ids: room_events = [] if batch_group == "room_id" and batch_group_key == room_id: @@ -157,6 +174,9 @@ class SearchHandler(BaseHandler): pagination_token = None i = 0 + # We keep looping and we keep filtering until we reach the limit + # or we run out of things. + # But only go around 5 times since otherwise synapse will be sad. while len(room_events) < search_filter.limit() and i < 5: i += 5 results = yield self.store.search_room( @@ -208,7 +228,7 @@ class SearchHandler(BaseHandler): allowed_events.extend(room_events) - # Normalize the group ranks + # Normalize the group orders if room_groups: if len(room_groups) > 1: mx = max(g["order"] for g in room_groups.values()) @@ -223,6 +243,8 @@ class SearchHandler(BaseHandler): # We should never get here due to the guard earlier. raise NotImplementedError() + # If client has asked for "context" for each event (i.e. some surrounding + # events and state), fetch that if event_context is not None: now_token = yield self.hs.get_event_sources().get_current_token() -- cgit 1.5.1 From 5ee070d21f50e3a937b2003737fd8b6ed70888ce Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 17:25:33 +0000 Subject: Increment by one, not five --- synapse/handlers/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index c39f4697e3..65255804f6 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -178,7 +178,7 @@ class SearchHandler(BaseHandler): # or we run out of things. # But only go around 5 times since otherwise synapse will be sad. while len(room_events) < search_filter.limit() and i < 5: - i += 5 + i += 1 results = yield self.store.search_room( room_id, search_term, keys, search_filter.limit() * 2, pagination_token=pagination_token, -- cgit 1.5.1 From 2aa98ff3bcd0c5fcdacade3f0c83dc53b996fd4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 17:25:50 +0000 Subject: Remove redundant test --- synapse/handlers/search.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 65255804f6..e1fb2db0c9 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -209,7 +209,6 @@ class SearchHandler(BaseHandler): res = results_map[room_events[-1].event_id] pagination_token = res["pagination_token"] - if room_events: group = room_groups.setdefault(room_id, {}) if pagination_token: next_batch = encode_base64("%s\n%s\n%s" % ( -- cgit 1.5.1 From 66d36b8e413f7203172cc63290487fc9c6e9202c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Nov 2015 17:26:19 +0000 Subject: Be explicit about what we're doing --- synapse/handlers/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index e1fb2db0c9..b7545c111f 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -121,7 +121,7 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": - room_ids = room_ids & {batch_group_key} + room_ids.intersection_update({batch_group_key}) rank_map = {} # event_id -> rank of event allowed_events = [] -- cgit 1.5.1