From 3bade14ec0aa7e56c84d30241bd86a177f0699d6 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 31 Oct 2018 04:33:41 +1100 Subject: Fix search 500ing (#4122) --- synapse/handlers/search.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 0c1d52fd11..80e7b15de8 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -24,6 +24,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.errors import SynapseError from synapse.api.filtering import Filter from synapse.events.utils import serialize_event +from synapse.storage.state import StateFilter from synapse.visibility import filter_events_for_client from ._base import BaseHandler @@ -324,9 +325,12 @@ class SearchHandler(BaseHandler): else: last_event_id = event.event_id + state_filter = StateFilter.from_types( + [(EventTypes.Member, sender) for sender in senders] + ) + state = yield self.store.get_state_for_event( - last_event_id, - types=[(EventTypes.Member, sender) for sender in senders] + last_event_id, state_filter ) res["profile_info"] = { -- cgit 1.5.1 From 158ffb92f1ba0e247fb0a71f0d400655643ae68e Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 4 Dec 2018 04:01:02 -0700 Subject: Add an option to disable search for homeservers which may not be interested in it (#4230) This is useful for homeservers not intended for users, such as bot-only homeservers or ones that only process IoT data. --- changelog.d/4230.feature | 1 + synapse/config/server.py | 12 +++++++++++- synapse/handlers/search.py | 3 +++ synapse/storage/search.py | 6 ++++++ 4 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4230.feature (limited to 'synapse/handlers/search.py') diff --git a/changelog.d/4230.feature b/changelog.d/4230.feature new file mode 100644 index 0000000000..0ecb1d5ec6 --- /dev/null +++ b/changelog.d/4230.feature @@ -0,0 +1 @@ +Add an option to disable search for homeservers that may not be interested in it. diff --git a/synapse/config/server.py b/synapse/config/server.py index 5ff9ac288d..4a5b902f8e 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -62,6 +62,11 @@ class ServerConfig(Config): # master, potentially causing inconsistency. self.enable_media_repo = config.get("enable_media_repo", True) + # whether to enable search. If disabled, new entries will not be inserted + # into the search tables and they will not be indexed. Users will receive + # errors when attempting to search for messages. + self.enable_search = config.get("enable_search", True) + self.filter_timeline_limit = config.get("filter_timeline_limit", -1) # Whether we should block invites sent to users on this server @@ -384,7 +389,12 @@ class ServerConfig(Config): # mau_limit_reserved_threepids: # - medium: 'email' # address: 'reserved_user@example.com' - + # + # Room searching + # + # If disabled, new messages will not be indexed for searching and users + # will receive errors when searching for messages. Defaults to enabled. + # enable_search: true """ % locals() def read_arguments(self, args): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 80e7b15de8..ec936bbb4e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -50,6 +50,9 @@ class SearchHandler(BaseHandler): dict to be returned to the client with results of search """ + if not self.hs.config.enable_search: + raise SynapseError(400, "Search is disabled on this homeserver") + batch_group = None batch_group_key = None batch_token = None diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d5b5df93e6..c6420b2374 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -45,6 +45,10 @@ class SearchStore(BackgroundUpdateStore): def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) + + if not hs.config.enable_search: + return + self.register_background_update_handler( self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search ) @@ -316,6 +320,8 @@ class SearchStore(BackgroundUpdateStore): entries (iterable[SearchEntry]): entries to be added to the table """ + if not self.hs.config.enable_search: + return if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" -- cgit 1.5.1 From df3a661e4adb7676682a5e3c298a2dfda18b08a1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 10:04:47 +0000 Subject: Search for messages across predecessor rooms Signed-off-by: Andrew Morgan --- synapse/api/filtering.py | 3 ++ synapse/handlers/search.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++ synapse/storage/state.py | 1 + 3 files changed, 73 insertions(+) (limited to 'synapse/handlers/search.py') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 16ad654864..84000e6422 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,6 +444,9 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) + def add_room_ids(self, room_ids): + self.rooms += room_ids + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index ec936bbb4e..77e7e4e0fb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -37,6 +37,54 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def get_old_rooms_from_upgraded_room(self, room_id): + """Retrieves room IDs of old rooms in the history of an upgraded room. + + We do so by checking the m.room.create event of the room for a + `predecessor` key. If it exists, we add the room ID to our return + list and then check that room for a m.room.create event and so on + until we can no longer find any more previous rooms. + + The full list of all found rooms in then returned. + + Args: + room_id (str): The ID of the room to search through. + + Returns: + dict of past room IDs as strings + """ + + historical_room_ids = [] + + while True: + state_ids = yield self.store.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + break + + # Retrieve the room's create event + create_event = yield self.store.get_event(create_id) + + if not create_event: + break + + # Check if a predecessor room is present + predecessor = create_event.content.get("predecessor", None) + if not predecessor: + break + + # Add predecessor's room ID + historical_room_id = predecessor["room_id"] + historical_room_ids.append(historical_room_id) + + # Scan through the old room for further predecessors + room_id = historical_room_id + + defer.returnValue(historical_room_ids) + @defer.inlineCallbacks def search(self, user, content, batch=None): """Performs a full text search for a user. @@ -139,6 +187,27 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) + # If doing a subset of all rooms seearch, check if any of the rooms + # are from an upgraded room, and search their contents as well + # XXX: There is the possibility that we don't have a create event for + # the room in question, in which case we can't return all the results + # we want to. + # Ideally we would just return the results we can get now, and + # try to get more results from other servers in the background. + if search_filter.rooms: + historical_room_ids = [] + for room_id in room_ids: + # Add any previous rooms to the search if they exist + ids = yield self.get_old_rooms_from_upgraded_room(room_id) + historical_room_ids += ids + + # Add any found rooms to the list to search + for historical_room_id in historical_room_ids: + room_ids.add(historical_room_id) + + # Prevent any historical events from being filtered + search_filter.add_room_ids(historical_room_ids) + if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a134e9b3e8..49b3ff4a71 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,6 +448,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: deferred: dict of (type, state_key) -> event_id """ + def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events -- cgit 1.5.1 From c9bfb058d85f6205fada062c78a4d1eca119417c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 11:12:48 +0000 Subject: Fix a bug with single-room search searching all rooms * Create a new method for getting predecessor rooms * Remove formatting change --- synapse/api/filtering.py | 15 +++++++++++++-- synapse/handlers/search.py | 42 ++++++++++-------------------------------- synapse/storage/state.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 35 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 84000e6422..0d8957175d 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,8 +444,19 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) - def add_room_ids(self, room_ids): - self.rooms += room_ids + def with_room_ids(self, room_ids): + """Returns a new filter with the given room IDs appended. + + Args: + room_ids (list): A list of room_ids. + + Returns: + filter: A new filter including the given rooms and the old + filter's rooms. + """ + newFilter = self + newFilter.rooms += room_ids + return newFilter def _matches_wildcard(actual_value, filter_value): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 77e7e4e0fb..75c26fe065 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -49,39 +49,26 @@ class SearchHandler(BaseHandler): The full list of all found rooms in then returned. Args: - room_id (str): The ID of the room to search through. + room_id (str): id of the room to search through. Returns: - dict of past room IDs as strings + Deferred[iterable[str]]: predecessor room ids """ historical_room_ids = [] while True: - state_ids = yield self.store.get_current_state_ids(room_id) - create_id = state_ids.get((EventTypes.Create, "")) + predecessor = yield self.store.get_room_predecessor(room_id) - # If we can't find the create event, assume we've hit a dead end - if not create_id: - break - - # Retrieve the room's create event - create_event = yield self.store.get_event(create_id) - - if not create_event: - break - - # Check if a predecessor room is present - predecessor = create_event.content.get("predecessor", None) + # If no predecessor, assume we've hit a dead end if not predecessor: break # Add predecessor's room ID - historical_room_id = predecessor["room_id"] - historical_room_ids.append(historical_room_id) + historical_room_ids.append(predecessor["room_id"]) # Scan through the old room for further predecessors - room_id = historical_room_id + room_id = predecessor["room_id"] defer.returnValue(historical_room_ids) @@ -185,28 +172,19 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - room_ids = search_filter.filter_rooms(room_ids) - # If doing a subset of all rooms seearch, check if any of the rooms # are from an upgraded room, and search their contents as well - # XXX: There is the possibility that we don't have a create event for - # the room in question, in which case we can't return all the results - # we want to. - # Ideally we would just return the results we can get now, and - # try to get more results from other servers in the background. if search_filter.rooms: historical_room_ids = [] - for room_id in room_ids: + for room_id in search_filter.rooms: # Add any previous rooms to the search if they exist ids = yield self.get_old_rooms_from_upgraded_room(room_id) historical_room_ids += ids - # Add any found rooms to the list to search - for historical_room_id in historical_room_ids: - room_ids.add(historical_room_id) - # Prevent any historical events from being filtered - search_filter.add_room_ids(historical_room_ids) + search_filter = search_filter.with_room_ids(historical_room_ids) + + room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 49b3ff4a71..b064671851 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -437,6 +437,34 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): create_event = yield self.get_event(create_id) defer.returnValue(create_event.content.get("room_version", "1")) + @defer.inlineCallbacks + def get_room_predecessor(self, room_id): + """Get the predecessor room of an upgraded room if one exists. + Otherwise return None. + + Args: + room_id (str) + + Returns: + Deferred[str]: predecessor room id + """ + + state_ids = yield self.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + return None + + # Retrieve the room's create event + create_event = yield self.get_event(create_id) + + if not create_event: + return None + + # Return predecessor if present + return create_event.content.get("predecessor", None) + @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): """Get the current state event ids for a room based on the @@ -448,7 +476,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: deferred: dict of (type, state_key) -> event_id """ - def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events -- cgit 1.5.1 From 03c85335d1d386c0523af3b6bf992f83bfb905d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 24 Jan 2019 17:22:09 +0000 Subject: Apply suggestions from code review Co-Authored-By: anoadragon453 <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/search.py | 2 +- synapse/storage/state.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/handlers/search.py') diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 75c26fe065..49c439313e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -52,7 +52,7 @@ class SearchHandler(BaseHandler): room_id (str): id of the room to search through. Returns: - Deferred[iterable[str]]: predecessor room ids + Deferred[iterable[unicode]]: predecessor room ids """ historical_room_ids = [] diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa..0a0691cd00 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -446,7 +446,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): room_id (str) Returns: - Deferred[str]: predecessor room id + Deferred[unicode|None]: predecessor room id """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) -- cgit 1.5.1