diff options
author | Paul "LeoNerd" Evans <paul@matrix.org> | 2015-12-10 16:21:00 +0000 |
---|---|---|
committer | Paul "LeoNerd" Evans <paul@matrix.org> | 2015-12-10 16:21:00 +0000 |
commit | d7ee7b589f0535c21301f38e93b0cabc0cf288d4 (patch) | |
tree | fcd7d110dc66d5e175f1030d10e0bbd5624bbf3c /synapse/handlers/search.py | |
parent | Don't complain if /make_join response lacks 'prev_state' list (SYN-517) (diff) | |
parent | Merge pull request #432 from matrix-org/pushrules_refactor (diff) | |
download | synapse-d7ee7b589f0535c21301f38e93b0cabc0cf288d4.tar.xz |
Merge branch 'develop' into paul/tiny-fixes
Diffstat (limited to 'synapse/handlers/search.py')
-rw-r--r-- | synapse/handlers/search.py | 210 |
1 files changed, 138 insertions, 72 deletions
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index b7545c111f..bc79564287 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -17,13 +17,14 @@ from twisted.internet import defer from ._base import BaseHandler -from synapse.api.constants import Membership +from synapse.api.constants import Membership, EventTypes from synapse.api.filtering import Filter from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event from unpaddedbase64 import decode_base64, encode_base64 +import itertools import logging @@ -79,6 +80,9 @@ class SearchHandler(BaseHandler): # What to order results by (impacts whether pagination can be doen) order_by = room_cat.get("order_by", "rank") + # Return the current state of the rooms? + include_state = room_cat.get("include_state", False) + # Include context around each event? event_context = room_cat.get( "event_context", None @@ -96,6 +100,10 @@ class SearchHandler(BaseHandler): after_limit = int(event_context.get( "after_limit", 5 )) + + # Return the historic display name and avatar for the senders + # of the events? + include_profile = bool(event_context.get("include_profile", False)) except KeyError: raise SynapseError(400, "Invalid search query") @@ -123,6 +131,17 @@ class SearchHandler(BaseHandler): if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) + if not room_ids: + defer.returnValue({ + "search_categories": { + "room_events": { + "results": [], + "count": 0, + "highlights": [], + } + } + }) + rank_map = {} # event_id -> rank of event allowed_events = [] room_groups = {} # Holds result of grouping by room, if applicable @@ -131,11 +150,18 @@ class SearchHandler(BaseHandler): # Holds the next_batch for the entire result set if one of those exists global_next_batch = None + highlights = set() + if order_by == "rank": - results = yield self.store.search_msgs( + search_result = yield self.store.search_msgs( room_ids, search_term, keys ) + if search_result["highlights"]: + highlights.update(search_result["highlights"]) + + results = search_result["results"] + results_map = {r["event"].event_id: r for r in results} rank_map.update({r["event"].event_id: r["rank"] for r in results}) @@ -163,80 +189,76 @@ class SearchHandler(BaseHandler): s["results"].append(e.event_id) elif order_by == "recent": - # In this case we specifically loop through each room as the given - # limit applies to each room, rather than a global list. - # This is not necessarilly a good idea. - for room_id in room_ids: - room_events = [] - if batch_group == "room_id" and batch_group_key == room_id: - pagination_token = batch_token - else: - pagination_token = None - i = 0 - - # We keep looping and we keep filtering until we reach the limit - # or we run out of things. - # But only go around 5 times since otherwise synapse will be sad. - while len(room_events) < search_filter.limit() and i < 5: - i += 1 - results = yield self.store.search_room( - room_id, search_term, keys, search_filter.limit() * 2, - pagination_token=pagination_token, - ) + room_events = [] + i = 0 + + pagination_token = batch_token + + # We keep looping and we keep filtering until we reach the limit + # or we run out of things. + # But only go around 5 times since otherwise synapse will be sad. + while len(room_events) < search_filter.limit() and i < 5: + i += 1 + search_result = yield self.store.search_rooms( + room_ids, search_term, keys, search_filter.limit() * 2, + pagination_token=pagination_token, + ) - results_map = {r["event"].event_id: r for r in results} + if search_result["highlights"]: + highlights.update(search_result["highlights"]) - rank_map.update({r["event"].event_id: r["rank"] for r in results}) + results = search_result["results"] - filtered_events = search_filter.filter([ - r["event"] for r in results - ]) + results_map = {r["event"].event_id: r for r in results} - events = yield self._filter_events_for_client( - user.to_string(), filtered_events - ) + rank_map.update({r["event"].event_id: r["rank"] for r in results}) - room_events.extend(events) - room_events = room_events[:search_filter.limit()] + filtered_events = search_filter.filter([ + r["event"] for r in results + ]) - if len(results) < search_filter.limit() * 2: - pagination_token = None - break - else: - pagination_token = results[-1]["pagination_token"] - - if room_events: - res = results_map[room_events[-1].event_id] - pagination_token = res["pagination_token"] - - group = room_groups.setdefault(room_id, {}) - if pagination_token: - next_batch = encode_base64("%s\n%s\n%s" % ( - "room_id", room_id, pagination_token - )) - group["next_batch"] = next_batch - - if batch_token: - global_next_batch = next_batch - - group["results"] = [e.event_id for e in room_events] - group["order"] = max( - e.origin_server_ts/1000 for e in room_events - if hasattr(e, "origin_server_ts") - ) + events = yield self._filter_events_for_client( + user.to_string(), filtered_events + ) - allowed_events.extend(room_events) + room_events.extend(events) + room_events = room_events[:search_filter.limit()] - # Normalize the group orders - if room_groups: - if len(room_groups) > 1: - mx = max(g["order"] for g in room_groups.values()) - mn = min(g["order"] for g in room_groups.values()) + if len(results) < search_filter.limit() * 2: + pagination_token = None + break + else: + pagination_token = results[-1]["pagination_token"] - for g in room_groups.values(): - g["order"] = (g["order"] - mn) * 1.0 / (mx - mn) + for event in room_events: + group = room_groups.setdefault(event.room_id, { + "results": [], + }) + group["results"].append(event.event_id) + + if room_events and len(room_events) >= search_filter.limit(): + last_event_id = room_events[-1].event_id + pagination_token = results_map[last_event_id]["pagination_token"] + + # We want to respect the given batch group and group keys so + # that if people blindly use the top level `next_batch` token + # it returns more from the same group (if applicable) rather + # than reverting to searching all results again. + if batch_group and batch_group_key: + global_next_batch = encode_base64("%s\n%s\n%s" % ( + batch_group, batch_group_key, pagination_token + )) else: - room_groups.values()[0]["order"] = 1 + global_next_batch = encode_base64("%s\n%s\n%s" % ( + "all", "", pagination_token + )) + + for room_id, group in room_groups.items(): + group["next_batch"] = encode_base64("%s\n%s\n%s" % ( + "room_id", room_id, pagination_token + )) + + allowed_events.extend(room_events) else: # We should never get here due to the guard earlier. @@ -269,6 +291,33 @@ class SearchHandler(BaseHandler): "room_key", res["end"] ).to_string() + if include_profile: + senders = set( + ev.sender + for ev in itertools.chain( + res["events_before"], [event], res["events_after"] + ) + ) + + if res["events_after"]: + last_event_id = res["events_after"][-1].event_id + else: + last_event_id = event.event_id + + state = yield self.store.get_state_for_event( + last_event_id, + types=[(EventTypes.Member, sender) for sender in senders] + ) + + res["profile_info"] = { + s.state_key: { + "displayname": s.content.get("displayname", None), + "avatar_url": s.content.get("avatar_url", None), + } + for s in state.values() + if s.type == EventTypes.Member and s.state_key in senders + } + contexts[event.event_id] = res else: contexts = {} @@ -287,22 +336,39 @@ class SearchHandler(BaseHandler): for e in context["events_after"] ] - results = { - e.event_id: { + state_results = {} + if include_state: + rooms = set(e.room_id for e in allowed_events) + for room_id in rooms: + state = yield self.state_handler.get_current_state(room_id) + state_results[room_id] = state.values() + + state_results.values() + + # We're now about to serialize the events. We should not make any + # blocking calls after this. Otherwise the 'age' will be wrong + + results = [ + { "rank": rank_map[e.event_id], "result": serialize_event(e, time_now), "context": contexts.get(e.event_id, {}), } for e in allowed_events - } - - logger.info("Found %d results", len(results)) + ] rooms_cat_res = { "results": results, - "count": len(results) + "count": len(results), + "highlights": list(highlights), } + if state_results: + rooms_cat_res["state"] = { + room_id: [serialize_event(e, time_now) for e in state] + for room_id, state in state_results.items() + } + if room_groups and "room_id" in group_keys: rooms_cat_res.setdefault("groups", {})["room_id"] = room_groups |