From 004234f03abad58b2de28abff406391a6d182e48 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 18 Mar 2021 18:24:16 +0000 Subject: Initial spaces summary API (#9643) This is very bare-bones for now: federation will come soon, while pagination is descoped for now but will come later. --- synapse/handlers/space_summary.py | 199 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 synapse/handlers/space_summary.py (limited to 'synapse/handlers/space_summary.py') diff --git a/synapse/handlers/space_summary.py b/synapse/handlers/space_summary.py new file mode 100644 index 0000000000..513dc0c71a --- /dev/null +++ b/synapse/handlers/space_summary.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import logging +from collections import deque +from typing import TYPE_CHECKING, Iterable, List, Optional, Set + +from synapse.api.constants import EventContentFields, EventTypes, HistoryVisibility +from synapse.api.errors import AuthError +from synapse.events import EventBase +from synapse.events.utils import format_event_for_client_v2 +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + +# number of rooms to return. We'll stop once we hit this limit. +# TODO: allow clients to reduce this with a request param. +MAX_ROOMS = 50 + +# max number of events to return per room. +MAX_ROOMS_PER_SPACE = 50 + + +class SpaceSummaryHandler: + def __init__(self, hs: "HomeServer"): + self._clock = hs.get_clock() + self._auth = hs.get_auth() + self._room_list_handler = hs.get_room_list_handler() + self._state_handler = hs.get_state_handler() + self._store = hs.get_datastore() + self._event_serializer = hs.get_event_client_serializer() + + async def get_space_summary( + self, + requester: str, + room_id: str, + suggested_only: bool = False, + max_rooms_per_space: Optional[int] = None, + ) -> JsonDict: + """ + Implementation of the space summary API + + Args: + requester: user id of the user making this request + + room_id: room id to start the summary at + + suggested_only: whether we should only return children with the "suggested" + flag set. + + max_rooms_per_space: an optional limit on the number of child rooms we will + return. This does not apply to the root room (ie, room_id), and + is overridden by ROOMS_PER_SPACE_LIMIT. + + Returns: + summary dict to return + """ + # first of all, check that the user is in the room in question (or it's + # world-readable) + await self._auth.check_user_in_room_or_world_readable(room_id, requester) + + # the queue of rooms to process + room_queue = deque((room_id,)) + + processed_rooms = set() # type: Set[str] + + rooms_result = [] # type: List[JsonDict] + events_result = [] # type: List[JsonDict] + + now = self._clock.time_msec() + + while room_queue and len(rooms_result) < MAX_ROOMS: + room_id = room_queue.popleft() + logger.debug("Processing room %s", room_id) + processed_rooms.add(room_id) + + try: + await self._auth.check_user_in_room_or_world_readable( + room_id, requester + ) + except AuthError: + logger.info( + "user %s cannot view room %s, omitting from summary", + requester, + room_id, + ) + continue + + room_entry = await self._build_room_entry(room_id) + rooms_result.append(room_entry) + + # look for child rooms/spaces. + child_events = await self._get_child_events(room_id) + + if suggested_only: + # we only care about suggested children + child_events = filter(_is_suggested_child_event, child_events) + + # The client-specified max_rooms_per_space limit doesn't apply to the + # room_id specified in the request, so we ignore it if this is the + # first room we are processing. Otherwise, apply any client-specified + # limit, capping to our built-in limit. + if max_rooms_per_space is not None and len(processed_rooms) > 1: + max_rooms = min(MAX_ROOMS_PER_SPACE, max_rooms_per_space) + else: + max_rooms = MAX_ROOMS_PER_SPACE + + for edge_event in itertools.islice(child_events, max_rooms): + edge_room_id = edge_event.state_key + + events_result.append( + await self._event_serializer.serialize_event( + edge_event, + time_now=now, + event_format=format_event_for_client_v2, + ) + ) + + # if we haven't yet visited the target of this link, add it to the queue + if edge_room_id not in processed_rooms: + room_queue.append(edge_room_id) + + return {"rooms": rooms_result, "events": events_result} + + async def _build_room_entry(self, room_id: str) -> JsonDict: + """Generate en entry suitable for the 'rooms' list in the summary response""" + stats = await self._store.get_room_with_stats(room_id) + + # currently this should be impossible because we call + # check_user_in_room_or_world_readable on the room before we get here, so + # there should always be an entry + assert stats is not None, "unable to retrieve stats for %s" % (room_id,) + + current_state_ids = await self._store.get_current_state_ids(room_id) + create_event = await self._store.get_event( + current_state_ids[(EventTypes.Create, "")] + ) + + # TODO: update once MSC1772 lands + room_type = create_event.content.get(EventContentFields.MSC1772_ROOM_TYPE) + + entry = { + "room_id": stats["room_id"], + "name": stats["name"], + "topic": stats["topic"], + "canonical_alias": stats["canonical_alias"], + "num_joined_members": stats["joined_members"], + "avatar_url": stats["avatar"], + "world_readable": ( + stats["history_visibility"] == HistoryVisibility.WORLD_READABLE + ), + "guest_can_join": stats["guest_access"] == "can_join", + "room_type": room_type, + } + + # Filter out Nones – rather omit the field altogether + room_entry = {k: v for k, v in entry.items() if v is not None} + + return room_entry + + async def _get_child_events(self, room_id: str) -> Iterable[EventBase]: + # look for child rooms/spaces. + current_state_ids = await self._store.get_current_state_ids(room_id) + + events = await self._store.get_events_as_list( + [ + event_id + for key, event_id in current_state_ids.items() + # TODO: update once MSC1772 lands + if key[0] == EventTypes.MSC1772_SPACE_CHILD + ] + ) + + # filter out any events without a "via" (which implies it has been redacted) + return (e for e in events if e.content.get("via")) + + +def _is_suggested_child_event(edge_event: EventBase) -> bool: + suggested = edge_event.content.get("suggested") + if isinstance(suggested, bool) and suggested: + return True + logger.debug("Ignorning not-suggested child %s", edge_event.state_key) + return False -- cgit 1.5.1 From 4ecba9bd5cb9c094da864d05e3a10456c30bc409 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 23 Mar 2021 11:51:12 +0000 Subject: Federation API for Space summary (#9652) Builds on the work done in #9643 to add a federation API for space summaries. There's a bit of refactoring of the existing client-server code first, to avoid too much duplication. --- changelog.d/9652.feature | 1 + synapse/federation/transport/server.py | 67 ++++++++++-- synapse/handlers/space_summary.py | 183 +++++++++++++++++++++++++-------- 3 files changed, 197 insertions(+), 54 deletions(-) create mode 100644 changelog.d/9652.feature (limited to 'synapse/handlers/space_summary.py') diff --git a/changelog.d/9652.feature b/changelog.d/9652.feature new file mode 100644 index 0000000000..2f7ccedcfb --- /dev/null +++ b/changelog.d/9652.feature @@ -0,0 +1 @@ +Add initial experimental support for a "space summary" API. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2cf935f38d..84e39c5a46 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -18,7 +18,7 @@ import functools import logging import re -from typing import Optional, Tuple, Type +from typing import Container, Mapping, Optional, Sequence, Tuple, Type import synapse from synapse.api.constants import MAX_GROUP_CATEGORYID_LENGTH, MAX_GROUP_ROLEID_LENGTH @@ -29,7 +29,7 @@ from synapse.api.urls import ( FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX, ) -from synapse.http.server import JsonResource +from synapse.http.server import HttpServer, JsonResource from synapse.http.servlet import ( parse_boolean_from_args, parse_integer_from_args, @@ -44,7 +44,8 @@ from synapse.logging.opentracing import ( whitelisted_homeserver, ) from synapse.server import HomeServer -from synapse.types import ThirdPartyInstanceID, get_domain_from_id +from synapse.types import JsonDict, ThirdPartyInstanceID, get_domain_from_id +from synapse.util.ratelimitutils import FederationRateLimiter from synapse.util.stringutils import parse_and_validate_server_name from synapse.util.versionstring import get_version_string @@ -1376,6 +1377,40 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): return 200, new_content +class FederationSpaceSummaryServlet(BaseFederationServlet): + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" + PATH = "/spaces/(?P[^/]*)" + + async def on_POST( + self, + origin: str, + content: JsonDict, + query: Mapping[bytes, Sequence[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + suggested_only = content.get("suggested_only", False) + if not isinstance(suggested_only, bool): + raise SynapseError( + 400, "'suggested_only' must be a boolean", Codes.BAD_JSON + ) + + exclude_rooms = content.get("exclude_rooms", []) + if not isinstance(exclude_rooms, list) or any( + not isinstance(x, str) for x in exclude_rooms + ): + raise SynapseError(400, "bad value for 'exclude_rooms'", Codes.BAD_JSON) + + max_rooms_per_space = content.get("max_rooms_per_space") + if max_rooms_per_space is not None and not isinstance(max_rooms_per_space, int): + raise SynapseError( + 400, "bad value for 'max_rooms_per_space'", Codes.BAD_JSON + ) + + return 200, await self.handler.federation_space_summary( + room_id, suggested_only, max_rooms_per_space, exclude_rooms + ) + + class RoomComplexityServlet(BaseFederationServlet): """ Indicates to other servers how complex (and therefore likely @@ -1474,18 +1509,24 @@ DEFAULT_SERVLET_GROUPS = ( ) -def register_servlets(hs, resource, authenticator, ratelimiter, servlet_groups=None): +def register_servlets( + hs: HomeServer, + resource: HttpServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + servlet_groups: Optional[Container[str]] = None, +): """Initialize and register servlet classes. Will by default register all servlets. For custom behaviour, pass in a list of servlet_groups to register. Args: - hs (synapse.server.HomeServer): homeserver - resource (JsonResource): resource class to register to - authenticator (Authenticator): authenticator to use - ratelimiter (util.ratelimitutils.FederationRateLimiter): ratelimiter to use - servlet_groups (list[str], optional): List of servlet groups to register. + hs: homeserver + resource: resource class to register to + authenticator: authenticator to use + ratelimiter: ratelimiter to use + servlet_groups: List of servlet groups to register. Defaults to ``DEFAULT_SERVLET_GROUPS``. """ if not servlet_groups: @@ -1500,6 +1541,14 @@ def register_servlets(hs, resource, authenticator, ratelimiter, servlet_groups=N server_name=hs.hostname, ).register(resource) + if hs.config.experimental.spaces_enabled: + FederationSpaceSummaryServlet( + handler=hs.get_space_summary_handler(), + authenticator=authenticator, + ratelimiter=ratelimiter, + server_name=hs.hostname, + ).register(resource) + if "openid" in servlet_groups: for servletclass in OPENID_SERVLET_CLASSES: servletclass( diff --git a/synapse/handlers/space_summary.py b/synapse/handlers/space_summary.py index 513dc0c71a..f5ead9447f 100644 --- a/synapse/handlers/space_summary.py +++ b/synapse/handlers/space_summary.py @@ -16,7 +16,9 @@ import itertools import logging from collections import deque -from typing import TYPE_CHECKING, Iterable, List, Optional, Set +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple + +import attr from synapse.api.constants import EventContentFields, EventTypes, HistoryVisibility from synapse.api.errors import AuthError @@ -54,7 +56,7 @@ class SpaceSummaryHandler: max_rooms_per_space: Optional[int] = None, ) -> JsonDict: """ - Implementation of the space summary API + Implementation of the space summary C-S API Args: requester: user id of the user making this request @@ -66,7 +68,7 @@ class SpaceSummaryHandler: max_rooms_per_space: an optional limit on the number of child rooms we will return. This does not apply to the root room (ie, room_id), and - is overridden by ROOMS_PER_SPACE_LIMIT. + is overridden by MAX_ROOMS_PER_SPACE. Returns: summary dict to return @@ -76,67 +78,153 @@ class SpaceSummaryHandler: await self._auth.check_user_in_room_or_world_readable(room_id, requester) # the queue of rooms to process - room_queue = deque((room_id,)) + room_queue = deque((_RoomQueueEntry(room_id),)) processed_rooms = set() # type: Set[str] rooms_result = [] # type: List[JsonDict] events_result = [] # type: List[JsonDict] - now = self._clock.time_msec() + while room_queue and len(rooms_result) < MAX_ROOMS: + queue_entry = room_queue.popleft() + room_id = queue_entry.room_id + logger.debug("Processing room %s", room_id) + processed_rooms.add(room_id) + + # The client-specified max_rooms_per_space limit doesn't apply to the + # room_id specified in the request, so we ignore it if this is the + # first room we are processing. + max_children = max_rooms_per_space if processed_rooms else None + + rooms, events = await self._summarize_local_room( + requester, room_id, suggested_only, max_children + ) + + rooms_result.extend(rooms) + events_result.extend(events) + + # add any children that we haven't already processed to the queue + for edge_event in events: + if edge_event["state_key"] not in processed_rooms: + room_queue.append(_RoomQueueEntry(edge_event["state_key"])) + + return {"rooms": rooms_result, "events": events_result} + + async def federation_space_summary( + self, + room_id: str, + suggested_only: bool, + max_rooms_per_space: Optional[int], + exclude_rooms: Iterable[str], + ) -> JsonDict: + """ + Implementation of the space summary Federation API + + Args: + room_id: room id to start the summary at + + suggested_only: whether we should only return children with the "suggested" + flag set. + + max_rooms_per_space: an optional limit on the number of child rooms we will + return. Unlike the C-S API, this applies to the root room (room_id). + It is clipped to MAX_ROOMS_PER_SPACE. + + exclude_rooms: a list of rooms to skip over (presumably because the + calling server has already seen them). + + Returns: + summary dict to return + """ + # the queue of rooms to process + room_queue = deque((room_id,)) + + # the set of rooms that we should not walk further. Initialise it with the + # excluded-rooms list; we will add other rooms as we process them so that + # we do not loop. + processed_rooms = set(exclude_rooms) # type: Set[str] + + rooms_result = [] # type: List[JsonDict] + events_result = [] # type: List[JsonDict] while room_queue and len(rooms_result) < MAX_ROOMS: room_id = room_queue.popleft() logger.debug("Processing room %s", room_id) processed_rooms.add(room_id) - try: - await self._auth.check_user_in_room_or_world_readable( - room_id, requester - ) - except AuthError: - logger.info( - "user %s cannot view room %s, omitting from summary", - requester, - room_id, - ) - continue + rooms, events = await self._summarize_local_room( + None, room_id, suggested_only, max_rooms_per_space + ) - room_entry = await self._build_room_entry(room_id) - rooms_result.append(room_entry) + rooms_result.extend(rooms) + events_result.extend(events) - # look for child rooms/spaces. - child_events = await self._get_child_events(room_id) + # add any children that we haven't already processed to the queue + for edge_event in events: + if edge_event["state_key"] not in processed_rooms: + room_queue.append(edge_event["state_key"]) - if suggested_only: - # we only care about suggested children - child_events = filter(_is_suggested_child_event, child_events) + return {"rooms": rooms_result, "events": events_result} - # The client-specified max_rooms_per_space limit doesn't apply to the - # room_id specified in the request, so we ignore it if this is the - # first room we are processing. Otherwise, apply any client-specified - # limit, capping to our built-in limit. - if max_rooms_per_space is not None and len(processed_rooms) > 1: - max_rooms = min(MAX_ROOMS_PER_SPACE, max_rooms_per_space) - else: - max_rooms = MAX_ROOMS_PER_SPACE - - for edge_event in itertools.islice(child_events, max_rooms): - edge_room_id = edge_event.state_key - - events_result.append( - await self._event_serializer.serialize_event( - edge_event, - time_now=now, - event_format=format_event_for_client_v2, - ) + async def _summarize_local_room( + self, + requester: Optional[str], + room_id: str, + suggested_only: bool, + max_children: Optional[int], + ) -> Tuple[Sequence[JsonDict], Sequence[JsonDict]]: + if not await self._is_room_accessible(room_id, requester): + return (), () + + room_entry = await self._build_room_entry(room_id) + + # look for child rooms/spaces. + child_events = await self._get_child_events(room_id) + + if suggested_only: + # we only care about suggested children + child_events = filter(_is_suggested_child_event, child_events) + + if max_children is None or max_children > MAX_ROOMS_PER_SPACE: + max_children = MAX_ROOMS_PER_SPACE + + now = self._clock.time_msec() + events_result = [] # type: List[JsonDict] + for edge_event in itertools.islice(child_events, max_children): + events_result.append( + await self._event_serializer.serialize_event( + edge_event, + time_now=now, + event_format=format_event_for_client_v2, ) + ) + return (room_entry,), events_result - # if we haven't yet visited the target of this link, add it to the queue - if edge_room_id not in processed_rooms: - room_queue.append(edge_room_id) + async def _is_room_accessible(self, room_id: str, requester: Optional[str]) -> bool: + # if we have an authenticated requesting user, first check if they are in the + # room + if requester: + try: + await self._auth.check_user_in_room(room_id, requester) + return True + except AuthError: + pass - return {"rooms": rooms_result, "events": events_result} + # otherwise, check if the room is peekable + hist_vis_ev = await self._state_handler.get_current_state( + room_id, EventTypes.RoomHistoryVisibility, "" + ) + if hist_vis_ev: + hist_vis = hist_vis_ev.content.get("history_visibility") + if hist_vis == HistoryVisibility.WORLD_READABLE: + return True + + logger.info( + "room %s is unpeekable and user %s is not a member, omitting from summary", + room_id, + requester, + ) + return False async def _build_room_entry(self, room_id: str) -> JsonDict: """Generate en entry suitable for the 'rooms' list in the summary response""" @@ -191,6 +279,11 @@ class SpaceSummaryHandler: return (e for e in events if e.content.get("via")) +@attr.s(frozen=True, slots=True) +class _RoomQueueEntry: + room_id = attr.ib(type=str) + + def _is_suggested_child_event(edge_event: EventBase) -> bool: suggested = edge_event.content.get("suggested") if isinstance(suggested, bool) and suggested: -- cgit 1.5.1 From c73cc2c2ad7244a0080f35d9710cedfe11917e69 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 24 Mar 2021 12:45:39 +0000 Subject: Spaces summary: call out to other servers (#9653) When we hit an unknown room in the space tree, see if there are other servers that we might be able to poll to get the data. Fixes: #9447 --- changelog.d/9653.feature | 1 + synapse/federation/federation_client.py | 180 ++++++++++++++++++++++++++++++-- synapse/federation/transport/client.py | 35 ++++++- synapse/handlers/space_summary.py | 135 +++++++++++++++++++++--- 4 files changed, 324 insertions(+), 27 deletions(-) create mode 100644 changelog.d/9653.feature (limited to 'synapse/handlers/space_summary.py') diff --git a/changelog.d/9653.feature b/changelog.d/9653.feature new file mode 100644 index 0000000000..2f7ccedcfb --- /dev/null +++ b/changelog.d/9653.feature @@ -0,0 +1 @@ +Add initial experimental support for a "space summary" API. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 3b2f51baab..afdb5bf2fa 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -27,11 +27,13 @@ from typing import ( List, Mapping, Optional, + Sequence, Tuple, TypeVar, Union, ) +import attr from prometheus_client import Counter from twisted.internet import defer @@ -455,6 +457,7 @@ class FederationClient(FederationBase): description: str, destinations: Iterable[str], callback: Callable[[str], Awaitable[T]], + failover_on_unknown_endpoint: bool = False, ) -> T: """Try an operation on a series of servers, until it succeeds @@ -474,6 +477,10 @@ class FederationClient(FederationBase): next server tried. Normally the stacktrace is logged but this is suppressed if the exception is an InvalidResponseError. + failover_on_unknown_endpoint: if True, we will try other servers if it looks + like a server doesn't support the endpoint. This is typically useful + if the endpoint in question is new or experimental. + Returns: The result of callback, if it succeeds @@ -493,16 +500,31 @@ class FederationClient(FederationBase): except UnsupportedRoomVersionError: raise except HttpResponseException as e: - if not 500 <= e.code < 600: - raise e.to_synapse_error() - else: - logger.warning( - "Failed to %s via %s: %i %s", - description, - destination, - e.code, - e.args[0], - ) + synapse_error = e.to_synapse_error() + failover = False + + if 500 <= e.code < 600: + failover = True + + elif failover_on_unknown_endpoint: + # there is no good way to detect an "unknown" endpoint. Dendrite + # returns a 404 (with no body); synapse returns a 400 + # with M_UNRECOGNISED. + if e.code == 404 or ( + e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED + ): + failover = True + + if not failover: + raise synapse_error from e + + logger.warning( + "Failed to %s via %s: %i %s", + description, + destination, + e.code, + e.args[0], + ) except Exception: logger.warning( "Failed to %s via %s", description, destination, exc_info=True @@ -1042,3 +1064,141 @@ class FederationClient(FederationBase): # If we don't manage to find it, return None. It's not an error if a # server doesn't give it to us. return None + + async def get_space_summary( + self, + destinations: Iterable[str], + room_id: str, + suggested_only: bool, + max_rooms_per_space: Optional[int], + exclude_rooms: List[str], + ) -> "FederationSpaceSummaryResult": + """ + Call other servers to get a summary of the given space + + + Args: + destinations: The remote servers. We will try them in turn, omitting any + that have been blacklisted. + + room_id: ID of the space to be queried + + suggested_only: If true, ask the remote server to only return children + with the "suggested" flag set + + max_rooms_per_space: A limit on the number of children to return for each + space + + exclude_rooms: A list of room IDs to tell the remote server to skip + + Returns: + a parsed FederationSpaceSummaryResult + + Raises: + SynapseError if we were unable to get a valid summary from any of the + remote servers + """ + + async def send_request(destination: str) -> FederationSpaceSummaryResult: + res = await self.transport_layer.get_space_summary( + destination=destination, + room_id=room_id, + suggested_only=suggested_only, + max_rooms_per_space=max_rooms_per_space, + exclude_rooms=exclude_rooms, + ) + + try: + return FederationSpaceSummaryResult.from_json_dict(res) + except ValueError as e: + raise InvalidResponseError(str(e)) + + return await self._try_destination_list( + "fetch space summary", + destinations, + send_request, + failover_on_unknown_endpoint=True, + ) + + +@attr.s(frozen=True, slots=True) +class FederationSpaceSummaryEventResult: + """Represents a single event in the result of a successful get_space_summary call. + + It's essentially just a serialised event object, but we do a bit of parsing and + validation in `from_json_dict` and store some of the validated properties in + object attributes. + """ + + event_type = attr.ib(type=str) + state_key = attr.ib(type=str) + via = attr.ib(type=Sequence[str]) + + # the raw data, including the above keys + data = attr.ib(type=JsonDict) + + @classmethod + def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryEventResult": + """Parse an event within the result of a /spaces/ request + + Args: + d: json object to be parsed + + Raises: + ValueError if d is not a valid event + """ + + event_type = d.get("type") + if not isinstance(event_type, str): + raise ValueError("Invalid event: 'event_type' must be a str") + + state_key = d.get("state_key") + if not isinstance(state_key, str): + raise ValueError("Invalid event: 'state_key' must be a str") + + content = d.get("content") + if not isinstance(content, dict): + raise ValueError("Invalid event: 'content' must be a dict") + + via = content.get("via") + if not isinstance(via, Sequence): + raise ValueError("Invalid event: 'via' must be a list") + if any(not isinstance(v, str) for v in via): + raise ValueError("Invalid event: 'via' must be a list of strings") + + return cls(event_type, state_key, via, d) + + +@attr.s(frozen=True, slots=True) +class FederationSpaceSummaryResult: + """Represents the data returned by a successful get_space_summary call.""" + + rooms = attr.ib(type=Sequence[JsonDict]) + events = attr.ib(type=Sequence[FederationSpaceSummaryEventResult]) + + @classmethod + def from_json_dict(cls, d: JsonDict) -> "FederationSpaceSummaryResult": + """Parse the result of a /spaces/ request + + Args: + d: json object to be parsed + + Raises: + ValueError if d is not a valid /spaces/ response + """ + rooms = d.get("rooms") + if not isinstance(rooms, Sequence): + raise ValueError("'rooms' must be a list") + if any(not isinstance(r, dict) for r in rooms): + raise ValueError("Invalid room in 'rooms' list") + + events = d.get("events") + if not isinstance(events, Sequence): + raise ValueError("'events' must be a list") + if any(not isinstance(e, dict) for e in events): + raise ValueError("Invalid event in 'events' list") + parsed_events = [ + FederationSpaceSummaryEventResult.from_json_dict(e) for e in events + ] + + return cls(rooms, parsed_events) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 10c4747f97..6aee47c431 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -16,7 +16,7 @@ import logging import urllib -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from synapse.api.constants import Membership from synapse.api.errors import Codes, HttpResponseException, SynapseError @@ -26,6 +26,7 @@ from synapse.api.urls import ( FEDERATION_V2_PREFIX, ) from synapse.logging.utils import log_function +from synapse.types import JsonDict logger = logging.getLogger(__name__) @@ -978,6 +979,38 @@ class TransportLayerClient: return self.client.get_json(destination=destination, path=path) + async def get_space_summary( + self, + destination: str, + room_id: str, + suggested_only: bool, + max_rooms_per_space: Optional[int], + exclude_rooms: List[str], + ) -> JsonDict: + """ + Args: + destination: The remote server + room_id: The room ID to ask about. + suggested_only: if True, only suggested rooms will be returned + max_rooms_per_space: an optional limit to the number of children to be + returned per space + exclude_rooms: a list of any rooms we can skip + """ + path = _create_path( + FEDERATION_UNSTABLE_PREFIX, "/org.matrix.msc2946/spaces/%s", room_id + ) + + params = { + "suggested_only": suggested_only, + "exclude_rooms": exclude_rooms, + } + if max_rooms_per_space is not None: + params["max_rooms_per_space"] = max_rooms_per_space + + return await self.client.post_json( + destination=destination, path=path, data=params + ) + def _create_path(federation_prefix, path, *args): """ diff --git a/synapse/handlers/space_summary.py b/synapse/handlers/space_summary.py index f5ead9447f..5d9418969d 100644 --- a/synapse/handlers/space_summary.py +++ b/synapse/handlers/space_summary.py @@ -16,7 +16,7 @@ import itertools import logging from collections import deque -from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple, cast import attr @@ -38,6 +38,9 @@ MAX_ROOMS = 50 # max number of events to return per room. MAX_ROOMS_PER_SPACE = 50 +# max number of federation servers to hit per room +MAX_SERVERS_PER_SPACE = 3 + class SpaceSummaryHandler: def __init__(self, hs: "HomeServer"): @@ -47,6 +50,8 @@ class SpaceSummaryHandler: self._state_handler = hs.get_state_handler() self._store = hs.get_datastore() self._event_serializer = hs.get_event_client_serializer() + self._server_name = hs.hostname + self._federation_client = hs.get_federation_client() async def get_space_summary( self, @@ -78,35 +83,81 @@ class SpaceSummaryHandler: await self._auth.check_user_in_room_or_world_readable(room_id, requester) # the queue of rooms to process - room_queue = deque((_RoomQueueEntry(room_id),)) + room_queue = deque((_RoomQueueEntry(room_id, ()),)) + # rooms we have already processed processed_rooms = set() # type: Set[str] + # events we have already processed. We don't necessarily have their event ids, + # so instead we key on (room id, state key) + processed_events = set() # type: Set[Tuple[str, str]] + rooms_result = [] # type: List[JsonDict] events_result = [] # type: List[JsonDict] while room_queue and len(rooms_result) < MAX_ROOMS: queue_entry = room_queue.popleft() room_id = queue_entry.room_id + if room_id in processed_rooms: + # already done this room + continue + logger.debug("Processing room %s", room_id) - processed_rooms.add(room_id) + + is_in_room = await self._store.is_host_joined(room_id, self._server_name) # The client-specified max_rooms_per_space limit doesn't apply to the # room_id specified in the request, so we ignore it if this is the # first room we are processing. max_children = max_rooms_per_space if processed_rooms else None - rooms, events = await self._summarize_local_room( - requester, room_id, suggested_only, max_children + if is_in_room: + rooms, events = await self._summarize_local_room( + requester, room_id, suggested_only, max_children + ) + else: + rooms, events = await self._summarize_remote_room( + queue_entry, + suggested_only, + max_children, + exclude_rooms=processed_rooms, + ) + + logger.debug( + "Query of %s returned rooms %s, events %s", + queue_entry.room_id, + [room.get("room_id") for room in rooms], + ["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events], ) rooms_result.extend(rooms) - events_result.extend(events) - # add any children that we haven't already processed to the queue - for edge_event in events: - if edge_event["state_key"] not in processed_rooms: - room_queue.append(_RoomQueueEntry(edge_event["state_key"])) + # any rooms returned don't need visiting again + processed_rooms.update(cast(str, room.get("room_id")) for room in rooms) + + # the room we queried may or may not have been returned, but don't process + # it again, anyway. + processed_rooms.add(room_id) + + # XXX: is it ok that we blindly iterate through any events returned by + # a remote server, whether or not they actually link to any rooms in our + # tree? + for ev in events: + # remote servers might return events we have already processed + # (eg, Dendrite returns inward pointers as well as outward ones), so + # we need to filter them out, to avoid returning duplicate links to the + # client. + ev_key = (ev["room_id"], ev["state_key"]) + if ev_key in processed_events: + continue + events_result.append(ev) + + # add the child to the queue. we have already validated + # that the vias are a list of server names. + room_queue.append( + _RoomQueueEntry(ev["state_key"], ev["content"]["via"]) + ) + processed_events.add(ev_key) return {"rooms": rooms_result, "events": events_result} @@ -149,20 +200,23 @@ class SpaceSummaryHandler: while room_queue and len(rooms_result) < MAX_ROOMS: room_id = room_queue.popleft() + if room_id in processed_rooms: + # already done this room + continue + logger.debug("Processing room %s", room_id) - processed_rooms.add(room_id) rooms, events = await self._summarize_local_room( None, room_id, suggested_only, max_rooms_per_space ) + processed_rooms.add(room_id) + rooms_result.extend(rooms) events_result.extend(events) - # add any children that we haven't already processed to the queue - for edge_event in events: - if edge_event["state_key"] not in processed_rooms: - room_queue.append(edge_event["state_key"]) + # add any children to the queue + room_queue.extend(edge_event["state_key"] for edge_event in events) return {"rooms": rooms_result, "events": events_result} @@ -200,6 +254,43 @@ class SpaceSummaryHandler: ) return (room_entry,), events_result + async def _summarize_remote_room( + self, + room: "_RoomQueueEntry", + suggested_only: bool, + max_children: Optional[int], + exclude_rooms: Iterable[str], + ) -> Tuple[Sequence[JsonDict], Sequence[JsonDict]]: + room_id = room.room_id + logger.info("Requesting summary for %s via %s", room_id, room.via) + + # we need to make the exclusion list json-serialisable + exclude_rooms = list(exclude_rooms) + + via = itertools.islice(room.via, MAX_SERVERS_PER_SPACE) + try: + res = await self._federation_client.get_space_summary( + via, + room_id, + suggested_only=suggested_only, + max_rooms_per_space=max_children, + exclude_rooms=exclude_rooms, + ) + except Exception as e: + logger.warning( + "Unable to get summary of %s via federation: %s", + room_id, + e, + exc_info=logger.isEnabledFor(logging.DEBUG), + ) + return (), () + + return res.rooms, tuple( + ev.data + for ev in res.events + if ev.event_type == EventTypes.MSC1772_SPACE_CHILD + ) + async def _is_room_accessible(self, room_id: str, requester: Optional[str]) -> bool: # if we have an authenticated requesting user, first check if they are in the # room @@ -276,12 +367,24 @@ class SpaceSummaryHandler: ) # filter out any events without a "via" (which implies it has been redacted) - return (e for e in events if e.content.get("via")) + return (e for e in events if _has_valid_via(e)) @attr.s(frozen=True, slots=True) class _RoomQueueEntry: room_id = attr.ib(type=str) + via = attr.ib(type=Sequence[str]) + + +def _has_valid_via(e: EventBase) -> bool: + via = e.content.get("via") + if not via or not isinstance(via, Sequence): + return False + for v in via: + if not isinstance(v, str): + logger.debug("Ignoring edge event %s with invalid via entry", e.event_id) + return False + return True def _is_suggested_child_event(edge_event: EventBase) -> bool: -- cgit 1.5.1