From 64887f06fcac63e069364d625d984b4951bf1ffc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 20 May 2021 16:11:48 +0100 Subject: Use ijson to parse the response to `/send_join`, reducing memory usage. (#9958) Instead of parsing the full response to `/send_join` into Python objects (which can be huge for large rooms) and *then* parsing that into events, we instead use ijson to stream parse the response directly into `EventBase` objects. --- synapse/federation/transport/client.py | 85 ++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 4 deletions(-) (limited to 'synapse/federation/transport/client.py') diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 497848a2b7..e93ab83f7f 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -17,13 +17,19 @@ import logging import urllib from typing import Any, Dict, List, Optional +import attr +import ijson + from synapse.api.constants import Membership from synapse.api.errors import Codes, HttpResponseException, SynapseError +from synapse.api.room_versions import RoomVersion from synapse.api.urls import ( FEDERATION_UNSTABLE_PREFIX, FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX, ) +from synapse.events import EventBase, make_event_from_dict +from synapse.http.matrixfederationclient import ByteParser from synapse.logging.utils import log_function from synapse.types import JsonDict @@ -240,21 +246,36 @@ class TransportLayerClient: return content @log_function - async def send_join_v1(self, destination, room_id, event_id, content): + async def send_join_v1( + self, + room_version, + destination, + room_id, + event_id, + content, + ) -> "SendJoinResponse": path = _create_v1_path("/send_join/%s/%s", room_id, event_id) response = await self.client.put_json( - destination=destination, path=path, data=content + destination=destination, + path=path, + data=content, + parser=SendJoinParser(room_version, v1_api=True), ) return response @log_function - async def send_join_v2(self, destination, room_id, event_id, content): + async def send_join_v2( + self, room_version, destination, room_id, event_id, content + ) -> "SendJoinResponse": path = _create_v2_path("/send_join/%s/%s", room_id, event_id) response = await self.client.put_json( - destination=destination, path=path, data=content + destination=destination, + path=path, + data=content, + parser=SendJoinParser(room_version, v1_api=False), ) return response @@ -1053,3 +1074,59 @@ def _create_v2_path(path, *args): str """ return _create_path(FEDERATION_V2_PREFIX, path, *args) + + +@attr.s(slots=True, auto_attribs=True) +class SendJoinResponse: + """The parsed response of a `/send_join` request.""" + + auth_events: List[EventBase] + state: List[EventBase] + + +@ijson.coroutine +def _event_list_parser(room_version: RoomVersion, events: List[EventBase]): + """Helper function for use with `ijson.items_coro` to parse an array of + events and add them to the given list. + """ + + while True: + obj = yield + event = make_event_from_dict(obj, room_version) + events.append(event) + + +class SendJoinParser(ByteParser[SendJoinResponse]): + """A parser for the response to `/send_join` requests. + + Args: + room_version: The version of the room. + v1_api: Whether the response is in the v1 format. + """ + + CONTENT_TYPE = "application/json" + + def __init__(self, room_version: RoomVersion, v1_api: bool): + self._response = SendJoinResponse([], []) + + # The V1 API has the shape of `[200, {...}]`, which we handle by + # prefixing with `item.*`. + prefix = "item." if v1_api else "" + + self._coro_state = ijson.items_coro( + _event_list_parser(room_version, self._response.state), + prefix + "state.item", + ) + self._coro_auth = ijson.items_coro( + _event_list_parser(room_version, self._response.auth_events), + prefix + "auth_chain.item", + ) + + def write(self, data: bytes) -> int: + self._coro_state.send(data) + self._coro_auth.send(data) + + return len(data) + + def finish(self) -> SendJoinResponse: + return self._response -- cgit 1.5.1 From 84cf3e47a0318aba51d9f830d5e724182c5d93c4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 28 May 2021 16:28:01 +0100 Subject: Allow response of `/send_join` to be larger. (#10093) Fixes #10087. --- changelog.d/10093.bugfix | 1 + synapse/federation/transport/client.py | 7 +++++++ synapse/http/matrixfederationclient.py | 14 +++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10093.bugfix (limited to 'synapse/federation/transport/client.py') diff --git a/changelog.d/10093.bugfix b/changelog.d/10093.bugfix new file mode 100644 index 0000000000..e50de4b2ea --- /dev/null +++ b/changelog.d/10093.bugfix @@ -0,0 +1 @@ +Fix HTTP response size limit to allow joining very large rooms over federation. diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index e93ab83f7f..5b4f5d17f7 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -35,6 +35,11 @@ from synapse.types import JsonDict logger = logging.getLogger(__name__) +# Send join responses can be huge, so we set a separate limit here. The response +# is parsed in a streaming manner, which helps alleviate the issue of memory +# usage a bit. +MAX_RESPONSE_SIZE_SEND_JOIN = 500 * 1024 * 1024 + class TransportLayerClient: """Sends federation HTTP requests to other servers""" @@ -261,6 +266,7 @@ class TransportLayerClient: path=path, data=content, parser=SendJoinParser(room_version, v1_api=True), + max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN, ) return response @@ -276,6 +282,7 @@ class TransportLayerClient: path=path, data=content, parser=SendJoinParser(room_version, v1_api=False), + max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN, ) return response diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index f5503b394b..1998990a14 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -205,6 +205,7 @@ async def _handle_response( response: IResponse, start_ms: int, parser: ByteParser[T], + max_response_size: Optional[int] = None, ) -> T: """ Reads the body of a response with a timeout and sends it to a parser @@ -216,15 +217,20 @@ async def _handle_response( response: response to the request start_ms: Timestamp when request was made parser: The parser for the response + max_response_size: The maximum size to read from the response, if None + uses the default. Returns: The parsed response """ + if max_response_size is None: + max_response_size = MAX_RESPONSE_SIZE + try: check_content_type_is(response.headers, parser.CONTENT_TYPE) - d = read_body_with_max_size(response, parser, MAX_RESPONSE_SIZE) + d = read_body_with_max_size(response, parser, max_response_size) d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor) length = await make_deferred_yieldable(d) @@ -735,6 +741,7 @@ class MatrixFederationHttpClient: backoff_on_404: bool = False, try_trailing_slash_on_400: bool = False, parser: Literal[None] = None, + max_response_size: Optional[int] = None, ) -> Union[JsonDict, list]: ... @@ -752,6 +759,7 @@ class MatrixFederationHttpClient: backoff_on_404: bool = False, try_trailing_slash_on_400: bool = False, parser: Optional[ByteParser[T]] = None, + max_response_size: Optional[int] = None, ) -> T: ... @@ -768,6 +776,7 @@ class MatrixFederationHttpClient: backoff_on_404: bool = False, try_trailing_slash_on_400: bool = False, parser: Optional[ByteParser] = None, + max_response_size: Optional[int] = None, ): """Sends the specified json data using PUT @@ -803,6 +812,8 @@ class MatrixFederationHttpClient: enabled. parser: The parser to use to decode the response. Defaults to parsing as JSON. + max_response_size: The maximum size to read from the response, if None + uses the default. Returns: Succeeds when we get a 2xx HTTP response. The @@ -853,6 +864,7 @@ class MatrixFederationHttpClient: response, start_ms, parser=parser, + max_response_size=max_response_size, ) return body -- cgit 1.5.1