From 4b965c862dc66c0da5d3240add70e9b5f0aa720b Mon Sep 17 00:00:00 2001 From: Jonathan de Jong Date: Wed, 14 Apr 2021 16:34:27 +0200 Subject: Remove redundant "coding: utf-8" lines (#9786) Part of #9744 Removes all redundant `# -*- coding: utf-8 -*-` lines from files, as python 3 automatically reads source code as utf-8 now. `Signed-off-by: Jonathan de Jong ` --- tests/http/__init__.py | 1 - tests/http/federation/__init__.py | 1 - tests/http/federation/test_matrix_federation_agent.py | 1 - tests/http/federation/test_srv_resolver.py | 1 - tests/http/test_additional_resource.py | 1 - tests/http/test_endpoint.py | 1 - tests/http/test_fedclient.py | 1 - tests/http/test_proxyagent.py | 1 - tests/http/test_servlet.py | 1 - tests/http/test_simple_client.py | 1 - 10 files changed, 10 deletions(-) (limited to 'tests/http') diff --git a/tests/http/__init__.py b/tests/http/__init__.py index 3e5a856584..e74f7f5b48 100644 --- a/tests/http/__init__.py +++ b/tests/http/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/federation/__init__.py b/tests/http/federation/__init__.py index 1453d04571..743fb9904a 100644 --- a/tests/http/federation/__init__.py +++ b/tests/http/federation/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index ae9d4504a8..e45980316b 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2019 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py index 466ce722d9..c49be33b9f 100644 --- a/tests/http/federation/test_srv_resolver.py +++ b/tests/http/federation/test_srv_resolver.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd # Copyright 2019 New Vector Ltd # diff --git a/tests/http/test_additional_resource.py b/tests/http/test_additional_resource.py index 453391a5a5..768c2ba4ea 100644 --- a/tests/http/test_additional_resource.py +++ b/tests/http/test_additional_resource.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py index d06ea518ce..1f9a2f9b1d 100644 --- a/tests/http/test_endpoint.py +++ b/tests/http/test_endpoint.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index 21c1297171..9e97185507 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index 3ea8b5bec7..fefc8099c9 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py index f979c96f7c..a80bfb9f4e 100644 --- a/tests/http/test_servlet.py +++ b/tests/http/test_servlet.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2020 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/http/test_simple_client.py b/tests/http/test_simple_client.py index cc4cae320d..c85a3665c1 100644 --- a/tests/http/test_simple_client.py +++ b/tests/http/test_simple_client.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2020 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); -- cgit 1.5.1 From 51a20914a863ac24387c424ccee14aa877e218f8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 23 Apr 2021 11:08:41 +0100 Subject: Limit the size of HTTP responses read over federation. (#9833) --- changelog.d/9833.bugfix | 1 + synapse/http/client.py | 15 +++++++-- synapse/http/matrixfederationclient.py | 43 +++++++++++++++++++++---- tests/http/test_fedclient.py | 59 ++++++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 8 deletions(-) create mode 100644 changelog.d/9833.bugfix (limited to 'tests/http') diff --git a/changelog.d/9833.bugfix b/changelog.d/9833.bugfix new file mode 100644 index 0000000000..56f9c9626b --- /dev/null +++ b/changelog.d/9833.bugfix @@ -0,0 +1 @@ +Limit the size of HTTP responses read over federation. diff --git a/synapse/http/client.py b/synapse/http/client.py index 1730187ffa..5f40f16e24 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -33,6 +33,7 @@ import treq from canonicaljson import encode_canonical_json from netaddr import AddrFormatError, IPAddress, IPSet from prometheus_client import Counter +from typing_extensions import Protocol from zope.interface import implementer, provider from OpenSSL import SSL @@ -754,6 +755,16 @@ def _timeout_to_request_timed_out_error(f: Failure): return f +class ByteWriteable(Protocol): + """The type of object which must be passed into read_body_with_max_size. + + Typically this is a file object. + """ + + def write(self, data: bytes) -> int: + pass + + class BodyExceededMaxSize(Exception): """The maximum allowed size of the HTTP body was exceeded.""" @@ -790,7 +801,7 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): transport = None # type: Optional[ITCPTransport] def __init__( - self, stream: BinaryIO, deferred: defer.Deferred, max_size: Optional[int] + self, stream: ByteWriteable, deferred: defer.Deferred, max_size: Optional[int] ): self.stream = stream self.deferred = deferred @@ -830,7 +841,7 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): def read_body_with_max_size( - response: IResponse, stream: BinaryIO, max_size: Optional[int] + response: IResponse, stream: ByteWriteable, max_size: Optional[int] ) -> defer.Deferred: """ Read a HTTP response body to a file-object. Optionally enforcing a maximum file size. diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index d48721a4e2..bb837b7b19 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1,5 +1,4 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vector Ltd +# Copyright 2014-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import cgi +import codecs import logging import random import sys +import typing import urllib.parse -from io import BytesIO +from io import BytesIO, StringIO from typing import Callable, Dict, List, Optional, Tuple, Union import attr @@ -72,6 +73,9 @@ incoming_responses_counter = Counter( "synapse_http_matrixfederationclient_responses", "", ["method", "code"] ) +# a federation response can be rather large (eg a big state_ids is 50M or so), so we +# need a generous limit here. +MAX_RESPONSE_SIZE = 100 * 1024 * 1024 MAX_LONG_RETRIES = 10 MAX_SHORT_RETRIES = 3 @@ -167,12 +171,27 @@ async def _handle_json_response( try: check_content_type_is_json(response.headers) - # Use the custom JSON decoder (partially re-implements treq.json_content). - d = treq.text_content(response, encoding="utf-8") - d.addCallback(json_decoder.decode) + buf = StringIO() + d = read_body_with_max_size(response, BinaryIOWrapper(buf), MAX_RESPONSE_SIZE) d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor) + def parse(_len: int): + return json_decoder.decode(buf.getvalue()) + + d.addCallback(parse) + body = await make_deferred_yieldable(d) + except BodyExceededMaxSize as e: + # The response was too big. + logger.warning( + "{%s} [%s] JSON response exceeded max size %i - %s %s", + request.txn_id, + request.destination, + MAX_RESPONSE_SIZE, + request.method, + request.uri.decode("ascii"), + ) + raise RequestSendFailed(e, can_retry=False) from e except ValueError as e: # The JSON content was invalid. logger.warning( @@ -218,6 +237,18 @@ async def _handle_json_response( return body +class BinaryIOWrapper: + """A wrapper for a TextIO which converts from bytes on the fly.""" + + def __init__(self, file: typing.TextIO, encoding="utf-8", errors="strict"): + self.decoder = codecs.getincrementaldecoder(encoding)(errors) + self.file = file + + def write(self, b: Union[bytes, bytearray]) -> int: + self.file.write(self.decoder.decode(b)) + return len(b) + + class MatrixFederationHttpClient: """HTTP client used to talk to other homeservers over the federation protocol. Send client certificates and signs requests. diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index 9e97185507..ed9a884d76 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -26,6 +26,7 @@ from twisted.web.http import HTTPChannel from synapse.api.errors import RequestSendFailed from synapse.http.matrixfederationclient import ( + MAX_RESPONSE_SIZE, MatrixFederationHttpClient, MatrixFederationRequest, ) @@ -560,3 +561,61 @@ class FederationClientTests(HomeserverTestCase): f = self.failureResultOf(test_d) self.assertIsInstance(f.value, RequestSendFailed) + + def test_too_big(self): + """ + Test what happens if a huge response is returned from the remote endpoint. + """ + + test_d = defer.ensureDeferred(self.cl.get_json("testserv:8008", "foo/bar")) + + self.pump() + + # Nothing happened yet + self.assertNoResult(test_d) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8008) + + # complete the connection and wire it up to a fake transport + protocol = factory.buildProtocol(None) + transport = StringTransport() + protocol.makeConnection(transport) + + # that should have made it send the request to the transport + self.assertRegex(transport.value(), b"^GET /foo/bar") + self.assertRegex(transport.value(), b"Host: testserv:8008") + + # Deferred is still without a result + self.assertNoResult(test_d) + + # Send it a huge HTTP response + protocol.dataReceived( + b"HTTP/1.1 200 OK\r\n" + b"Server: Fake\r\n" + b"Content-Type: application/json\r\n" + b"\r\n" + ) + + self.pump() + + # should still be waiting + self.assertNoResult(test_d) + + sent = 0 + chunk_size = 1024 * 512 + while not test_d.called: + protocol.dataReceived(b"a" * chunk_size) + sent += chunk_size + self.assertLessEqual(sent, MAX_RESPONSE_SIZE) + + self.assertEqual(sent, MAX_RESPONSE_SIZE) + + f = self.failureResultOf(test_d) + self.assertIsInstance(f.value, RequestSendFailed) + + self.assertTrue(transport.disconnecting) -- cgit 1.5.1 From 3ff225175462dde8376aa584e3a47c43b1f0e790 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 23 Apr 2021 19:20:44 +0100 Subject: Improved validation for received requests (#9817) * Simplify `start_listening` callpath * Correctly check the size of uploaded files --- changelog.d/9817.misc | 1 + synapse/api/constants.py | 3 ++ synapse/app/_base.py | 30 ++++++++++-- synapse/app/admin_cmd.py | 8 +-- synapse/app/generic_worker.py | 11 +++-- synapse/app/homeserver.py | 17 +++++-- synapse/config/logger.py | 3 +- synapse/event_auth.py | 4 +- synapse/http/site.py | 32 ++++++++++-- synapse/rest/media/v1/upload_resource.py | 2 - synapse/server.py | 8 +++ tests/http/test_site.py | 83 ++++++++++++++++++++++++++++++++ tests/replication/_base.py | 1 + tests/test_server.py | 1 + tests/unittest.py | 1 + 15 files changed, 174 insertions(+), 31 deletions(-) create mode 100644 changelog.d/9817.misc create mode 100644 tests/http/test_site.py (limited to 'tests/http') diff --git a/changelog.d/9817.misc b/changelog.d/9817.misc new file mode 100644 index 0000000000..8aa8895f05 --- /dev/null +++ b/changelog.d/9817.misc @@ -0,0 +1 @@ +Fix a long-standing bug which caused `max_upload_size` to not be correctly enforced. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 31a59bceec..936b6534b4 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -17,6 +17,9 @@ """Contains constants from the specification.""" +# the max size of a (canonical-json-encoded) event +MAX_PDU_SIZE = 65536 + # the "depth" field on events is limited to 2**63 - 1 MAX_DEPTH = 2 ** 63 - 1 diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 2113c4f370..638e01c1b2 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -30,9 +30,10 @@ from twisted.internet import defer, error, reactor from twisted.protocols.tls import TLSMemoryBIOFactory import synapse +from synapse.api.constants import MAX_PDU_SIZE from synapse.app import check_bind_error from synapse.app.phone_stats_home import start_phone_stats_home -from synapse.config.server import ListenerConfig +from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory from synapse.logging.context import PreserveLoggingContext from synapse.metrics.background_process_metrics import wrap_as_background_process @@ -288,7 +289,7 @@ def refresh_certificate(hs): logger.info("Context factories updated.") -async def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]): +async def start(hs: "synapse.server.HomeServer"): """ Start a Synapse server or worker. @@ -300,7 +301,6 @@ async def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerCon Args: hs: homeserver instance - listeners: Listener configuration ('listeners' in homeserver.yaml) """ # Set up the SIGHUP machinery. if hasattr(signal, "SIGHUP"): @@ -336,7 +336,7 @@ async def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerCon synapse.logging.opentracing.init_tracer(hs) # type: ignore[attr-defined] # noqa # It is now safe to start your Synapse. - hs.start_listening(listeners) + hs.start_listening() hs.get_datastore().db_pool.start_profiling() hs.get_pusherpool().start() @@ -530,3 +530,25 @@ def sdnotify(state): # this is a bit surprising, since we don't expect to have a NOTIFY_SOCKET # unless systemd is expecting us to notify it. logger.warning("Unable to send notification to systemd: %s", e) + + +def max_request_body_size(config: HomeServerConfig) -> int: + """Get a suitable maximum size for incoming HTTP requests""" + + # Other than media uploads, the biggest request we expect to see is a fully-loaded + # /federation/v1/send request. + # + # The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are + # limited to 65536 bytes (possibly slightly more if the sender didn't use canonical + # json encoding); there is no specced limit to EDUs (see + # https://github.com/matrix-org/matrix-doc/issues/3121). + # + # in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M) + # + max_request_size = 200 * MAX_PDU_SIZE + + # if we have a media repo enabled, we may need to allow larger uploads than that + if config.media.can_load_media_repo: + max_request_size = max(max_request_size, config.media.max_upload_size) + + return max_request_size diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index eb256db749..68ae19c977 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -70,12 +70,6 @@ class AdminCmdSlavedStore( class AdminCmdServer(HomeServer): DATASTORE_CLASS = AdminCmdSlavedStore - def _listen_http(self, listener_config): - pass - - def start_listening(self, listeners): - pass - async def export_data_command(hs, args): """Export data for a user. @@ -232,7 +226,7 @@ def start(config_options): async def run(): with LoggingContext("command"): - _base.start(ss, []) + _base.start(ss) await args.func(ss, args) _base.start_worker_reactor( diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 70e07d0574..1a15ceee81 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -15,7 +15,7 @@ # limitations under the License. import logging import sys -from typing import Dict, Iterable, Optional +from typing import Dict, Optional from twisted.internet import address from twisted.web.resource import IResource @@ -32,7 +32,7 @@ from synapse.api.urls import ( SERVER_KEY_V2_PREFIX, ) from synapse.app import _base -from synapse.app._base import register_start +from synapse.app._base import max_request_body_size, register_start from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging @@ -367,6 +367,7 @@ class GenericWorkerServer(HomeServer): listener_config, root_resource, self.version_string, + max_request_body_size=max_request_body_size(self.config), reactor=self.get_reactor(), ), reactor=self.get_reactor(), @@ -374,8 +375,8 @@ class GenericWorkerServer(HomeServer): logger.info("Synapse worker now listening on port %d", port) - def start_listening(self, listeners: Iterable[ListenerConfig]): - for listener in listeners: + def start_listening(self): + for listener in self.config.worker_listeners: if listener.type == "http": self._listen_http(listener) elif listener.type == "manhole": @@ -468,7 +469,7 @@ def start(config_options): # streams. Will no-op if no streams can be written to by this worker. hs.get_replication_streamer() - register_start(_base.start, hs, config.worker_listeners) + register_start(_base.start, hs) _base.start_worker_reactor("synapse-generic-worker", config) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 140f6bcdee..8e78134bbe 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -17,7 +17,7 @@ import logging import os import sys -from typing import Iterable, Iterator +from typing import Iterator from twisted.internet import reactor from twisted.web.resource import EncodingResourceWrapper, IResource @@ -36,7 +36,13 @@ from synapse.api.urls import ( WEB_CLIENT_PREFIX, ) from synapse.app import _base -from synapse.app._base import listen_ssl, listen_tcp, quit_with_error, register_start +from synapse.app._base import ( + listen_ssl, + listen_tcp, + max_request_body_size, + quit_with_error, + register_start, +) from synapse.config._base import ConfigError from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.homeserver import HomeServerConfig @@ -132,6 +138,7 @@ class SynapseHomeServer(HomeServer): listener_config, create_resource_tree(resources, root_resource), self.version_string, + max_request_body_size=max_request_body_size(self.config), reactor=self.get_reactor(), ) @@ -268,14 +275,14 @@ class SynapseHomeServer(HomeServer): return resources - def start_listening(self, listeners: Iterable[ListenerConfig]): + def start_listening(self): if self.config.redis_enabled: # If redis is enabled we connect via the replication command handler # in the same way as the workers (since we're effectively a client # rather than a server). self.get_tcp_replication().start_replication(self) - for listener in listeners: + for listener in self.config.server.listeners: if listener.type == "http": self._listening_services.extend( self._listener_http(self.config, listener) @@ -407,7 +414,7 @@ def setup(config_options): # Loading the provider metadata also ensures the provider config is valid. await oidc.load_metadata() - await _base.start(hs, config.listeners) + await _base.start(hs) hs.get_datastore().db_pool.updates.start_doing_background_updates() diff --git a/synapse/config/logger.py b/synapse/config/logger.py index b174e0df6d..813076dfe2 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -31,7 +31,6 @@ from twisted.logger import ( ) import synapse -from synapse.app import _base as appbase from synapse.logging._structured import setup_structured_logging from synapse.logging.context import LoggingContextFilter from synapse.logging.filter import MetadataFilter @@ -318,6 +317,8 @@ def setup_logging( # Perform one-time logging configuration. _setup_stdlib_logging(config, log_config_path, logBeginner=logBeginner) # Add a SIGHUP handler to reload the logging configuration, if one is available. + from synapse.app import _base as appbase + appbase.register_sighup(_reload_logging_config, log_config_path) # Log immediately so we can grep backwards. diff --git a/synapse/event_auth.py b/synapse/event_auth.py index afc2bc8267..70c556566e 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -21,7 +21,7 @@ from signedjson.key import decode_verify_key_bytes from signedjson.sign import SignatureVerifyException, verify_signed_json from unpaddedbase64 import decode_base64 -from synapse.api.constants import EventTypes, JoinRules, Membership +from synapse.api.constants import MAX_PDU_SIZE, EventTypes, JoinRules, Membership from synapse.api.errors import AuthError, EventSizeError, SynapseError from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, @@ -205,7 +205,7 @@ def _check_size_limits(event: EventBase) -> None: too_big("type") if len(event.event_id) > 255: too_big("event_id") - if len(encode_canonical_json(event.get_pdu_json())) > 65536: + if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE: too_big("event") diff --git a/synapse/http/site.py b/synapse/http/site.py index e911ee4809..671fd3fbcc 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -14,7 +14,7 @@ import contextlib import logging import time -from typing import Optional, Tuple, Type, Union +from typing import Optional, Tuple, Union import attr from zope.interface import implementer @@ -50,6 +50,7 @@ class SynapseRequest(Request): * Redaction of access_token query-params in __repr__ * Logging at start and end * Metrics to record CPU, wallclock and DB time by endpoint. + * A limit to the size of request which will be accepted It also provides a method `processing`, which returns a context manager. If this method is called, the request won't be logged until the context manager is closed; @@ -60,8 +61,9 @@ class SynapseRequest(Request): logcontext: the log context for this request """ - def __init__(self, channel, *args, **kw): + def __init__(self, channel, *args, max_request_body_size=1024, **kw): Request.__init__(self, channel, *args, **kw) + self._max_request_body_size = max_request_body_size self.site = channel.site # type: SynapseSite self._channel = channel # this is used by the tests self.start_time = 0.0 @@ -98,6 +100,18 @@ class SynapseRequest(Request): self.site.site_tag, ) + def handleContentChunk(self, data): + # we should have a `content` by now. + assert self.content, "handleContentChunk() called before gotLength()" + if self.content.tell() + len(data) > self._max_request_body_size: + logger.warning( + "Aborting connection from %s because the request exceeds maximum size", + self.client, + ) + self.transport.abortConnection() + return + super().handleContentChunk(data) + @property def requester(self) -> Optional[Union[Requester, str]]: return self._requester @@ -505,6 +519,7 @@ class SynapseSite(Site): config: ListenerConfig, resource: IResource, server_version_string, + max_request_body_size: int, reactor: IReactorTime, ): """ @@ -516,6 +531,8 @@ class SynapseSite(Site): resource: The base of the resource tree to be used for serving requests on this site server_version_string: A string to present for the Server header + max_request_body_size: Maximum request body length to allow before + dropping the connection reactor: reactor to be used to manage connection timeouts """ Site.__init__(self, resource, reactor=reactor) @@ -524,9 +541,14 @@ class SynapseSite(Site): assert config.http_options is not None proxied = config.http_options.x_forwarded - self.requestFactory = ( - XForwardedForRequest if proxied else SynapseRequest - ) # type: Type[Request] + request_class = XForwardedForRequest if proxied else SynapseRequest + + def request_factory(channel, queued) -> Request: + return request_class( + channel, max_request_body_size=max_request_body_size, queued=queued + ) + + self.requestFactory = request_factory # type: ignore self.access_logger = logging.getLogger(logger_name) self.server_version_string = server_version_string.encode("ascii") diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 80f017a4dd..024a105bf2 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -51,8 +51,6 @@ class UploadResource(DirectServeJsonResource): async def _async_render_POST(self, request: SynapseRequest) -> None: requester = await self.auth.get_user_by_req(request) - # TODO: The checks here are a bit late. The content will have - # already been uploaded to a tmp file at this point content_length = request.getHeader("Content-Length") if content_length is None: raise SynapseError(msg="Request must specify a Content-Length", code=400) diff --git a/synapse/server.py b/synapse/server.py index 8c147be2b3..06570bb1ce 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -287,6 +287,14 @@ class HomeServer(metaclass=abc.ABCMeta): if self.config.run_background_tasks: self.setup_background_tasks() + def start_listening(self) -> None: + """Start the HTTP, manhole, metrics, etc listeners + + Does nothing in this base class; overridden in derived classes to start the + appropriate listeners. + """ + pass + def setup_background_tasks(self) -> None: """ Some handlers have side effects on instantiation (like registering diff --git a/tests/http/test_site.py b/tests/http/test_site.py new file mode 100644 index 0000000000..8c13b4f693 --- /dev/null +++ b/tests/http/test_site.py @@ -0,0 +1,83 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet.address import IPv6Address +from twisted.test.proto_helpers import StringTransport + +from synapse.app.homeserver import SynapseHomeServer + +from tests.unittest import HomeserverTestCase + + +class SynapseRequestTestCase(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + return self.setup_test_homeserver(homeserver_to_use=SynapseHomeServer) + + def test_large_request(self): + """overlarge HTTP requests should be rejected""" + self.hs.start_listening() + + # find the HTTP server which is configured to listen on port 0 + (port, factory, _backlog, interface) = self.reactor.tcpServers[0] + self.assertEqual(interface, "::") + self.assertEqual(port, 0) + + # as a control case, first send a regular request. + + # complete the connection and wire it up to a fake transport + client_address = IPv6Address("TCP", "::1", "2345") + protocol = factory.buildProtocol(client_address) + transport = StringTransport() + protocol.makeConnection(transport) + + protocol.dataReceived( + b"POST / HTTP/1.1\r\n" + b"Connection: close\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + b"0\r\n" + b"\r\n" + ) + + while not transport.disconnecting: + self.reactor.advance(1) + + # we should get a 404 + self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 404 ") + + # now send an oversized request + protocol = factory.buildProtocol(client_address) + transport = StringTransport() + protocol.makeConnection(transport) + + protocol.dataReceived( + b"POST / HTTP/1.1\r\n" + b"Connection: close\r\n" + b"Transfer-Encoding: chunked\r\n" + b"\r\n" + ) + + # we deliberately send all the data in one big chunk, to ensure that + # twisted isn't buffering the data in the chunked transfer decoder. + # we start with the chunk size, in hex. (We won't actually send this much) + protocol.dataReceived(b"10000000\r\n") + sent = 0 + while not transport.disconnected: + self.assertLess(sent, 0x10000000, "connection did not drop") + protocol.dataReceived(b"\0" * 1024) + sent += 1024 + + # default max upload size is 50M, so it should drop on the next buffer after + # that. + self.assertEqual(sent, 50 * 1024 * 1024 + 1024) diff --git a/tests/replication/_base.py b/tests/replication/_base.py index dc3519ea13..624bd1b927 100644 --- a/tests/replication/_base.py +++ b/tests/replication/_base.py @@ -359,6 +359,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase): config=worker_hs.config.server.listeners[0], resource=resource, server_version_string="1", + max_request_body_size=4096, reactor=self.reactor, ) diff --git a/tests/test_server.py b/tests/test_server.py index 45400be367..407e172e41 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -202,6 +202,7 @@ class OptionsResourceTests(unittest.TestCase): parse_listener_def({"type": "http", "port": 0}), self.resource, "1.0", + max_request_body_size=1234, reactor=self.reactor, ) diff --git a/tests/unittest.py b/tests/unittest.py index 5353e75c7c..9bd02bd9c4 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -247,6 +247,7 @@ class HomeserverTestCase(TestCase): config=self.hs.config.server.listeners[0], resource=self.resource, server_version_string="1", + max_request_body_size=1234, reactor=self.reactor, ) -- cgit 1.5.1