From 8ec221710329c157a0c83d401e592a325ae02c20 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 18 Feb 2021 09:01:29 -0500 Subject: Reduce the memory usage of previewing media files. (#9421) This reduces the memory usage of previewing media files which end up larger than the `max_spider_size` by avoiding buffering content internally in treq. It also checks the `Content-Length` header in additional places instead of streaming the content to check the body length. --- synapse/http/client.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/client.py b/synapse/http/client.py index 73b414ccff..e54d9bd213 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -56,7 +56,7 @@ from twisted.web.client import ( ) from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers -from twisted.web.iweb import IAgent, IBodyProducer, IResponse +from twisted.web.iweb import UNKNOWN_LENGTH, IAgent, IBodyProducer, IResponse from synapse.api.errors import Codes, HttpResponseException, SynapseError from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri @@ -408,6 +408,9 @@ class SimpleHttpClient: agent=self.agent, data=body_producer, headers=headers, + # Avoid buffering the body in treq since we do not reuse + # response bodies. + unbuffered=True, **self._extra_treq_args, ) # type: defer.Deferred @@ -702,18 +705,6 @@ class SimpleHttpClient: resp_headers = dict(response.headers.getAllRawHeaders()) - if ( - b"Content-Length" in resp_headers - and max_size - and int(resp_headers[b"Content-Length"][0]) > max_size - ): - logger.warning("Requested URL is too large > %r bytes" % (max_size,)) - raise SynapseError( - 502, - "Requested file is too large > %r bytes" % (max_size,), - Codes.TOO_LARGE, - ) - if response.code > 299: logger.warning("Got %d when downloading %s" % (response.code, url)) raise SynapseError(502, "Got error %d" % (response.code,), Codes.UNKNOWN) @@ -780,7 +771,9 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): # in the meantime. if self.max_size is not None and self.length >= self.max_size: self.deferred.errback(BodyExceededMaxSize()) - self.transport.loseConnection() + # Close the connection (forcefully) since all the data will get + # discarded anyway. + self.transport.abortConnection() def connectionLost(self, reason: Failure) -> None: # If the maximum size was already exceeded, there's nothing to do. @@ -814,6 +807,11 @@ def read_body_with_max_size( Returns: A Deferred which resolves to the length of the read body. """ + # If the Content-Length header gives a size larger than the maximum allowed + # size, do not bother downloading the body. + if max_size is not None and response.length != UNKNOWN_LENGTH: + if response.length > max_size: + return defer.fail(BodyExceededMaxSize()) d = defer.Deferred() response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size)) -- cgit 1.4.1