diff --git a/synapse/http/client.py b/synapse/http/client.py
index 743a7ffcb1..d617055617 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -20,6 +20,7 @@ from typing import (
TYPE_CHECKING,
Any,
BinaryIO,
+ Callable,
Dict,
Iterable,
List,
@@ -693,12 +694,18 @@ class SimpleHttpClient:
output_stream: BinaryIO,
max_size: Optional[int] = None,
headers: Optional[RawHeaders] = None,
+ is_allowed_content_type: Optional[Callable[[str], bool]] = None,
) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
"""GETs a file from a given URL
Args:
url: The URL to GET
output_stream: File to write the response body to.
headers: A map from header name to a list of values for that header
+ is_allowed_content_type: A predicate to determine whether the
+ content type of the file we're downloading is allowed. If set and
+ it evaluates to False when called with the content type, the
+ request will be terminated before completing the download by
+ raising SynapseError.
Returns:
A tuple of the file length, dict of the response
headers, absolute URI of the response and HTTP response code.
@@ -726,6 +733,17 @@ class SimpleHttpClient:
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
)
+ if is_allowed_content_type and b"Content-Type" in resp_headers:
+ content_type = resp_headers[b"Content-Type"][0].decode("ascii")
+ if not is_allowed_content_type(content_type):
+ raise SynapseError(
+ HTTPStatus.BAD_GATEWAY,
+ (
+ "Requested file's content type not allowed for this operation: %s"
+ % content_type
+ ),
+ )
+
# TODO: if our Content-Type is HTML or something, just read the first
# N bytes into RAM rather than saving it all to disk only to read it
# straight back in again
|