diff --git a/synapse/http/client.py b/synapse/http/client.py
index 743a7ffcb1..d617055617 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -20,6 +20,7 @@ from typing import (
TYPE_CHECKING,
Any,
BinaryIO,
+ Callable,
Dict,
Iterable,
List,
@@ -693,12 +694,18 @@ class SimpleHttpClient:
output_stream: BinaryIO,
max_size: Optional[int] = None,
headers: Optional[RawHeaders] = None,
+ is_allowed_content_type: Optional[Callable[[str], bool]] = None,
) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
"""GETs a file from a given URL
Args:
url: The URL to GET
output_stream: File to write the response body to.
headers: A map from header name to a list of values for that header
+ is_allowed_content_type: A predicate to determine whether the
+ content type of the file we're downloading is allowed. If set and
+ it evaluates to False when called with the content type, the
+ request will be terminated before completing the download by
+ raising SynapseError.
Returns:
A tuple of the file length, dict of the response
headers, absolute URI of the response and HTTP response code.
@@ -726,6 +733,17 @@ class SimpleHttpClient:
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
)
+ if is_allowed_content_type and b"Content-Type" in resp_headers:
+ content_type = resp_headers[b"Content-Type"][0].decode("ascii")
+ if not is_allowed_content_type(content_type):
+ raise SynapseError(
+ HTTPStatus.BAD_GATEWAY,
+ (
+ "Requested file's content type not allowed for this operation: %s"
+ % content_type
+ ),
+ )
+
# TODO: if our Content-Type is HTML or something, just read the first
# N bytes into RAM rather than saving it all to disk only to read it
# straight back in again
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index efd84ced8f..8d3d1e54dc 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -403,6 +403,7 @@ class PreviewUrlResource(DirectServeJsonResource):
output_stream=output_stream,
max_size=self.max_spider_size,
headers={"Accept-Language": self.url_preview_accept_language},
+ is_allowed_content_type=_is_previewable,
)
except SynapseError:
# Pass SynapseErrors through directly, so that the servlet
@@ -761,3 +762,10 @@ def _is_html(content_type: str) -> bool:
def _is_json(content_type: str) -> bool:
return content_type.lower().startswith("application/json")
+
+
+def _is_previewable(content_type: str) -> bool:
+ """Returns True for content types for which we will perform URL preview and False
+ otherwise."""
+
+ return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
|