summary refs log tree commit diff
path: root/synapse/http
diff options
context:
space:
mode:
authorDenis Kasak <dkasak@termina.org.uk>2022-02-10 15:43:01 +0000
committerGitHub <noreply@github.com>2022-02-10 15:43:01 +0000
commit337f38cac38bc57bc6a3cc8b319e3079c60c4549 (patch)
tree15d58bea7ff37ac3d515c93984fc49417d670941 /synapse/http
parentFix broken link in README to admin API. (#11955) (diff)
downloadsynapse-337f38cac38bc57bc6a3cc8b319e3079c60c4549.tar.xz
Implement a content type allow list for URL previews (#11936)
This implements an allow list for content types for which Synapse will attempt URL preview. If a URL resolves to a resource with a content type which isn't in the list, the download will terminate immediately.

This makes sense given that Synapse would never successfully generate a URL preview for such files in the first place, and helps prevent issues with streaming media servers, such as #8302.

Signed-off-by: Denis Kasak dkasak@termina.org.uk
Diffstat (limited to 'synapse/http')
-rw-r--r--synapse/http/client.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 743a7ffcb1..d617055617 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -20,6 +20,7 @@ from typing import (
     TYPE_CHECKING,
     Any,
     BinaryIO,
+    Callable,
     Dict,
     Iterable,
     List,
@@ -693,12 +694,18 @@ class SimpleHttpClient:
         output_stream: BinaryIO,
         max_size: Optional[int] = None,
         headers: Optional[RawHeaders] = None,
+        is_allowed_content_type: Optional[Callable[[str], bool]] = None,
     ) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
         """GETs a file from a given URL
         Args:
             url: The URL to GET
             output_stream: File to write the response body to.
             headers: A map from header name to a list of values for that header
+            is_allowed_content_type: A predicate to determine whether the
+                content type of the file we're downloading is allowed. If set and
+                it evaluates to False when called with the content type, the
+                request will be terminated before completing the download by
+                raising SynapseError.
         Returns:
             A tuple of the file length, dict of the response
             headers, absolute URI of the response and HTTP response code.
@@ -726,6 +733,17 @@ class SimpleHttpClient:
                 HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
             )
 
+        if is_allowed_content_type and b"Content-Type" in resp_headers:
+            content_type = resp_headers[b"Content-Type"][0].decode("ascii")
+            if not is_allowed_content_type(content_type):
+                raise SynapseError(
+                    HTTPStatus.BAD_GATEWAY,
+                    (
+                        "Requested file's content type not allowed for this operation: %s"
+                        % content_type
+                    ),
+                )
+
         # TODO: if our Content-Type is HTML or something, just read the first
         # N bytes into RAM rather than saving it all to disk only to read it
         # straight back in again