summary refs log tree commit diff
diff options
context:
space:
mode:
authorShay <shaysquared@gmail.com>2021-11-01 10:26:02 -0700
committerGitHub <noreply@github.com>2021-11-01 10:26:02 -0700
commitf5c6a80886ac00482aaffa8e8ce3d98b31eab661 (patch)
treef6fc7ab3cc185fa37e145eea1c9b23089c99b280
parentAdd `use_float=true` to ijson calls in Synapse (#11217) (diff)
downloadsynapse-f5c6a80886ac00482aaffa8e8ce3d98b31eab661.tar.xz
Handle missing Content-Type header when accessing remote media (#11200)
* add code to handle missing content-type header and a test to verify that it works

* add handling for missing content-type in the /upload endpoint as well

* slightly refactor test code to put private method in approriate place

* handle possible null value for content-type when pulling from the local db

* add changelog

* refactor test and add code to handle missing content-type in cached remote media

* requested changes

* Update changelog.d/11200.bugfix

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
-rw-r--r--changelog.d/11200.bugfix1
-rw-r--r--synapse/rest/media/v1/media_repository.py12
-rw-r--r--synapse/rest/media/v1/upload_resource.py2
-rw-r--r--tests/rest/media/v1/test_media_storage.py18
4 files changed, 29 insertions, 4 deletions
diff --git a/changelog.d/11200.bugfix b/changelog.d/11200.bugfix
new file mode 100644
index 0000000000..c855081986
--- /dev/null
+++ b/changelog.d/11200.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug wherein a missing `Content-Type` header when downloading remote media would cause Synapse to throw an error.
\ No newline at end of file
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index abd88a2d4f..244ba261bb 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -215,6 +215,8 @@ class MediaRepository:
         self.mark_recently_accessed(None, media_id)
 
         media_type = media_info["media_type"]
+        if not media_type:
+            media_type = "application/octet-stream"
         media_length = media_info["media_length"]
         upload_name = name if name else media_info["upload_name"]
         url_cache = media_info["url_cache"]
@@ -333,6 +335,9 @@ class MediaRepository:
                 logger.info("Media is quarantined")
                 raise NotFoundError()
 
+            if not media_info["media_type"]:
+                media_info["media_type"] = "application/octet-stream"
+
             responder = await self.media_storage.fetch_media(file_info)
             if responder:
                 return responder, media_info
@@ -354,6 +359,8 @@ class MediaRepository:
                 raise e
 
         file_id = media_info["filesystem_id"]
+        if not media_info["media_type"]:
+            media_info["media_type"] = "application/octet-stream"
         file_info = FileInfo(server_name, file_id)
 
         # We generate thumbnails even if another process downloaded the media
@@ -445,7 +452,10 @@ class MediaRepository:
 
             await finish()
 
-            media_type = headers[b"Content-Type"][0].decode("ascii")
+            if b"Content-Type" in headers:
+                media_type = headers[b"Content-Type"][0].decode("ascii")
+            else:
+                media_type = "application/octet-stream"
             upload_name = get_filename_from_headers(headers)
             time_now_ms = self.clock.time_msec()
 
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 7dcb1428e4..8162094cf6 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -80,7 +80,7 @@ class UploadResource(DirectServeJsonResource):
             assert content_type_headers  # for mypy
             media_type = content_type_headers[0].decode("ascii")
         else:
-            raise SynapseError(msg="Upload request missing 'Content-Type'", code=400)
+            media_type = "application/octet-stream"
 
         # if headers.hasHeader(b"Content-Disposition"):
         #     disposition = headers.getRawHeaders(b"Content-Disposition")[0]
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 4ae00755c9..4cf1ed5ddf 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -248,7 +248,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
 
         self.media_id = "example.com/12345"
 
-    def _req(self, content_disposition):
+    def _req(self, content_disposition, include_content_type=True):
 
         channel = make_request(
             self.reactor,
@@ -271,8 +271,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
 
         headers = {
             b"Content-Length": [b"%d" % (len(self.test_image.data))],
-            b"Content-Type": [self.test_image.content_type],
         }
+
+        if include_content_type:
+            headers[b"Content-Type"] = [self.test_image.content_type]
+
         if content_disposition:
             headers[b"Content-Disposition"] = [content_disposition]
 
@@ -285,6 +288,17 @@ class MediaRepoTests(unittest.HomeserverTestCase):
 
         return channel
 
+    def test_handle_missing_content_type(self):
+        channel = self._req(
+            b"inline; filename=out" + self.test_image.extension,
+            include_content_type=False,
+        )
+        headers = channel.headers
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
+        )
+
     def test_disposition_filename_ascii(self):
         """
         If the filename is filename=<ascii> then Synapse will decode it as an