summary refs log tree commit diff
diff options
context:
space:
mode:
authorThe Stranjer <791672+TheStranjer@users.noreply.github.com>2020-03-17 09:29:09 -0400
committerGitHub <noreply@github.com>2020-03-17 13:29:09 +0000
commit5e477c1debfd932ced56ec755204d6ead4ce8ec8 (patch)
tree5d65bb8be10ca3ebbd0f8b3934445af6d0a9e768
parentRemove unused federation endpoint (`query_auth`) (#7026) (diff)
downloadsynapse-5e477c1debfd932ced56ec755204d6ead4ce8ec8.tar.xz
Set charset to utf-8 when adding headers for certain text content types (#7044)
Fixes #7043
-rw-r--r--changelog.d/7044.bugfix1
-rw-r--r--synapse/rest/media/v1/_base.py25
2 files changed, 25 insertions, 1 deletions
diff --git a/changelog.d/7044.bugfix b/changelog.d/7044.bugfix
new file mode 100644
index 0000000000..790088ddb4
--- /dev/null
+++ b/changelog.d/7044.bugfix
@@ -0,0 +1 @@
+Fix a bug that renders UTF-8 text files incorrectly when loaded from media. Contributed by @TheStranjer.
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index ba28dd089d..503f2bed98 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -30,6 +30,22 @@ from synapse.util.stringutils import is_ascii
 
 logger = logging.getLogger(__name__)
 
+# list all text content types that will have the charset default to UTF-8 when
+# none is given
+TEXT_CONTENT_TYPES = [
+    "text/css",
+    "text/csv",
+    "text/html",
+    "text/calendar",
+    "text/plain",
+    "text/javascript",
+    "application/json",
+    "application/ld+json",
+    "application/rtf",
+    "image/svg+xml",
+    "text/xml",
+]
+
 
 def parse_media_id(request):
     try:
@@ -96,7 +112,14 @@ def add_file_headers(request, media_type, file_size, upload_name):
     def _quote(x):
         return urllib.parse.quote(x.encode("utf-8"))
 
-    request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
+    # Default to a UTF-8 charset for text content types.
+    # ex, uses UTF-8 for 'text/css' but not 'text/css; charset=UTF-16'
+    if media_type.lower() in TEXT_CONTENT_TYPES:
+        content_type = media_type + "; charset=UTF-8"
+    else:
+        content_type = media_type
+
+    request.setHeader(b"Content-Type", content_type.encode("UTF-8"))
     if upload_name:
         # RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
         #