summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2015-08-27 10:03:58 +0100
committerErik Johnston <erik@matrix.org>2015-08-27 10:03:58 +0100
commite330c802e4d653e2ccae83fbcd4c2feaaeffdfe1 (patch)
tree96518f02f3b845794c270da3c026d0d157dd52e2
parentMerge pull request #252 from matrix-org/erikj/typing_loop (diff)
parentGive a sensible error message if the filename is invalid UTF-8 (diff)
downloadsynapse-e330c802e4d653e2ccae83fbcd4c2feaaeffdfe1.tar.xz
Merge pull request #259 from matrix-org/markjh/unicode_content_disposition
Support unicode attachment filenames
-rw-r--r--synapse/rest/media/v1/base_resource.py41
-rw-r--r--synapse/rest/media/v1/upload_resource.py12
-rw-r--r--synapse/util/stringutils.py2
3 files changed, 43 insertions, 12 deletions
diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py
index 4e21527c3d..60751da1d1 100644
--- a/synapse/rest/media/v1/base_resource.py
+++ b/synapse/rest/media/v1/base_resource.py
@@ -33,6 +33,8 @@ import os
 
 import cgi
 import logging
+import urllib
+import urlparse
 
 logger = logging.getLogger(__name__)
 
@@ -42,10 +44,13 @@ def parse_media_id(request):
         # This allows users to append e.g. /test.png to the URL. Useful for
         # clients that parse the URL to see content type.
         server_name, media_id = request.postpath[:2]
-        if len(request.postpath) > 2 and is_ascii(request.postpath[-1]):
-            return server_name, media_id, request.postpath[-1]
-        else:
-            return server_name, media_id, None
+        file_name = None
+        if len(request.postpath) > 2:
+            try:
+                file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8")
+            except UnicodeDecodeError:
+                pass
+        return server_name, media_id, file_name
     except:
         raise SynapseError(
             404,
@@ -143,6 +148,16 @@ class BaseMediaResource(Resource):
                 upload_name = params.get("filename", None)
                 if upload_name and not is_ascii(upload_name):
                     upload_name = None
+                else:
+                    upload_name_utf8 = params.get("filename*", None)
+                    if upload_name_utf8.lower().startswith("utf-8''"):
+                        upload_name = upload_name_utf8[7:]
+                if upload_name:
+                    upload_name = urlparse.unquote(upload_name)
+                    try:
+                        upload_name = upload_name.decode("utf-8")
+                    except UnicodeDecodeError:
+                        upload_name = None
             else:
                 upload_name = None
 
@@ -181,10 +196,20 @@ class BaseMediaResource(Resource):
         if os.path.isfile(file_path):
             request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
             if upload_name:
-                request.setHeader(
-                    b"Content-Disposition",
-                    b"inline; filename=%s" % (upload_name.encode("utf-8"),),
-                )
+                if is_ascii(upload_name):
+                    request.setHeader(
+                        b"Content-Disposition",
+                        b"inline; filename=%s" % (
+                            urllib.quote(upload_name.encode("utf-8")),
+                        ),
+                    )
+                else:
+                    request.setHeader(
+                        b"Content-Disposition",
+                        b"inline; filename*=utf-8''%s" % (
+                            urllib.quote(upload_name.encode("utf-8")),
+                        ),
+                    )
 
             # cache for at least a day.
             # XXX: we might want to turn this off for data we don't want to
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index cdd1d44e07..031bfa80f8 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -15,7 +15,7 @@
 
 from synapse.http.server import respond_with_json, request_handler
 
-from synapse.util.stringutils import random_string, is_ascii
+from synapse.util.stringutils import random_string
 from synapse.api.errors import SynapseError
 
 from twisted.web.server import NOT_DONE_YET
@@ -86,9 +86,13 @@ class UploadResource(BaseMediaResource):
 
         upload_name = request.args.get("filename", None)
         if upload_name:
-            upload_name = upload_name[0]
-            if upload_name and not is_ascii(upload_name):
-                raise SynapseError(400, "filename must be ascii")
+            try:
+                upload_name = upload_name[0].decode('UTF-8')
+            except UnicodeDecodeError:
+                raise SynapseError(
+                    msg="Invalid UTF-8 filename parameter: %r" % (upload_name),
+                    code=400,
+                )
 
         headers = request.requestHeaders
 
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 7a1e96af37..f3a36340e4 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -38,6 +38,8 @@ def random_string_with_symbols(length):
 def is_ascii(s):
     try:
         s.encode("ascii")
+    except UnicodeEncodeError:
+        return False
     except UnicodeDecodeError:
         return False
     else: