From e85c7873dc885c18705c2a77d8487517379d64fb Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 16:26:37 +0100 Subject: Allow non-ascii filenames for attachments --- synapse/rest/media/v1/base_resource.py | 17 +++++++++++++---- synapse/rest/media/v1/upload_resource.py | 6 ++---- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 4e21527c3d..24297b20f1 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -33,6 +33,7 @@ import os import cgi import logging +import urllib logger = logging.getLogger(__name__) @@ -181,10 +182,18 @@ class BaseMediaResource(Resource): if os.path.isfile(file_path): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % (upload_name.encode("utf-8"),), - ) + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % (upload_name.encode("utf-8"),), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index cdd1d44e07..21d8fb9ce9 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -15,7 +15,7 @@ from synapse.http.server import respond_with_json, request_handler -from synapse.util.stringutils import random_string, is_ascii +from synapse.util.stringutils import random_string from synapse.api.errors import SynapseError from twisted.web.server import NOT_DONE_YET @@ -86,9 +86,7 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: - upload_name = upload_name[0] - if upload_name and not is_ascii(upload_name): - raise SynapseError(400, "filename must be ascii") + upload_name = upload_name[0].decode('UTF-8') headers = request.requestHeaders -- cgit 1.4.1 From 5a9e0c36824ffc8bb365cdb30a273d427f997bd9 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 17:08:47 +0100 Subject: Handle unicode filenames given when downloading or received over federation --- synapse/rest/media/v1/base_resource.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 24297b20f1..ad2c9d4e74 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -34,6 +34,7 @@ import os import cgi import logging import urllib +import urlparse logger = logging.getLogger(__name__) @@ -43,10 +44,13 @@ def parse_media_id(request): # This allows users to append e.g. /test.png to the URL. Useful for # clients that parse the URL to see content type. server_name, media_id = request.postpath[:2] - if len(request.postpath) > 2 and is_ascii(request.postpath[-1]): - return server_name, media_id, request.postpath[-1] - else: - return server_name, media_id, None + file_name = None + if len(request.postpath) > 2: + try: + file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8") + except UnicodeDecodeError: + pass + return server_name, media_id, file_name except: raise SynapseError( 404, @@ -144,6 +148,16 @@ class BaseMediaResource(Resource): upload_name = params.get("filename", None) if upload_name and not is_ascii(upload_name): upload_name = None + else: + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + if upload_name: + upload_name = urlparse.unquote(upload_name) + try: + upload_name = upload_name.decode("utf-8"); + except UnicodeDecodeError: + upload_name = None else: upload_name = None @@ -185,7 +199,9 @@ class BaseMediaResource(Resource): if is_ascii(upload_name): request.setHeader( b"Content-Disposition", - b"inline; filename=%s" % (upload_name.encode("utf-8"),), + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), ) else: request.setHeader( -- cgit 1.4.1 From c9cb354b58972b9e0e91cd6d6398e9bb02f7b967 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 17:27:23 +0100 Subject: Give a sensible error message if the filename is invalid UTF-8 --- synapse/rest/media/v1/base_resource.py | 2 +- synapse/rest/media/v1/upload_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index ad2c9d4e74..60751da1d1 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -155,7 +155,7 @@ class BaseMediaResource(Resource): if upload_name: upload_name = urlparse.unquote(upload_name) try: - upload_name = upload_name.decode("utf-8"); + upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 21d8fb9ce9..031bfa80f8 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -86,7 +86,13 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: - upload_name = upload_name[0].decode('UTF-8') + try: + upload_name = upload_name[0].decode('UTF-8') + except UnicodeDecodeError: + raise SynapseError( + msg="Invalid UTF-8 filename parameter: %r" % (upload_name), + code=400, + ) headers = request.requestHeaders -- cgit 1.4.1 From f02532baadc4fbd95bec6cb7f45019d2c46c1324 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:37:02 +0100 Subject: Check for None --- synapse/rest/media/v1/base_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 60751da1d1..b0e997b478 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -150,7 +150,7 @@ class BaseMediaResource(Resource): upload_name = None else: upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8.lower().startswith("utf-8''"): + if upload_name and upload_name_utf8.lower().startswith("utf-8''"): upload_name = upload_name_utf8[7:] if upload_name: upload_name = urlparse.unquote(upload_name) -- cgit 1.4.1 From 53c2eed862c2c2fc90ee4b51bed624be5fcec9f3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:38:22 +0100 Subject: None check the correct variable --- synapse/rest/media/v1/base_resource.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index b0e997b478..610cb3ef82 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -150,8 +150,9 @@ class BaseMediaResource(Resource): upload_name = None else: upload_name_utf8 = params.get("filename*", None) - if upload_name and upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] if upload_name: upload_name = urlparse.unquote(upload_name) try: -- cgit 1.4.1 From 66ec6cf9b892cd22dd75d9b66f10b120ebe233ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:48:58 +0100 Subject: Check for an internationalised filename first --- synapse/rest/media/v1/base_resource.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 610cb3ef82..03ebbbefe9 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -145,14 +145,20 @@ class BaseMediaResource(Resource): content_disposition = headers.get("Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) - upload_name = params.get("filename", None) - if upload_name and not is_ascii(upload_name): - upload_name = None - else: - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + + # If there isn't check for an ascii name. + if not upload_name: + upload_name = params.get("filename", None) + if upload_name and not is_ascii(upload_name): + upload_name = None + if upload_name: upload_name = urlparse.unquote(upload_name) try: -- cgit 1.4.1 From ddf4d2bd981cbc4079b2bff0a2bba500b1aad208 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 27 Aug 2015 10:50:49 +0100 Subject: Consistency --- synapse/rest/media/v1/base_resource.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/rest/media') diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 03ebbbefe9..b2aeb8c909 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -155,9 +155,9 @@ class BaseMediaResource(Resource): # If there isn't check for an ascii name. if not upload_name: - upload_name = params.get("filename", None) - if upload_name and not is_ascii(upload_name): - upload_name = None + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii if upload_name: upload_name = urlparse.unquote(upload_name) -- cgit 1.4.1