diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index d16a30acd8..953d89bd82 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -99,10 +100,29 @@ def add_file_headers(request, media_type, file_size, upload_name):
request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
if upload_name:
- if is_ascii(upload_name):
- disposition = "inline; filename=%s" % (_quote(upload_name),)
+ # RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
+ #
+ # `filename` is defined to be a `value`, which is defined by RFC2616
+ # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token`
+ # is (essentially) a single US-ASCII word, and a `quoted-string` is a
+ # US-ASCII string surrounded by double-quotes, using backslash as an
+ # escape charater. Note that %-encoding is *not* permitted.
+ #
+ # `filename*` is defined to be an `ext-value`, which is defined in
+ # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`,
+ # where `value-chars` is essentially a %-encoded string in the given charset.
+ #
+ # [1]: https://tools.ietf.org/html/rfc6266#section-4.1
+ # [2]: https://tools.ietf.org/html/rfc2616#section-3.6
+ # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1
+
+ # We avoid the quoted-string version of `filename`, because (a) synapse didn't
+ # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we
+ # may as well just do the filename* version.
+ if _can_encode_filename_as_token(upload_name):
+ disposition = 'inline; filename=%s' % (upload_name, )
else:
- disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
+ disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name), )
request.setHeader(b"Content-Disposition", disposition.encode('ascii'))
@@ -115,6 +135,35 @@ def add_file_headers(request, media_type, file_size, upload_name):
request.setHeader(b"Content-Length", b"%d" % (file_size,))
+# separators as defined in RFC2616. SP and HT are handled separately.
+# see _can_encode_filename_as_token.
+_FILENAME_SEPARATOR_CHARS = set((
+ "(", ")", "<", ">", "@", ",", ";", ":", "\\", '"',
+ "/", "[", "]", "?", "=", "{", "}",
+))
+
+
+def _can_encode_filename_as_token(x):
+ for c in x:
+ # from RFC2616:
+ #
+ # token = 1*<any CHAR except CTLs or separators>
+ #
+ # separators = "(" | ")" | "<" | ">" | "@"
+ # | "," | ";" | ":" | "\" | <">
+ # | "/" | "[" | "]" | "?" | "="
+ # | "{" | "}" | SP | HT
+ #
+ # CHAR = <any US-ASCII character (octets 0 - 127)>
+ #
+ # CTL = <any US-ASCII control character
+ # (octets 0 - 31) and DEL (127)>
+ #
+ if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS:
+ return False
+ return True
+
+
@defer.inlineCallbacks
def respond_with_responder(request, responder, media_type, file_size, upload_name=None):
"""Responds to the request with given responder. If responder is None then
@@ -213,8 +262,7 @@ def get_filename_from_headers(headers):
Content-Disposition HTTP header.
Args:
- headers (twisted.web.http_headers.Headers): The HTTP
- request headers.
+ headers (dict[bytes, list[bytes]]): The HTTP request headers.
Returns:
A Unicode string of the filename, or None.
@@ -225,23 +273,12 @@ def get_filename_from_headers(headers):
if not content_disposition[0]:
return
- # dict of unicode: bytes, corresponding to the key value sections of the
- # Content-Disposition header.
- params = {}
- parts = content_disposition[0].split(b";")
- for i in parts:
- # Split into key-value pairs, if able
- # We don't care about things like `inline`, so throw it out
- if b"=" not in i:
- continue
-
- key, value = i.strip().split(b"=")
- params[key.decode('ascii')] = value
+ _, params = _parse_header(content_disposition[0])
upload_name = None
# First check if there is a valid UTF-8 filename
- upload_name_utf8 = params.get("filename*", None)
+ upload_name_utf8 = params.get(b"filename*", None)
if upload_name_utf8:
if upload_name_utf8.lower().startswith(b"utf-8''"):
upload_name_utf8 = upload_name_utf8[7:]
@@ -267,12 +304,68 @@ def get_filename_from_headers(headers):
# If there isn't check for an ascii name.
if not upload_name:
- upload_name_ascii = params.get("filename", None)
+ upload_name_ascii = params.get(b"filename", None)
if upload_name_ascii and is_ascii(upload_name_ascii):
- # Make sure there's no %-quoted bytes. If there is, reject it as
- # non-valid ASCII.
- if b"%" not in upload_name_ascii:
- upload_name = upload_name_ascii.decode('ascii')
+ upload_name = upload_name_ascii.decode('ascii')
# This may be None here, indicating we did not find a matching name.
return upload_name
+
+
+def _parse_header(line):
+ """Parse a Content-type like header.
+
+ Cargo-culted from `cgi`, but works on bytes rather than strings.
+
+ Args:
+ line (bytes): header to be parsed
+
+ Returns:
+ Tuple[bytes, dict[bytes, bytes]]:
+ the main content-type, followed by the parameter dictionary
+ """
+ parts = _parseparam(b';' + line)
+ key = next(parts)
+ pdict = {}
+ for p in parts:
+ i = p.find(b'=')
+ if i >= 0:
+ name = p[:i].strip().lower()
+ value = p[i + 1:].strip()
+
+ # strip double-quotes
+ if len(value) >= 2 and value[0:1] == value[-1:] == b'"':
+ value = value[1:-1]
+ value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')
+ pdict[name] = value
+
+ return key, pdict
+
+
+def _parseparam(s):
+ """Generator which splits the input on ;, respecting double-quoted sequences
+
+ Cargo-culted from `cgi`, but works on bytes rather than strings.
+
+ Args:
+ s (bytes): header to be parsed
+
+ Returns:
+ Iterable[bytes]: the split input
+ """
+ while s[:1] == b';':
+ s = s[1:]
+
+ # look for the next ;
+ end = s.find(b';')
+
+ # if there is an odd number of " marks between here and the next ;, skip to the
+ # next ; instead
+ while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
+ end = s.find(b';', end + 1)
+
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ yield f.strip()
+ s = s[end:]
|