summary refs log tree commit diff
path: root/synapse/rest/media/v1/_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/rest/media/v1/_base.py')
-rw-r--r--synapse/rest/media/v1/_base.py139
1 files changed, 116 insertions, 23 deletions
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py

index d16a30acd8..953d89bd82 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py
@@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -99,10 +100,29 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - if is_ascii(upload_name): - disposition = "inline; filename=%s" % (_quote(upload_name),) + # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. + # + # `filename` is defined to be a `value`, which is defined by RFC2616 + # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` + # is (essentially) a single US-ASCII word, and a `quoted-string` is a + # US-ASCII string surrounded by double-quotes, using backslash as an + # escape charater. Note that %-encoding is *not* permitted. + # + # `filename*` is defined to be an `ext-value`, which is defined in + # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, + # where `value-chars` is essentially a %-encoded string in the given charset. + # + # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 + # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 + # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 + + # We avoid the quoted-string version of `filename`, because (a) synapse didn't + # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we + # may as well just do the filename* version. + if _can_encode_filename_as_token(upload_name): + disposition = 'inline; filename=%s' % (upload_name, ) else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name), ) request.setHeader(b"Content-Disposition", disposition.encode('ascii')) @@ -115,6 +135,35 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Length", b"%d" % (file_size,)) +# separators as defined in RFC2616. SP and HT are handled separately. +# see _can_encode_filename_as_token. +_FILENAME_SEPARATOR_CHARS = set(( + "(", ")", "<", ">", "@", ",", ";", ":", "\\", '"', + "/", "[", "]", "?", "=", "{", "}", +)) + + +def _can_encode_filename_as_token(x): + for c in x: + # from RFC2616: + # + # token = 1*<any CHAR except CTLs or separators> + # + # separators = "(" | ")" | "<" | ">" | "@" + # | "," | ";" | ":" | "\" | <"> + # | "/" | "[" | "]" | "?" | "=" + # | "{" | "}" | SP | HT + # + # CHAR = <any US-ASCII character (octets 0 - 127)> + # + # CTL = <any US-ASCII control character + # (octets 0 - 31) and DEL (127)> + # + if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: + return False + return True + + @defer.inlineCallbacks def respond_with_responder(request, responder, media_type, file_size, upload_name=None): """Responds to the request with given responder. If responder is None then @@ -213,8 +262,7 @@ def get_filename_from_headers(headers): Content-Disposition HTTP header. Args: - headers (twisted.web.http_headers.Headers): The HTTP - request headers. + headers (dict[bytes, list[bytes]]): The HTTP request headers. Returns: A Unicode string of the filename, or None. @@ -225,23 +273,12 @@ def get_filename_from_headers(headers): if not content_disposition[0]: return - # dict of unicode: bytes, corresponding to the key value sections of the - # Content-Disposition header. - params = {} - parts = content_disposition[0].split(b";") - for i in parts: - # Split into key-value pairs, if able - # We don't care about things like `inline`, so throw it out - if b"=" not in i: - continue - - key, value = i.strip().split(b"=") - params[key.decode('ascii')] = value + _, params = _parse_header(content_disposition[0]) upload_name = None # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) + upload_name_utf8 = params.get(b"filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] @@ -267,12 +304,68 @@ def get_filename_from_headers(headers): # If there isn't check for an ascii name. if not upload_name: - upload_name_ascii = params.get("filename", None) + upload_name_ascii = params.get(b"filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): - # Make sure there's no %-quoted bytes. If there is, reject it as - # non-valid ASCII. - if b"%" not in upload_name_ascii: - upload_name = upload_name_ascii.decode('ascii') + upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name + + +def _parse_header(line): + """Parse a Content-type like header. + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + line (bytes): header to be parsed + + Returns: + Tuple[bytes, dict[bytes, bytes]]: + the main content-type, followed by the parameter dictionary + """ + parts = _parseparam(b';' + line) + key = next(parts) + pdict = {} + for p in parts: + i = p.find(b'=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1:].strip() + + # strip double-quotes + if len(value) >= 2 and value[0:1] == value[-1:] == b'"': + value = value[1:-1] + value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') + pdict[name] = value + + return key, pdict + + +def _parseparam(s): + """Generator which splits the input on ;, respecting double-quoted sequences + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + s (bytes): header to be parsed + + Returns: + Iterable[bytes]: the split input + """ + while s[:1] == b';': + s = s[1:] + + # look for the next ; + end = s.find(b';') + + # if there is an odd number of " marks between here and the next ;, skip to the + # next ; instead + while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: + end = s.find(b';', end + 1) + + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:]