From e0581ccf0e7ffc4855ff57b63cd6ff5a1a6507d2 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 15 Nov 2018 15:55:58 -0600 Subject: Fix Content-Disposition in media repository (#4176) --- synapse/rest/media/v1/_base.py | 122 ++++++++++++++++++++------ synapse/rest/media/v1/media_repository.py | 48 +++------- synapse/rest/media/v1/preview_url_resource.py | 30 +------ 3 files changed, 110 insertions(+), 90 deletions(-) (limited to 'synapse/rest') diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 76e479afa3..efe42a429d 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os +from six import PY3 from six.moves import urllib from twisted.internet import defer @@ -48,26 +49,21 @@ def parse_media_id(request): return server_name, media_id, file_name except Exception: raise SynapseError( - 404, - "Invalid media id token %r" % (request.postpath,), - Codes.UNKNOWN, + 404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN ) def respond_404(request): respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True + request, + 404, + cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND), + send_cors=True, ) @defer.inlineCallbacks -def respond_with_file(request, media_type, file_path, - file_size=None, upload_name=None): +def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None): logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): @@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name): file_size (int): Size in bytes of the media, if known. upload_name (str): The name of the requested file, if any. """ + def _quote(x): return urllib.parse.quote(x.encode("utf-8")) request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: if is_ascii(upload_name): - disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename=%s" % (_quote(upload_name),) else: - disposition = ( - "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii") + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) - request.setHeader(b"Content-Disposition", disposition) + request.setHeader(b"Content-Disposition", disposition.encode('ascii')) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to # recommend caching as it's sensitive or private - or at least # select private. don't bother setting Expires as all our # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") + request.setHeader(b"Content-Length", b"%d" % (file_size,)) @defer.inlineCallbacks @@ -153,6 +144,7 @@ class Responder(object): Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + def write_to_consumer(self, consumer): """Stream response into consumer @@ -186,9 +178,18 @@ class FileInfo(object): thumbnail_method (str) thumbnail_type (str): Content type of thumbnail, e.g. image/png """ - def __init__(self, server_name, file_id, url_cache=False, - thumbnail=False, thumbnail_width=None, thumbnail_height=None, - thumbnail_method=None, thumbnail_type=None): + + def __init__( + self, + server_name, + file_id, + url_cache=False, + thumbnail=False, + thumbnail_width=None, + thumbnail_height=None, + thumbnail_method=None, + thumbnail_type=None, + ): self.server_name = server_name self.file_id = file_id self.url_cache = url_cache @@ -197,3 +198,74 @@ class FileInfo(object): self.thumbnail_height = thumbnail_height self.thumbnail_method = thumbnail_method self.thumbnail_type = thumbnail_type + + +def get_filename_from_headers(headers): + """ + Get the filename of the downloaded file by inspecting the + Content-Disposition HTTP header. + + Args: + headers (twisted.web.http_headers.Headers): The HTTP + request headers. + + Returns: + A Unicode string of the filename, or None. + """ + content_disposition = headers.get(b"Content-Disposition", [b'']) + + # No header, bail out. + if not content_disposition[0]: + return + + # dict of unicode: bytes, corresponding to the key value sections of the + # Content-Disposition header. + params = {} + parts = content_disposition[0].split(b";") + for i in parts: + # Split into key-value pairs, if able + # We don't care about things like `inline`, so throw it out + if b"=" not in i: + continue + + key, value = i.strip().split(b"=") + params[key.decode('ascii')] = value + + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith(b"utf-8''"): + upload_name_utf8 = upload_name_utf8[7:] + # We have a filename*= section. This MUST be ASCII, and any UTF-8 + # bytes are %-quoted. + if PY3: + try: + # Once it is decoded, we can then unquote the %-encoded + # parts strictly into a unicode string. + upload_name = urllib.parse.unquote( + upload_name_utf8.decode('ascii'), errors="strict" + ) + except UnicodeDecodeError: + # Incorrect UTF-8. + pass + else: + # On Python 2, we first unquote the %-encoded parts and then + # decode it strictly using UTF-8. + try: + upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8') + except UnicodeDecodeError: + pass + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + # Make sure there's no %-quoted bytes. If there is, reject it as + # non-valid ASCII. + if b"%" not in upload_name_ascii: + upload_name = upload_name_ascii.decode('ascii') + + # This may be None here, indicating we did not find a matching name. + return upload_name diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d6c5f07af0..e117836e9a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -14,14 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import errno import logging import os import shutil -from six import PY3, iteritems -from six.moves.urllib import parse as urlparse +from six import iteritems import twisted.internet.error import twisted.web.http @@ -34,14 +32,18 @@ from synapse.api.errors import ( NotFoundError, SynapseError, ) -from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util import logcontext from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string -from ._base import FileInfo, respond_404, respond_with_responder +from ._base import ( + FileInfo, + get_filename_from_headers, + respond_404, + respond_with_responder, +) from .config_resource import MediaConfigResource from .download_resource import DownloadResource from .filepath import MediaFilePaths @@ -62,7 +64,7 @@ class MediaRepository(object): def __init__(self, hs): self.hs = hs self.auth = hs.get_auth() - self.client = MatrixFederationHttpClient(hs) + self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() @@ -397,39 +399,9 @@ class MediaRepository(object): yield finish() media_type = headers[b"Content-Type"][0].decode('ascii') - + upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0].decode('ascii'),) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - if PY3: - upload_name = urlparse.unquote(upload_name) - else: - upload_name = urlparse.unquote(upload_name.encode('ascii')) - try: - if isinstance(upload_name, bytes): - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - logger.info("Stored remote media in file %r", fname) yield self.store.store_cached_remote_media( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 9b15699e4d..d0ecf241b6 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cgi import datetime import errno import fnmatch @@ -44,10 +43,11 @@ from synapse.http.server import ( ) from synapse.http.servlet import parse_integer, parse_string from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.media.v1._base import get_filename_from_headers from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background -from synapse.util.stringutils import is_ascii, random_string +from synapse.util.stringutils import random_string from ._base import FileInfo @@ -336,31 +336,7 @@ class PreviewUrlResource(Resource): media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() - content_disposition = headers.get(b"Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - download_name = None - - # First check if there is a valid UTF-8 filename - download_name_utf8 = params.get("filename*", None) - if download_name_utf8: - if download_name_utf8.lower().startswith("utf-8''"): - download_name = download_name_utf8[7:] - - # If there isn't check for an ascii name. - if not download_name: - download_name_ascii = params.get("filename", None) - if download_name_ascii and is_ascii(download_name_ascii): - download_name = download_name_ascii - - if download_name: - download_name = urlparse.unquote(download_name) - try: - download_name = download_name.decode("utf-8") - except UnicodeDecodeError: - download_name = None - else: - download_name = None + download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, -- cgit 1.4.1