diff options
author | Erik Johnston <erik@matrix.org> | 2016-06-09 14:21:23 +0100 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2016-06-09 14:21:23 +0100 |
commit | ba0406d10da32ebebf4185f01841f236371e0ae8 (patch) | |
tree | 70f492094b7fb962a8161bd2304c6846b3ac3f40 /synapse/rest/media | |
parent | Merge pull request #801 from ruma/readme-history-storage (diff) | |
parent | Change CHANGELOG (diff) | |
download | synapse-ba0406d10da32ebebf4185f01841f236371e0ae8.tar.xz |
Merge branch 'release-v0.16.0' of github.com:matrix-org/synapse v0.16.0
Diffstat (limited to 'synapse/rest/media')
-rw-r--r-- | synapse/rest/media/v1/_base.py | 110 | ||||
-rw-r--r-- | synapse/rest/media/v1/base_resource.py | 459 | ||||
-rw-r--r-- | synapse/rest/media/v1/download_resource.py | 27 | ||||
-rw-r--r-- | synapse/rest/media/v1/media_repository.py | 396 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_url_resource.py | 451 | ||||
-rw-r--r-- | synapse/rest/media/v1/thumbnail_resource.py | 68 | ||||
-rw-r--r-- | synapse/rest/media/v1/upload_resource.py | 54 |
7 files changed, 1044 insertions, 521 deletions
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py new file mode 100644 index 0000000000..b9600f2167 --- /dev/null +++ b/synapse/rest/media/v1/_base.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.http.server import respond_with_json, finish_request +from synapse.api.errors import ( + cs_error, Codes, SynapseError +) + +from twisted.internet import defer +from twisted.protocols.basic import FileSender + +from synapse.util.stringutils import is_ascii + +import os + +import logging +import urllib +import urlparse + +logger = logging.getLogger(__name__) + + +def parse_media_id(request): + try: + # This allows users to append e.g. /test.png to the URL. Useful for + # clients that parse the URL to see content type. + server_name, media_id = request.postpath[:2] + file_name = None + if len(request.postpath) > 2: + try: + file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8") + except UnicodeDecodeError: + pass + return server_name, media_id, file_name + except: + raise SynapseError( + 404, + "Invalid media id token %r" % (request.postpath,), + Codes.UNKNOWN, + ) + + +def respond_404(request): + respond_with_json( + request, 404, + cs_error( + "Not found %r" % (request.postpath,), + code=Codes.NOT_FOUND, + ), + send_cors=True + ) + + +@defer.inlineCallbacks +def respond_with_file(request, media_type, file_path, + file_size=None, upload_name=None): + logger.debug("Responding with %r", file_path) + + if os.path.isfile(file_path): + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + if upload_name: + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + if file_size is None: + stat = os.stat(file_path) + file_size = stat.st_size + + request.setHeader( + b"Content-Length", b"%d" % (file_size,) + ) + + with open(file_path, "rb") as f: + yield FileSender().beginFileTransfer(f, request) + + finish_request(request) + else: + respond_404(request) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py deleted file mode 100644 index 58ef91c0b8..0000000000 --- a/synapse/rest/media/v1/base_resource.py +++ /dev/null @@ -1,459 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .thumbnailer import Thumbnailer - -from synapse.http.matrixfederationclient import MatrixFederationHttpClient -from synapse.http.server import respond_with_json, finish_request -from synapse.util.stringutils import random_string -from synapse.api.errors import ( - cs_error, Codes, SynapseError -) - -from twisted.internet import defer, threads -from twisted.web.resource import Resource -from twisted.protocols.basic import FileSender - -from synapse.util.async import ObservableDeferred -from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn - -import os - -import cgi -import logging -import urllib -import urlparse - -logger = logging.getLogger(__name__) - - -def parse_media_id(request): - try: - # This allows users to append e.g. /test.png to the URL. Useful for - # clients that parse the URL to see content type. - server_name, media_id = request.postpath[:2] - file_name = None - if len(request.postpath) > 2: - try: - file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8") - except UnicodeDecodeError: - pass - return server_name, media_id, file_name - except: - raise SynapseError( - 404, - "Invalid media id token %r" % (request.postpath,), - Codes.UNKNOWN, - ) - - -class BaseMediaResource(Resource): - isLeaf = True - - def __init__(self, hs, filepaths): - Resource.__init__(self) - self.auth = hs.get_auth() - self.client = MatrixFederationHttpClient(hs) - self.clock = hs.get_clock() - self.server_name = hs.hostname - self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size - self.max_image_pixels = hs.config.max_image_pixels - self.filepaths = filepaths - self.version_string = hs.version_string - self.downloads = {} - self.dynamic_thumbnails = hs.config.dynamic_thumbnails - self.thumbnail_requirements = hs.config.thumbnail_requirements - - def _respond_404(self, request): - respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True - ) - - @staticmethod - def _makedirs(filepath): - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - - def _get_remote_media(self, server_name, media_id): - key = (server_name, media_id) - download = self.downloads.get(key) - if download is None: - download = self._get_remote_media_impl(server_name, media_id) - download = ObservableDeferred( - download, - consumeErrors=True - ) - self.downloads[key] = download - - @download.addBoth - def callback(media_info): - del self.downloads[key] - return media_info - return download.observe() - - @defer.inlineCallbacks - def _get_remote_media_impl(self, server_name, media_id): - media_info = yield self.store.get_cached_remote_media( - server_name, media_id - ) - if not media_info: - media_info = yield self._download_remote_file( - server_name, media_id - ) - defer.returnValue(media_info) - - @defer.inlineCallbacks - def _download_remote_file(self, server_name, media_id): - file_id = random_string(24) - - fname = self.filepaths.remote_media_filepath( - server_name, file_id - ) - self._makedirs(fname) - - try: - with open(fname, "wb") as f: - request_path = "/".join(( - "/_matrix/media/v1/download", server_name, media_id, - )) - length, headers = yield self.client.get_file( - server_name, request_path, output_stream=f, - max_size=self.max_upload_size, - ) - media_type = headers["Content-Type"][0] - time_now_ms = self.clock.time_msec() - - content_disposition = headers.get("Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - upload_name = urlparse.unquote(upload_name) - try: - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - - yield self.store.store_cached_remote_media( - origin=server_name, - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=upload_name, - media_length=length, - filesystem_id=file_id, - ) - except: - os.remove(fname) - raise - - media_info = { - "media_type": media_type, - "media_length": length, - "upload_name": upload_name, - "created_ts": time_now_ms, - "filesystem_id": file_id, - } - - yield self._generate_remote_thumbnails( - server_name, media_id, media_info - ) - - defer.returnValue(media_info) - - @defer.inlineCallbacks - def _respond_with_file(self, request, media_type, file_path, - file_size=None, upload_name=None): - logger.debug("Responding with %r", file_path) - - if os.path.isfile(file_path): - request.setHeader(b"Content-Type", media_type.encode("UTF-8")) - if upload_name: - if is_ascii(upload_name): - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - else: - request.setHeader( - b"Content-Disposition", - b"inline; filename*=utf-8''%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - if file_size is None: - stat = os.stat(file_path) - file_size = stat.st_size - - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) - - with open(file_path, "rb") as f: - yield FileSender().beginFileTransfer(f, request) - - finish_request(request) - else: - self._respond_404(request) - - def _get_thumbnail_requirements(self, media_type): - return self.thumbnail_requirements.get(media_type, ()) - - def _generate_thumbnail(self, input_path, t_path, t_width, t_height, - t_method, t_type): - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height - - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return - - if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - elif t_method == "scale": - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - else: - t_len = None - - return t_len - - @defer.inlineCallbacks - def _generate_local_exact_thumbnail(self, media_id, t_width, t_height, - t_method, t_type): - input_path = self.filepaths.local_media_filepath(media_id) - - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( - threads.deferToThread, - self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type - ) - - if t_len: - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) - - defer.returnValue(t_path) - - @defer.inlineCallbacks - def _generate_remote_exact_thumbnail(self, server_name, file_id, media_id, - t_width, t_height, t_method, t_type): - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( - threads.deferToThread, - self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type - ) - - if t_len: - yield self.store.store_remote_media_thumbnail( - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ) - - defer.returnValue(t_path) - - @defer.inlineCallbacks - def _generate_local_thumbnails(self, media_id, media_info): - media_type = media_info["media_type"] - requirements = self._get_thumbnail_requirements(media_type) - if not requirements: - return - - input_path = self.filepaths.local_media_filepath(media_id) - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height - - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return - - local_thumbnails = [] - - def generate_thumbnails(): - scales = set() - crops = set() - for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - - local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len - )) - - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len - )) - - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - - for l in local_thumbnails: - yield self.store.store_local_thumbnail(*l) - - defer.returnValue({ - "width": m_width, - "height": m_height, - }) - - @defer.inlineCallbacks - def _generate_remote_thumbnails(self, server_name, media_id, media_info): - media_type = media_info["media_type"] - file_id = media_info["filesystem_id"] - requirements = self._get_thumbnail_requirements(media_type) - if not requirements: - return - - remote_thumbnails = [] - - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height - - def generate_thumbnails(): - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return - - scales = set() - crops = set() - for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) - - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) - - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - - for r in remote_thumbnails: - yield self.store.store_remote_media_thumbnail(*r) - - defer.returnValue({ - "width": m_width, - "height": m_height, - }) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 1aad6b3551..9f69620772 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base_resource import BaseMediaResource, parse_media_id +from ._base import parse_media_id, respond_with_file, respond_404 +from twisted.web.resource import Resource from synapse.http.server import request_handler from twisted.web.server import NOT_DONE_YET @@ -24,12 +25,24 @@ import logging logger = logging.getLogger(__name__) -class DownloadResource(BaseMediaResource): +class DownloadResource(Resource): + isLeaf = True + + def __init__(self, hs, media_repo): + Resource.__init__(self) + + self.filepaths = media_repo.filepaths + self.media_repo = media_repo + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.version_string = hs.version_string + self.clock = hs.get_clock() + def render_GET(self, request): self._async_render_GET(request) return NOT_DONE_YET - @request_handler + @request_handler() @defer.inlineCallbacks def _async_render_GET(self, request): server_name, media_id, name = parse_media_id(request) @@ -44,7 +57,7 @@ class DownloadResource(BaseMediaResource): def _respond_local_file(self, request, media_id, name): media_info = yield self.store.get_local_media(media_id) if not media_info: - self._respond_404(request) + respond_404(request) return media_type = media_info["media_type"] @@ -52,14 +65,14 @@ class DownloadResource(BaseMediaResource): upload_name = name if name else media_info["upload_name"] file_path = self.filepaths.local_media_filepath(media_id) - yield self._respond_with_file( + yield respond_with_file( request, media_type, file_path, media_length, upload_name=upload_name, ) @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id, name): - media_info = yield self._get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media(server_name, media_id) media_type = media_info["media_type"] media_length = media_info["media_length"] @@ -70,7 +83,7 @@ class DownloadResource(BaseMediaResource): server_name, filesystem_id ) - yield self._respond_with_file( + yield respond_with_file( request, media_type, file_path, media_length, upload_name=upload_name, ) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 7dfb027dd1..d96bf9afe2 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -17,15 +17,400 @@ from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource from .identicon_resource import IdenticonResource +from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from twisted.web.resource import Resource +from .thumbnailer import Thumbnailer + +from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.util.stringutils import random_string + +from twisted.internet import defer, threads + +from synapse.util.async import ObservableDeferred +from synapse.util.stringutils import is_ascii +from synapse.util.logcontext import preserve_context_over_fn + +import os + +import cgi import logging +import urlparse logger = logging.getLogger(__name__) +class MediaRepository(object): + def __init__(self, hs, filepaths): + self.auth = hs.get_auth() + self.client = MatrixFederationHttpClient(hs) + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.max_upload_size = hs.config.max_upload_size + self.max_image_pixels = hs.config.max_image_pixels + self.filepaths = filepaths + self.downloads = {} + self.dynamic_thumbnails = hs.config.dynamic_thumbnails + self.thumbnail_requirements = hs.config.thumbnail_requirements + + @staticmethod + def _makedirs(filepath): + dirname = os.path.dirname(filepath) + if not os.path.exists(dirname): + os.makedirs(dirname) + + @defer.inlineCallbacks + def create_content(self, media_type, upload_name, content, content_length, + auth_user): + media_id = random_string(24) + + fname = self.filepaths.local_media_filepath(media_id) + self._makedirs(fname) + + # This shouldn't block for very long because the content will have + # already been uploaded at this point. + with open(fname, "wb") as f: + f.write(content) + + yield self.store.store_local_media( + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=content_length, + user_id=auth_user, + ) + media_info = { + "media_type": media_type, + "media_length": content_length, + } + + yield self._generate_local_thumbnails(media_id, media_info) + + defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) + + def get_remote_media(self, server_name, media_id): + key = (server_name, media_id) + download = self.downloads.get(key) + if download is None: + download = self._get_remote_media_impl(server_name, media_id) + download = ObservableDeferred( + download, + consumeErrors=True + ) + self.downloads[key] = download + + @download.addBoth + def callback(media_info): + del self.downloads[key] + return media_info + return download.observe() + + @defer.inlineCallbacks + def _get_remote_media_impl(self, server_name, media_id): + media_info = yield self.store.get_cached_remote_media( + server_name, media_id + ) + if not media_info: + media_info = yield self._download_remote_file( + server_name, media_id + ) + defer.returnValue(media_info) + + @defer.inlineCallbacks + def _download_remote_file(self, server_name, media_id): + file_id = random_string(24) + + fname = self.filepaths.remote_media_filepath( + server_name, file_id + ) + self._makedirs(fname) + + try: + with open(fname, "wb") as f: + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )) + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + max_size=self.max_upload_size, + ) + media_type = headers["Content-Type"][0] + time_now_ms = self.clock.time_msec() + + content_disposition = headers.get("Content-Disposition", None) + if content_disposition: + _, params = cgi.parse_header(content_disposition[0],) + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii + + if upload_name: + upload_name = urlparse.unquote(upload_name) + try: + upload_name = upload_name.decode("utf-8") + except UnicodeDecodeError: + upload_name = None + else: + upload_name = None + + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=length, + filesystem_id=file_id, + ) + except: + os.remove(fname) + raise + + media_info = { + "media_type": media_type, + "media_length": length, + "upload_name": upload_name, + "created_ts": time_now_ms, + "filesystem_id": file_id, + } + + yield self._generate_remote_thumbnails( + server_name, media_id, media_info + ) + + defer.returnValue(media_info) + + def _get_thumbnail_requirements(self, media_type): + return self.thumbnail_requirements.get(media_type, ()) + + def _generate_thumbnail(self, input_path, t_path, t_width, t_height, + t_method, t_type): + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + if t_method == "crop": + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + elif t_method == "scale": + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + else: + t_len = None + + return t_len + + @defer.inlineCallbacks + def generate_local_exact_thumbnail(self, media_id, t_width, t_height, + t_method, t_type): + input_path = self.filepaths.local_media_filepath(media_id) + + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + + t_len = yield preserve_context_over_fn( + threads.deferToThread, + self._generate_thumbnail, + input_path, t_path, t_width, t_height, t_method, t_type + ) + + if t_len: + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue(t_path) + + @defer.inlineCallbacks + def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, + t_width, t_height, t_method, t_type): + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + + t_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + + t_len = yield preserve_context_over_fn( + threads.deferToThread, + self._generate_thumbnail, + input_path, t_path, t_width, t_height, t_method, t_type + ) + + if t_len: + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue(t_path) + + @defer.inlineCallbacks + def _generate_local_thumbnails(self, media_id, media_info): + media_type = media_info["media_type"] + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.local_media_filepath(media_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + local_thumbnails = [] + + def generate_thumbnails(): + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + + local_thumbnails.append(( + media_id, t_width, t_height, t_type, t_method, t_len + )) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + local_thumbnails.append(( + media_id, t_width, t_height, t_type, t_method, t_len + )) + + yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + + for l in local_thumbnails: + yield self.store.store_local_thumbnail(*l) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) + + @defer.inlineCallbacks + def _generate_remote_thumbnails(self, server_name, media_id, media_info): + media_type = media_info["media_type"] + file_id = media_info["filesystem_id"] + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return + + remote_thumbnails = [] + + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + def generate_thumbnails(): + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + remote_thumbnails.append([ + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ]) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + remote_thumbnails.append([ + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ]) + + yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + + for r in remote_thumbnails: + yield self.store.store_remote_media_thumbnail(*r) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) + + class MediaRepositoryResource(Resource): """File uploading and downloading. @@ -74,7 +459,12 @@ class MediaRepositoryResource(Resource): def __init__(self, hs): Resource.__init__(self) filepaths = MediaFilePaths(hs.config.media_store_path) - self.putChild("upload", UploadResource(hs, filepaths)) - self.putChild("download", DownloadResource(hs, filepaths)) - self.putChild("thumbnail", ThumbnailResource(hs, filepaths)) + + media_repo = MediaRepository(hs, filepaths) + + self.putChild("upload", UploadResource(hs, media_repo)) + self.putChild("download", DownloadResource(hs, media_repo)) + self.putChild("thumbnail", ThumbnailResource(hs, media_repo)) self.putChild("identicon", IdenticonResource()) + if hs.config.url_preview_enabled: + self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py new file mode 100644 index 0000000000..37dd1de899 --- /dev/null +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -0,0 +1,451 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.server import NOT_DONE_YET +from twisted.internet import defer +from twisted.web.resource import Resource + +from synapse.api.errors import ( + SynapseError, Codes, +) +from synapse.util.stringutils import random_string +from synapse.util.caches.expiringcache import ExpiringCache +from synapse.http.client import SpiderHttpClient +from synapse.http.server import ( + request_handler, respond_with_json_bytes +) +from synapse.util.async import ObservableDeferred +from synapse.util.stringutils import is_ascii + +import os +import re +import fnmatch +import cgi +import ujson as json +import urlparse + +import logging +logger = logging.getLogger(__name__) + + +class PreviewUrlResource(Resource): + isLeaf = True + + def __init__(self, hs, media_repo): + Resource.__init__(self) + + self.auth = hs.get_auth() + self.clock = hs.get_clock() + self.version_string = hs.version_string + self.filepaths = media_repo.filepaths + self.max_spider_size = hs.config.max_spider_size + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.client = SpiderHttpClient(hs) + self.media_repo = media_repo + + self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist + + # simple memory cache mapping urls to OG metadata + self.cache = ExpiringCache( + cache_name="url_previews", + clock=self.clock, + # don't spider URLs more often than once an hour + expiry_ms=60 * 60 * 1000, + ) + self.cache.start() + + self.downloads = {} + + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + @request_handler() + @defer.inlineCallbacks + def _async_render_GET(self, request): + + # XXX: if get_user_by_req fails, what should we do in an async render? + requester = yield self.auth.get_user_by_req(request) + url = request.args.get("url")[0] + if "ts" in request.args: + ts = int(request.args.get("ts")[0]) + else: + ts = self.clock.time_msec() + + url_tuple = urlparse.urlsplit(url) + for entry in self.url_preview_url_blacklist: + match = True + for attrib in entry: + pattern = entry[attrib] + value = getattr(url_tuple, attrib) + logger.debug(( + "Matching attrib '%s' with value '%s' against" + " pattern '%s'" + ) % (attrib, value, pattern)) + + if value is None: + match = False + continue + + if pattern.startswith('^'): + if not re.match(pattern, getattr(url_tuple, attrib)): + match = False + continue + else: + if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): + match = False + continue + if match: + logger.warn( + "URL %s blocked by url_blacklist entry %s", url, entry + ) + raise SynapseError( + 403, "URL blocked by url pattern blacklist entry", + Codes.UNKNOWN + ) + + # first check the memory cache - good to handle all the clients on this + # HS thundering away to preview the same URL at the same time. + og = self.cache.get(url) + if og: + respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True) + return + + # then check the URL cache in the DB (which will also provide us with + # historical previews, if we have any) + cache_result = yield self.store.get_url_cache(url, ts) + if ( + cache_result and + cache_result["download_ts"] + cache_result["expires"] > ts and + cache_result["response_code"] / 100 == 2 + ): + respond_with_json_bytes( + request, 200, cache_result["og"].encode('utf-8'), + send_cors=True + ) + return + + # Ensure only one download for a given URL is active at a time + download = self.downloads.get(url) + if download is None: + download = self._download_url(url, requester.user) + download = ObservableDeferred( + download, + consumeErrors=True + ) + self.downloads[url] = download + + @download.addBoth + def callback(media_info): + del self.downloads[url] + return media_info + media_info = yield download.observe() + + # FIXME: we should probably update our cache now anyway, so that + # even if the OG calculation raises, we don't keep hammering on the + # remote server. For now, leave it uncached to aid debugging OG + # calculation problems + + logger.debug("got media_info of '%s'" % media_info) + + if self._is_media(media_info['media_type']): + dims = yield self.media_repo._generate_local_thumbnails( + media_info['filesystem_id'], media_info + ) + + og = { + "og:description": media_info['download_name'], + "og:image": "mxc://%s/%s" % ( + self.server_name, media_info['filesystem_id'] + ), + "og:image:type": media_info['media_type'], + "matrix:image:size": media_info['media_length'], + } + + if dims: + og["og:image:width"] = dims['width'] + og["og:image:height"] = dims['height'] + else: + logger.warn("Couldn't get dims for %s" % url) + + # define our OG response for this media + elif self._is_html(media_info['media_type']): + # TODO: somehow stop a big HTML tree from exploding synapse's RAM + + from lxml import etree + + file = open(media_info['filename']) + body = file.read() + file.close() + + # clobber the encoding from the content-type, or default to utf-8 + # XXX: this overrides any <meta/> or XML charset headers in the body + # which may pose problems, but so far seems to work okay. + match = re.match(r'.*; *charset=(.*?)(;|$)', media_info['media_type'], re.I) + encoding = match.group(1) if match else "utf-8" + + try: + parser = etree.HTMLParser(recover=True, encoding=encoding) + tree = etree.fromstring(body, parser) + og = yield self._calc_og(tree, media_info, requester) + except UnicodeDecodeError: + # blindly try decoding the body as utf-8, which seems to fix + # the charset mismatches on https://google.com + parser = etree.HTMLParser(recover=True, encoding=encoding) + tree = etree.fromstring(body.decode('utf-8', 'ignore'), parser) + og = yield self._calc_og(tree, media_info, requester) + + else: + logger.warn("Failed to find any OG data in %s", url) + og = {} + + logger.debug("Calculated OG for %s as %s" % (url, og)) + + # store OG in ephemeral in-memory cache + self.cache[url] = og + + # store OG in history-aware DB cache + yield self.store.store_url_cache( + url, + media_info["response_code"], + media_info["etag"], + media_info["expires"], + json.dumps(og), + media_info["filesystem_id"], + media_info["created_ts"], + ) + + respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True) + + @defer.inlineCallbacks + def _calc_og(self, tree, media_info, requester): + # suck our tree into lxml and define our OG response. + + # if we see any image URLs in the OG response, then spider them + # (although the client could choose to do this by asking for previews of those + # URLs to avoid DoSing the server) + + # "og:type" : "video", + # "og:url" : "https://www.youtube.com/watch?v=LXDBoHyjmtw", + # "og:site_name" : "YouTube", + # "og:video:type" : "application/x-shockwave-flash", + # "og:description" : "Fun stuff happening here", + # "og:title" : "RemoteJam - Matrix team hack for Disrupt Europe Hackathon", + # "og:image" : "https://i.ytimg.com/vi/LXDBoHyjmtw/maxresdefault.jpg", + # "og:video:url" : "http://www.youtube.com/v/LXDBoHyjmtw?version=3&autohide=1", + # "og:video:width" : "1280" + # "og:video:height" : "720", + # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3", + + og = {} + for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"): + og[tag.attrib['property']] = tag.attrib['content'] + + # TODO: grab article: meta tags too, e.g.: + + # "article:publisher" : "https://www.facebook.com/thethudonline" /> + # "article:author" content="https://www.facebook.com/thethudonline" /> + # "article:tag" content="baby" /> + # "article:section" content="Breaking News" /> + # "article:published_time" content="2016-03-31T19:58:24+00:00" /> + # "article:modified_time" content="2016-04-01T18:31:53+00:00" /> + + if 'og:title' not in og: + # do some basic spidering of the HTML + title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]") + og['og:title'] = title[0].text.strip() if title else None + + if 'og:image' not in og: + # TODO: extract a favicon failing all else + meta_image = tree.xpath( + "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image']/@content" + ) + if meta_image: + og['og:image'] = self._rebase_url(meta_image[0], media_info['uri']) + else: + # TODO: consider inlined CSS styles as well as width & height attribs + images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]") + images = sorted(images, key=lambda i: ( + -1 * int(i.attrib['width']) * int(i.attrib['height']) + )) + if not images: + images = tree.xpath("//img[@src]") + if images: + og['og:image'] = images[0].attrib['src'] + + # pre-cache the image for posterity + # FIXME: it might be cleaner to use the same flow as the main /preview_url request + # itself and benefit from the same caching etc. But for now we just rely on the + # caching on the master request to speed things up. + if 'og:image' in og and og['og:image']: + image_info = yield self._download_url( + self._rebase_url(og['og:image'], media_info['uri']), requester.user + ) + + if self._is_media(image_info['media_type']): + # TODO: make sure we don't choke on white-on-transparent images + dims = yield self.media_repo._generate_local_thumbnails( + image_info['filesystem_id'], image_info + ) + if dims: + og["og:image:width"] = dims['width'] + og["og:image:height"] = dims['height'] + else: + logger.warn("Couldn't get dims for %s" % og["og:image"]) + + og["og:image"] = "mxc://%s/%s" % ( + self.server_name, image_info['filesystem_id'] + ) + og["og:image:type"] = image_info['media_type'] + og["matrix:image:size"] = image_info['media_length'] + else: + del og["og:image"] + + if 'og:description' not in og: + meta_description = tree.xpath( + "//*/meta" + "[translate(@name, 'DESCRIPTION', 'description')='description']" + "/@content") + if meta_description: + og['og:description'] = meta_description[0] + else: + # grab any text nodes which are inside the <body/> tag... + # unless they are within an HTML5 semantic markup tag... + # <header/>, <nav/>, <aside/>, <footer/> + # ...or if they are within a <script/> or <style/> tag. + # This is a very very very coarse approximation to a plain text + # render of the page. + text_nodes = tree.xpath("//text()[not(ancestor::header | ancestor::nav | " + "ancestor::aside | ancestor::footer | " + "ancestor::script | ancestor::style)]" + + "[ancestor::body]") + text = '' + for text_node in text_nodes: + if len(text) < 500: + text += text_node + ' ' + else: + break + text = re.sub(r'[\t ]+', ' ', text) + text = re.sub(r'[\t \r\n]*[\r\n]+', '\n', text) + text = text.strip()[:500] + og['og:description'] = text if text else None + + # TODO: delete the url downloads to stop diskfilling, + # as we only ever cared about its OG + defer.returnValue(og) + + def _rebase_url(self, url, base): + base = list(urlparse.urlparse(base)) + url = list(urlparse.urlparse(url)) + if not url[0]: # fix up schema + url[0] = base[0] or "http" + if not url[1]: # fix up hostname + url[1] = base[1] + if not url[2].startswith('/'): + url[2] = re.sub(r'/[^/]+$', '/', base[2]) + url[2] + return urlparse.urlunparse(url) + + @defer.inlineCallbacks + def _download_url(self, url, user): + # TODO: we should probably honour robots.txt... except in practice + # we're most likely being explicitly triggered by a human rather than a + # bot, so are we really a robot? + + # XXX: horrible duplication with base_resource's _download_remote_file() + file_id = random_string(24) + + fname = self.filepaths.local_media_filepath(file_id) + self.media_repo._makedirs(fname) + + try: + with open(fname, "wb") as f: + logger.debug("Trying to get url '%s'" % url) + length, headers, uri, code = yield self.client.get_file( + url, output_stream=f, max_size=self.max_spider_size, + ) + # FIXME: pass through 404s and other error messages nicely + + media_type = headers["Content-Type"][0] + time_now_ms = self.clock.time_msec() + + content_disposition = headers.get("Content-Disposition", None) + if content_disposition: + _, params = cgi.parse_header(content_disposition[0],) + download_name = None + + # First check if there is a valid UTF-8 filename + download_name_utf8 = params.get("filename*", None) + if download_name_utf8: + if download_name_utf8.lower().startswith("utf-8''"): + download_name = download_name_utf8[7:] + + # If there isn't check for an ascii name. + if not download_name: + download_name_ascii = params.get("filename", None) + if download_name_ascii and is_ascii(download_name_ascii): + download_name = download_name_ascii + + if download_name: + download_name = urlparse.unquote(download_name) + try: + download_name = download_name.decode("utf-8") + except UnicodeDecodeError: + download_name = None + else: + download_name = None + + yield self.store.store_local_media( + media_id=file_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=download_name, + media_length=length, + user_id=user, + ) + + except Exception as e: + os.remove(fname) + raise SynapseError( + 500, ("Failed to download content: %s" % e), + Codes.UNKNOWN + ) + + defer.returnValue({ + "media_type": media_type, + "media_length": length, + "download_name": download_name, + "created_ts": time_now_ms, + "filesystem_id": file_id, + "filename": fname, + "uri": uri, + "response_code": code, + # FIXME: we should calculate a proper expiration based on the + # Cache-Control and Expire headers. But for now, assume 1 hour. + "expires": 60 * 60 * 1000, + "etag": headers["ETag"][0] if "ETag" in headers else None, + }) + + def _is_media(self, content_type): + if content_type.lower().startswith("image/"): + return True + + def _is_html(self, content_type): + content_type = content_type.lower() + if ( + content_type.startswith("text/html") or + content_type.startswith("application/xhtml") + ): + return True diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index ab52499785..0b9e1de1a7 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -14,7 +14,8 @@ # limitations under the License. -from .base_resource import BaseMediaResource, parse_media_id +from ._base import parse_media_id, respond_404, respond_with_file +from twisted.web.resource import Resource from synapse.http.servlet import parse_string, parse_integer from synapse.http.server import request_handler @@ -26,14 +27,25 @@ import logging logger = logging.getLogger(__name__) -class ThumbnailResource(BaseMediaResource): +class ThumbnailResource(Resource): isLeaf = True + def __init__(self, hs, media_repo): + Resource.__init__(self) + + self.store = hs.get_datastore() + self.filepaths = media_repo.filepaths + self.media_repo = media_repo + self.dynamic_thumbnails = hs.config.dynamic_thumbnails + self.server_name = hs.hostname + self.version_string = hs.version_string + self.clock = hs.get_clock() + def render_GET(self, request): self._async_render_GET(request) return NOT_DONE_YET - @request_handler + @request_handler() @defer.inlineCallbacks def _async_render_GET(self, request): server_name, media_id, _ = parse_media_id(request) @@ -69,9 +81,14 @@ class ThumbnailResource(BaseMediaResource): media_info = yield self.store.get_local_media(media_id) if not media_info: - self._respond_404(request) + respond_404(request) return + # if media_info["media_type"] == "image/svg+xml": + # file_path = self.filepaths.local_media_filepath(media_id) + # yield respond_with_file(request, media_info["media_type"], file_path) + # return + thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) if thumbnail_infos: @@ -86,7 +103,7 @@ class ThumbnailResource(BaseMediaResource): file_path = self.filepaths.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method, ) - yield self._respond_with_file(request, t_type, file_path) + yield respond_with_file(request, t_type, file_path) else: yield self._respond_default_thumbnail( @@ -100,9 +117,14 @@ class ThumbnailResource(BaseMediaResource): media_info = yield self.store.get_local_media(media_id) if not media_info: - self._respond_404(request) + respond_404(request) return + # if media_info["media_type"] == "image/svg+xml": + # file_path = self.filepaths.local_media_filepath(media_id) + # yield respond_with_file(request, media_info["media_type"], file_path) + # return + thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) for info in thumbnail_infos: t_w = info["thumbnail_width"] == desired_width @@ -114,18 +136,18 @@ class ThumbnailResource(BaseMediaResource): file_path = self.filepaths.local_media_thumbnail( media_id, desired_width, desired_height, desired_type, desired_method, ) - yield self._respond_with_file(request, desired_type, file_path) + yield respond_with_file(request, desired_type, file_path) return logger.debug("We don't have a local thumbnail of that size. Generating") # Okay, so we generate one. - file_path = yield self._generate_local_exact_thumbnail( + file_path = yield self.media_repo.generate_local_exact_thumbnail( media_id, desired_width, desired_height, desired_method, desired_type ) if file_path: - yield self._respond_with_file(request, desired_type, file_path) + yield respond_with_file(request, desired_type, file_path) else: yield self._respond_default_thumbnail( request, media_info, desired_width, desired_height, @@ -136,7 +158,12 @@ class ThumbnailResource(BaseMediaResource): def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, desired_width, desired_height, desired_method, desired_type): - media_info = yield self._get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media(server_name, media_id) + + # if media_info["media_type"] == "image/svg+xml": + # file_path = self.filepaths.remote_media_filepath(server_name, media_id) + # yield respond_with_file(request, media_info["media_type"], file_path) + # return thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -155,19 +182,19 @@ class ThumbnailResource(BaseMediaResource): server_name, file_id, desired_width, desired_height, desired_type, desired_method, ) - yield self._respond_with_file(request, desired_type, file_path) + yield respond_with_file(request, desired_type, file_path) return logger.debug("We don't have a local thumbnail of that size. Generating") # Okay, so we generate one. - file_path = yield self._generate_remote_exact_thumbnail( + file_path = yield self.media_repo.generate_remote_exact_thumbnail( server_name, file_id, media_id, desired_width, desired_height, desired_method, desired_type ) if file_path: - yield self._respond_with_file(request, desired_type, file_path) + yield respond_with_file(request, desired_type, file_path) else: yield self._respond_default_thumbnail( request, media_info, desired_width, desired_height, @@ -179,7 +206,12 @@ class ThumbnailResource(BaseMediaResource): height, method, m_type): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead. - media_info = yield self._get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media(server_name, media_id) + + # if media_info["media_type"] == "image/svg+xml": + # file_path = self.filepaths.remote_media_filepath(server_name, media_id) + # yield respond_with_file(request, media_info["media_type"], file_path) + # return thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -199,7 +231,7 @@ class ThumbnailResource(BaseMediaResource): file_path = self.filepaths.remote_media_thumbnail( server_name, file_id, t_width, t_height, t_type, t_method, ) - yield self._respond_with_file(request, t_type, file_path, t_length) + yield respond_with_file(request, t_type, file_path, t_length) else: yield self._respond_default_thumbnail( request, media_info, width, height, method, m_type, @@ -208,6 +240,8 @@ class ThumbnailResource(BaseMediaResource): @defer.inlineCallbacks def _respond_default_thumbnail(self, request, media_info, width, height, method, m_type): + # XXX: how is this meant to work? store.get_default_thumbnails + # appears to always return [] so won't this always 404? media_type = media_info["media_type"] top_level_type = media_type.split("/")[0] sub_type = media_type.split("/")[-1].split(";")[0] @@ -223,7 +257,7 @@ class ThumbnailResource(BaseMediaResource): "_default", "_default", ) if not thumbnail_infos: - self._respond_404(request) + respond_404(request) return thumbnail_info = self._select_thumbnail( @@ -239,7 +273,7 @@ class ThumbnailResource(BaseMediaResource): file_path = self.filepaths.default_thumbnail( top_level_type, sub_type, t_width, t_height, t_type, t_method, ) - yield self.respond_with_file(request, t_type, file_path, t_length) + yield respond_with_file(request, t_type, file_path, t_length) def _select_thumbnail(self, desired_width, desired_height, desired_method, desired_type, thumbnail_infos): diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 9c7ad4ae85..b716d1d892 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -15,20 +15,34 @@ from synapse.http.server import respond_with_json, request_handler -from synapse.util.stringutils import random_string from synapse.api.errors import SynapseError from twisted.web.server import NOT_DONE_YET from twisted.internet import defer -from .base_resource import BaseMediaResource +from twisted.web.resource import Resource import logging logger = logging.getLogger(__name__) -class UploadResource(BaseMediaResource): +class UploadResource(Resource): + isLeaf = True + + def __init__(self, hs, media_repo): + Resource.__init__(self) + + self.media_repo = media_repo + self.filepaths = media_repo.filepaths + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.auth = hs.get_auth() + self.max_upload_size = hs.config.max_upload_size + self.version_string = hs.version_string + self.clock = hs.get_clock() + def render_POST(self, request): self._async_render_POST(request) return NOT_DONE_YET @@ -37,37 +51,7 @@ class UploadResource(BaseMediaResource): respond_with_json(request, 200, {}, send_cors=True) return NOT_DONE_YET - @defer.inlineCallbacks - def create_content(self, media_type, upload_name, content, content_length, - auth_user): - media_id = random_string(24) - - fname = self.filepaths.local_media_filepath(media_id) - self._makedirs(fname) - - # This shouldn't block for very long because the content will have - # already been uploaded at this point. - with open(fname, "wb") as f: - f.write(content) - - yield self.store.store_local_media( - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=upload_name, - media_length=content_length, - user_id=auth_user, - ) - media_info = { - "media_type": media_type, - "media_length": content_length, - } - - yield self._generate_local_thumbnails(media_id, media_info) - - defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) - - @request_handler + @request_handler() @defer.inlineCallbacks def _async_render_POST(self, request): requester = yield self.auth.get_user_by_req(request) @@ -108,7 +92,7 @@ class UploadResource(BaseMediaResource): # disposition = headers.getRawHeaders("Content-Disposition")[0] # TODO(markjh): parse content-dispostion - content_uri = yield self.create_content( + content_uri = yield self.media_repo.create_content( media_type, upload_name, request.content.read(), content_length, requester.user ) |