From 2442e9876c2622e345ab62a414e509d4f5cecb4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 14:36:07 +0000 Subject: Make PreviewUrlResource use MediaStorage --- synapse/rest/media/v1/preview_url_resource.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1/preview_url_resource.py') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 40d2e664eb..f3dbbb3fec 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -17,6 +17,8 @@ from twisted.web.server import NOT_DONE_YET from twisted.internet import defer from twisted.web.resource import Resource +from ._base import FileInfo + from synapse.api.errors import ( SynapseError, Codes, ) @@ -49,7 +51,7 @@ logger = logging.getLogger(__name__) class PreviewUrlResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.auth = hs.get_auth() @@ -62,6 +64,7 @@ class PreviewUrlResource(Resource): self.client = SpiderHttpClient(hs) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path + self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -273,19 +276,21 @@ class PreviewUrlResource(Resource): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fpath = self.filepaths.url_cache_filepath_rel(file_id) - fname = os.path.join(self.primary_base_path, fpath) - self.media_repo._makedirs(fname) + file_info = FileInfo( + server_name=None, + file_id=file_id, + url_cache=True, + ) try: - with open(fname, "wb") as f: + with self.media_storage.store_into_file(file_info) as (f, fname, finish): logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) # FIXME: pass through 404s and other error messages nicely - yield self.media_repo.copy_to_backup(fpath) + yield finish() media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -327,7 +332,6 @@ class PreviewUrlResource(Resource): ) except Exception as e: - os.remove(fname) raise SynapseError( 500, ("Failed to download content: %s" % e), Codes.UNKNOWN -- cgit 1.5.1 From 0a90d9ede4a39d720afd131866f98b51aa591cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:03:05 +0000 Subject: Move setting of file_id up to caller --- synapse/rest/media/v1/media_repository.py | 21 ++++++++++----------- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'synapse/rest/media/v1/preview_url_resource.py') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d191f9d2f5..35c1dcbc97 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +413,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, media_info + server_name, media_id, file_id, media_info, ) defer.returnValue(media_info) @@ -525,24 +525,23 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + url_cache=False): """Generate and store thumbnails for an image. Args: - server_name(str|None): The server name if remote media, else None if local - media_id(str) - media_info(dict) - url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + server_name (str|None): The server name if remote media, else None if local + media_id (str): The media ID of the content. (This is the same as + the file_id for local content) + file_id (str): Local file ID + media_info (dict) + url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - if server_name: - file_id = media_info["filesystem_id"] - else: - file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f3dbbb3fec..5ddf581bd2 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -185,8 +185,9 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): + file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, media_info['filesystem_id'], media_info, url_cache=True, + None, file_id, file_id, media_info, url_cache=True, ) og = { @@ -231,8 +232,9 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, image_info['filesystem_id'], image_info, url_cache=True, + None, file_id, file_id, image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] -- cgit 1.5.1 From 6368e5c0ab39e43ee6950ba94a826c44db7d43f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:17:38 +0000 Subject: Change _generate_thumbnails to take media_type --- synapse/rest/media/v1/media_repository.py | 11 ++++++----- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1/preview_url_resource.py') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 35c1dcbc97..e08cf8af46 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,9 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_id, media_info) + yield self._generate_thumbnails( + None, media_id, media_id, media_info["media_type"], + ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +415,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info, + server_name, media_id, file_id, media_info["media_type"], ) defer.returnValue(media_info) @@ -525,7 +527,7 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + def _generate_thumbnails(self, server_name, media_id, file_id, media_type, url_cache=False): """Generate and store thumbnails for an image. @@ -534,14 +536,13 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_info (dict) + media_type (str) url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ - media_type = media_info["media_type"] requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 5ddf581bd2..981f01e417 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -187,7 +187,8 @@ class PreviewUrlResource(Resource): if _is_media(media_info['media_type']): file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, media_info, url_cache=True, + None, file_id, file_id, media_info["media_type"], + url_cache=True, ) og = { @@ -234,7 +235,8 @@ class PreviewUrlResource(Resource): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info, url_cache=True, + None, file_id, file_id, image_info["media_type"], + url_cache=True, ) if dims: og["og:image:width"] = dims['width'] -- cgit 1.5.1 From d5352cbba80005fb226563211c9e7179edef2d65 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 00:35:18 +0000 Subject: Handle url_previews with no content-type avoid failing with an exception if the remote server doesn't give us a Content-Type header. Also, clean up the exception handling a bit. --- synapse/rest/media/v1/preview_url_resource.py | 55 +++++++++++++++++---------- 1 file changed, 34 insertions(+), 21 deletions(-) (limited to 'synapse/rest/media/v1/preview_url_resource.py') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 981f01e417..31fe7aa75c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cgi +import datetime +import errno +import fnmatch +import itertools +import logging +import os +import re +import shutil +import sys +import traceback +import ujson as json +import urlparse from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -33,18 +46,6 @@ from synapse.http.server import ( from synapse.util.async import ObservableDeferred from synapse.util.stringutils import is_ascii -import os -import re -import fnmatch -import cgi -import ujson as json -import urlparse -import itertools -import datetime -import errno -import shutil - -import logging logger = logging.getLogger(__name__) @@ -286,17 +287,28 @@ class PreviewUrlResource(Resource): url_cache=True, ) - try: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) + except Exception as e: # FIXME: pass through 404s and other error messages nicely + logger.warn("Error downloading %s: %r", url, e) + raise SynapseError( + 500, "Failed to download content: %s" % ( + traceback.format_exception_only(sys.exc_type, e), + ), + Codes.UNKNOWN, + ) + yield finish() - yield finish() - - media_type = headers["Content-Type"][0] + try: + if "Content-Type" in headers: + media_type = headers["Content-Type"][0] + else: + media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) @@ -336,10 +348,11 @@ class PreviewUrlResource(Resource): ) except Exception as e: - raise SynapseError( - 500, ("Failed to download content: %s" % e), - Codes.UNKNOWN - ) + logger.error("Error handling downloaded %s: %r", url, e) + # TODO: we really ought to delete the downloaded file in this + # case, since we won't have recorded it in the db, and will + # therefore not expire it. + raise defer.returnValue({ "media_type": media_type, -- cgit 1.5.1