From cc84d3ea78eaf50c20ad84b3df99ecf4547e08a8 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 10 Dec 2014 14:46:55 +0000 Subject: Thumbnail uploaded and cached images --- synapse/media/v1/base_resource.py | 318 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 318 insertions(+) create mode 100644 synapse/media/v1/base_resource.py (limited to 'synapse/media/v1/base_resource.py') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py new file mode 100644 index 0000000000..1e57a1465f --- /dev/null +++ b/synapse/media/v1/base_resource.py @@ -0,0 +1,318 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .thumbnailer import Thumbnailer + +from synapse.http.server import respond_with_json +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, CodeMessageException, cs_error, Codes, SynapseError +) + +from twisted.internet import defer +from twisted.web.resource import Resource +from twisted.protocols.basic import FileSender + +import os + +import logging + +logger = logging.getLogger(__name__) + + +class BaseMediaResource(Resource): + isLeaf = True + + def __init__(self, hs, filepaths): + Resource.__init__(self) + self.client = hs.get_http_client() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.max_upload_size = hs.config.max_upload_size + self.filepaths = filepaths + + @staticmethod + def catch_errors(request_handler): + @defer.inlineCallbacks + def wrapped_request_handler(self, request): + try: + yield request_handler(self, request) + except CodeMessageException as e: + logger.exception(e) + respond_with_json( + request, e.code, cs_exception(e), send_cors=True + ) + except: + logger.exception( + "Failed handle request %s.%s on %r", + request_handler.__module__, + request_handler.__name__, + self, + ) + respond_with_json( + request, + 500, + {"error": "Internal server error"}, + send_cors=True + ) + return wrapped_request_handler + + @staticmethod + def _parse_media_id(request): + try: + server_name, media_id = request.postpath + return (server_name, media_id) + except: + raise SynapseError( + 404, + "Invalid media id token %r" % (request.postpath,), + Codes.UNKKOWN, + ) + + @staticmethod + def _parse_integer(request, arg_name, default=None): + try: + if default is None: + return int(request.args[arg_name][0]) + else: + return int(request.args.get(arg_name, [default])[0]) + except: + raise SynapseError( + 400, + "Missing integer argument %r" % (arg_name), + Codes.UNKNOWN, + ) + + @staticmethod + def _parse_string(request, arg_name, default=None): + try: + if default is None: + return request.args[arg_name][0] + else: + return request.args.get(arg_name, [default])[0] + except: + raise SynapseError( + 400, + "Missing string argument %r" % (arg_name), + Codes.UNKNOWN, + ) + + def _respond_404(self, request): + respond_with_json( + request, 404, + cs_error( + "Not found %r" % (request.postpath,), + code=Codes.NOT_FOUND, + ), + send_cors=True + ) + + @defer.inlineCallbacks + def _download_remote_file(self, server_name, media_id): + file_id = random_string(24) + + fname = self.filepaths.remote_media_filepath( + server_name, file_id + ) + os.makedirs(os.path.dirname(fname)) + + try: + with open(fname, "wb") as f: + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )), + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + ) + media_type = headers["Content-Type"][0] + time_now_ms = self.clock.time_msec() + + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=length, + file_id=file_id, + ) + except: + os.remove(fname) + raise + + media_info = { + "media_type": media_type, + "media_length": length, + "upload_name": None, + "created_ts": time_now_ms, + "file_id": file_id, + } + + yield self._generate_remote_thumbnails( + server_name, media_id, media_info + ) + + defer.returnValue(media_info) + + @defer.inlineCallbacks + def _respond_with_file(self, request, media_type, file_path): + logger.debug("Responding with %r", file_path) + + if os.path.isfile(file_path): + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + with open(file_path, "rb") as f: + yield FileSender().beginFileTransfer(f, request) + + request.finish() + else: + self._respond_404() + + def _get_thumbnail_requirements(self, media_type): + if media_type == "image/jpeg": + return ( + (32, 32, "crop", "image/jpeg"), + (96, 96, "crop", "image/jpeg"), + (320, 240, "scale", "image/jpeg"), + (640, 480, "scale", "image/jpeg"), + ) + elif (media_type == "image/png") or (media_type == "image/gif"): + return ( + (32, 32, "crop", "image/png"), + (96, 96, "crop", "image/png"), + (320, 240, "scale", "image/png"), + (640, 480, "scale", "image/png"), + ) + else: + return () + + @defer.inlineCallbacks + def _generate_local_thumbnails(self, media_id, media_info): + media_type = media_info["media_type"] + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.local_media_path(media_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) + + @defer.inlineCallbacks + def _generate_remote_thumbnails(self, server_name, media_id, media_info): + media_type = media_info["media_type"] + file_id = media_info["filesystem_id"] + requirements = self._get_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.remote_media_path(server_name, file_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.remote_media_thumbnail( + server_name, media_id, file_id, + media_id, t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) -- cgit 1.4.1 From e5275d856ee7a1d7aeccd3ea6ab97b49456d24c9 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 10 Dec 2014 15:46:18 +0000 Subject: Get the code actually working --- synapse/media/v1/base_resource.py | 31 +++++++++++++------- synapse/media/v1/download_resource.py | 6 ++-- synapse/media/v1/media_repository.py | 2 ++ synapse/media/v1/thumbnail_resource.py | 52 ++++++++++++++++------------------ synapse/media/v1/upload_resource.py | 8 ++---- synapse/storage/media_repository.py | 14 +++++---- 6 files changed, 61 insertions(+), 52 deletions(-) (limited to 'synapse/media/v1/base_resource.py') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 1e57a1465f..81f2456343 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -37,6 +37,7 @@ class BaseMediaResource(Resource): def __init__(self, hs, filepaths): Resource.__init__(self) + self.auth = hs.get_auth() self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname @@ -120,6 +121,12 @@ class BaseMediaResource(Resource): send_cors=True ) + @staticmethod + def _makedirs(filepath): + dirname = os.path.dirname(filepath) + if not os.path.exists(dirname): + os.makedirs(dirname) + @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id): file_id = random_string(24) @@ -127,13 +134,13 @@ class BaseMediaResource(Resource): fname = self.filepaths.remote_media_filepath( server_name, file_id ) - os.makedirs(os.path.dirname(fname)) + self._makedirs(fname) try: with open(fname, "wb") as f: request_path = "/".join(( "/_matrix/media/v1/download", server_name, media_id, - )), + )) length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, ) @@ -147,7 +154,7 @@ class BaseMediaResource(Resource): time_now_ms=self.clock.time_msec(), upload_name=None, media_length=length, - file_id=file_id, + filesystem_id=file_id, ) except: os.remove(fname) @@ -158,7 +165,7 @@ class BaseMediaResource(Resource): "media_length": length, "upload_name": None, "created_ts": time_now_ms, - "file_id": file_id, + "filesystem_id": file_id, } yield self._generate_remote_thumbnails( @@ -215,7 +222,7 @@ class BaseMediaResource(Resource): if not requirements: return - input_path = self.filepaths.local_media_path(media_id) + input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -235,6 +242,7 @@ class BaseMediaResource(Resource): t_path = self.filepaths.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -250,6 +258,7 @@ class BaseMediaResource(Resource): t_path = self.filepaths.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -264,11 +273,11 @@ class BaseMediaResource(Resource): def _generate_remote_thumbnails(self, server_name, media_id, media_info): media_type = media_info["media_type"] file_id = media_info["filesystem_id"] - requirements = self._get_requirements(media_type) + requirements = self._get_thumbnail_requirements(media_type) if not requirements: return - input_path = self.filepaths.remote_media_path(server_name, file_id) + input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -286,9 +295,9 @@ class BaseMediaResource(Resource): for t_width, t_height, t_type in scales: t_method = "scale" t_path = self.filepaths.remote_media_thumbnail( - server_name, media_id, file_id, - media_id, t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, @@ -303,9 +312,9 @@ class BaseMediaResource(Resource): continue t_method = "crop" t_path = self.filepaths.remote_media_thumbnail( - server_name, media_id, file_id, - t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py index 31c6f25968..6de0932ba3 100644 --- a/synapse/media/v1/download_resource.py +++ b/synapse/media/v1/download_resource.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base_media_resource import BaseMediaResource +from .base_resource import BaseMediaResource from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -52,7 +52,7 @@ class DownloadResource(BaseMediaResource): media_type = media_info["media_type"] file_path = self.filepaths.local_media_filepath(media_id) - yield self.respond_with_file(request, media_type, file_path) + yield self._respond_with_file(request, media_type, file_path) @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id): @@ -72,4 +72,4 @@ class DownloadResource(BaseMediaResource): server_name, filesystem_id ) - yield self.respond_with_file(request, media_type, file_path) + yield self._respond_with_file(request, media_type, file_path) diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index e0a4cd01ee..2bd29d2288 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -15,6 +15,7 @@ from .upload_resource import UploadResource from .download_resource import DownloadResource +from .thumbnail_resource import ThumbnailResource from .filepath import MediaFilePaths from twisted.web.resource import Resource @@ -64,3 +65,4 @@ class MediaRepositoryResource(Resource): filepaths = MediaFilePaths(hs.config.media_store_path) self.putChild("upload", UploadResource(hs, filepaths)) self.putChild("download", DownloadResource(hs, filepaths)) + self.putChild("thumbnail", ThumbnailResource(hs, filepaths)) diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py index 331ba87e00..fd08c7ecd2 100644 --- a/synapse/media/v1/thumbnail_resource.py +++ b/synapse/media/v1/thumbnail_resource.py @@ -14,7 +14,7 @@ # limitations under the License. -from .base_media_resource import BaseMediaResource +from .base_resource import BaseMediaResource from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -59,26 +59,25 @@ class ThumbnailResource(BaseMediaResource): self._respond_404(request) return - thumbnail_infos = yield self.store.get_local_thumbnail(media_id) + thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) if thumbnail_infos: thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - thumbnail_width = thumbnail_info["thumbnail_width"] - thumbnail_height = thumbnail_info["thumbnail_height"] - thumbnail_type = thumbnail_info["thumbnail_type"] - thumbnail_method = thumbnail_info["thumbnail_method"] + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] file_path = self.filepaths.local_media_thumbnail( - media_id, thumbnail_width, thumbnail_height, thumbnail_type, - thumbnail_method, + media_id, t_width, t_height, t_type, t_method, ) - yield self._respond_with_file(request, thumbnail_type, file_path) + yield self._respond_with_file(request, t_type, file_path) else: yield self._respond_default_thumbnail( - self, request, media_info, width, height, method, m_type, + request, media_info, width, height, method, m_type, ) @defer.inlineCallbacks @@ -103,19 +102,19 @@ class ThumbnailResource(BaseMediaResource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - thumbnail_width = thumbnail_info["thumbnail_width"] - thumbnail_height = thumbnail_info["thumbnail_height"] - thumbnail_type = thumbnail_info["thumbnail_type"] - thumbnail_method = thumbnail_info["thumbnail_method"] + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] + file_id = thumbnail_info["filesystem_id"] file_path = self.filepaths.remote_media_thumbnail( - server_name, media_id, thumbnail_width, thumbnail_height, - thumbnail_type, thumbnail_method, + server_name, file_id, t_width, t_height, t_type, t_method, ) - yield self._respond_with_file(request, thumbnail_type, file_path) + yield self._respond_with_file(request, t_type, file_path) else: yield self._respond_default_thumbnail( - self, request, media_info, width, height, method, m_type, + request, media_info, width, height, method, m_type, ) @defer.inlineCallbacks @@ -143,16 +142,15 @@ class ThumbnailResource(BaseMediaResource): width, height, "crop", m_type, thumbnail_infos ) - thumbnail_width = thumbnail_info["thumbnail_width"] - thumbnail_height = thumbnail_info["thumbnail_height"] - thumbnail_type = thumbnail_info["thumbnail_type"] - thumbnail_method = thumbnail_info["thumbnail_method"] + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] file_path = self.filepaths.default_thumbnail( - top_level_type, sub_type, thumbnail_width, thumbnail_height, - thumbnail_type, thumbnail_method, + top_level_type, sub_type, t_width, t_height, t_type, t_method, ) - yield self.respond_with_file(request, thumbnail_type, file_path) + yield self.respond_with_file(request, t_type, file_path) def _select_thumbnail(self, desired_width, desired_height, desired_method, desired_type, thumbnail_infos): @@ -164,7 +162,7 @@ class ThumbnailResource(BaseMediaResource): for info in thumbnail_infos: t_w = info["thumbnail_width"] t_h = info["thumbnail_height"] - t_method = info["thumnail_method"] + t_method = info["thumbnail_method"] if t_method == "scale" or t_method == "crop": aspect_quality = abs(d_w * t_h - d_h * t_w) size_quality = abs((d_w - t_w) * (d_h - t_h)) @@ -180,7 +178,7 @@ class ThumbnailResource(BaseMediaResource): for info in thumbnail_infos: t_w = info["thumbnail_width"] t_h = info["thumbnail_height"] - t_method = info["thumnail_method"] + t_method = info["thumbnail_method"] if t_method == "scale" and (t_w >= d_w or t_h >= d_h): size_quality = abs((d_w - t_w) * (d_h - t_h)) type_quality = desired_type != info["thumbnail_type"] diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index a78cc3cff3..b2449ff03d 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -23,9 +23,7 @@ from synapse.api.errors import ( from twisted.web.server import NOT_DONE_YET from twisted.internet import defer -from .baseresource import BaseMediaResource - -import os +from .base_resource import BaseMediaResource import logging @@ -75,7 +73,7 @@ class UploadResource(BaseMediaResource): media_id = random_string(24) fname = self.filepaths.local_media_filepath(media_id) - os.makedirs(os.path.dirname(fname)) + self._makedirs(fname) # This shouldn't block for very long because the content will have # already been uploaded at this point. @@ -95,7 +93,7 @@ class UploadResource(BaseMediaResource): "media_length": content_length, } - yield self._generate_local_thumbnails(self, media_id, media_info) + yield self._generate_local_thumbnails(media_id, media_info) respond_with_json( request, 200, {"content_token": media_id}, send_cors=True diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index a848662716..18c068d3d9 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -19,6 +19,9 @@ from _base import SQLBaseStore class MediaRepositoryStore(SQLBaseStore): """Persistence for attachments and avatars""" + def get_default_thumbnails(self, top_level_type, sub_type): + return [] + def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: @@ -47,7 +50,7 @@ class MediaRepositoryStore(SQLBaseStore): def get_local_media_thumbnails(self, media_id): return self._simple_select_list( - "local_media_thumbnails", + "local_media_repository_thumbnails", {"media_id": media_id}, ( "thumbnail_width", "thumbnail_height", "thumbnail_method", @@ -59,7 +62,7 @@ class MediaRepositoryStore(SQLBaseStore): thumbnail_height, thumbnail_type, thumbnail_method, thumbnail_length): return self._simple_insert( - "local_media_thumbnails", + "local_media_repository_thumbnails", { "media_id": media_id, "thumbnail_width": thumbnail_width, @@ -100,11 +103,10 @@ class MediaRepositoryStore(SQLBaseStore): def get_remote_media_thumbnails(self, origin, media_id): return self._simple_select_list( "remote_media_cache_thumbnails", - {"origin": origin, "media_id": media_id}, + {"media_origin": origin, "media_id": media_id}, ( - "thumbnail_width", "thumbnail_height", "thumbnail_method" - "thumbnail_type", "thumbnail_length", - "filesystem_id" + "thumbnail_width", "thumbnail_height", "thumbnail_method", + "thumbnail_type", "thumbnail_length", "filesystem_id", ) ) -- cgit 1.4.1 From b5eb9124f70a0f5196f8508f24e4566e8a5c0e90 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 10:08:09 +0000 Subject: Make sure we pass a tuple to string '%' formatting --- synapse/media/v1/base_resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/media/v1/base_resource.py') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 81f2456343..8c62ecd597 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -93,7 +93,7 @@ class BaseMediaResource(Resource): except: raise SynapseError( 400, - "Missing integer argument %r" % (arg_name), + "Missing integer argument %r" % (arg_name,), Codes.UNKNOWN, ) @@ -107,7 +107,7 @@ class BaseMediaResource(Resource): except: raise SynapseError( 400, - "Missing string argument %r" % (arg_name), + "Missing string argument %r" % (arg_name,), Codes.UNKNOWN, ) -- cgit 1.4.1 From d80d505b1f70eae128990ce1a9517e5c5edead73 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 14:19:32 +0000 Subject: Limit the size of images that are thumbnailed serverside. Limit the size of file that a server will download from a remote server --- synapse/api/errors.py | 1 + synapse/config/repository.py | 5 +++++ synapse/http/matrixfederationclient.py | 25 +++++++++++++++++++------ synapse/media/v1/base_resource.py | 18 ++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) (limited to 'synapse/media/v1/base_resource.py') diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 581439ceb3..e250b9b211 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -34,6 +34,7 @@ class Codes(object): LIMIT_EXCEEDED = "M_LIMIT_EXCEEDED" CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED" CAPTCHA_INVALID = "M_CAPTCHA_INVALID" + TOO_LARGE = "M_TOO_LARGE" class CodeMessageException(Exception): diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 6eec930a03..f1b7b1b74e 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -20,6 +20,7 @@ class ContentRepositoryConfig(Config): def __init__(self, args): super(ContentRepositoryConfig, self).__init__(args) self.max_upload_size = self.parse_size(args.max_upload_size) + self.max_image_pixels = self.parse_size(args.max_image_pixels) self.media_store_path = self.ensure_directory(args.media_store_path) def parse_size(self, string): @@ -41,3 +42,7 @@ class ContentRepositoryConfig(Config): db_group.add_argument( "--media-store-path", default=cls.default_path("media_store") ) + db_group.add_argument( + "--max-image-pixels", default="32M", + help="Maximum number of pixels that will be thumbnailed" + ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index f05269cdfb..8f4db59c75 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -26,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext from syutil.jsonutil import encode_canonical_json -from synapse.api.errors import CodeMessageException, SynapseError +from synapse.api.errors import CodeMessageException, SynapseError, Codes from syutil.crypto.jsonsign import sign_json @@ -289,7 +289,7 @@ class MatrixFederationHttpClient(object): @defer.inlineCallbacks def get_file(self, destination, path, output_stream, args={}, - retry_on_dns_fail=True): + retry_on_dns_fail=True, max_size=None): """GETs a file from a given homeserver Args: destination (str): The remote server to send the HTTP request to. @@ -325,7 +325,11 @@ class MatrixFederationHttpClient(object): headers = dict(response.headers.getAllRawHeaders()) - length = yield _readBodyToFile(response, output_stream) + try: + length = yield _readBodyToFile(response, output_stream, max_size) + except: + logger.exception("Failed to download body") + raise defer.returnValue((length, headers)) @@ -337,14 +341,23 @@ class MatrixFederationHttpClient(object): class _ReadBodyToFileProtocol(protocol.Protocol): - def __init__(self, stream, deferred): + def __init__(self, stream, deferred, max_size): self.stream = stream self.deferred = deferred self.length = 0 + self.max_size = max_size def dataReceived(self, data): self.stream.write(data) self.length += len(data) + if self.max_size is not None and self.length >= self.max_size: + self.deferred.errback(SynapseError( + 502, + "Requested file is too large > %r bytes" % (self.max_size,), + Codes.TOO_LARGE, + )) + self.deferred = defer.Deferred() + self.transport.loseConnection() def connectionLost(self, reason): if reason.check(ResponseDone): @@ -353,9 +366,9 @@ class _ReadBodyToFileProtocol(protocol.Protocol): self.deferred.errback(reason) -def _readBodyToFile(response, stream): +def _readBodyToFile(response, stream, max_size): d = defer.Deferred() - response.deliverBody(_ReadBodyToFileProtocol(stream, d)) + response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) return d diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 8c62ecd597..77b05c6548 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -43,6 +43,7 @@ class BaseMediaResource(Resource): self.server_name = hs.hostname self.store = hs.get_datastore() self.max_upload_size = hs.config.max_upload_size + self.max_image_pixels = hs.config.max_image_pixels self.filepaths = filepaths @staticmethod @@ -143,6 +144,7 @@ class BaseMediaResource(Resource): )) length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, + max_size=self.max_upload_size, ) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -226,6 +228,14 @@ class BaseMediaResource(Resource): thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r" + m_width, m_height, self.max_image_pixels + ) + return + scales = set() crops = set() for r_width, r_height, r_method, r_type in requirements: @@ -281,6 +291,14 @@ class BaseMediaResource(Resource): thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r" + m_width, m_height, self.max_image_pixels + ) + return + scales = set() crops = set() for r_width, r_height, r_method, r_type in requirements: -- cgit 1.4.1 From 03d9024cbcb4957b223d1a36e7ae2ad668b1859d Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 16:48:11 +0000 Subject: Allow only one download for a given image at a time, so that we don't end up downloading the same image twice if two clients request a remote image at the same time --- synapse/media/v1/base_resource.py | 27 +++++++++++++++++++++++++-- synapse/media/v1/download_resource.py | 9 +-------- synapse/media/v1/thumbnail_resource.py | 13 +++---------- 3 files changed, 29 insertions(+), 20 deletions(-) (limited to 'synapse/media/v1/base_resource.py') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 77b05c6548..14735ff375 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -45,6 +45,7 @@ class BaseMediaResource(Resource): self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels self.filepaths = filepaths + self.downloads = {} @staticmethod def catch_errors(request_handler): @@ -128,6 +129,28 @@ class BaseMediaResource(Resource): if not os.path.exists(dirname): os.makedirs(dirname) + def _get_remote_media(self, server_name, media_id): + key = (server_name, media_id) + download = self.downloads.get(key) + if download is None: + download = self._get_remote_media_impl(server_name, media_id) + self.downloads[key] = download + @download.addBoth + def callback(media_info): + del self.downloads[key] + return download + + @defer.inlineCallbacks + def _get_remote_media_impl(self, server_name, media_id): + media_info = yield self.store.get_cached_remote_media( + server_name, media_id + ) + if not media_info: + media_info = yield self._download_remote_file( + server_name, media_id + ) + defer.returnValue(media_info) + @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id): file_id = random_string(24) @@ -231,7 +254,7 @@ class BaseMediaResource(Resource): if m_width * m_height >= self.max_image_pixels: logger.info( - "Image too large to thumbnail %r x %r > %r" + "Image too large to thumbnail %r x %r > %r", m_width, m_height, self.max_image_pixels ) return @@ -294,7 +317,7 @@ class BaseMediaResource(Resource): if m_width * m_height >= self.max_image_pixels: logger.info( - "Image too large to thumbnail %r x %r > %r" + "Image too large to thumbnail %r x %r > %r", m_width, m_height, self.max_image_pixels ) return diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py index 6de0932ba3..f3a6804e05 100644 --- a/synapse/media/v1/download_resource.py +++ b/synapse/media/v1/download_resource.py @@ -56,14 +56,7 @@ class DownloadResource(BaseMediaResource): @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id): - media_info = yield self.store.get_cached_remote_media( - server_name, media_id - ) - - if not media_info: - media_info = yield self._download_remote_file( - server_name, media_id - ) + media_info = yield self._get_remote_media(server_name, media_id) media_type = media_info["media_type"] filesystem_id = media_info["filesystem_id"] diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py index fd08c7ecd2..e19620d456 100644 --- a/synapse/media/v1/thumbnail_resource.py +++ b/synapse/media/v1/thumbnail_resource.py @@ -83,16 +83,9 @@ class ThumbnailResource(BaseMediaResource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): - media_info = yield self.store.get_cached_remote_media( - server_name, media_id - ) - - if not media_info: - # TODO: Don't download the whole remote file - # We should proxy the thumbnail from the remote server instead. - media_info = yield self._download_remote_file( - server_name, media_id - ) + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead. + media_info = yield self._get_remote_media(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, -- cgit 1.4.1 From 3c7857e49b8dcad723d52174aba77c47453c0298 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 16 Dec 2014 15:24:03 +0000 Subject: clean up coding style a bit --- synapse/events/__init__.py | 22 +++++++++++----------- synapse/handlers/_base.py | 4 ++-- synapse/handlers/directory.py | 1 - synapse/handlers/federation.py | 11 ++++------- synapse/handlers/typing.py | 4 ++-- synapse/media/v1/base_resource.py | 1 + synapse/rest/room.py | 8 +++++--- synapse/rest/transactions.py | 1 + 8 files changed, 26 insertions(+), 26 deletions(-) (limited to 'synapse/media/v1/base_resource.py') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 984f14fce4..34b1b944ab 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -30,20 +30,20 @@ class _EventInternalMetadata(object): def _event_dict_property(key): - def getter(self): - return self._event_dict[key] + def getter(self): + return self._event_dict[key] - def setter(self, v): - self._event_dict[key] = v + def setter(self, v): + self._event_dict[key] = v - def delete(self): - del self._event_dict[key] + def delete(self): + del self._event_dict[key] - return property( - getter, - setter, - delete, - ) + return property( + getter, + setter, + delete, + ) class EventBase(object): diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index af8eb5f0f5..567769253e 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -61,8 +61,6 @@ class BaseHandler(object): def _create_new_client_event(self, builder): yield run_on_reactor() - context = EventContext() - latest_ret = yield self.store.get_latest_events_in_room( builder.room_id, ) @@ -78,6 +76,8 @@ class BaseHandler(object): builder.depth = depth state_handler = self.state_handler + + context = EventContext() ret = yield state_handler.annotate_context_with_state( builder, context, diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 404baea796..66d3b533d9 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -156,4 +156,3 @@ class DirectoryHandler(BaseHandler): "sender": user_id, "content": {"aliases": aliases}, }) - diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 16a104c0e2..d80a54bdea 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -202,7 +202,7 @@ class FederationHandler(BaseHandler): e.msg, affected=event.event_id, ) - + # if we're receiving valid events from an origin, # it's probably a good idea to mark it as not in retry-state # for sending (although this is a bit of a leap) @@ -263,9 +263,7 @@ class FederationHandler(BaseHandler): context = EventContext() yield self.state_handler.annotate_context_with_state(event, context) - events.append( - (event, context) - ) + events.append((event, context)) yield self.store.persist_event( event, @@ -547,8 +545,6 @@ class FederationHandler(BaseHandler): """ event = pdu - context = EventContext() - event.internal_metadata.outlier = True event.signatures.update( @@ -559,6 +555,7 @@ class FederationHandler(BaseHandler): ) ) + context = EventContext() yield self.state_handler.annotate_context_with_state(event, context) yield self.store.persist_event( @@ -685,13 +682,13 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks def _handle_new_event(self, event, state=None, backfilled=False, current_state=None, fetch_missing=True): - context = EventContext() logger.debug( "_handle_new_event: Before annotate: %s, sigs: %s", event.event_id, event.signatures, ) + context = EventContext() yield self.state_handler.annotate_context_with_state( event, context, diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 77d66f66ff..7626b07280 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -45,8 +45,8 @@ class TypingNotificationHandler(BaseHandler): hs.get_distributor().observe("user_left_room", self.user_left_room) - self._member_typing_until = {} # clock time we expect to stop - self._member_typing_timer = {} # deferreds to manage theabove + self._member_typing_until = {} # clock time we expect to stop + self._member_typing_timer = {} # deferreds to manage theabove # map room IDs to serial numbers self._room_serials = {} diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 14735ff375..2f5440ab64 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -135,6 +135,7 @@ class BaseMediaResource(Resource): if download is None: download = self._get_remote_media_impl(server_name, media_id) self.downloads[key] = download + @download.addBoth def callback(media_info): del self.downloads[key] diff --git a/synapse/rest/room.py b/synapse/rest/room.py index 0e2d5fbaae..005a9f6f44 100644 --- a/synapse/rest/room.py +++ b/synapse/rest/room.py @@ -310,8 +310,8 @@ class RoomMessageListRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request, room_id): user = yield self.auth.get_user_by_req(request) - pagination_config = PaginationConfig.from_request(request, - default_limit=10, + pagination_config = PaginationConfig.from_request( + request, default_limit=10, ) with_feedback = "feedback" in request.args handler = self.handlers.message_handler @@ -466,7 +466,9 @@ class RoomRedactEventRestServlet(RestServlet): class RoomTypingRestServlet(RestServlet): - PATTERN = client_path_pattern("/rooms/(?P[^/]*)/typing/(?P[^/]*)$") + PATTERN = client_path_pattern( + "/rooms/(?P[^/]*)/typing/(?P[^/]*)$" + ) @defer.inlineCallbacks def on_PUT(self, request, room_id, user_id): diff --git a/synapse/rest/transactions.py b/synapse/rest/transactions.py index 8c41ab4edb..31377bd41d 100644 --- a/synapse/rest/transactions.py +++ b/synapse/rest/transactions.py @@ -19,6 +19,7 @@ import logging logger = logging.getLogger(__name__) + # FIXME: elsewhere we use FooStore to indicate something in the storage layer... class HttpTransactionStore(object): -- cgit 1.4.1