From 279c48c8b442ec726fb5088e56ce9c1d2ed4bfb5 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 2 Dec 2014 15:09:51 +0000 Subject: Write the upload portion of version 1 of the media repository --- synapse/http/content_repository.py | 212 ---------------------------- synapse/http/server.py | 21 ++- synapse/media/v0/content_repository.py | 212 ++++++++++++++++++++++++++++ synapse/media/v1/filepath.py | 53 +++++++ synapse/media/v1/media_repository.py | 72 ++++++++++ synapse/media/v1/upload_resource.py | 110 +++++++++++++++ synapse/storage/media_repository.py | 116 +++++++++++++++ synapse/storage/schema/media_repository.sql | 66 +++++++++ 8 files changed, 643 insertions(+), 219 deletions(-) delete mode 100644 synapse/http/content_repository.py create mode 100644 synapse/media/v0/content_repository.py create mode 100644 synapse/media/v1/filepath.py create mode 100644 synapse/media/v1/media_repository.py create mode 100644 synapse/media/v1/upload_resource.py create mode 100644 synapse/storage/media_repository.py create mode 100644 synapse/storage/schema/media_repository.sql (limited to 'synapse') diff --git a/synapse/http/content_repository.py b/synapse/http/content_repository.py deleted file mode 100644 index 64ecb5346e..0000000000 --- a/synapse/http/content_repository.py +++ /dev/null @@ -1,212 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .server import respond_with_json_bytes - -from synapse.util.stringutils import random_string -from synapse.api.errors import ( - cs_exception, SynapseError, CodeMessageException, Codes, cs_error -) - -from twisted.protocols.basic import FileSender -from twisted.web import server, resource -from twisted.internet import defer - -import base64 -import json -import logging -import os -import re - -logger = logging.getLogger(__name__) - - -class ContentRepoResource(resource.Resource): - """Provides file uploading and downloading. - - Uploads are POSTed to wherever this Resource is linked to. This resource - returns a "content token" which can be used to GET this content again. The - token is typically a path, but it may not be. Tokens can expire, be - one-time uses, etc. - - In this case, the token is a path to the file and contains 3 interesting - sections: - - User ID base64d (for namespacing content to each user) - - random 24 char string - - Content type base64d (so we can return it when clients GET it) - - """ - isLeaf = True - - def __init__(self, hs, directory, auth, external_addr): - resource.Resource.__init__(self) - self.hs = hs - self.directory = directory - self.auth = auth - self.external_addr = external_addr.rstrip('/') - self.max_upload_size = hs.config.max_upload_size - - if not os.path.isdir(self.directory): - os.mkdir(self.directory) - logger.info("ContentRepoResource : Created %s directory.", - self.directory) - - @defer.inlineCallbacks - def map_request_to_name(self, request): - # auth the user - auth_user = yield self.auth.get_user_by_req(request) - - # namespace all file uploads on the user - prefix = base64.urlsafe_b64encode( - auth_user.to_string() - ).replace('=', '') - - # use a random string for the main portion - main_part = random_string(24) - - # suffix with a file extension if we can make one. This is nice to - # provide a hint to clients on the file information. We will also reuse - # this info to spit back the content type to the client. - suffix = "" - if request.requestHeaders.hasHeader("Content-Type"): - content_type = request.requestHeaders.getRawHeaders( - "Content-Type")[0] - suffix = "." + base64.urlsafe_b64encode(content_type) - if (content_type.split("/")[0].lower() in - ["image", "video", "audio"]): - file_ext = content_type.split("/")[-1] - # be a little paranoid and only allow a-z - file_ext = re.sub("[^a-z]", "", file_ext) - suffix += "." + file_ext - - file_name = prefix + main_part + suffix - file_path = os.path.join(self.directory, file_name) - logger.info("User %s is uploading a file to path %s", - auth_user.to_string(), - file_path) - - # keep trying to make a non-clashing file, with a sensible max attempts - attempts = 0 - while os.path.exists(file_path): - main_part = random_string(24) - file_name = prefix + main_part + suffix - file_path = os.path.join(self.directory, file_name) - attempts += 1 - if attempts > 25: # really? Really? - raise SynapseError(500, "Unable to create file.") - - defer.returnValue(file_path) - - def render_GET(self, request): - # no auth here on purpose, to allow anyone to view, even across home - # servers. - - # TODO: A little crude here, we could do this better. - filename = request.path.split('/')[-1] - # be paranoid - filename = re.sub("[^0-9A-z.-_]", "", filename) - - file_path = self.directory + "/" + filename - - logger.debug("Searching for %s", file_path) - - if os.path.isfile(file_path): - # filename has the content type - base64_contentype = filename.split(".")[1] - content_type = base64.urlsafe_b64decode(base64_contentype) - logger.info("Sending file %s", file_path) - f = open(file_path, 'rb') - request.setHeader('Content-Type', content_type) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our matrix - # clients are smart enough to be happy with Cache-Control (right?) - request.setHeader( - "Cache-Control", "public,max-age=86400,s-maxage=86400" - ) - - d = FileSender().beginFileTransfer(f, request) - - # after the file has been sent, clean up and finish the request - def cbFinished(ignored): - f.close() - request.finish() - d.addCallback(cbFinished) - else: - respond_with_json_bytes( - request, - 404, - json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)), - send_cors=True) - - return server.NOT_DONE_YET - - def render_POST(self, request): - self._async_render(request) - return server.NOT_DONE_YET - - def render_OPTIONS(self, request): - respond_with_json_bytes(request, 200, {}, send_cors=True) - return server.NOT_DONE_YET - - @defer.inlineCallbacks - def _async_render(self, request): - try: - # TODO: The checks here are a bit late. The content will have - # already been uploaded to a tmp file at this point - content_length = request.getHeader("Content-Length") - if content_length is None: - raise SynapseError( - msg="Request must specify a Content-Length", code=400 - ) - if int(content_length) > self.max_upload_size: - raise SynapseError( - msg="Upload request body is too large", - code=413, - ) - - fname = yield self.map_request_to_name(request) - - # TODO I have a suspicious feeling this is just going to block - with open(fname, "wb") as f: - f.write(request.content.read()) - - # FIXME (erikj): These should use constants. - file_name = os.path.basename(fname) - # FIXME: we can't assume what the repo's public mounted path is - # ...plus self-signed SSL won't work to remote clients anyway - # ...and we can't assume that it's SSL anyway, as we might want to - # serve it via the non-SSL listener... - url = "%s/_matrix/content/%s" % ( - self.external_addr, file_name - ) - - respond_with_json_bytes(request, 200, - json.dumps({"content_token": url}), - send_cors=True) - - except CodeMessageException as e: - logger.exception(e) - respond_with_json_bytes(request, e.code, - json.dumps(cs_exception(e))) - except Exception as e: - logger.error("Failed to store file: %s" % e) - respond_with_json_bytes( - request, - 500, - json.dumps({"error": "Internal server error"}), - send_cors=True) diff --git a/synapse/http/server.py b/synapse/http/server.py index 8024ff5bde..046e230361 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -166,14 +166,10 @@ class JsonResource(HttpServer, resource.Resource): request) return - if not self._request_user_agent_is_curl(request): - json_bytes = encode_canonical_json(response_json_object) - else: - json_bytes = encode_pretty_printed_json(response_json_object) - # TODO: Only enable CORS for the requests that need it. - respond_with_json_bytes(request, code, json_bytes, send_cors=True, - response_code_message=response_code_message) + respond_with_json(request, code, response_json_object, send_cors=True, + response_code_message=response_code_message, + pretty_print=self._request_user_agent_is_curl) @staticmethod def _request_user_agent_is_curl(request): @@ -202,6 +198,17 @@ class RootRedirect(resource.Resource): return resource.Resource.getChild(self, name, request) +def respond_with_json(request, code, json_object, send_cors=False, + response_code_message=None, pretty_print=False): + if not pretty_print: + json_bytes = encode_pretty_printed_json(response_json_object) + else: + json_bytes = encode_canonical_json(response_json_object) + + return respond_with_json_bytes(request, code, json_bytes, send_cors, + response_code_message=response_code_message) + + def respond_with_json_bytes(request, code, json_bytes, send_cors=False, response_code_message=None): """Sends encoded JSON in response to the given request. diff --git a/synapse/media/v0/content_repository.py b/synapse/media/v0/content_repository.py new file mode 100644 index 0000000000..64ecb5346e --- /dev/null +++ b/synapse/media/v0/content_repository.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .server import respond_with_json_bytes + +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, SynapseError, CodeMessageException, Codes, cs_error +) + +from twisted.protocols.basic import FileSender +from twisted.web import server, resource +from twisted.internet import defer + +import base64 +import json +import logging +import os +import re + +logger = logging.getLogger(__name__) + + +class ContentRepoResource(resource.Resource): + """Provides file uploading and downloading. + + Uploads are POSTed to wherever this Resource is linked to. This resource + returns a "content token" which can be used to GET this content again. The + token is typically a path, but it may not be. Tokens can expire, be + one-time uses, etc. + + In this case, the token is a path to the file and contains 3 interesting + sections: + - User ID base64d (for namespacing content to each user) + - random 24 char string + - Content type base64d (so we can return it when clients GET it) + + """ + isLeaf = True + + def __init__(self, hs, directory, auth, external_addr): + resource.Resource.__init__(self) + self.hs = hs + self.directory = directory + self.auth = auth + self.external_addr = external_addr.rstrip('/') + self.max_upload_size = hs.config.max_upload_size + + if not os.path.isdir(self.directory): + os.mkdir(self.directory) + logger.info("ContentRepoResource : Created %s directory.", + self.directory) + + @defer.inlineCallbacks + def map_request_to_name(self, request): + # auth the user + auth_user = yield self.auth.get_user_by_req(request) + + # namespace all file uploads on the user + prefix = base64.urlsafe_b64encode( + auth_user.to_string() + ).replace('=', '') + + # use a random string for the main portion + main_part = random_string(24) + + # suffix with a file extension if we can make one. This is nice to + # provide a hint to clients on the file information. We will also reuse + # this info to spit back the content type to the client. + suffix = "" + if request.requestHeaders.hasHeader("Content-Type"): + content_type = request.requestHeaders.getRawHeaders( + "Content-Type")[0] + suffix = "." + base64.urlsafe_b64encode(content_type) + if (content_type.split("/")[0].lower() in + ["image", "video", "audio"]): + file_ext = content_type.split("/")[-1] + # be a little paranoid and only allow a-z + file_ext = re.sub("[^a-z]", "", file_ext) + suffix += "." + file_ext + + file_name = prefix + main_part + suffix + file_path = os.path.join(self.directory, file_name) + logger.info("User %s is uploading a file to path %s", + auth_user.to_string(), + file_path) + + # keep trying to make a non-clashing file, with a sensible max attempts + attempts = 0 + while os.path.exists(file_path): + main_part = random_string(24) + file_name = prefix + main_part + suffix + file_path = os.path.join(self.directory, file_name) + attempts += 1 + if attempts > 25: # really? Really? + raise SynapseError(500, "Unable to create file.") + + defer.returnValue(file_path) + + def render_GET(self, request): + # no auth here on purpose, to allow anyone to view, even across home + # servers. + + # TODO: A little crude here, we could do this better. + filename = request.path.split('/')[-1] + # be paranoid + filename = re.sub("[^0-9A-z.-_]", "", filename) + + file_path = self.directory + "/" + filename + + logger.debug("Searching for %s", file_path) + + if os.path.isfile(file_path): + # filename has the content type + base64_contentype = filename.split(".")[1] + content_type = base64.urlsafe_b64decode(base64_contentype) + logger.info("Sending file %s", file_path) + f = open(file_path, 'rb') + request.setHeader('Content-Type', content_type) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our matrix + # clients are smart enough to be happy with Cache-Control (right?) + request.setHeader( + "Cache-Control", "public,max-age=86400,s-maxage=86400" + ) + + d = FileSender().beginFileTransfer(f, request) + + # after the file has been sent, clean up and finish the request + def cbFinished(ignored): + f.close() + request.finish() + d.addCallback(cbFinished) + else: + respond_with_json_bytes( + request, + 404, + json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)), + send_cors=True) + + return server.NOT_DONE_YET + + def render_POST(self, request): + self._async_render(request) + return server.NOT_DONE_YET + + def render_OPTIONS(self, request): + respond_with_json_bytes(request, 200, {}, send_cors=True) + return server.NOT_DONE_YET + + @defer.inlineCallbacks + def _async_render(self, request): + try: + # TODO: The checks here are a bit late. The content will have + # already been uploaded to a tmp file at this point + content_length = request.getHeader("Content-Length") + if content_length is None: + raise SynapseError( + msg="Request must specify a Content-Length", code=400 + ) + if int(content_length) > self.max_upload_size: + raise SynapseError( + msg="Upload request body is too large", + code=413, + ) + + fname = yield self.map_request_to_name(request) + + # TODO I have a suspicious feeling this is just going to block + with open(fname, "wb") as f: + f.write(request.content.read()) + + # FIXME (erikj): These should use constants. + file_name = os.path.basename(fname) + # FIXME: we can't assume what the repo's public mounted path is + # ...plus self-signed SSL won't work to remote clients anyway + # ...and we can't assume that it's SSL anyway, as we might want to + # serve it via the non-SSL listener... + url = "%s/_matrix/content/%s" % ( + self.external_addr, file_name + ) + + respond_with_json_bytes(request, 200, + json.dumps({"content_token": url}), + send_cors=True) + + except CodeMessageException as e: + logger.exception(e) + respond_with_json_bytes(request, e.code, + json.dumps(cs_exception(e))) + except Exception as e: + logger.error("Failed to store file: %s" % e) + respond_with_json_bytes( + request, + 500, + json.dumps({"error": "Internal server error"}), + send_cors=True) diff --git a/synapse/media/v1/filepath.py b/synapse/media/v1/filepath.py new file mode 100644 index 0000000000..d23564e03e --- /dev/null +++ b/synapse/media/v1/filepath.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + + +class MediaFilePaths(object): + + def __init__(self, base_path): + self.base_path = base_path + + def local_media_filepath(self, media_id): + return os.path.join( + self.base_path, "local", "content", + media_id[0:2], media_id[2:4], media_id[4:] + ) + + def local_media_thumbnail(self, media_id, width, height, content_type): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) + return os.path.join( + self.base_path, "local", "thumbnails", + media_id[0:2], media_id[2:4], media_id[4:], + file_name + ) + + def remote_media_filepath(self, server_name, file_id): + return os.path.join( + self.base_path, "remote", "content", server_name, + file_id[0:2], file_id[2:4], file_id[4:] + ) + + def remote_media_thumbnail(self, server_name, file_id, width, height, + content_type): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) + return os.path.join( + self.base_path, "remote", "content", server_name, + file_id[0:2], file_id[2:4], file_id[4:], + file_name + ) diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py new file mode 100644 index 0000000000..9c36a8e933 --- /dev/null +++ b/synapse/media/v1/media_repository.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.http.server import respond_with_json_bytes + +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, SynapseError, CodeMessageException, Codes, cs_error +) + +from twisted.protocols.basic import FileSender +from twisted.web import server, resource +from twisted.internet import defer + +import base64 +import json +import logging +import os +import re + +logger = logging.getLogger(__name__) + + +class MediaRepository(): + """Profiles file uploading and downloading. + + Uploads are POSTed to a resource which returns a token which is used to GET + the download:: + + => POST /_matrix/media/v1/upload HTTP/1.1 + Content-Type: + + + + <= HTTP/1.1 200 OK + Content-Type: application/json + + { "token": } + + => GET /_matrix/media/v1/download/ HTTP/1.1 + + <= HTTP/1.1 200 OK + Content-Type: + Content-Disposition: attachment;filename= + + + + Clients can get thumbnails by supplying a desired width and height:: + + => GET /_matrix/media/v1/thumbnail/?width=&height= HTTP/1.1 + + <= HTTP/1.1 200 OK + Content-Type: image/jpeg or image/png + + + """ + + def __init__(self, hs): + filepaths = MediaFilePaths + diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py new file mode 100644 index 0000000000..3721a0173d --- /dev/null +++ b/synapse/media/v1/upload_resource.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.http.server import respond_with_json + +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, SynapseError, CodeMessageException +) + +from twisted.web import server, resource +from twisted.internet import defer + +import logging + +logger = logging.getLogger(__name__) + +class UploadResource(resource.Resource): + + def __init__(self, hs, filepaths): + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.max_upload_size = hs.config.max_upload_size() + self.filepaths = filepaths + + def render_POST(self, request): + self._async_render_POST(request) + return server.NOT_DONE_YET + + def render_OPTIONS(self, request): + respond_with_json(request, 200, {}, send_cors=True) + return server.NOT_DONE_YET + + @defer.inlineCallbacks + def _async_render_POST(self, request): + + auth_user = yield self.auth.get_user_by_req(request) + + try: + # TODO: The checks here are a bit late. The content will have + # already been uploaded to a tmp file at this point + content_length = request.getHeader("Content-Length") + if content_length is None: + raise SynapseError( + msg="Request must specify a Content-Length", code=400 + ) + if int(content_length) > self.max_upload_size: + raise SynapseError( + msg="Upload request body is too large", + code=413, + ) + + headers = request.requestHeaders() + + if headers.hasHeader("Content-Type"): + media_type = headers.getRawHeaders("Content-Type")[0] + else: + raise SynapseError( + msg="Upload request missing 'Content-Type'", + code=400, + ) + + #if headers.hasHeader("Content-Disposition"): + # disposition = headers.getRawHeaders("Content-Disposition")[0] + # TODO(markjh): parse content-dispostion + + media_id = random_string(24) + + fname = self.filepaths.local_media_file_path(media_id) + + # This shouldn't block for very long because the content will have + # already been uploaded at this point. + with open(fname, "wb") as f: + f.write(request.content.read()) + + yield self.store.store_local_media( + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=content_length, + user_id=auth_user, + ) + + respond_with_json( + request, 200, {"content_token": media_id}, send_cors=True + ) + except CodeMessageException as e: + logger.exception(e) + respond_with_json(request, e.code, cs_exception(e), send_cors=True) + except: + logger.exception("Failed to store file") + respond_with_json( + request, + 500, + {"error": "Internal server error"}, + send_cors=True + ) diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py new file mode 100644 index 0000000000..73ceba3f2c --- /dev/null +++ b/synapse/storage/media_repository.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from _base import SQLBaseStore + + +class MediaRepositoryStore(SQLBaseStore): + """Persistence for attachments and avatars""" + + def get_local_media(self, media_id): + return self._simple_select_one( + "local_media_repository", + {"media_id": media_id}, + ("media_type", "media_length", "upload_name", "created_ts"), + ) + + def store_local_media(self, media_id, media_type, time_now_ms, upload_name, + media_length, user_id): + return self._simple_insert( + "local_media_repository", + { + "media_id": media_id, + "media_type": media_type, + "created_ts": time_now_ms, + "upload_name": upload_name, + "media_length": media_length, + "user_id": user_id, + } + ) + + def get_local_media_thumbnails(self, media_id): + return self._simple_select_list( + "local_media_thumbnails", + {"media_id": media_id}, + ( + "thumbnail_width", "thumbnail_height", + "thumbnail_type", "thumbnail_length", + ) + ) + + def store_local_thumbnail(self, media_id, thumbnail_width, + thumbnail_height, thumbnail_type, + thumbnail_length): + return self._simple_insert( + "local_media_thumbnails", + { + "media_id": media_id, + "thumbnail_width": thumbnail_width, + "thumbnail_height": thumbnail_height, + "thumbnail_type": thumbnail_type, + "thumbnail_length": thumbnail_length, + } + ) + + def get_cached_remote_media(self, origin, media_id): + return self._simple_select_one( + "remote_media_cache", + {"media_origin": origin, "media_id": media_id}, + ("media_type", "media_length", "upload_name", "created_ts"), + ) + + def store_cached_remote_media(self, origin, media_id, media_type, + media_length, time_now_ms, upload_name, + filesytem_id): + return self._simple_insert( + "remote_media_cache", + { + "media_origin": origin, + "media_id": media_id, + "media_type": media_type, + "media_length": media_length, + "created_ts": time_now_ms, + "upload_name": upload_name, + "filesystem_id": filesystem_id, + } + ) + + def get_remote_media_thumbnails(self, origin, media_id): + return self._simple_select_list( + "remote_media_cache_thumbnails", + {"origin": origin, "media_id": media_id}, + ( + "thumbnail_width", "thumbnail_height", + "thumbnail_type", "thumbnail_length", + "filesystem_id" + ) + ) + + + def store_remote_media_thumbnail(self, origin, media_id, thumbnail_width, + thumbnail_height, thumbnail_type, + thumbnail_length, filesystem_id): + return self._simple_insert( + "remote_media_cache_thumbnails", + { + "media_origin": origin, + "media_id": media_id, + "thumbnail_width": thumbnail_width, + "thumbnail_height": thumbnail_height, + "thumbnail_type": thumbnail_type, + "thumbnail_length": thumbnail_length, + "filesystem_id": filesystem_id, + } + ) diff --git a/synapse/storage/schema/media_repository.sql b/synapse/storage/schema/media_repository.sql new file mode 100644 index 0000000000..768752296f --- /dev/null +++ b/synapse/storage/schema/media_repository.sql @@ -0,0 +1,66 @@ +/* Copyright 2014 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE IF NOT EXISTS local_media_repository ( + media_id TEXT, -- The id used to refer to the media. + media_type TEXT, -- The MIME-type of the media. + media_length INTEGER, -- Length of the media in bytes. + created_ts INTEGER, -- When the content was uploaded in ms. + upload_name TEXT, -- The name the media was uploaded with. + user_id TEXT, -- The user who uploaded the file. + CONSTRAINT uniqueness UNIQUE (media_id) +); + +CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails ( + media_id TEXT, -- The id used to refer to the media. + thumbnail_width INTEGER, -- The width of the thumbnail in pixels. + thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_length INTEGER, -- The length of the thumbnail in bytes. + CONSTRAINT uniqueness UNIQUE ( + media_id, thumbnail_width, thumbnail_height, thumbnail_type + ) +); + +CREATE INDEX IF NOT EXISTS local_media_repository_thumbnails_media_id + ON local_media_repository_thumbnails (media_id); + +CREATE TABLE IF NOT EXISTS remote_media_cache ( + media_origin TEXT, -- The remote HS the media came from. + media_id TEXT, -- The id used to refer to the media on that server. + media_type TEXT, -- The MIME-type of the media. + created_ts INTEGER, -- When the content was uploaded in ms. + upload_name TEXT, -- The name the media was uploaded with. + media_length INTEGER, -- Length of the media in bytes. + filesystem_id TEXT, -- The name used to store the media on disk. + CONSTRAINT uniqueness UNIQUE (media_origin, media_id) +); + +CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails ( + media_origin TEXT, -- The remote HS the media came from. + media_id TEXT, -- The id used to refer to the media. + thumbnail_width INTEGER, -- The width of the thumbnail in pixels. + thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_length INTEGER, -- The length of the thumbnail in bytes. + filesystem_id TEXT, -- The name used to store the media on disk. + CONSTRAINT uniqueness UNIQUE ( + media_origin, media_id, thumbnail_width, thumbnail_height, + thumbnail_type + ) +); + +CREATE INDEX IF NOT EXISTS remote_media_cache_thumbnails_media_id + ON local_media_repository_thumbnails (media_id); -- cgit 1.4.1 From 5da65085d106e98cf7b762836cb300d01226bf92 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 2 Dec 2014 19:51:47 +0000 Subject: Get uploads working with new media repo --- synapse/api/urls.py | 1 + synapse/app/homeserver.py | 9 +++++++-- synapse/config/_base.py | 14 ++++++++++++++ synapse/config/repository.py | 4 ++++ synapse/http/server.py | 4 ++-- synapse/media/__init__.py | 0 synapse/media/v0/__init__.py | 0 synapse/media/v0/content_repository.py | 2 +- synapse/media/v1/__init__.py | 0 synapse/media/v1/media_repository.py | 23 +++++++---------------- synapse/media/v1/upload_resource.py | 14 ++++++++------ synapse/server.py | 1 + synapse/storage/__init__.py | 6 +++++- synapse/storage/media_repository.py | 7 ++++++- 14 files changed, 56 insertions(+), 29 deletions(-) create mode 100644 synapse/media/__init__.py create mode 100644 synapse/media/v0/__init__.py create mode 100644 synapse/media/v1/__init__.py (limited to 'synapse') diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 6dc19305b7..d7625127f8 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -20,3 +20,4 @@ FEDERATION_PREFIX = "/_matrix/federation/v1" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" SERVER_KEY_PREFIX = "/_matrix/key/v1" +MEDIA_PREFIX = "/_matrix/media/v1" diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 855fe8e170..a6e29c0860 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -24,12 +24,13 @@ from twisted.web.resource import Resource from twisted.web.static import File from twisted.web.server import Site from synapse.http.server import JsonResource, RootRedirect -from synapse.http.content_repository import ContentRepoResource +from synapse.media.v0.content_repository import ContentRepoResource +from synapse.media.v1.media_repository import MediaRepositoryResource from synapse.http.server_key_resource import LocalKey from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.api.urls import ( CLIENT_PREFIX, FEDERATION_PREFIX, WEB_CLIENT_PREFIX, CONTENT_REPO_PREFIX, - SERVER_KEY_PREFIX, + SERVER_KEY_PREFIX, MEDIA_PREFIX ) from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory @@ -69,6 +70,9 @@ class SynapseHomeServer(HomeServer): self, self.upload_dir, self.auth, self.content_addr ) + def build_resource_for_media_repository(self): + return MediaRepositoryResource(self) + def build_resource_for_server_key(self): return LocalKey(self) @@ -99,6 +103,7 @@ class SynapseHomeServer(HomeServer): (FEDERATION_PREFIX, self.get_resource_for_federation()), (CONTENT_REPO_PREFIX, self.get_resource_for_content_repo()), (SERVER_KEY_PREFIX, self.get_resource_for_server_key()), + (MEDIA_PREFIX, self.get_resource_for_media_repository()), ] if web_client: logger.info("Adding the web client.") diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 6870af10e8..1426436dcb 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -50,12 +50,26 @@ class Config(object): ) return cls.abspath(file_path) + @staticmethod + def ensure_directory(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + if not os.path.isdir(dir_path): + raise ConfigError( + "%s is not a directory" % (dir_path,) + ) + return dir_path + @classmethod def read_file(cls, file_path, config_name): cls.check_file(file_path, config_name) with open(file_path) as file_stream: return file_stream.read() + @staticmethod + def default_path(name): + return os.path.abspath(os.path.join(os.path.curdir, name)) + @staticmethod def read_config_file(file_path): with open(file_path) as file_stream: diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 743bc26474..6eec930a03 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -20,6 +20,7 @@ class ContentRepositoryConfig(Config): def __init__(self, args): super(ContentRepositoryConfig, self).__init__(args) self.max_upload_size = self.parse_size(args.max_upload_size) + self.media_store_path = self.ensure_directory(args.media_store_path) def parse_size(self, string): sizes = {"K": 1024, "M": 1024 * 1024} @@ -37,3 +38,6 @@ class ContentRepositoryConfig(Config): db_group.add_argument( "--max-upload-size", default="1M" ) + db_group.add_argument( + "--media-store-path", default=cls.default_path("media_store") + ) diff --git a/synapse/http/server.py b/synapse/http/server.py index 046e230361..02277c4998 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -201,9 +201,9 @@ class RootRedirect(resource.Resource): def respond_with_json(request, code, json_object, send_cors=False, response_code_message=None, pretty_print=False): if not pretty_print: - json_bytes = encode_pretty_printed_json(response_json_object) + json_bytes = encode_pretty_printed_json(json_object) else: - json_bytes = encode_canonical_json(response_json_object) + json_bytes = encode_canonical_json(json_object) return respond_with_json_bytes(request, code, json_bytes, send_cors, response_code_message=response_code_message) diff --git a/synapse/media/__init__.py b/synapse/media/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/media/v0/__init__.py b/synapse/media/v0/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/media/v0/content_repository.py b/synapse/media/v0/content_repository.py index 64ecb5346e..ce5d3d153e 100644 --- a/synapse/media/v0/content_repository.py +++ b/synapse/media/v0/content_repository.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .server import respond_with_json_bytes +from synapse.http.server import respond_with_json_bytes from synapse.util.stringutils import random_string from synapse.api.errors import ( diff --git a/synapse/media/v1/__init__.py b/synapse/media/v1/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index 9c36a8e933..0f4eeef278 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -13,27 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import respond_with_json_bytes +from .upload_resource import UploadResource +from .filepath import MediaFilePaths -from synapse.util.stringutils import random_string -from synapse.api.errors import ( - cs_exception, SynapseError, CodeMessageException, Codes, cs_error -) +from twisted.web.resource import Resource -from twisted.protocols.basic import FileSender -from twisted.web import server, resource -from twisted.internet import defer - -import base64 -import json import logging -import os -import re logger = logging.getLogger(__name__) -class MediaRepository(): +class MediaRepositoryResource(Resource): """Profiles file uploading and downloading. Uploads are POSTed to a resource which returns a token which is used to GET @@ -68,5 +58,6 @@ class MediaRepository(): """ def __init__(self, hs): - filepaths = MediaFilePaths - + Resource.__init__(self) + filepaths = MediaFilePaths(hs.config.media_store_path) + self.putChild("upload", UploadResource(hs, filepaths)) diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index 3721a0173d..d9d7825b2b 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -23,6 +23,8 @@ from synapse.api.errors import ( from twisted.web import server, resource from twisted.internet import defer +import os + import logging logger = logging.getLogger(__name__) @@ -31,8 +33,9 @@ class UploadResource(resource.Resource): def __init__(self, hs, filepaths): self.auth = hs.get_auth() + self.clock = hs.get_clock() self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size() + self.max_upload_size = hs.config.max_upload_size self.filepaths = filepaths def render_POST(self, request): @@ -45,10 +48,8 @@ class UploadResource(resource.Resource): @defer.inlineCallbacks def _async_render_POST(self, request): - - auth_user = yield self.auth.get_user_by_req(request) - try: + auth_user = yield self.auth.get_user_by_req(request) # TODO: The checks here are a bit late. The content will have # already been uploaded to a tmp file at this point content_length = request.getHeader("Content-Length") @@ -62,7 +63,7 @@ class UploadResource(resource.Resource): code=413, ) - headers = request.requestHeaders() + headers = request.requestHeaders if headers.hasHeader("Content-Type"): media_type = headers.getRawHeaders("Content-Type")[0] @@ -78,7 +79,8 @@ class UploadResource(resource.Resource): media_id = random_string(24) - fname = self.filepaths.local_media_file_path(media_id) + fname = self.filepaths.local_media_filepath(media_id) + os.makedirs(os.path.dirname(fname)) # This shouldn't block for very long because the content will have # already been uploaded at this point. diff --git a/synapse/server.py b/synapse/server.py index da0a44433a..7eb15270fc 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -78,6 +78,7 @@ class BaseHomeServer(object): 'resource_for_web_client', 'resource_for_content_repo', 'resource_for_server_key', + 'resource_for_media_repository', 'event_sources', 'ratelimiter', 'keyring', diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 1231794de0..f6811a8117 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -33,6 +33,7 @@ from .stream import StreamStore from .transactions import TransactionStore from .keys import KeyStore from .event_federation import EventFederationStore +from .media_repository import MediaRepositoryStore from .state import StateStore from .signatures import SignatureStore @@ -62,6 +63,7 @@ SCHEMAS = [ "state", "event_edges", "event_signatures", + "media_repository", ] @@ -81,7 +83,9 @@ class DataStore(RoomMemberStore, RoomStore, RegistrationStore, StreamStore, ProfileStore, FeedbackStore, PresenceStore, TransactionStore, DirectoryStore, KeyStore, StateStore, SignatureStore, - EventFederationStore, ): + EventFederationStore, + MediaRepositoryStore, + ): def __init__(self, hs): super(DataStore, self).__init__(hs) diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 73ceba3f2c..db03619a80 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -20,10 +20,15 @@ class MediaRepositoryStore(SQLBaseStore): """Persistence for attachments and avatars""" def get_local_media(self, media_id): + """Get the metadata for a local piece of media + Returns: + None if the media_id doesn't exist. + """ return self._simple_select_one( "local_media_repository", {"media_id": media_id}, ("media_type", "media_length", "upload_name", "created_ts"), + True, ) def store_local_media(self, media_id, media_type, time_now_ms, upload_name, @@ -36,7 +41,7 @@ class MediaRepositoryStore(SQLBaseStore): "created_ts": time_now_ms, "upload_name": upload_name, "media_length": media_length, - "user_id": user_id, + "user_id": user_id.to_string(), } ) -- cgit 1.4.1 From 2f804a70723a911bbc1d1bafbeb33f8462980151 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 2 Dec 2014 19:55:18 +0000 Subject: Fix pyflakes and pep8 warnings --- synapse/media/v1/media_repository.py | 3 ++- synapse/media/v1/upload_resource.py | 1 + synapse/storage/media_repository.py | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index 0f4eeef278..afd92874cf 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -49,7 +49,8 @@ class MediaRepositoryResource(Resource): Clients can get thumbnails by supplying a desired width and height:: - => GET /_matrix/media/v1/thumbnail/?width=&height= HTTP/1.1 + => GET /_matrix/media/v1 + /thumbnail/?width=&height= HTTP/1.1 <= HTTP/1.1 200 OK Content-Type: image/jpeg or image/png diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index d9d7825b2b..2919fee12f 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -29,6 +29,7 @@ import logging logger = logging.getLogger(__name__) + class UploadResource(resource.Resource): def __init__(self, hs, filepaths): diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index db03619a80..eda191ad5b 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -78,7 +78,7 @@ class MediaRepositoryStore(SQLBaseStore): def store_cached_remote_media(self, origin, media_id, media_type, media_length, time_now_ms, upload_name, - filesytem_id): + filesystem_id): return self._simple_insert( "remote_media_cache", { @@ -103,7 +103,6 @@ class MediaRepositoryStore(SQLBaseStore): ) ) - def store_remote_media_thumbnail(self, origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_length, filesystem_id): -- cgit 1.4.1 From c01fd5573c92c7c6da258bac7ff377a91cbebfd1 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 4 Dec 2014 14:22:31 +0000 Subject: Implement download support for media_repository --- synapse/http/matrixfederationclient.py | 73 ++++++++++++- synapse/media/v1/download_resource.py | 194 +++++++++++++++++++++++++++++++++ synapse/media/v1/media_repository.py | 2 + synapse/media/v1/upload_resource.py | 11 +- synapse/storage/media_repository.py | 10 +- 5 files changed, 278 insertions(+), 12 deletions(-) create mode 100644 synapse/media/v1/download_resource.py (limited to 'synapse') diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 510f07dd7b..c7082b83a7 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -14,10 +14,11 @@ # limitations under the License. -from twisted.internet import defer, reactor +from twisted.internet import defer, reactor, protocol from twisted.internet.error import DNSLookupError from twisted.web.client import readBody, _AgentBase, _URI from twisted.web.http_headers import Headers +from twisted.web._newclient import ResponseDone from synapse.http.endpoint import matrix_federation_endpoint from synapse.util.async import sleep @@ -227,7 +228,7 @@ class MatrixFederationHttpClient(object): @defer.inlineCallbacks def get_json(self, destination, path, args={}, retry_on_dns_fail=True): - """ Get's some json from the given host homeserver and path + """ GETs some json from the given host homeserver and path Args: destination (str): The remote server to send the HTTP request @@ -235,9 +236,6 @@ class MatrixFederationHttpClient(object): path (str): The HTTP path. args (dict): A dictionary used to create query strings, defaults to None. - **Note**: The value of each key is assumed to be an iterable - and *not* a string. - Returns: Deferred: Succeeds when we get *any* HTTP response. @@ -272,6 +270,48 @@ class MatrixFederationHttpClient(object): defer.returnValue(json.loads(body)) + @defer.inlineCallbacks + def get_file(self, destination, path, output_stream, args={}, + retry_on_dns_fail=True): + """GETs a file from a given homeserver + Args: + destination (str): The remote server to send the HTTP request to. + path (str): The HTTP path to GET. + output_stream (file): File to write the response body to. + args (dict): Optional dictionary used to create the query string. + Returns: + A (int,dict) tuple of the file length and a dict of the response + headers. + """ + + encoded_args = {} + for k, vs in args.items(): + if isinstance(vs, basestring): + vs = [vs] + encoded_args[k] = [v.encode("UTF-8") for v in vs] + + query_bytes = urllib.urlencode(encoded_args, True) + logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) + + def body_callback(method, url_bytes, headers_dict): + self.sign_request(destination, method, url_bytes, headers_dict) + return None + + response = yield self._create_request( + destination.encode("ascii"), + "GET", + path.encode("ascii"), + query_bytes=query_bytes, + body_callback=body_callback, + retry_on_dns_fail=retry_on_dns_fail + ) + + headers = dict(response.headers.getAllRawHeaders()) + + length = yield _readBodyToFile(response, output_stream) + + defer.returnValue((length, headers)) + def _getEndpoint(self, reactor, destination): return matrix_federation_endpoint( reactor, destination, timeout=10, @@ -279,6 +319,29 @@ class MatrixFederationHttpClient(object): ) +class _ReadBodyToFileProtocol(protocol.Protocol): + def __init__(self, stream, deferred): + self.stream = stream + self.deferred = deferred + self.length = 0 + + def dataReceived(self, data): + self.stream.write(data) + self.length += len(data) + + def connectionLost(self, reason): + if reason.check(ResponseDone): + self.deferred.callback(self.length) + else: + self.deferred.errback(reason) + + +def _readBodyToFile(response, stream): + d = defer.Deferred() + response.deliverBody(_ReadBodyToFileProtocol(stream, d)) + return d + + def _print_ex(e): if hasattr(e, "reasons") and e.reasons: for ex in e.reasons: diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py new file mode 100644 index 0000000000..c243f16a74 --- /dev/null +++ b/synapse/media/v1/download_resource.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.http.server import respond_with_json +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, CodeMessageException, cs_error, Codes +) + +from twisted.protocols.basic import FileSender +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET +from twisted.internet import defer + +import os + +import logging + +logger = logging.getLogger(__name__) + + +class DownloadResource(Resource): + isLeaf = True + + def __init__(self, hs, filepaths): + Resource.__init__(self) + self.client = hs.get_http_client() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.filepaths = filepaths + + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + def _respond_404(self, request): + respond_with_json( + request, 404, + cs_error( + "Not found %r" % (request.postpath,), + code=Codes.NOT_FOUND, + ), + send_cors=True + ) + + @defer.inlineCallbacks + def _async_render_GET(self, request): + + try: + server_name, media_id = request.postpath + except: + self._respond_404(request) + return + + try: + if server_name == self.server_name: + yield self._respond_local_file(request, media_id) + else: + yield self._respond_remote_file(request, server_name, media_id) + except CodeMessageException as e: + logger.exception(e) + respond_with_json(request, e.code, cs_exception(e), send_cors=True) + except: + logger.exception("Failed to serve file") + respond_with_json( + request, + 500, + {"error": "Internal server error"}, + send_cors=True + ) + + @defer.inlineCallbacks + def _download_remote_file(self, server_name, media_id): + filesystem_id = random_string(24) + + fname = self.filepaths.remote_media_filepath( + server_name, filesystem_id + ) + os.makedirs(os.path.dirname(fname)) + + try: + with open(fname, "wb") as f: + length, headers = yield self.client.get_file( + server_name, + "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )), + output_stream=f, + ) + except: + os.remove(fname) + raise + + media_type = headers["Content-Type"][0] + time_now_ms = self.clock.time_msec() + + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=length, + filesystem_id=filesystem_id, + ) + + defer.returnValue({ + "media_type": media_type, + "media_length": length, + "upload_name": None, + "created_ts": time_now_ms, + "filesystem_id": filesystem_id, + }) + + @defer.inlineCallbacks + def _respond_remote_file(self, request, server_name, media_id): + media_info = yield self.store.get_cached_remote_media( + server_name, media_id + ) + + if not media_info: + media_info = yield self._download_remote_file( + server_name, media_id + ) + + filesystem_id = media_info["filesystem_id"] + + file_path = self.filepaths.remote_media_filepath( + server_name, filesystem_id + ) + + if os.path.isfile(file_path): + media_type = media_info["media_type"] + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + with open(file_path, "rb") as f: + yield FileSender().beginFileTransfer(f, request) + + request.finish() + else: + self._respond_404() + + @defer.inlineCallbacks + def _respond_local_file(self, request, media_id): + media_info = yield self.store.get_local_media(media_id) + if not media_info: + self._respond_404() + return + + file_path = self.filepaths.local_media_filepath(media_id) + + logger.debug("Searching for %s", file_path) + + if os.path.isfile(file_path): + media_type = media_info["media_type"] + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + with open(file_path, "rb") as f: + yield FileSender().beginFileTransfer(f, request) + + request.finish() + else: + self._respond_404() diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index afd92874cf..e0a4cd01ee 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -14,6 +14,7 @@ # limitations under the License. from .upload_resource import UploadResource +from .download_resource import DownloadResource from .filepath import MediaFilePaths from twisted.web.resource import Resource @@ -62,3 +63,4 @@ class MediaRepositoryResource(Resource): Resource.__init__(self) filepaths = MediaFilePaths(hs.config.media_store_path) self.putChild("upload", UploadResource(hs, filepaths)) + self.putChild("download", DownloadResource(hs, filepaths)) diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index 2919fee12f..91bcc5caff 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -20,7 +20,8 @@ from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException ) -from twisted.web import server, resource +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET from twisted.internet import defer import os @@ -30,9 +31,11 @@ import logging logger = logging.getLogger(__name__) -class UploadResource(resource.Resource): +class UploadResource(Resource): + isLeaf = True def __init__(self, hs, filepaths): + Resource.__init__(self) self.auth = hs.get_auth() self.clock = hs.get_clock() self.store = hs.get_datastore() @@ -41,11 +44,11 @@ class UploadResource(resource.Resource): def render_POST(self, request): self._async_render_POST(request) - return server.NOT_DONE_YET + return NOT_DONE_YET def render_OPTIONS(self, request): respond_with_json(request, 200, {}, send_cors=True) - return server.NOT_DONE_YET + return NOT_DONE_YET @defer.inlineCallbacks def _async_render_POST(self, request): diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index eda191ad5b..2d3a2d1ccb 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -22,13 +22,13 @@ class MediaRepositoryStore(SQLBaseStore): def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: - None if the media_id doesn't exist. + None if the meia_id doesn't exist. """ return self._simple_select_one( "local_media_repository", {"media_id": media_id}, ("media_type", "media_length", "upload_name", "created_ts"), - True, + allow_none=True, ) def store_local_media(self, media_id, media_type, time_now_ms, upload_name, @@ -73,7 +73,11 @@ class MediaRepositoryStore(SQLBaseStore): return self._simple_select_one( "remote_media_cache", {"media_origin": origin, "media_id": media_id}, - ("media_type", "media_length", "upload_name", "created_ts"), + ( + "media_type", "media_length", "upload_name", "created_ts", + "filesystem_id", + ), + allow_none=True, ) def store_cached_remote_media(self, origin, media_id, media_type, -- cgit 1.4.1 From 036382012227916718f28a63b882dd341a309b68 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Fri, 5 Dec 2014 16:12:37 +0000 Subject: Add a class for generating thumbnails using PIL --- synapse/media/v1/thumbnailer.py | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 synapse/media/v1/thumbnailer.py (limited to 'synapse') diff --git a/synapse/media/v1/thumbnailer.py b/synapse/media/v1/thumbnailer.py new file mode 100644 index 0000000000..ed09283b27 --- /dev/null +++ b/synapse/media/v1/thumbnailer.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import PIL.Image + +class Thumbnailer(object): + + FORMAT_JPEG="JPEG" + FORMAT_PNG="PNG" + + def __init__(self, input_path): + self.image = PIL.Image.open(input_path) + self.width, self.height = self.image.size + + def size_preserve(self, max_width, max_height): + """Calculate the largest size that preserves aspect ratio which + fits within the given rectangle:: + + (w_in / h_in) = (w_out / h_out) + w_out = min(w_max, h_max * (w_in / h_in)) + h_out = min(h_max, w_max * (h_in / w_in)) + + Args: + max_width: The largest possible width. + max_height: The larget possible height. + """ + + if max_width * self.height < max_height * self.width: + return (max_width, (max_width * self.height) // self.width) + else: + return ((max_height * self.width) // self.height, max_height) + + def thumbnail_scale(self, output_path, output_format, width, height): + """Rescales the image to the given dimensions""" + output = self.image.resize((width, height), PIL.Image.BILINEAR) + output.save(output_path, output_format) + + def thumbnail_crop(self, output_path, output_format, width, height): + """Rescales and crops the image to the given dimensions preserving + aspect:: + (w_in / h_in) = (w_scaled / h_scaled) + w_scaled = max(w_out, h_out * (w_in / h_in)) + h_scaled = max(h_out, w_out * (h_in / w_in)) + + Args: + max_width: The largest possible width. + max_height: The larget possible height. + """ + if width * self.height > height * self.width: + scaled_height = (width * self.height) // self.width + scaled_image = self.image.resize( + (width, scaled_height), PIL.Image.BILINEAR + ) + crop_top = (scaled_height - height) // 2 + crop_bottom = height + crop_top + cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) + cropped.save(output_path, output_format) + else: + scaled_width = (height * self.width) // self.height + scaled_image = self.image.resize( + (scaled_width, height), PIL.Image.BILINEAR + ) + crop_left = (scaled_width - width) // 2 + crop_right = width + crop_left + cropped = scaled_image.crop((crop_left, 0, crop_right, height)) + cropped.save(output_path, output_format) -- cgit 1.4.1 From a953be097f3431481875dc04662adcc5ba2226a9 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Fri, 5 Dec 2014 16:30:18 +0000 Subject: Add a method field to thumbnail storage --- synapse/storage/media_repository.py | 15 +++++++++------ synapse/storage/schema/media_repository.sql | 4 +++- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 2d3a2d1ccb..b3f1fc087b 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -50,20 +50,21 @@ class MediaRepositoryStore(SQLBaseStore): "local_media_thumbnails", {"media_id": media_id}, ( - "thumbnail_width", "thumbnail_height", + "thumbnail_width", "thumbnail_height", "thumbnail_method", "thumbnail_type", "thumbnail_length", ) ) def store_local_thumbnail(self, media_id, thumbnail_width, - thumbnail_height, thumbnail_type, - thumbnail_length): + thumbnail_height, thumbnail_method, + thumbnail_type, thumbnail_length): return self._simple_insert( "local_media_thumbnails", { "media_id": media_id, "thumbnail_width": thumbnail_width, "thumbnail_height": thumbnail_height, + "thumbnail_method": thumbnail_method, "thumbnail_type": thumbnail_type, "thumbnail_length": thumbnail_length, } @@ -101,15 +102,16 @@ class MediaRepositoryStore(SQLBaseStore): "remote_media_cache_thumbnails", {"origin": origin, "media_id": media_id}, ( - "thumbnail_width", "thumbnail_height", + "thumbnail_width", "thumbnail_height", "thumbnail_method" "thumbnail_type", "thumbnail_length", "filesystem_id" ) ) def store_remote_media_thumbnail(self, origin, media_id, thumbnail_width, - thumbnail_height, thumbnail_type, - thumbnail_length, filesystem_id): + thumbnail_height, thumbnail_method, + thumbnail_type, thumbnail_length, + filesystem_id): return self._simple_insert( "remote_media_cache_thumbnails", { @@ -117,6 +119,7 @@ class MediaRepositoryStore(SQLBaseStore): "media_id": media_id, "thumbnail_width": thumbnail_width, "thumbnail_height": thumbnail_height, + "thumbnail_method": thumbnail_method, "thumbnail_type": thumbnail_type, "thumbnail_length": thumbnail_length, "filesystem_id": filesystem_id, diff --git a/synapse/storage/schema/media_repository.sql b/synapse/storage/schema/media_repository.sql index 768752296f..b785fa0208 100644 --- a/synapse/storage/schema/media_repository.sql +++ b/synapse/storage/schema/media_repository.sql @@ -28,6 +28,7 @@ CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails ( thumbnail_width INTEGER, -- The width of the thumbnail in pixels. thumbnail_height INTEGER, -- The height of the thumbnail in pixels. thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_method TEXT, -- The method used to make the thumbnail. thumbnail_length INTEGER, -- The length of the thumbnail in bytes. CONSTRAINT uniqueness UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type @@ -53,12 +54,13 @@ CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails ( media_id TEXT, -- The id used to refer to the media. thumbnail_width INTEGER, -- The width of the thumbnail in pixels. thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_method TEXT, -- The method used to make the thumbnail thumbnail_type TEXT, -- The MIME-type of the thumbnail. thumbnail_length INTEGER, -- The length of the thumbnail in bytes. filesystem_id TEXT, -- The name used to store the media on disk. CONSTRAINT uniqueness UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, - thumbnail_type + thumbnail_type, thumbnail_type ) ); -- cgit 1.4.1 From cc84d3ea78eaf50c20ad84b3df99ecf4547e08a8 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 10 Dec 2014 14:46:55 +0000 Subject: Thumbnail uploaded and cached images --- synapse/media/v1/base_resource.py | 318 +++++++++++++++++++++++++++++++++ synapse/media/v1/download_resource.py | 155 ++-------------- synapse/media/v1/filepath.py | 28 ++- synapse/media/v1/thumbnail_resource.py | 191 ++++++++++++++++++++ synapse/media/v1/thumbnailer.py | 37 ++-- synapse/media/v1/upload_resource.py | 21 +-- synapse/storage/media_repository.py | 12 +- 7 files changed, 587 insertions(+), 175 deletions(-) create mode 100644 synapse/media/v1/base_resource.py create mode 100644 synapse/media/v1/thumbnail_resource.py (limited to 'synapse') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py new file mode 100644 index 0000000000..1e57a1465f --- /dev/null +++ b/synapse/media/v1/base_resource.py @@ -0,0 +1,318 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .thumbnailer import Thumbnailer + +from synapse.http.server import respond_with_json +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, CodeMessageException, cs_error, Codes, SynapseError +) + +from twisted.internet import defer +from twisted.web.resource import Resource +from twisted.protocols.basic import FileSender + +import os + +import logging + +logger = logging.getLogger(__name__) + + +class BaseMediaResource(Resource): + isLeaf = True + + def __init__(self, hs, filepaths): + Resource.__init__(self) + self.client = hs.get_http_client() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.max_upload_size = hs.config.max_upload_size + self.filepaths = filepaths + + @staticmethod + def catch_errors(request_handler): + @defer.inlineCallbacks + def wrapped_request_handler(self, request): + try: + yield request_handler(self, request) + except CodeMessageException as e: + logger.exception(e) + respond_with_json( + request, e.code, cs_exception(e), send_cors=True + ) + except: + logger.exception( + "Failed handle request %s.%s on %r", + request_handler.__module__, + request_handler.__name__, + self, + ) + respond_with_json( + request, + 500, + {"error": "Internal server error"}, + send_cors=True + ) + return wrapped_request_handler + + @staticmethod + def _parse_media_id(request): + try: + server_name, media_id = request.postpath + return (server_name, media_id) + except: + raise SynapseError( + 404, + "Invalid media id token %r" % (request.postpath,), + Codes.UNKKOWN, + ) + + @staticmethod + def _parse_integer(request, arg_name, default=None): + try: + if default is None: + return int(request.args[arg_name][0]) + else: + return int(request.args.get(arg_name, [default])[0]) + except: + raise SynapseError( + 400, + "Missing integer argument %r" % (arg_name), + Codes.UNKNOWN, + ) + + @staticmethod + def _parse_string(request, arg_name, default=None): + try: + if default is None: + return request.args[arg_name][0] + else: + return request.args.get(arg_name, [default])[0] + except: + raise SynapseError( + 400, + "Missing string argument %r" % (arg_name), + Codes.UNKNOWN, + ) + + def _respond_404(self, request): + respond_with_json( + request, 404, + cs_error( + "Not found %r" % (request.postpath,), + code=Codes.NOT_FOUND, + ), + send_cors=True + ) + + @defer.inlineCallbacks + def _download_remote_file(self, server_name, media_id): + file_id = random_string(24) + + fname = self.filepaths.remote_media_filepath( + server_name, file_id + ) + os.makedirs(os.path.dirname(fname)) + + try: + with open(fname, "wb") as f: + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )), + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + ) + media_type = headers["Content-Type"][0] + time_now_ms = self.clock.time_msec() + + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=length, + file_id=file_id, + ) + except: + os.remove(fname) + raise + + media_info = { + "media_type": media_type, + "media_length": length, + "upload_name": None, + "created_ts": time_now_ms, + "file_id": file_id, + } + + yield self._generate_remote_thumbnails( + server_name, media_id, media_info + ) + + defer.returnValue(media_info) + + @defer.inlineCallbacks + def _respond_with_file(self, request, media_type, file_path): + logger.debug("Responding with %r", file_path) + + if os.path.isfile(file_path): + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + with open(file_path, "rb") as f: + yield FileSender().beginFileTransfer(f, request) + + request.finish() + else: + self._respond_404() + + def _get_thumbnail_requirements(self, media_type): + if media_type == "image/jpeg": + return ( + (32, 32, "crop", "image/jpeg"), + (96, 96, "crop", "image/jpeg"), + (320, 240, "scale", "image/jpeg"), + (640, 480, "scale", "image/jpeg"), + ) + elif (media_type == "image/png") or (media_type == "image/gif"): + return ( + (32, 32, "crop", "image/png"), + (96, 96, "crop", "image/png"), + (320, 240, "scale", "image/png"), + (640, 480, "scale", "image/png"), + ) + else: + return () + + @defer.inlineCallbacks + def _generate_local_thumbnails(self, media_id, media_info): + media_type = media_info["media_type"] + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.local_media_path(media_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) + + @defer.inlineCallbacks + def _generate_remote_thumbnails(self, server_name, media_id, media_info): + media_type = media_info["media_type"] + file_id = media_info["filesystem_id"] + requirements = self._get_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.remote_media_path(server_name, file_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.remote_media_thumbnail( + server_name, media_id, file_id, + media_id, t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method + ) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py index c243f16a74..31c6f25968 100644 --- a/synapse/media/v1/download_resource.py +++ b/synapse/media/v1/download_resource.py @@ -13,117 +13,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import respond_with_json -from synapse.util.stringutils import random_string -from synapse.api.errors import ( - cs_exception, CodeMessageException, cs_error, Codes -) - -from twisted.protocols.basic import FileSender -from twisted.web.resource import Resource +from .base_media_resource import BaseMediaResource + from twisted.web.server import NOT_DONE_YET from twisted.internet import defer -import os - import logging logger = logging.getLogger(__name__) -class DownloadResource(Resource): - isLeaf = True - - def __init__(self, hs, filepaths): - Resource.__init__(self) - self.client = hs.get_http_client() - self.clock = hs.get_clock() - self.server_name = hs.hostname - self.store = hs.get_datastore() - self.filepaths = filepaths - +class DownloadResource(BaseMediaResource): def render_GET(self, request): self._async_render_GET(request) return NOT_DONE_YET - def _respond_404(self, request): - respond_with_json( - request, 404, - cs_error( - "Not found %r" % (request.postpath,), - code=Codes.NOT_FOUND, - ), - send_cors=True - ) - + @BaseMediaResource.catch_errors @defer.inlineCallbacks def _async_render_GET(self, request): - try: server_name, media_id = request.postpath except: self._respond_404(request) return - try: - if server_name == self.server_name: - yield self._respond_local_file(request, media_id) - else: - yield self._respond_remote_file(request, server_name, media_id) - except CodeMessageException as e: - logger.exception(e) - respond_with_json(request, e.code, cs_exception(e), send_cors=True) - except: - logger.exception("Failed to serve file") - respond_with_json( - request, - 500, - {"error": "Internal server error"}, - send_cors=True - ) + if server_name == self.server_name: + yield self._respond_local_file(request, media_id) + else: + yield self._respond_remote_file(request, server_name, media_id) @defer.inlineCallbacks - def _download_remote_file(self, server_name, media_id): - filesystem_id = random_string(24) - - fname = self.filepaths.remote_media_filepath( - server_name, filesystem_id - ) - os.makedirs(os.path.dirname(fname)) + def _respond_local_file(self, request, media_id): + media_info = yield self.store.get_local_media(media_id) + if not media_info: + self._respond_404() + return - try: - with open(fname, "wb") as f: - length, headers = yield self.client.get_file( - server_name, - "/".join(( - "/_matrix/media/v1/download", server_name, media_id, - )), - output_stream=f, - ) - except: - os.remove(fname) - raise - - media_type = headers["Content-Type"][0] - time_now_ms = self.clock.time_msec() - - yield self.store.store_cached_remote_media( - origin=server_name, - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=None, - media_length=length, - filesystem_id=filesystem_id, - ) + media_type = media_info["media_type"] + file_path = self.filepaths.local_media_filepath(media_id) - defer.returnValue({ - "media_type": media_type, - "media_length": length, - "upload_name": None, - "created_ts": time_now_ms, - "filesystem_id": filesystem_id, - }) + yield self.respond_with_file(request, media_type, file_path) @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id): @@ -136,59 +65,11 @@ class DownloadResource(Resource): server_name, media_id ) + media_type = media_info["media_type"] filesystem_id = media_info["filesystem_id"] file_path = self.filepaths.remote_media_filepath( server_name, filesystem_id ) - if os.path.isfile(file_path): - media_type = media_info["media_type"] - request.setHeader(b"Content-Type", media_type.encode("UTF-8")) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - with open(file_path, "rb") as f: - yield FileSender().beginFileTransfer(f, request) - - request.finish() - else: - self._respond_404() - - @defer.inlineCallbacks - def _respond_local_file(self, request, media_id): - media_info = yield self.store.get_local_media(media_id) - if not media_info: - self._respond_404() - return - - file_path = self.filepaths.local_media_filepath(media_id) - - logger.debug("Searching for %s", file_path) - - if os.path.isfile(file_path): - media_type = media_info["media_type"] - request.setHeader(b"Content-Type", media_type.encode("UTF-8")) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) - - with open(file_path, "rb") as f: - yield FileSender().beginFileTransfer(f, request) - - request.finish() - else: - self._respond_404() + yield self.respond_with_file(request, media_type, file_path) diff --git a/synapse/media/v1/filepath.py b/synapse/media/v1/filepath.py index d23564e03e..0078bc3d40 100644 --- a/synapse/media/v1/filepath.py +++ b/synapse/media/v1/filepath.py @@ -21,33 +21,47 @@ class MediaFilePaths(object): def __init__(self, base_path): self.base_path = base_path + def default_thumbnail(self, default_top_level, default_sub_type, width, + height, content_type, method): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % ( + width, height, top_level_type, sub_type, method + ) + return os.path.join( + self.base_path, "default_thumbnails", default_top_level, + default_sub_type, file_name + ) + def local_media_filepath(self, media_id): return os.path.join( - self.base_path, "local", "content", + self.base_path, "local_content", media_id[0:2], media_id[2:4], media_id[4:] ) - def local_media_thumbnail(self, media_id, width, height, content_type): + def local_media_thumbnail(self, media_id, width, height, content_type, + method): top_level_type, sub_type = content_type.split("/") - file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) + file_name = "%i-%i-%s-%s-%s" % ( + width, height, top_level_type, sub_type, method + ) return os.path.join( - self.base_path, "local", "thumbnails", + self.base_path, "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) def remote_media_filepath(self, server_name, file_id): return os.path.join( - self.base_path, "remote", "content", server_name, + self.base_path, "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] ) def remote_media_thumbnail(self, server_name, file_id, width, height, - content_type): + content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( - self.base_path, "remote", "content", server_name, + self.base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], file_name ) diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py new file mode 100644 index 0000000000..331ba87e00 --- /dev/null +++ b/synapse/media/v1/thumbnail_resource.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .base_media_resource import BaseMediaResource + +from twisted.web.server import NOT_DONE_YET +from twisted.internet import defer + +import logging + +logger = logging.getLogger(__name__) + + +class ThumbnailResource(BaseMediaResource): + isLeaf = True + + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + @BaseMediaResource.catch_errors + @defer.inlineCallbacks + def _async_render_GET(self, request): + server_name, media_id = self._parse_media_id(request) + width = self._parse_integer(request, "width") + height = self._parse_integer(request, "height") + method = self._parse_string(request, "method", "scale") + m_type = self._parse_string(request, "type", "image/png") + + if server_name == self.server_name: + yield self._respond_local_thumbnail( + request, media_id, width, height, method, m_type + ) + else: + yield self._respond_remote_thumbnail( + request, server_name, media_id, + width, height, method, m_type + ) + + @defer.inlineCallbacks + def _respond_local_thumbnail(self, request, media_id, width, height, + method, m_type): + media_info = yield self.store.get_local_media(media_id) + + if not media_info: + self._respond_404(request) + return + + thumbnail_infos = yield self.store.get_local_thumbnail(media_id) + + if thumbnail_infos: + thumbnail_info = self._select_thumbnail( + width, height, method, m_type, thumbnail_infos + ) + thumbnail_width = thumbnail_info["thumbnail_width"] + thumbnail_height = thumbnail_info["thumbnail_height"] + thumbnail_type = thumbnail_info["thumbnail_type"] + thumbnail_method = thumbnail_info["thumbnail_method"] + + file_path = self.filepaths.local_media_thumbnail( + media_id, thumbnail_width, thumbnail_height, thumbnail_type, + thumbnail_method, + ) + yield self._respond_with_file(request, thumbnail_type, file_path) + + else: + yield self._respond_default_thumbnail( + self, request, media_info, width, height, method, m_type, + ) + + @defer.inlineCallbacks + def _respond_remote_thumbnail(self, request, server_name, media_id, width, + height, method, m_type): + media_info = yield self.store.get_cached_remote_media( + server_name, media_id + ) + + if not media_info: + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead. + media_info = yield self._download_remote_file( + server_name, media_id + ) + + thumbnail_infos = yield self.store.get_remote_media_thumbnails( + server_name, media_id, + ) + + if thumbnail_infos: + thumbnail_info = self._select_thumbnail( + width, height, method, m_type, thumbnail_infos + ) + thumbnail_width = thumbnail_info["thumbnail_width"] + thumbnail_height = thumbnail_info["thumbnail_height"] + thumbnail_type = thumbnail_info["thumbnail_type"] + thumbnail_method = thumbnail_info["thumbnail_method"] + + file_path = self.filepaths.remote_media_thumbnail( + server_name, media_id, thumbnail_width, thumbnail_height, + thumbnail_type, thumbnail_method, + ) + yield self._respond_with_file(request, thumbnail_type, file_path) + else: + yield self._respond_default_thumbnail( + self, request, media_info, width, height, method, m_type, + ) + + @defer.inlineCallbacks + def _respond_default_thumbnail(self, request, media_info, width, height, + method, m_type): + media_type = media_info["media_type"] + top_level_type = media_type.split("/")[0] + sub_type = media_type.split("/")[-1].split(";")[0] + thumbnail_infos = yield self.store.get_default_thumbnails( + top_level_type, sub_type, + ) + if not thumbnail_infos: + thumbnail_infos = yield self.store.get_default_thumbnails( + top_level_type, "_default", + ) + if not thumbnail_infos: + thumbnail_infos = yield self.store.get_default_thumbnails( + "_default", "_default", + ) + if not thumbnail_infos: + self._respond_404(request) + return + + thumbnail_info = self._select_thumbnail( + width, height, "crop", m_type, thumbnail_infos + ) + + thumbnail_width = thumbnail_info["thumbnail_width"] + thumbnail_height = thumbnail_info["thumbnail_height"] + thumbnail_type = thumbnail_info["thumbnail_type"] + thumbnail_method = thumbnail_info["thumbnail_method"] + + file_path = self.filepaths.default_thumbnail( + top_level_type, sub_type, thumbnail_width, thumbnail_height, + thumbnail_type, thumbnail_method, + ) + yield self.respond_with_file(request, thumbnail_type, file_path) + + def _select_thumbnail(self, desired_width, desired_height, desired_method, + desired_type, thumbnail_infos): + d_w = desired_width + d_h = desired_height + + if desired_method.lower() == "crop": + info_list = [] + for info in thumbnail_infos: + t_w = info["thumbnail_width"] + t_h = info["thumbnail_height"] + t_method = info["thumnail_method"] + if t_method == "scale" or t_method == "crop": + aspect_quality = abs(d_w * t_h - d_h * t_w) + size_quality = abs((d_w - t_w) * (d_h - t_h)) + type_quality = desired_type != info["thumbnail_type"] + length_quality = info["thumbnail_length"] + info_list.append(( + aspect_quality, size_quality, type_quality, + length_quality, info + )) + return min(info_list)[-1] + else: + info_list = [] + for info in thumbnail_infos: + t_w = info["thumbnail_width"] + t_h = info["thumbnail_height"] + t_method = info["thumnail_method"] + if t_method == "scale" and (t_w >= d_w or t_h >= d_h): + size_quality = abs((d_w - t_w) * (d_h - t_h)) + type_quality = desired_type != info["thumbnail_type"] + length_quality = info["thumbnail_length"] + info_list.append(( + size_quality, type_quality, length_quality, info + )) + return min(info_list)[-1] diff --git a/synapse/media/v1/thumbnailer.py b/synapse/media/v1/thumbnailer.py index ed09283b27..47160721e7 100644 --- a/synapse/media/v1/thumbnailer.py +++ b/synapse/media/v1/thumbnailer.py @@ -13,18 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import PIL.Image +import Image +from io import BytesIO + class Thumbnailer(object): - FORMAT_JPEG="JPEG" - FORMAT_PNG="PNG" + FORMATS = { + "image/jpeg": "JPEG", + "image/png": "PNG", + } def __init__(self, input_path): - self.image = PIL.Image.open(input_path) + self.image = Image.open(input_path) self.width, self.height = self.image.size - def size_preserve(self, max_width, max_height): + def aspect(self, max_width, max_height): """Calculate the largest size that preserves aspect ratio which fits within the given rectangle:: @@ -42,12 +46,12 @@ class Thumbnailer(object): else: return ((max_height * self.width) // self.height, max_height) - def thumbnail_scale(self, output_path, output_format, width, height): + def scale(self, output_path, width, height, output_type): """Rescales the image to the given dimensions""" - output = self.image.resize((width, height), PIL.Image.BILINEAR) - output.save(output_path, output_format) + scaled = self.image.resize((width, height), Image.BILINEAR) + return self.save_image(scaled, output_type, output_path) - def thumbnail_crop(self, output_path, output_format, width, height): + def crop(self, output_path, width, height, output_type): """Rescales and crops the image to the given dimensions preserving aspect:: (w_in / h_in) = (w_scaled / h_scaled) @@ -61,18 +65,25 @@ class Thumbnailer(object): if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width scaled_image = self.image.resize( - (width, scaled_height), PIL.Image.BILINEAR + (width, scaled_height), Image.BILINEAR ) crop_top = (scaled_height - height) // 2 crop_bottom = height + crop_top cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) - cropped.save(output_path, output_format) else: scaled_width = (height * self.width) // self.height scaled_image = self.image.resize( - (scaled_width, height), PIL.Image.BILINEAR + (scaled_width, height), Image.BILINEAR ) crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - cropped.save(output_path, output_format) + return self.save_image(cropped, output_type, output_path) + + def save_image(self, output_image, output_type, output_path): + output_bytes_io = BytesIO() + output_image.save(output_bytes_io, self.FORMATS[output_type]) + output_bytes = output_bytes_io.getvalue() + with open(output_path, "wb") as output_file: + output_file.write(output_bytes) + return len(output_bytes) diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index 91bcc5caff..a78cc3cff3 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -20,10 +20,11 @@ from synapse.api.errors import ( cs_exception, SynapseError, CodeMessageException ) -from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET from twisted.internet import defer +from .baseresource import BaseMediaResource + import os import logging @@ -31,17 +32,7 @@ import logging logger = logging.getLogger(__name__) -class UploadResource(Resource): - isLeaf = True - - def __init__(self, hs, filepaths): - Resource.__init__(self) - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size - self.filepaths = filepaths - +class UploadResource(BaseMediaResource): def render_POST(self, request): self._async_render_POST(request) return NOT_DONE_YET @@ -99,6 +90,12 @@ class UploadResource(Resource): media_length=content_length, user_id=auth_user, ) + media_info = { + "media_type": media_type, + "media_length": content_length, + } + + yield self._generate_local_thumbnails(self, media_id, media_info) respond_with_json( request, 200, {"content_token": media_id}, send_cors=True diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index b3f1fc087b..a848662716 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -56,8 +56,8 @@ class MediaRepositoryStore(SQLBaseStore): ) def store_local_thumbnail(self, media_id, thumbnail_width, - thumbnail_height, thumbnail_method, - thumbnail_type, thumbnail_length): + thumbnail_height, thumbnail_type, + thumbnail_method, thumbnail_length): return self._simple_insert( "local_media_thumbnails", { @@ -108,10 +108,10 @@ class MediaRepositoryStore(SQLBaseStore): ) ) - def store_remote_media_thumbnail(self, origin, media_id, thumbnail_width, - thumbnail_height, thumbnail_method, - thumbnail_type, thumbnail_length, - filesystem_id): + def store_remote_media_thumbnail(self, origin, media_id, filesystem_id, + thumbnail_width, thumbnail_height, + thumbnail_type, thumbnail_method, + thumbnail_length): return self._simple_insert( "remote_media_cache_thumbnails", { -- cgit 1.4.1 From e5275d856ee7a1d7aeccd3ea6ab97b49456d24c9 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 10 Dec 2014 15:46:18 +0000 Subject: Get the code actually working --- synapse/media/v1/base_resource.py | 31 +++++++++++++------- synapse/media/v1/download_resource.py | 6 ++-- synapse/media/v1/media_repository.py | 2 ++ synapse/media/v1/thumbnail_resource.py | 52 ++++++++++++++++------------------ synapse/media/v1/upload_resource.py | 8 ++---- synapse/storage/media_repository.py | 14 +++++---- 6 files changed, 61 insertions(+), 52 deletions(-) (limited to 'synapse') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 1e57a1465f..81f2456343 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -37,6 +37,7 @@ class BaseMediaResource(Resource): def __init__(self, hs, filepaths): Resource.__init__(self) + self.auth = hs.get_auth() self.client = hs.get_http_client() self.clock = hs.get_clock() self.server_name = hs.hostname @@ -120,6 +121,12 @@ class BaseMediaResource(Resource): send_cors=True ) + @staticmethod + def _makedirs(filepath): + dirname = os.path.dirname(filepath) + if not os.path.exists(dirname): + os.makedirs(dirname) + @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id): file_id = random_string(24) @@ -127,13 +134,13 @@ class BaseMediaResource(Resource): fname = self.filepaths.remote_media_filepath( server_name, file_id ) - os.makedirs(os.path.dirname(fname)) + self._makedirs(fname) try: with open(fname, "wb") as f: request_path = "/".join(( "/_matrix/media/v1/download", server_name, media_id, - )), + )) length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, ) @@ -147,7 +154,7 @@ class BaseMediaResource(Resource): time_now_ms=self.clock.time_msec(), upload_name=None, media_length=length, - file_id=file_id, + filesystem_id=file_id, ) except: os.remove(fname) @@ -158,7 +165,7 @@ class BaseMediaResource(Resource): "media_length": length, "upload_name": None, "created_ts": time_now_ms, - "file_id": file_id, + "filesystem_id": file_id, } yield self._generate_remote_thumbnails( @@ -215,7 +222,7 @@ class BaseMediaResource(Resource): if not requirements: return - input_path = self.filepaths.local_media_path(media_id) + input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -235,6 +242,7 @@ class BaseMediaResource(Resource): t_path = self.filepaths.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -250,6 +258,7 @@ class BaseMediaResource(Resource): t_path = self.filepaths.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -264,11 +273,11 @@ class BaseMediaResource(Resource): def _generate_remote_thumbnails(self, server_name, media_id, media_info): media_type = media_info["media_type"] file_id = media_info["filesystem_id"] - requirements = self._get_requirements(media_type) + requirements = self._get_thumbnail_requirements(media_type) if not requirements: return - input_path = self.filepaths.remote_media_path(server_name, file_id) + input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -286,9 +295,9 @@ class BaseMediaResource(Resource): for t_width, t_height, t_type in scales: t_method = "scale" t_path = self.filepaths.remote_media_thumbnail( - server_name, media_id, file_id, - media_id, t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, @@ -303,9 +312,9 @@ class BaseMediaResource(Resource): continue t_method = "crop" t_path = self.filepaths.remote_media_thumbnail( - server_name, media_id, file_id, - t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) + self._makedirs(t_path) t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py index 31c6f25968..6de0932ba3 100644 --- a/synapse/media/v1/download_resource.py +++ b/synapse/media/v1/download_resource.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base_media_resource import BaseMediaResource +from .base_resource import BaseMediaResource from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -52,7 +52,7 @@ class DownloadResource(BaseMediaResource): media_type = media_info["media_type"] file_path = self.filepaths.local_media_filepath(media_id) - yield self.respond_with_file(request, media_type, file_path) + yield self._respond_with_file(request, media_type, file_path) @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id): @@ -72,4 +72,4 @@ class DownloadResource(BaseMediaResource): server_name, filesystem_id ) - yield self.respond_with_file(request, media_type, file_path) + yield self._respond_with_file(request, media_type, file_path) diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index e0a4cd01ee..2bd29d2288 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -15,6 +15,7 @@ from .upload_resource import UploadResource from .download_resource import DownloadResource +from .thumbnail_resource import ThumbnailResource from .filepath import MediaFilePaths from twisted.web.resource import Resource @@ -64,3 +65,4 @@ class MediaRepositoryResource(Resource): filepaths = MediaFilePaths(hs.config.media_store_path) self.putChild("upload", UploadResource(hs, filepaths)) self.putChild("download", DownloadResource(hs, filepaths)) + self.putChild("thumbnail", ThumbnailResource(hs, filepaths)) diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py index 331ba87e00..fd08c7ecd2 100644 --- a/synapse/media/v1/thumbnail_resource.py +++ b/synapse/media/v1/thumbnail_resource.py @@ -14,7 +14,7 @@ # limitations under the License. -from .base_media_resource import BaseMediaResource +from .base_resource import BaseMediaResource from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -59,26 +59,25 @@ class ThumbnailResource(BaseMediaResource): self._respond_404(request) return - thumbnail_infos = yield self.store.get_local_thumbnail(media_id) + thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) if thumbnail_infos: thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - thumbnail_width = thumbnail_info["thumbnail_width"] - thumbnail_height = thumbnail_info["thumbnail_height"] - thumbnail_type = thumbnail_info["thumbnail_type"] - thumbnail_method = thumbnail_info["thumbnail_method"] + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] file_path = self.filepaths.local_media_thumbnail( - media_id, thumbnail_width, thumbnail_height, thumbnail_type, - thumbnail_method, + media_id, t_width, t_height, t_type, t_method, ) - yield self._respond_with_file(request, thumbnail_type, file_path) + yield self._respond_with_file(request, t_type, file_path) else: yield self._respond_default_thumbnail( - self, request, media_info, width, height, method, m_type, + request, media_info, width, height, method, m_type, ) @defer.inlineCallbacks @@ -103,19 +102,19 @@ class ThumbnailResource(BaseMediaResource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - thumbnail_width = thumbnail_info["thumbnail_width"] - thumbnail_height = thumbnail_info["thumbnail_height"] - thumbnail_type = thumbnail_info["thumbnail_type"] - thumbnail_method = thumbnail_info["thumbnail_method"] + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] + file_id = thumbnail_info["filesystem_id"] file_path = self.filepaths.remote_media_thumbnail( - server_name, media_id, thumbnail_width, thumbnail_height, - thumbnail_type, thumbnail_method, + server_name, file_id, t_width, t_height, t_type, t_method, ) - yield self._respond_with_file(request, thumbnail_type, file_path) + yield self._respond_with_file(request, t_type, file_path) else: yield self._respond_default_thumbnail( - self, request, media_info, width, height, method, m_type, + request, media_info, width, height, method, m_type, ) @defer.inlineCallbacks @@ -143,16 +142,15 @@ class ThumbnailResource(BaseMediaResource): width, height, "crop", m_type, thumbnail_infos ) - thumbnail_width = thumbnail_info["thumbnail_width"] - thumbnail_height = thumbnail_info["thumbnail_height"] - thumbnail_type = thumbnail_info["thumbnail_type"] - thumbnail_method = thumbnail_info["thumbnail_method"] + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] file_path = self.filepaths.default_thumbnail( - top_level_type, sub_type, thumbnail_width, thumbnail_height, - thumbnail_type, thumbnail_method, + top_level_type, sub_type, t_width, t_height, t_type, t_method, ) - yield self.respond_with_file(request, thumbnail_type, file_path) + yield self.respond_with_file(request, t_type, file_path) def _select_thumbnail(self, desired_width, desired_height, desired_method, desired_type, thumbnail_infos): @@ -164,7 +162,7 @@ class ThumbnailResource(BaseMediaResource): for info in thumbnail_infos: t_w = info["thumbnail_width"] t_h = info["thumbnail_height"] - t_method = info["thumnail_method"] + t_method = info["thumbnail_method"] if t_method == "scale" or t_method == "crop": aspect_quality = abs(d_w * t_h - d_h * t_w) size_quality = abs((d_w - t_w) * (d_h - t_h)) @@ -180,7 +178,7 @@ class ThumbnailResource(BaseMediaResource): for info in thumbnail_infos: t_w = info["thumbnail_width"] t_h = info["thumbnail_height"] - t_method = info["thumnail_method"] + t_method = info["thumbnail_method"] if t_method == "scale" and (t_w >= d_w or t_h >= d_h): size_quality = abs((d_w - t_w) * (d_h - t_h)) type_quality = desired_type != info["thumbnail_type"] diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index a78cc3cff3..b2449ff03d 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -23,9 +23,7 @@ from synapse.api.errors import ( from twisted.web.server import NOT_DONE_YET from twisted.internet import defer -from .baseresource import BaseMediaResource - -import os +from .base_resource import BaseMediaResource import logging @@ -75,7 +73,7 @@ class UploadResource(BaseMediaResource): media_id = random_string(24) fname = self.filepaths.local_media_filepath(media_id) - os.makedirs(os.path.dirname(fname)) + self._makedirs(fname) # This shouldn't block for very long because the content will have # already been uploaded at this point. @@ -95,7 +93,7 @@ class UploadResource(BaseMediaResource): "media_length": content_length, } - yield self._generate_local_thumbnails(self, media_id, media_info) + yield self._generate_local_thumbnails(media_id, media_info) respond_with_json( request, 200, {"content_token": media_id}, send_cors=True diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index a848662716..18c068d3d9 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -19,6 +19,9 @@ from _base import SQLBaseStore class MediaRepositoryStore(SQLBaseStore): """Persistence for attachments and avatars""" + def get_default_thumbnails(self, top_level_type, sub_type): + return [] + def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: @@ -47,7 +50,7 @@ class MediaRepositoryStore(SQLBaseStore): def get_local_media_thumbnails(self, media_id): return self._simple_select_list( - "local_media_thumbnails", + "local_media_repository_thumbnails", {"media_id": media_id}, ( "thumbnail_width", "thumbnail_height", "thumbnail_method", @@ -59,7 +62,7 @@ class MediaRepositoryStore(SQLBaseStore): thumbnail_height, thumbnail_type, thumbnail_method, thumbnail_length): return self._simple_insert( - "local_media_thumbnails", + "local_media_repository_thumbnails", { "media_id": media_id, "thumbnail_width": thumbnail_width, @@ -100,11 +103,10 @@ class MediaRepositoryStore(SQLBaseStore): def get_remote_media_thumbnails(self, origin, media_id): return self._simple_select_list( "remote_media_cache_thumbnails", - {"origin": origin, "media_id": media_id}, + {"media_origin": origin, "media_id": media_id}, ( - "thumbnail_width", "thumbnail_height", "thumbnail_method" - "thumbnail_type", "thumbnail_length", - "filesystem_id" + "thumbnail_width", "thumbnail_height", "thumbnail_method", + "thumbnail_type", "thumbnail_length", "filesystem_id", ) ) -- cgit 1.4.1 From 2d265ef3bd99e84ad37057e8fb69c5cddc91f0ce Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 10 Dec 2014 16:09:09 +0000 Subject: import Image as PIL.Image. --- synapse/media/v1/thumbnailer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/media/v1/thumbnailer.py b/synapse/media/v1/thumbnailer.py index 47160721e7..774ae4538f 100644 --- a/synapse/media/v1/thumbnailer.py +++ b/synapse/media/v1/thumbnailer.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import Image +import PIL.Image as Image from io import BytesIO -- cgit 1.4.1 From b5eb9124f70a0f5196f8508f24e4566e8a5c0e90 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 10:08:09 +0000 Subject: Make sure we pass a tuple to string '%' formatting --- synapse/media/v1/base_resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 81f2456343..8c62ecd597 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -93,7 +93,7 @@ class BaseMediaResource(Resource): except: raise SynapseError( 400, - "Missing integer argument %r" % (arg_name), + "Missing integer argument %r" % (arg_name,), Codes.UNKNOWN, ) @@ -107,7 +107,7 @@ class BaseMediaResource(Resource): except: raise SynapseError( 400, - "Missing string argument %r" % (arg_name), + "Missing string argument %r" % (arg_name,), Codes.UNKNOWN, ) -- cgit 1.4.1 From ead8fc5e388cf03b83b5470430a1c3d70af00df3 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 10:41:43 +0000 Subject: doc the thumbnail methods --- synapse/media/v1/media_repository.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index 2bd29d2288..a0dc56be4b 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -49,15 +49,24 @@ class MediaRepositoryResource(Resource): - Clients can get thumbnails by supplying a desired width and height:: + Clients can get thumbnails by supplying a desired width and height and + thumbnailing method:: => GET /_matrix/media/v1 - /thumbnail/?width=&height= HTTP/1.1 + /thumbnail/?width=&height=&method= HTTP/1.1 <= HTTP/1.1 200 OK Content-Type: image/jpeg or image/png + + The thumbnail methods are "crop" and "scale". "scale" trys to return an + image where either the width or the height is smaller than the requested + size. The client should then scale and letterbox the image if it needs to + fit within a given rectangle. "crop" trys to return an image where the + width and height are close to the requested size and the aspect matches + the requested size. The client should scale the image if it needs to fit + within a given rectangle. """ def __init__(self, hs): -- cgit 1.4.1 From d80d505b1f70eae128990ce1a9517e5c5edead73 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 14:19:32 +0000 Subject: Limit the size of images that are thumbnailed serverside. Limit the size of file that a server will download from a remote server --- synapse/api/errors.py | 1 + synapse/config/repository.py | 5 +++++ synapse/http/matrixfederationclient.py | 25 +++++++++++++++++++------ synapse/media/v1/base_resource.py | 18 ++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 581439ceb3..e250b9b211 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -34,6 +34,7 @@ class Codes(object): LIMIT_EXCEEDED = "M_LIMIT_EXCEEDED" CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED" CAPTCHA_INVALID = "M_CAPTCHA_INVALID" + TOO_LARGE = "M_TOO_LARGE" class CodeMessageException(Exception): diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 6eec930a03..f1b7b1b74e 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -20,6 +20,7 @@ class ContentRepositoryConfig(Config): def __init__(self, args): super(ContentRepositoryConfig, self).__init__(args) self.max_upload_size = self.parse_size(args.max_upload_size) + self.max_image_pixels = self.parse_size(args.max_image_pixels) self.media_store_path = self.ensure_directory(args.media_store_path) def parse_size(self, string): @@ -41,3 +42,7 @@ class ContentRepositoryConfig(Config): db_group.add_argument( "--media-store-path", default=cls.default_path("media_store") ) + db_group.add_argument( + "--max-image-pixels", default="32M", + help="Maximum number of pixels that will be thumbnailed" + ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index f05269cdfb..8f4db59c75 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -26,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext from syutil.jsonutil import encode_canonical_json -from synapse.api.errors import CodeMessageException, SynapseError +from synapse.api.errors import CodeMessageException, SynapseError, Codes from syutil.crypto.jsonsign import sign_json @@ -289,7 +289,7 @@ class MatrixFederationHttpClient(object): @defer.inlineCallbacks def get_file(self, destination, path, output_stream, args={}, - retry_on_dns_fail=True): + retry_on_dns_fail=True, max_size=None): """GETs a file from a given homeserver Args: destination (str): The remote server to send the HTTP request to. @@ -325,7 +325,11 @@ class MatrixFederationHttpClient(object): headers = dict(response.headers.getAllRawHeaders()) - length = yield _readBodyToFile(response, output_stream) + try: + length = yield _readBodyToFile(response, output_stream, max_size) + except: + logger.exception("Failed to download body") + raise defer.returnValue((length, headers)) @@ -337,14 +341,23 @@ class MatrixFederationHttpClient(object): class _ReadBodyToFileProtocol(protocol.Protocol): - def __init__(self, stream, deferred): + def __init__(self, stream, deferred, max_size): self.stream = stream self.deferred = deferred self.length = 0 + self.max_size = max_size def dataReceived(self, data): self.stream.write(data) self.length += len(data) + if self.max_size is not None and self.length >= self.max_size: + self.deferred.errback(SynapseError( + 502, + "Requested file is too large > %r bytes" % (self.max_size,), + Codes.TOO_LARGE, + )) + self.deferred = defer.Deferred() + self.transport.loseConnection() def connectionLost(self, reason): if reason.check(ResponseDone): @@ -353,9 +366,9 @@ class _ReadBodyToFileProtocol(protocol.Protocol): self.deferred.errback(reason) -def _readBodyToFile(response, stream): +def _readBodyToFile(response, stream, max_size): d = defer.Deferred() - response.deliverBody(_ReadBodyToFileProtocol(stream, d)) + response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) return d diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 8c62ecd597..77b05c6548 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -43,6 +43,7 @@ class BaseMediaResource(Resource): self.server_name = hs.hostname self.store = hs.get_datastore() self.max_upload_size = hs.config.max_upload_size + self.max_image_pixels = hs.config.max_image_pixels self.filepaths = filepaths @staticmethod @@ -143,6 +144,7 @@ class BaseMediaResource(Resource): )) length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, + max_size=self.max_upload_size, ) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -226,6 +228,14 @@ class BaseMediaResource(Resource): thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r" + m_width, m_height, self.max_image_pixels + ) + return + scales = set() crops = set() for r_width, r_height, r_method, r_type in requirements: @@ -281,6 +291,14 @@ class BaseMediaResource(Resource): thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r" + m_width, m_height, self.max_image_pixels + ) + return + scales = set() crops = set() for r_width, r_height, r_method, r_type in requirements: -- cgit 1.4.1 From 03d9024cbcb4957b223d1a36e7ae2ad668b1859d Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Thu, 11 Dec 2014 16:48:11 +0000 Subject: Allow only one download for a given image at a time, so that we don't end up downloading the same image twice if two clients request a remote image at the same time --- synapse/media/v1/base_resource.py | 27 +++++++++++++++++++++++++-- synapse/media/v1/download_resource.py | 9 +-------- synapse/media/v1/thumbnail_resource.py | 13 +++---------- 3 files changed, 29 insertions(+), 20 deletions(-) (limited to 'synapse') diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py index 77b05c6548..14735ff375 100644 --- a/synapse/media/v1/base_resource.py +++ b/synapse/media/v1/base_resource.py @@ -45,6 +45,7 @@ class BaseMediaResource(Resource): self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels self.filepaths = filepaths + self.downloads = {} @staticmethod def catch_errors(request_handler): @@ -128,6 +129,28 @@ class BaseMediaResource(Resource): if not os.path.exists(dirname): os.makedirs(dirname) + def _get_remote_media(self, server_name, media_id): + key = (server_name, media_id) + download = self.downloads.get(key) + if download is None: + download = self._get_remote_media_impl(server_name, media_id) + self.downloads[key] = download + @download.addBoth + def callback(media_info): + del self.downloads[key] + return download + + @defer.inlineCallbacks + def _get_remote_media_impl(self, server_name, media_id): + media_info = yield self.store.get_cached_remote_media( + server_name, media_id + ) + if not media_info: + media_info = yield self._download_remote_file( + server_name, media_id + ) + defer.returnValue(media_info) + @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id): file_id = random_string(24) @@ -231,7 +254,7 @@ class BaseMediaResource(Resource): if m_width * m_height >= self.max_image_pixels: logger.info( - "Image too large to thumbnail %r x %r > %r" + "Image too large to thumbnail %r x %r > %r", m_width, m_height, self.max_image_pixels ) return @@ -294,7 +317,7 @@ class BaseMediaResource(Resource): if m_width * m_height >= self.max_image_pixels: logger.info( - "Image too large to thumbnail %r x %r > %r" + "Image too large to thumbnail %r x %r > %r", m_width, m_height, self.max_image_pixels ) return diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py index 6de0932ba3..f3a6804e05 100644 --- a/synapse/media/v1/download_resource.py +++ b/synapse/media/v1/download_resource.py @@ -56,14 +56,7 @@ class DownloadResource(BaseMediaResource): @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id): - media_info = yield self.store.get_cached_remote_media( - server_name, media_id - ) - - if not media_info: - media_info = yield self._download_remote_file( - server_name, media_id - ) + media_info = yield self._get_remote_media(server_name, media_id) media_type = media_info["media_type"] filesystem_id = media_info["filesystem_id"] diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py index fd08c7ecd2..e19620d456 100644 --- a/synapse/media/v1/thumbnail_resource.py +++ b/synapse/media/v1/thumbnail_resource.py @@ -83,16 +83,9 @@ class ThumbnailResource(BaseMediaResource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): - media_info = yield self.store.get_cached_remote_media( - server_name, media_id - ) - - if not media_info: - # TODO: Don't download the whole remote file - # We should proxy the thumbnail from the remote server instead. - media_info = yield self._download_remote_file( - server_name, media_id - ) + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead. + media_info = yield self._get_remote_media(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, -- cgit 1.4.1