From 279c48c8b442ec726fb5088e56ce9c1d2ed4bfb5 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 2 Dec 2014 15:09:51 +0000 Subject: Write the upload portion of version 1 of the media repository --- synapse/media/v0/content_repository.py | 212 +++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 synapse/media/v0/content_repository.py (limited to 'synapse/media/v0') diff --git a/synapse/media/v0/content_repository.py b/synapse/media/v0/content_repository.py new file mode 100644 index 0000000000..64ecb5346e --- /dev/null +++ b/synapse/media/v0/content_repository.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .server import respond_with_json_bytes + +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, SynapseError, CodeMessageException, Codes, cs_error +) + +from twisted.protocols.basic import FileSender +from twisted.web import server, resource +from twisted.internet import defer + +import base64 +import json +import logging +import os +import re + +logger = logging.getLogger(__name__) + + +class ContentRepoResource(resource.Resource): + """Provides file uploading and downloading. + + Uploads are POSTed to wherever this Resource is linked to. This resource + returns a "content token" which can be used to GET this content again. The + token is typically a path, but it may not be. Tokens can expire, be + one-time uses, etc. + + In this case, the token is a path to the file and contains 3 interesting + sections: + - User ID base64d (for namespacing content to each user) + - random 24 char string + - Content type base64d (so we can return it when clients GET it) + + """ + isLeaf = True + + def __init__(self, hs, directory, auth, external_addr): + resource.Resource.__init__(self) + self.hs = hs + self.directory = directory + self.auth = auth + self.external_addr = external_addr.rstrip('/') + self.max_upload_size = hs.config.max_upload_size + + if not os.path.isdir(self.directory): + os.mkdir(self.directory) + logger.info("ContentRepoResource : Created %s directory.", + self.directory) + + @defer.inlineCallbacks + def map_request_to_name(self, request): + # auth the user + auth_user = yield self.auth.get_user_by_req(request) + + # namespace all file uploads on the user + prefix = base64.urlsafe_b64encode( + auth_user.to_string() + ).replace('=', '') + + # use a random string for the main portion + main_part = random_string(24) + + # suffix with a file extension if we can make one. This is nice to + # provide a hint to clients on the file information. We will also reuse + # this info to spit back the content type to the client. + suffix = "" + if request.requestHeaders.hasHeader("Content-Type"): + content_type = request.requestHeaders.getRawHeaders( + "Content-Type")[0] + suffix = "." + base64.urlsafe_b64encode(content_type) + if (content_type.split("/")[0].lower() in + ["image", "video", "audio"]): + file_ext = content_type.split("/")[-1] + # be a little paranoid and only allow a-z + file_ext = re.sub("[^a-z]", "", file_ext) + suffix += "." + file_ext + + file_name = prefix + main_part + suffix + file_path = os.path.join(self.directory, file_name) + logger.info("User %s is uploading a file to path %s", + auth_user.to_string(), + file_path) + + # keep trying to make a non-clashing file, with a sensible max attempts + attempts = 0 + while os.path.exists(file_path): + main_part = random_string(24) + file_name = prefix + main_part + suffix + file_path = os.path.join(self.directory, file_name) + attempts += 1 + if attempts > 25: # really? Really? + raise SynapseError(500, "Unable to create file.") + + defer.returnValue(file_path) + + def render_GET(self, request): + # no auth here on purpose, to allow anyone to view, even across home + # servers. + + # TODO: A little crude here, we could do this better. + filename = request.path.split('/')[-1] + # be paranoid + filename = re.sub("[^0-9A-z.-_]", "", filename) + + file_path = self.directory + "/" + filename + + logger.debug("Searching for %s", file_path) + + if os.path.isfile(file_path): + # filename has the content type + base64_contentype = filename.split(".")[1] + content_type = base64.urlsafe_b64decode(base64_contentype) + logger.info("Sending file %s", file_path) + f = open(file_path, 'rb') + request.setHeader('Content-Type', content_type) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our matrix + # clients are smart enough to be happy with Cache-Control (right?) + request.setHeader( + "Cache-Control", "public,max-age=86400,s-maxage=86400" + ) + + d = FileSender().beginFileTransfer(f, request) + + # after the file has been sent, clean up and finish the request + def cbFinished(ignored): + f.close() + request.finish() + d.addCallback(cbFinished) + else: + respond_with_json_bytes( + request, + 404, + json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)), + send_cors=True) + + return server.NOT_DONE_YET + + def render_POST(self, request): + self._async_render(request) + return server.NOT_DONE_YET + + def render_OPTIONS(self, request): + respond_with_json_bytes(request, 200, {}, send_cors=True) + return server.NOT_DONE_YET + + @defer.inlineCallbacks + def _async_render(self, request): + try: + # TODO: The checks here are a bit late. The content will have + # already been uploaded to a tmp file at this point + content_length = request.getHeader("Content-Length") + if content_length is None: + raise SynapseError( + msg="Request must specify a Content-Length", code=400 + ) + if int(content_length) > self.max_upload_size: + raise SynapseError( + msg="Upload request body is too large", + code=413, + ) + + fname = yield self.map_request_to_name(request) + + # TODO I have a suspicious feeling this is just going to block + with open(fname, "wb") as f: + f.write(request.content.read()) + + # FIXME (erikj): These should use constants. + file_name = os.path.basename(fname) + # FIXME: we can't assume what the repo's public mounted path is + # ...plus self-signed SSL won't work to remote clients anyway + # ...and we can't assume that it's SSL anyway, as we might want to + # serve it via the non-SSL listener... + url = "%s/_matrix/content/%s" % ( + self.external_addr, file_name + ) + + respond_with_json_bytes(request, 200, + json.dumps({"content_token": url}), + send_cors=True) + + except CodeMessageException as e: + logger.exception(e) + respond_with_json_bytes(request, e.code, + json.dumps(cs_exception(e))) + except Exception as e: + logger.error("Failed to store file: %s" % e) + respond_with_json_bytes( + request, + 500, + json.dumps({"error": "Internal server error"}), + send_cors=True) -- cgit 1.4.1 From 5da65085d106e98cf7b762836cb300d01226bf92 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Tue, 2 Dec 2014 19:51:47 +0000 Subject: Get uploads working with new media repo --- synapse/api/urls.py | 1 + synapse/app/homeserver.py | 9 +++++++-- synapse/config/_base.py | 14 ++++++++++++++ synapse/config/repository.py | 4 ++++ synapse/http/server.py | 4 ++-- synapse/media/__init__.py | 0 synapse/media/v0/__init__.py | 0 synapse/media/v0/content_repository.py | 2 +- synapse/media/v1/__init__.py | 0 synapse/media/v1/media_repository.py | 23 +++++++---------------- synapse/media/v1/upload_resource.py | 14 ++++++++------ synapse/server.py | 1 + synapse/storage/__init__.py | 6 +++++- synapse/storage/media_repository.py | 7 ++++++- 14 files changed, 56 insertions(+), 29 deletions(-) create mode 100644 synapse/media/__init__.py create mode 100644 synapse/media/v0/__init__.py create mode 100644 synapse/media/v1/__init__.py (limited to 'synapse/media/v0') diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 6dc19305b7..d7625127f8 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -20,3 +20,4 @@ FEDERATION_PREFIX = "/_matrix/federation/v1" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" SERVER_KEY_PREFIX = "/_matrix/key/v1" +MEDIA_PREFIX = "/_matrix/media/v1" diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 855fe8e170..a6e29c0860 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -24,12 +24,13 @@ from twisted.web.resource import Resource from twisted.web.static import File from twisted.web.server import Site from synapse.http.server import JsonResource, RootRedirect -from synapse.http.content_repository import ContentRepoResource +from synapse.media.v0.content_repository import ContentRepoResource +from synapse.media.v1.media_repository import MediaRepositoryResource from synapse.http.server_key_resource import LocalKey from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.api.urls import ( CLIENT_PREFIX, FEDERATION_PREFIX, WEB_CLIENT_PREFIX, CONTENT_REPO_PREFIX, - SERVER_KEY_PREFIX, + SERVER_KEY_PREFIX, MEDIA_PREFIX ) from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory @@ -69,6 +70,9 @@ class SynapseHomeServer(HomeServer): self, self.upload_dir, self.auth, self.content_addr ) + def build_resource_for_media_repository(self): + return MediaRepositoryResource(self) + def build_resource_for_server_key(self): return LocalKey(self) @@ -99,6 +103,7 @@ class SynapseHomeServer(HomeServer): (FEDERATION_PREFIX, self.get_resource_for_federation()), (CONTENT_REPO_PREFIX, self.get_resource_for_content_repo()), (SERVER_KEY_PREFIX, self.get_resource_for_server_key()), + (MEDIA_PREFIX, self.get_resource_for_media_repository()), ] if web_client: logger.info("Adding the web client.") diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 6870af10e8..1426436dcb 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -50,12 +50,26 @@ class Config(object): ) return cls.abspath(file_path) + @staticmethod + def ensure_directory(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + if not os.path.isdir(dir_path): + raise ConfigError( + "%s is not a directory" % (dir_path,) + ) + return dir_path + @classmethod def read_file(cls, file_path, config_name): cls.check_file(file_path, config_name) with open(file_path) as file_stream: return file_stream.read() + @staticmethod + def default_path(name): + return os.path.abspath(os.path.join(os.path.curdir, name)) + @staticmethod def read_config_file(file_path): with open(file_path) as file_stream: diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 743bc26474..6eec930a03 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -20,6 +20,7 @@ class ContentRepositoryConfig(Config): def __init__(self, args): super(ContentRepositoryConfig, self).__init__(args) self.max_upload_size = self.parse_size(args.max_upload_size) + self.media_store_path = self.ensure_directory(args.media_store_path) def parse_size(self, string): sizes = {"K": 1024, "M": 1024 * 1024} @@ -37,3 +38,6 @@ class ContentRepositoryConfig(Config): db_group.add_argument( "--max-upload-size", default="1M" ) + db_group.add_argument( + "--media-store-path", default=cls.default_path("media_store") + ) diff --git a/synapse/http/server.py b/synapse/http/server.py index 046e230361..02277c4998 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -201,9 +201,9 @@ class RootRedirect(resource.Resource): def respond_with_json(request, code, json_object, send_cors=False, response_code_message=None, pretty_print=False): if not pretty_print: - json_bytes = encode_pretty_printed_json(response_json_object) + json_bytes = encode_pretty_printed_json(json_object) else: - json_bytes = encode_canonical_json(response_json_object) + json_bytes = encode_canonical_json(json_object) return respond_with_json_bytes(request, code, json_bytes, send_cors, response_code_message=response_code_message) diff --git a/synapse/media/__init__.py b/synapse/media/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/media/v0/__init__.py b/synapse/media/v0/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/media/v0/content_repository.py b/synapse/media/v0/content_repository.py index 64ecb5346e..ce5d3d153e 100644 --- a/synapse/media/v0/content_repository.py +++ b/synapse/media/v0/content_repository.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .server import respond_with_json_bytes +from synapse.http.server import respond_with_json_bytes from synapse.util.stringutils import random_string from synapse.api.errors import ( diff --git a/synapse/media/v1/__init__.py b/synapse/media/v1/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py index 9c36a8e933..0f4eeef278 100644 --- a/synapse/media/v1/media_repository.py +++ b/synapse/media/v1/media_repository.py @@ -13,27 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import respond_with_json_bytes +from .upload_resource import UploadResource +from .filepath import MediaFilePaths -from synapse.util.stringutils import random_string -from synapse.api.errors import ( - cs_exception, SynapseError, CodeMessageException, Codes, cs_error -) +from twisted.web.resource import Resource -from twisted.protocols.basic import FileSender -from twisted.web import server, resource -from twisted.internet import defer - -import base64 -import json import logging -import os -import re logger = logging.getLogger(__name__) -class MediaRepository(): +class MediaRepositoryResource(Resource): """Profiles file uploading and downloading. Uploads are POSTed to a resource which returns a token which is used to GET @@ -68,5 +58,6 @@ class MediaRepository(): """ def __init__(self, hs): - filepaths = MediaFilePaths - + Resource.__init__(self) + filepaths = MediaFilePaths(hs.config.media_store_path) + self.putChild("upload", UploadResource(hs, filepaths)) diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py index 3721a0173d..d9d7825b2b 100644 --- a/synapse/media/v1/upload_resource.py +++ b/synapse/media/v1/upload_resource.py @@ -23,6 +23,8 @@ from synapse.api.errors import ( from twisted.web import server, resource from twisted.internet import defer +import os + import logging logger = logging.getLogger(__name__) @@ -31,8 +33,9 @@ class UploadResource(resource.Resource): def __init__(self, hs, filepaths): self.auth = hs.get_auth() + self.clock = hs.get_clock() self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size() + self.max_upload_size = hs.config.max_upload_size self.filepaths = filepaths def render_POST(self, request): @@ -45,10 +48,8 @@ class UploadResource(resource.Resource): @defer.inlineCallbacks def _async_render_POST(self, request): - - auth_user = yield self.auth.get_user_by_req(request) - try: + auth_user = yield self.auth.get_user_by_req(request) # TODO: The checks here are a bit late. The content will have # already been uploaded to a tmp file at this point content_length = request.getHeader("Content-Length") @@ -62,7 +63,7 @@ class UploadResource(resource.Resource): code=413, ) - headers = request.requestHeaders() + headers = request.requestHeaders if headers.hasHeader("Content-Type"): media_type = headers.getRawHeaders("Content-Type")[0] @@ -78,7 +79,8 @@ class UploadResource(resource.Resource): media_id = random_string(24) - fname = self.filepaths.local_media_file_path(media_id) + fname = self.filepaths.local_media_filepath(media_id) + os.makedirs(os.path.dirname(fname)) # This shouldn't block for very long because the content will have # already been uploaded at this point. diff --git a/synapse/server.py b/synapse/server.py index da0a44433a..7eb15270fc 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -78,6 +78,7 @@ class BaseHomeServer(object): 'resource_for_web_client', 'resource_for_content_repo', 'resource_for_server_key', + 'resource_for_media_repository', 'event_sources', 'ratelimiter', 'keyring', diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 1231794de0..f6811a8117 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -33,6 +33,7 @@ from .stream import StreamStore from .transactions import TransactionStore from .keys import KeyStore from .event_federation import EventFederationStore +from .media_repository import MediaRepositoryStore from .state import StateStore from .signatures import SignatureStore @@ -62,6 +63,7 @@ SCHEMAS = [ "state", "event_edges", "event_signatures", + "media_repository", ] @@ -81,7 +83,9 @@ class DataStore(RoomMemberStore, RoomStore, RegistrationStore, StreamStore, ProfileStore, FeedbackStore, PresenceStore, TransactionStore, DirectoryStore, KeyStore, StateStore, SignatureStore, - EventFederationStore, ): + EventFederationStore, + MediaRepositoryStore, + ): def __init__(self, hs): super(DataStore, self).__init__(hs) diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 73ceba3f2c..db03619a80 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -20,10 +20,15 @@ class MediaRepositoryStore(SQLBaseStore): """Persistence for attachments and avatars""" def get_local_media(self, media_id): + """Get the metadata for a local piece of media + Returns: + None if the media_id doesn't exist. + """ return self._simple_select_one( "local_media_repository", {"media_id": media_id}, ("media_type", "media_length", "upload_name", "created_ts"), + True, ) def store_local_media(self, media_id, media_type, time_now_ms, upload_name, @@ -36,7 +41,7 @@ class MediaRepositoryStore(SQLBaseStore): "created_ts": time_now_ms, "upload_name": upload_name, "media_length": media_length, - "user_id": user_id, + "user_id": user_id.to_string(), } ) -- cgit 1.4.1