summary refs log tree commit diff
path: root/synapse/media
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2014-12-11 17:46:23 +0000
committerErik Johnston <erik@matrix.org>2014-12-11 17:46:23 +0000
commit85574cfbf06ffab46b511ec684b40fb8239b7807 (patch)
tree6596757f5eabfd708df0fc42de2320bb27517de0 /synapse/media
parentImplement .cancel_call_later() in MockClock (diff)
parentAllow only one download for a given image at a time, so that we don't end up ... (diff)
downloadsynapse-85574cfbf06ffab46b511ec684b40fb8239b7807.tar.xz
Merge pull request #23 from matrix-org/media_repository
Media repository
Diffstat (limited to 'synapse/media')
-rw-r--r--synapse/media/__init__.py0
-rw-r--r--synapse/media/v0/__init__.py0
-rw-r--r--synapse/media/v0/content_repository.py212
-rw-r--r--synapse/media/v1/__init__.py0
-rw-r--r--synapse/media/v1/base_resource.py368
-rw-r--r--synapse/media/v1/download_resource.py68
-rw-r--r--synapse/media/v1/filepath.py67
-rw-r--r--synapse/media/v1/media_repository.py77
-rw-r--r--synapse/media/v1/thumbnail_resource.py182
-rw-r--r--synapse/media/v1/thumbnailer.py89
-rw-r--r--synapse/media/v1/upload_resource.py111
11 files changed, 1174 insertions, 0 deletions
diff --git a/synapse/media/__init__.py b/synapse/media/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/synapse/media/__init__.py
diff --git a/synapse/media/v0/__init__.py b/synapse/media/v0/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/synapse/media/v0/__init__.py
diff --git a/synapse/media/v0/content_repository.py b/synapse/media/v0/content_repository.py
new file mode 100644
index 0000000000..ce5d3d153e
--- /dev/null
+++ b/synapse/media/v0/content_repository.py
@@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.http.server import respond_with_json_bytes
+
+from synapse.util.stringutils import random_string
+from synapse.api.errors import (
+    cs_exception, SynapseError, CodeMessageException, Codes, cs_error
+)
+
+from twisted.protocols.basic import FileSender
+from twisted.web import server, resource
+from twisted.internet import defer
+
+import base64
+import json
+import logging
+import os
+import re
+
+logger = logging.getLogger(__name__)
+
+
+class ContentRepoResource(resource.Resource):
+    """Provides file uploading and downloading.
+
+    Uploads are POSTed to wherever this Resource is linked to. This resource
+    returns a "content token" which can be used to GET this content again. The
+    token is typically a path, but it may not be. Tokens can expire, be
+    one-time uses, etc.
+
+    In this case, the token is a path to the file and contains 3 interesting
+    sections:
+        - User ID base64d (for namespacing content to each user)
+        - random 24 char string
+        - Content type base64d (so we can return it when clients GET it)
+
+    """
+    isLeaf = True
+
+    def __init__(self, hs, directory, auth, external_addr):
+        resource.Resource.__init__(self)
+        self.hs = hs
+        self.directory = directory
+        self.auth = auth
+        self.external_addr = external_addr.rstrip('/')
+        self.max_upload_size = hs.config.max_upload_size
+
+        if not os.path.isdir(self.directory):
+            os.mkdir(self.directory)
+            logger.info("ContentRepoResource : Created %s directory.",
+                        self.directory)
+
+    @defer.inlineCallbacks
+    def map_request_to_name(self, request):
+        # auth the user
+        auth_user = yield self.auth.get_user_by_req(request)
+
+        # namespace all file uploads on the user
+        prefix = base64.urlsafe_b64encode(
+            auth_user.to_string()
+        ).replace('=', '')
+
+        # use a random string for the main portion
+        main_part = random_string(24)
+
+        # suffix with a file extension if we can make one. This is nice to
+        # provide a hint to clients on the file information. We will also reuse
+        # this info to spit back the content type to the client.
+        suffix = ""
+        if request.requestHeaders.hasHeader("Content-Type"):
+            content_type = request.requestHeaders.getRawHeaders(
+                "Content-Type")[0]
+            suffix = "." + base64.urlsafe_b64encode(content_type)
+            if (content_type.split("/")[0].lower() in
+                    ["image", "video", "audio"]):
+                file_ext = content_type.split("/")[-1]
+                # be a little paranoid and only allow a-z
+                file_ext = re.sub("[^a-z]", "", file_ext)
+                suffix += "." + file_ext
+
+        file_name = prefix + main_part + suffix
+        file_path = os.path.join(self.directory, file_name)
+        logger.info("User %s is uploading a file to path %s",
+                    auth_user.to_string(),
+                    file_path)
+
+        # keep trying to make a non-clashing file, with a sensible max attempts
+        attempts = 0
+        while os.path.exists(file_path):
+            main_part = random_string(24)
+            file_name = prefix + main_part + suffix
+            file_path = os.path.join(self.directory, file_name)
+            attempts += 1
+            if attempts > 25:  # really? Really?
+                raise SynapseError(500, "Unable to create file.")
+
+        defer.returnValue(file_path)
+
+    def render_GET(self, request):
+        # no auth here on purpose, to allow anyone to view, even across home
+        # servers.
+
+        # TODO: A little crude here, we could do this better.
+        filename = request.path.split('/')[-1]
+        # be paranoid
+        filename = re.sub("[^0-9A-z.-_]", "", filename)
+
+        file_path = self.directory + "/" + filename
+
+        logger.debug("Searching for %s", file_path)
+
+        if os.path.isfile(file_path):
+            # filename has the content type
+            base64_contentype = filename.split(".")[1]
+            content_type = base64.urlsafe_b64decode(base64_contentype)
+            logger.info("Sending file %s", file_path)
+            f = open(file_path, 'rb')
+            request.setHeader('Content-Type', content_type)
+
+            # cache for at least a day.
+            # XXX: we might want to turn this off for data we don't want to
+            # recommend caching as it's sensitive or private - or at least
+            # select private. don't bother setting Expires as all our matrix
+            # clients are smart enough to be happy with Cache-Control (right?)
+            request.setHeader(
+                "Cache-Control", "public,max-age=86400,s-maxage=86400"
+            )
+
+            d = FileSender().beginFileTransfer(f, request)
+
+            # after the file has been sent, clean up and finish the request
+            def cbFinished(ignored):
+                f.close()
+                request.finish()
+            d.addCallback(cbFinished)
+        else:
+            respond_with_json_bytes(
+                request,
+                404,
+                json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)),
+                send_cors=True)
+
+        return server.NOT_DONE_YET
+
+    def render_POST(self, request):
+        self._async_render(request)
+        return server.NOT_DONE_YET
+
+    def render_OPTIONS(self, request):
+        respond_with_json_bytes(request, 200, {}, send_cors=True)
+        return server.NOT_DONE_YET
+
+    @defer.inlineCallbacks
+    def _async_render(self, request):
+        try:
+            # TODO: The checks here are a bit late. The content will have
+            # already been uploaded to a tmp file at this point
+            content_length = request.getHeader("Content-Length")
+            if content_length is None:
+                raise SynapseError(
+                    msg="Request must specify a Content-Length", code=400
+                )
+            if int(content_length) > self.max_upload_size:
+                raise SynapseError(
+                    msg="Upload request body is too large",
+                    code=413,
+                )
+
+            fname = yield self.map_request_to_name(request)
+
+            # TODO I have a suspicious feeling this is just going to block
+            with open(fname, "wb") as f:
+                f.write(request.content.read())
+
+            # FIXME (erikj): These should use constants.
+            file_name = os.path.basename(fname)
+            # FIXME: we can't assume what the repo's public mounted path is
+            # ...plus self-signed SSL won't work to remote clients anyway
+            # ...and we can't assume that it's SSL anyway, as we might want to
+            # serve it via the non-SSL listener...
+            url = "%s/_matrix/content/%s" % (
+                self.external_addr, file_name
+            )
+
+            respond_with_json_bytes(request, 200,
+                                    json.dumps({"content_token": url}),
+                                    send_cors=True)
+
+        except CodeMessageException as e:
+            logger.exception(e)
+            respond_with_json_bytes(request, e.code,
+                                    json.dumps(cs_exception(e)))
+        except Exception as e:
+            logger.error("Failed to store file: %s" % e)
+            respond_with_json_bytes(
+                request,
+                500,
+                json.dumps({"error": "Internal server error"}),
+                send_cors=True)
diff --git a/synapse/media/v1/__init__.py b/synapse/media/v1/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/synapse/media/v1/__init__.py
diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py
new file mode 100644
index 0000000000..14735ff375
--- /dev/null
+++ b/synapse/media/v1/base_resource.py
@@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .thumbnailer import Thumbnailer
+
+from synapse.http.server import respond_with_json
+from synapse.util.stringutils import random_string
+from synapse.api.errors import (
+    cs_exception, CodeMessageException, cs_error, Codes, SynapseError
+)
+
+from twisted.internet import defer
+from twisted.web.resource import Resource
+from twisted.protocols.basic import FileSender
+
+import os
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class BaseMediaResource(Resource):
+    isLeaf = True
+
+    def __init__(self, hs, filepaths):
+        Resource.__init__(self)
+        self.auth = hs.get_auth()
+        self.client = hs.get_http_client()
+        self.clock = hs.get_clock()
+        self.server_name = hs.hostname
+        self.store = hs.get_datastore()
+        self.max_upload_size = hs.config.max_upload_size
+        self.max_image_pixels = hs.config.max_image_pixels
+        self.filepaths = filepaths
+        self.downloads = {}
+
+    @staticmethod
+    def catch_errors(request_handler):
+        @defer.inlineCallbacks
+        def wrapped_request_handler(self, request):
+            try:
+                yield request_handler(self, request)
+            except CodeMessageException as e:
+                logger.exception(e)
+                respond_with_json(
+                    request, e.code, cs_exception(e), send_cors=True
+                )
+            except:
+                logger.exception(
+                    "Failed handle request %s.%s on %r",
+                    request_handler.__module__,
+                    request_handler.__name__,
+                    self,
+                )
+                respond_with_json(
+                    request,
+                    500,
+                    {"error": "Internal server error"},
+                    send_cors=True
+                )
+        return wrapped_request_handler
+
+    @staticmethod
+    def _parse_media_id(request):
+        try:
+            server_name, media_id = request.postpath
+            return (server_name, media_id)
+        except:
+            raise SynapseError(
+                404,
+                "Invalid media id token %r" % (request.postpath,),
+                Codes.UNKKOWN,
+            )
+
+    @staticmethod
+    def _parse_integer(request, arg_name, default=None):
+        try:
+            if default is None:
+                return int(request.args[arg_name][0])
+            else:
+                return int(request.args.get(arg_name, [default])[0])
+        except:
+            raise SynapseError(
+                400,
+                "Missing integer argument %r" % (arg_name,),
+                Codes.UNKNOWN,
+            )
+
+    @staticmethod
+    def _parse_string(request, arg_name, default=None):
+        try:
+            if default is None:
+                return request.args[arg_name][0]
+            else:
+                return request.args.get(arg_name, [default])[0]
+        except:
+            raise SynapseError(
+                400,
+                "Missing string argument %r" % (arg_name,),
+                Codes.UNKNOWN,
+            )
+
+    def _respond_404(self, request):
+        respond_with_json(
+            request, 404,
+            cs_error(
+                "Not found %r" % (request.postpath,),
+                code=Codes.NOT_FOUND,
+            ),
+            send_cors=True
+        )
+
+    @staticmethod
+    def _makedirs(filepath):
+        dirname = os.path.dirname(filepath)
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+
+    def _get_remote_media(self, server_name, media_id):
+        key = (server_name, media_id)
+        download = self.downloads.get(key)
+        if download is None:
+            download = self._get_remote_media_impl(server_name, media_id)
+            self.downloads[key] = download
+            @download.addBoth
+            def callback(media_info):
+                del self.downloads[key]
+        return download
+
+    @defer.inlineCallbacks
+    def _get_remote_media_impl(self, server_name, media_id):
+        media_info = yield self.store.get_cached_remote_media(
+            server_name, media_id
+        )
+        if not media_info:
+            media_info = yield self._download_remote_file(
+                server_name, media_id
+            )
+        defer.returnValue(media_info)
+
+    @defer.inlineCallbacks
+    def _download_remote_file(self, server_name, media_id):
+        file_id = random_string(24)
+
+        fname = self.filepaths.remote_media_filepath(
+            server_name, file_id
+        )
+        self._makedirs(fname)
+
+        try:
+            with open(fname, "wb") as f:
+                request_path = "/".join((
+                    "/_matrix/media/v1/download", server_name, media_id,
+                ))
+                length, headers = yield self.client.get_file(
+                    server_name, request_path, output_stream=f,
+                    max_size=self.max_upload_size,
+                )
+            media_type = headers["Content-Type"][0]
+            time_now_ms = self.clock.time_msec()
+
+            yield self.store.store_cached_remote_media(
+                origin=server_name,
+                media_id=media_id,
+                media_type=media_type,
+                time_now_ms=self.clock.time_msec(),
+                upload_name=None,
+                media_length=length,
+                filesystem_id=file_id,
+            )
+        except:
+            os.remove(fname)
+            raise
+
+        media_info = {
+            "media_type": media_type,
+            "media_length": length,
+            "upload_name": None,
+            "created_ts": time_now_ms,
+            "filesystem_id": file_id,
+        }
+
+        yield self._generate_remote_thumbnails(
+            server_name, media_id, media_info
+        )
+
+        defer.returnValue(media_info)
+
+    @defer.inlineCallbacks
+    def _respond_with_file(self, request, media_type, file_path):
+        logger.debug("Responding with %r", file_path)
+
+        if os.path.isfile(file_path):
+            request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
+
+            # cache for at least a day.
+            # XXX: we might want to turn this off for data we don't want to
+            # recommend caching as it's sensitive or private - or at least
+            # select private. don't bother setting Expires as all our
+            # clients are smart enough to be happy with Cache-Control
+            request.setHeader(
+                b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
+            )
+
+            with open(file_path, "rb") as f:
+                yield FileSender().beginFileTransfer(f, request)
+
+            request.finish()
+        else:
+            self._respond_404()
+
+    def _get_thumbnail_requirements(self, media_type):
+        if media_type == "image/jpeg":
+            return (
+                (32, 32, "crop", "image/jpeg"),
+                (96, 96, "crop", "image/jpeg"),
+                (320, 240, "scale", "image/jpeg"),
+                (640, 480, "scale", "image/jpeg"),
+            )
+        elif (media_type == "image/png") or (media_type == "image/gif"):
+            return (
+                (32, 32, "crop", "image/png"),
+                (96, 96, "crop", "image/png"),
+                (320, 240, "scale", "image/png"),
+                (640, 480, "scale", "image/png"),
+            )
+        else:
+            return ()
+
+    @defer.inlineCallbacks
+    def _generate_local_thumbnails(self, media_id, media_info):
+        media_type = media_info["media_type"]
+        requirements = self._get_thumbnail_requirements(media_type)
+        if not requirements:
+            return
+
+        input_path = self.filepaths.local_media_filepath(media_id)
+        thumbnailer = Thumbnailer(input_path)
+        m_width = thumbnailer.width
+        m_height = thumbnailer.height
+
+        if m_width * m_height >= self.max_image_pixels:
+            logger.info(
+                "Image too large to thumbnail %r x %r > %r",
+                m_width, m_height, self.max_image_pixels
+            )
+            return
+
+        scales = set()
+        crops = set()
+        for r_width, r_height, r_method, r_type in requirements:
+            if r_method == "scale":
+                t_width, t_height = thumbnailer.aspect(r_width, r_height)
+                scales.add((
+                    min(m_width, t_width), min(m_height, t_height), r_type,
+                ))
+            elif r_method == "crop":
+                crops.add((r_width, r_height, r_type))
+
+        for t_width, t_height, t_type in scales:
+            t_method = "scale"
+            t_path = self.filepaths.local_media_thumbnail(
+                media_id, t_width, t_height, t_type, t_method
+            )
+            self._makedirs(t_path)
+            t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
+            yield self.store.store_local_thumbnail(
+                media_id, t_width, t_height, t_type, t_method, t_len
+            )
+
+        for t_width, t_height, t_type in crops:
+            if (t_width, t_height, t_type) in scales:
+                # If the aspect ratio of the cropped thumbnail matches a purely
+                # scaled one then there is no point in calculating a separate
+                # thumbnail.
+                continue
+            t_method = "crop"
+            t_path = self.filepaths.local_media_thumbnail(
+                media_id, t_width, t_height, t_type, t_method
+            )
+            self._makedirs(t_path)
+            t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
+            yield self.store.store_local_thumbnail(
+                media_id, t_width, t_height, t_type, t_method, t_len
+            )
+
+        defer.returnValue({
+            "width": m_width,
+            "height": m_height,
+        })
+
+    @defer.inlineCallbacks
+    def _generate_remote_thumbnails(self, server_name, media_id, media_info):
+        media_type = media_info["media_type"]
+        file_id = media_info["filesystem_id"]
+        requirements = self._get_thumbnail_requirements(media_type)
+        if not requirements:
+            return
+
+        input_path = self.filepaths.remote_media_filepath(server_name, file_id)
+        thumbnailer = Thumbnailer(input_path)
+        m_width = thumbnailer.width
+        m_height = thumbnailer.height
+
+        if m_width * m_height >= self.max_image_pixels:
+            logger.info(
+                "Image too large to thumbnail %r x %r > %r",
+                m_width, m_height, self.max_image_pixels
+            )
+            return
+
+        scales = set()
+        crops = set()
+        for r_width, r_height, r_method, r_type in requirements:
+            if r_method == "scale":
+                t_width, t_height = thumbnailer.aspect(r_width, r_height)
+                scales.add((
+                    min(m_width, t_width), min(m_height, t_height), r_type,
+                ))
+            elif r_method == "crop":
+                crops.add((r_width, r_height, r_type))
+
+        for t_width, t_height, t_type in scales:
+            t_method = "scale"
+            t_path = self.filepaths.remote_media_thumbnail(
+                server_name, file_id, t_width, t_height, t_type, t_method
+            )
+            self._makedirs(t_path)
+            t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
+            yield self.store.store_remote_media_thumbnail(
+                server_name, media_id, file_id,
+                t_width, t_height, t_type, t_method, t_len
+            )
+
+        for t_width, t_height, t_type in crops:
+            if (t_width, t_height, t_type) in scales:
+                # If the aspect ratio of the cropped thumbnail matches a purely
+                # scaled one then there is no point in calculating a separate
+                # thumbnail.
+                continue
+            t_method = "crop"
+            t_path = self.filepaths.remote_media_thumbnail(
+                server_name, file_id, t_width, t_height, t_type, t_method
+            )
+            self._makedirs(t_path)
+            t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
+            yield self.store.store_remote_media_thumbnail(
+                server_name, media_id, file_id,
+                t_width, t_height, t_type, t_method, t_len
+            )
+
+        defer.returnValue({
+            "width": m_width,
+            "height": m_height,
+        })
diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py
new file mode 100644
index 0000000000..f3a6804e05
--- /dev/null
+++ b/synapse/media/v1/download_resource.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_resource import BaseMediaResource
+
+from twisted.web.server import NOT_DONE_YET
+from twisted.internet import defer
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class DownloadResource(BaseMediaResource):
+    def render_GET(self, request):
+        self._async_render_GET(request)
+        return NOT_DONE_YET
+
+    @BaseMediaResource.catch_errors
+    @defer.inlineCallbacks
+    def _async_render_GET(self, request):
+        try:
+            server_name, media_id = request.postpath
+        except:
+            self._respond_404(request)
+            return
+
+        if server_name == self.server_name:
+            yield self._respond_local_file(request, media_id)
+        else:
+            yield self._respond_remote_file(request, server_name, media_id)
+
+    @defer.inlineCallbacks
+    def _respond_local_file(self, request, media_id):
+        media_info = yield self.store.get_local_media(media_id)
+        if not media_info:
+            self._respond_404()
+            return
+
+        media_type = media_info["media_type"]
+        file_path = self.filepaths.local_media_filepath(media_id)
+
+        yield self._respond_with_file(request, media_type, file_path)
+
+    @defer.inlineCallbacks
+    def _respond_remote_file(self, request, server_name, media_id):
+        media_info = yield self._get_remote_media(server_name, media_id)
+
+        media_type = media_info["media_type"]
+        filesystem_id = media_info["filesystem_id"]
+
+        file_path = self.filepaths.remote_media_filepath(
+            server_name, filesystem_id
+        )
+
+        yield self._respond_with_file(request, media_type, file_path)
diff --git a/synapse/media/v1/filepath.py b/synapse/media/v1/filepath.py
new file mode 100644
index 0000000000..0078bc3d40
--- /dev/null
+++ b/synapse/media/v1/filepath.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+
+class MediaFilePaths(object):
+
+    def __init__(self, base_path):
+        self.base_path = base_path
+
+    def default_thumbnail(self, default_top_level, default_sub_type, width,
+                          height, content_type, method):
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s-%s" % (
+            width, height, top_level_type, sub_type, method
+        )
+        return os.path.join(
+            self.base_path, "default_thumbnails", default_top_level,
+            default_sub_type, file_name
+        )
+
+    def local_media_filepath(self, media_id):
+        return os.path.join(
+            self.base_path, "local_content",
+            media_id[0:2], media_id[2:4], media_id[4:]
+        )
+
+    def local_media_thumbnail(self, media_id, width, height, content_type,
+                              method):
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s-%s" % (
+            width, height, top_level_type, sub_type, method
+        )
+        return os.path.join(
+            self.base_path, "local_thumbnails",
+            media_id[0:2], media_id[2:4], media_id[4:],
+            file_name
+        )
+
+    def remote_media_filepath(self, server_name, file_id):
+        return os.path.join(
+            self.base_path, "remote_content", server_name,
+            file_id[0:2], file_id[2:4], file_id[4:]
+        )
+
+    def remote_media_thumbnail(self, server_name, file_id, width, height,
+                               content_type, method):
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
+        return os.path.join(
+            self.base_path, "remote_thumbnail", server_name,
+            file_id[0:2], file_id[2:4], file_id[4:],
+            file_name
+        )
diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py
new file mode 100644
index 0000000000..a0dc56be4b
--- /dev/null
+++ b/synapse/media/v1/media_repository.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .upload_resource import UploadResource
+from .download_resource import DownloadResource
+from .thumbnail_resource import ThumbnailResource
+from .filepath import MediaFilePaths
+
+from twisted.web.resource import Resource
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class MediaRepositoryResource(Resource):
+    """Profiles file uploading and downloading.
+
+    Uploads are POSTed to a resource which returns a token which is used to GET
+    the download::
+
+        => POST /_matrix/media/v1/upload HTTP/1.1
+           Content-Type: <media-type>
+
+           <media>
+
+        <= HTTP/1.1 200 OK
+           Content-Type: application/json
+
+           { "token": <media-id> }
+
+        => GET /_matrix/media/v1/download/<media-id> HTTP/1.1
+
+        <= HTTP/1.1 200 OK
+           Content-Type: <media-type>
+           Content-Disposition: attachment;filename=<upload-filename>
+
+           <media>
+
+    Clients can get thumbnails by supplying a desired width and height and
+    thumbnailing method::
+
+        => GET /_matrix/media/v1
+                /thumbnail/<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1
+
+        <= HTTP/1.1 200 OK
+           Content-Type: image/jpeg or image/png
+
+           <thumbnail>
+
+    The thumbnail methods are "crop" and "scale". "scale" trys to return an
+    image where either the width or the height is smaller than the requested
+    size. The client should then scale and letterbox the image if it needs to
+    fit within a given rectangle. "crop" trys to return an image where the
+    width and height are close to the requested size and the aspect matches
+    the requested size. The client should scale the image if it needs to fit
+    within a given rectangle.
+    """
+
+    def __init__(self, hs):
+        Resource.__init__(self)
+        filepaths = MediaFilePaths(hs.config.media_store_path)
+        self.putChild("upload", UploadResource(hs, filepaths))
+        self.putChild("download", DownloadResource(hs, filepaths))
+        self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py
new file mode 100644
index 0000000000..e19620d456
--- /dev/null
+++ b/synapse/media/v1/thumbnail_resource.py
@@ -0,0 +1,182 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .base_resource import BaseMediaResource
+
+from twisted.web.server import NOT_DONE_YET
+from twisted.internet import defer
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ThumbnailResource(BaseMediaResource):
+    isLeaf = True
+
+    def render_GET(self, request):
+        self._async_render_GET(request)
+        return NOT_DONE_YET
+
+    @BaseMediaResource.catch_errors
+    @defer.inlineCallbacks
+    def _async_render_GET(self, request):
+        server_name, media_id = self._parse_media_id(request)
+        width = self._parse_integer(request, "width")
+        height = self._parse_integer(request, "height")
+        method = self._parse_string(request, "method", "scale")
+        m_type = self._parse_string(request, "type", "image/png")
+
+        if server_name == self.server_name:
+            yield self._respond_local_thumbnail(
+                request, media_id, width, height, method, m_type
+            )
+        else:
+            yield self._respond_remote_thumbnail(
+                request, server_name, media_id,
+                width, height, method, m_type
+            )
+
+    @defer.inlineCallbacks
+    def _respond_local_thumbnail(self, request, media_id, width, height,
+                                 method, m_type):
+        media_info = yield self.store.get_local_media(media_id)
+
+        if not media_info:
+            self._respond_404(request)
+            return
+
+        thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id)
+
+        if thumbnail_infos:
+            thumbnail_info = self._select_thumbnail(
+                width, height, method, m_type, thumbnail_infos
+            )
+            t_width = thumbnail_info["thumbnail_width"]
+            t_height = thumbnail_info["thumbnail_height"]
+            t_type = thumbnail_info["thumbnail_type"]
+            t_method = thumbnail_info["thumbnail_method"]
+
+            file_path = self.filepaths.local_media_thumbnail(
+                media_id, t_width, t_height, t_type, t_method,
+            )
+            yield self._respond_with_file(request, t_type, file_path)
+
+        else:
+            yield self._respond_default_thumbnail(
+                request, media_info, width, height, method, m_type,
+            )
+
+    @defer.inlineCallbacks
+    def _respond_remote_thumbnail(self, request, server_name, media_id, width,
+                                  height, method, m_type):
+        # TODO: Don't download the whole remote file
+        # We should proxy the thumbnail from the remote server instead.
+        media_info = yield self._get_remote_media(server_name, media_id)
+
+        thumbnail_infos = yield self.store.get_remote_media_thumbnails(
+            server_name, media_id,
+        )
+
+        if thumbnail_infos:
+            thumbnail_info = self._select_thumbnail(
+                width, height, method, m_type, thumbnail_infos
+            )
+            t_width = thumbnail_info["thumbnail_width"]
+            t_height = thumbnail_info["thumbnail_height"]
+            t_type = thumbnail_info["thumbnail_type"]
+            t_method = thumbnail_info["thumbnail_method"]
+            file_id = thumbnail_info["filesystem_id"]
+
+            file_path = self.filepaths.remote_media_thumbnail(
+                server_name, file_id, t_width, t_height, t_type, t_method,
+            )
+            yield self._respond_with_file(request, t_type, file_path)
+        else:
+            yield self._respond_default_thumbnail(
+                request, media_info, width, height, method, m_type,
+            )
+
+    @defer.inlineCallbacks
+    def _respond_default_thumbnail(self, request, media_info, width, height,
+                                   method, m_type):
+        media_type = media_info["media_type"]
+        top_level_type = media_type.split("/")[0]
+        sub_type = media_type.split("/")[-1].split(";")[0]
+        thumbnail_infos = yield self.store.get_default_thumbnails(
+            top_level_type, sub_type,
+        )
+        if not thumbnail_infos:
+            thumbnail_infos = yield self.store.get_default_thumbnails(
+                top_level_type, "_default",
+            )
+        if not thumbnail_infos:
+            thumbnail_infos = yield self.store.get_default_thumbnails(
+                "_default", "_default",
+            )
+        if not thumbnail_infos:
+            self._respond_404(request)
+            return
+
+        thumbnail_info = self._select_thumbnail(
+            width, height, "crop", m_type, thumbnail_infos
+        )
+
+        t_width = thumbnail_info["thumbnail_width"]
+        t_height = thumbnail_info["thumbnail_height"]
+        t_type = thumbnail_info["thumbnail_type"]
+        t_method = thumbnail_info["thumbnail_method"]
+
+        file_path = self.filepaths.default_thumbnail(
+            top_level_type, sub_type, t_width, t_height, t_type, t_method,
+        )
+        yield self.respond_with_file(request, t_type, file_path)
+
+    def _select_thumbnail(self, desired_width, desired_height, desired_method,
+                          desired_type, thumbnail_infos):
+        d_w = desired_width
+        d_h = desired_height
+
+        if desired_method.lower() == "crop":
+            info_list = []
+            for info in thumbnail_infos:
+                t_w = info["thumbnail_width"]
+                t_h = info["thumbnail_height"]
+                t_method = info["thumbnail_method"]
+                if t_method == "scale" or t_method == "crop":
+                    aspect_quality = abs(d_w * t_h - d_h * t_w)
+                    size_quality = abs((d_w - t_w) * (d_h - t_h))
+                    type_quality = desired_type != info["thumbnail_type"]
+                    length_quality = info["thumbnail_length"]
+                    info_list.append((
+                        aspect_quality, size_quality, type_quality,
+                        length_quality, info
+                    ))
+            return min(info_list)[-1]
+        else:
+            info_list = []
+            for info in thumbnail_infos:
+                t_w = info["thumbnail_width"]
+                t_h = info["thumbnail_height"]
+                t_method = info["thumbnail_method"]
+                if t_method == "scale" and (t_w >= d_w or t_h >= d_h):
+                    size_quality = abs((d_w - t_w) * (d_h - t_h))
+                    type_quality = desired_type != info["thumbnail_type"]
+                    length_quality = info["thumbnail_length"]
+                    info_list.append((
+                        size_quality, type_quality, length_quality, info
+                    ))
+            return min(info_list)[-1]
diff --git a/synapse/media/v1/thumbnailer.py b/synapse/media/v1/thumbnailer.py
new file mode 100644
index 0000000000..774ae4538f
--- /dev/null
+++ b/synapse/media/v1/thumbnailer.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import PIL.Image as Image
+from io import BytesIO
+
+
+class Thumbnailer(object):
+
+    FORMATS = {
+        "image/jpeg": "JPEG",
+        "image/png": "PNG",
+    }
+
+    def __init__(self, input_path):
+        self.image = Image.open(input_path)
+        self.width, self.height = self.image.size
+
+    def aspect(self, max_width, max_height):
+        """Calculate the largest size that preserves aspect ratio which
+        fits within the given rectangle::
+
+            (w_in / h_in) = (w_out / h_out)
+            w_out = min(w_max, h_max * (w_in / h_in))
+            h_out = min(h_max, w_max * (h_in / w_in))
+
+        Args:
+            max_width: The largest possible width.
+            max_height: The larget possible height.
+        """
+
+        if max_width * self.height < max_height * self.width:
+            return (max_width, (max_width * self.height) // self.width)
+        else:
+            return ((max_height * self.width) // self.height, max_height)
+
+    def scale(self, output_path, width, height, output_type):
+        """Rescales the image to the given dimensions"""
+        scaled = self.image.resize((width, height), Image.BILINEAR)
+        return self.save_image(scaled, output_type, output_path)
+
+    def crop(self, output_path, width, height, output_type):
+        """Rescales and crops the image to the given dimensions preserving
+        aspect::
+            (w_in / h_in) = (w_scaled / h_scaled)
+            w_scaled = max(w_out, h_out * (w_in / h_in))
+            h_scaled = max(h_out, w_out * (h_in / w_in))
+
+        Args:
+            max_width: The largest possible width.
+            max_height: The larget possible height.
+        """
+        if width * self.height > height * self.width:
+            scaled_height = (width * self.height) // self.width
+            scaled_image = self.image.resize(
+                (width, scaled_height), Image.BILINEAR
+            )
+            crop_top = (scaled_height - height) // 2
+            crop_bottom = height + crop_top
+            cropped = scaled_image.crop((0, crop_top, width, crop_bottom))
+        else:
+            scaled_width = (height * self.width) // self.height
+            scaled_image = self.image.resize(
+                (scaled_width, height), Image.BILINEAR
+            )
+            crop_left = (scaled_width - width) // 2
+            crop_right = width + crop_left
+            cropped = scaled_image.crop((crop_left, 0, crop_right, height))
+        return self.save_image(cropped, output_type, output_path)
+
+    def save_image(self, output_image, output_type, output_path):
+        output_bytes_io = BytesIO()
+        output_image.save(output_bytes_io, self.FORMATS[output_type])
+        output_bytes = output_bytes_io.getvalue()
+        with open(output_path, "wb") as output_file:
+            output_file.write(output_bytes)
+        return len(output_bytes)
diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py
new file mode 100644
index 0000000000..b2449ff03d
--- /dev/null
+++ b/synapse/media/v1/upload_resource.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.http.server import respond_with_json
+
+from synapse.util.stringutils import random_string
+from synapse.api.errors import (
+    cs_exception, SynapseError, CodeMessageException
+)
+
+from twisted.web.server import NOT_DONE_YET
+from twisted.internet import defer
+
+from .base_resource import BaseMediaResource
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class UploadResource(BaseMediaResource):
+    def render_POST(self, request):
+        self._async_render_POST(request)
+        return NOT_DONE_YET
+
+    def render_OPTIONS(self, request):
+        respond_with_json(request, 200, {}, send_cors=True)
+        return NOT_DONE_YET
+
+    @defer.inlineCallbacks
+    def _async_render_POST(self, request):
+        try:
+            auth_user = yield self.auth.get_user_by_req(request)
+            # TODO: The checks here are a bit late. The content will have
+            # already been uploaded to a tmp file at this point
+            content_length = request.getHeader("Content-Length")
+            if content_length is None:
+                raise SynapseError(
+                    msg="Request must specify a Content-Length", code=400
+                )
+            if int(content_length) > self.max_upload_size:
+                raise SynapseError(
+                    msg="Upload request body is too large",
+                    code=413,
+                )
+
+            headers = request.requestHeaders
+
+            if headers.hasHeader("Content-Type"):
+                media_type = headers.getRawHeaders("Content-Type")[0]
+            else:
+                raise SynapseError(
+                    msg="Upload request missing 'Content-Type'",
+                    code=400,
+                )
+
+            #if headers.hasHeader("Content-Disposition"):
+            #    disposition = headers.getRawHeaders("Content-Disposition")[0]
+            # TODO(markjh): parse content-dispostion
+
+            media_id = random_string(24)
+
+            fname = self.filepaths.local_media_filepath(media_id)
+            self._makedirs(fname)
+
+            # This shouldn't block for very long because the content will have
+            # already been uploaded at this point.
+            with open(fname, "wb") as f:
+                f.write(request.content.read())
+
+            yield self.store.store_local_media(
+                media_id=media_id,
+                media_type=media_type,
+                time_now_ms=self.clock.time_msec(),
+                upload_name=None,
+                media_length=content_length,
+                user_id=auth_user,
+            )
+            media_info = {
+                "media_type": media_type,
+                "media_length": content_length,
+            }
+
+            yield self._generate_local_thumbnails(media_id, media_info)
+
+            respond_with_json(
+                request, 200, {"content_token": media_id}, send_cors=True
+            )
+        except CodeMessageException as e:
+            logger.exception(e)
+            respond_with_json(request, e.code, cs_exception(e), send_cors=True)
+        except:
+            logger.exception("Failed to store file")
+            respond_with_json(
+                request,
+                500,
+                {"error": "Internal server error"},
+                send_cors=True
+            )