summary refs log tree commit diff
path: root/synapse/http
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2014-12-11 17:46:23 +0000
committerErik Johnston <erik@matrix.org>2014-12-11 17:46:23 +0000
commit85574cfbf06ffab46b511ec684b40fb8239b7807 (patch)
tree6596757f5eabfd708df0fc42de2320bb27517de0 /synapse/http
parentImplement .cancel_call_later() in MockClock (diff)
parentAllow only one download for a given image at a time, so that we don't end up ... (diff)
downloadsynapse-85574cfbf06ffab46b511ec684b40fb8239b7807.tar.xz
Merge pull request #23 from matrix-org/media_repository
Media repository
Diffstat (limited to 'synapse/http')
-rw-r--r--synapse/http/content_repository.py212
-rw-r--r--synapse/http/matrixfederationclient.py88
-rw-r--r--synapse/http/server.py21
3 files changed, 96 insertions, 225 deletions
diff --git a/synapse/http/content_repository.py b/synapse/http/content_repository.py
deleted file mode 100644
index 64ecb5346e..0000000000
--- a/synapse/http/content_repository.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2014 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .server import respond_with_json_bytes
-
-from synapse.util.stringutils import random_string
-from synapse.api.errors import (
-    cs_exception, SynapseError, CodeMessageException, Codes, cs_error
-)
-
-from twisted.protocols.basic import FileSender
-from twisted.web import server, resource
-from twisted.internet import defer
-
-import base64
-import json
-import logging
-import os
-import re
-
-logger = logging.getLogger(__name__)
-
-
-class ContentRepoResource(resource.Resource):
-    """Provides file uploading and downloading.
-
-    Uploads are POSTed to wherever this Resource is linked to. This resource
-    returns a "content token" which can be used to GET this content again. The
-    token is typically a path, but it may not be. Tokens can expire, be
-    one-time uses, etc.
-
-    In this case, the token is a path to the file and contains 3 interesting
-    sections:
-        - User ID base64d (for namespacing content to each user)
-        - random 24 char string
-        - Content type base64d (so we can return it when clients GET it)
-
-    """
-    isLeaf = True
-
-    def __init__(self, hs, directory, auth, external_addr):
-        resource.Resource.__init__(self)
-        self.hs = hs
-        self.directory = directory
-        self.auth = auth
-        self.external_addr = external_addr.rstrip('/')
-        self.max_upload_size = hs.config.max_upload_size
-
-        if not os.path.isdir(self.directory):
-            os.mkdir(self.directory)
-            logger.info("ContentRepoResource : Created %s directory.",
-                        self.directory)
-
-    @defer.inlineCallbacks
-    def map_request_to_name(self, request):
-        # auth the user
-        auth_user = yield self.auth.get_user_by_req(request)
-
-        # namespace all file uploads on the user
-        prefix = base64.urlsafe_b64encode(
-            auth_user.to_string()
-        ).replace('=', '')
-
-        # use a random string for the main portion
-        main_part = random_string(24)
-
-        # suffix with a file extension if we can make one. This is nice to
-        # provide a hint to clients on the file information. We will also reuse
-        # this info to spit back the content type to the client.
-        suffix = ""
-        if request.requestHeaders.hasHeader("Content-Type"):
-            content_type = request.requestHeaders.getRawHeaders(
-                "Content-Type")[0]
-            suffix = "." + base64.urlsafe_b64encode(content_type)
-            if (content_type.split("/")[0].lower() in
-                    ["image", "video", "audio"]):
-                file_ext = content_type.split("/")[-1]
-                # be a little paranoid and only allow a-z
-                file_ext = re.sub("[^a-z]", "", file_ext)
-                suffix += "." + file_ext
-
-        file_name = prefix + main_part + suffix
-        file_path = os.path.join(self.directory, file_name)
-        logger.info("User %s is uploading a file to path %s",
-                    auth_user.to_string(),
-                    file_path)
-
-        # keep trying to make a non-clashing file, with a sensible max attempts
-        attempts = 0
-        while os.path.exists(file_path):
-            main_part = random_string(24)
-            file_name = prefix + main_part + suffix
-            file_path = os.path.join(self.directory, file_name)
-            attempts += 1
-            if attempts > 25:  # really? Really?
-                raise SynapseError(500, "Unable to create file.")
-
-        defer.returnValue(file_path)
-
-    def render_GET(self, request):
-        # no auth here on purpose, to allow anyone to view, even across home
-        # servers.
-
-        # TODO: A little crude here, we could do this better.
-        filename = request.path.split('/')[-1]
-        # be paranoid
-        filename = re.sub("[^0-9A-z.-_]", "", filename)
-
-        file_path = self.directory + "/" + filename
-
-        logger.debug("Searching for %s", file_path)
-
-        if os.path.isfile(file_path):
-            # filename has the content type
-            base64_contentype = filename.split(".")[1]
-            content_type = base64.urlsafe_b64decode(base64_contentype)
-            logger.info("Sending file %s", file_path)
-            f = open(file_path, 'rb')
-            request.setHeader('Content-Type', content_type)
-
-            # cache for at least a day.
-            # XXX: we might want to turn this off for data we don't want to
-            # recommend caching as it's sensitive or private - or at least
-            # select private. don't bother setting Expires as all our matrix
-            # clients are smart enough to be happy with Cache-Control (right?)
-            request.setHeader(
-                "Cache-Control", "public,max-age=86400,s-maxage=86400"
-            )
-
-            d = FileSender().beginFileTransfer(f, request)
-
-            # after the file has been sent, clean up and finish the request
-            def cbFinished(ignored):
-                f.close()
-                request.finish()
-            d.addCallback(cbFinished)
-        else:
-            respond_with_json_bytes(
-                request,
-                404,
-                json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)),
-                send_cors=True)
-
-        return server.NOT_DONE_YET
-
-    def render_POST(self, request):
-        self._async_render(request)
-        return server.NOT_DONE_YET
-
-    def render_OPTIONS(self, request):
-        respond_with_json_bytes(request, 200, {}, send_cors=True)
-        return server.NOT_DONE_YET
-
-    @defer.inlineCallbacks
-    def _async_render(self, request):
-        try:
-            # TODO: The checks here are a bit late. The content will have
-            # already been uploaded to a tmp file at this point
-            content_length = request.getHeader("Content-Length")
-            if content_length is None:
-                raise SynapseError(
-                    msg="Request must specify a Content-Length", code=400
-                )
-            if int(content_length) > self.max_upload_size:
-                raise SynapseError(
-                    msg="Upload request body is too large",
-                    code=413,
-                )
-
-            fname = yield self.map_request_to_name(request)
-
-            # TODO I have a suspicious feeling this is just going to block
-            with open(fname, "wb") as f:
-                f.write(request.content.read())
-
-            # FIXME (erikj): These should use constants.
-            file_name = os.path.basename(fname)
-            # FIXME: we can't assume what the repo's public mounted path is
-            # ...plus self-signed SSL won't work to remote clients anyway
-            # ...and we can't assume that it's SSL anyway, as we might want to
-            # serve it via the non-SSL listener...
-            url = "%s/_matrix/content/%s" % (
-                self.external_addr, file_name
-            )
-
-            respond_with_json_bytes(request, 200,
-                                    json.dumps({"content_token": url}),
-                                    send_cors=True)
-
-        except CodeMessageException as e:
-            logger.exception(e)
-            respond_with_json_bytes(request, e.code,
-                                    json.dumps(cs_exception(e)))
-        except Exception as e:
-            logger.error("Failed to store file: %s" % e)
-            respond_with_json_bytes(
-                request,
-                500,
-                json.dumps({"error": "Internal server error"}),
-                send_cors=True)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index fc5b5ab809..8f4db59c75 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -14,10 +14,11 @@
 # limitations under the License.
 
 
-from twisted.internet import defer, reactor
+from twisted.internet import defer, reactor, protocol
 from twisted.internet.error import DNSLookupError
 from twisted.web.client import readBody, _AgentBase, _URI
 from twisted.web.http_headers import Headers
+from twisted.web._newclient import ResponseDone
 
 from synapse.http.endpoint import matrix_federation_endpoint
 from synapse.util.async import sleep
@@ -25,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext
 
 from syutil.jsonutil import encode_canonical_json
 
-from synapse.api.errors import CodeMessageException, SynapseError
+from synapse.api.errors import CodeMessageException, SynapseError, Codes
 
 from syutil.crypto.jsonsign import sign_json
 
@@ -244,7 +245,7 @@ class MatrixFederationHttpClient(object):
 
     @defer.inlineCallbacks
     def get_json(self, destination, path, args={}, retry_on_dns_fail=True):
-        """ Get's some json from the given host homeserver and path
+        """ GETs some json from the given host homeserver and path
 
         Args:
             destination (str): The remote server to send the HTTP request
@@ -252,9 +253,6 @@ class MatrixFederationHttpClient(object):
             path (str): The HTTP path.
             args (dict): A dictionary used to create query strings, defaults to
                 None.
-                **Note**: The value of each key is assumed to be an iterable
-                and *not* a string.
-
         Returns:
             Deferred: Succeeds when we get *any* HTTP response.
 
@@ -289,6 +287,52 @@ class MatrixFederationHttpClient(object):
 
         defer.returnValue(json.loads(body))
 
+    @defer.inlineCallbacks
+    def get_file(self, destination, path, output_stream, args={},
+                 retry_on_dns_fail=True, max_size=None):
+        """GETs a file from a given homeserver
+        Args:
+            destination (str): The remote server to send the HTTP request to.
+            path (str): The HTTP path to GET.
+            output_stream (file): File to write the response body to.
+            args (dict): Optional dictionary used to create the query string.
+        Returns:
+            A (int,dict) tuple of the file length and a dict of the response
+            headers.
+        """
+
+        encoded_args = {}
+        for k, vs in args.items():
+            if isinstance(vs, basestring):
+                vs = [vs]
+            encoded_args[k] = [v.encode("UTF-8") for v in vs]
+
+        query_bytes = urllib.urlencode(encoded_args, True)
+        logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail)
+
+        def body_callback(method, url_bytes, headers_dict):
+            self.sign_request(destination, method, url_bytes, headers_dict)
+            return None
+
+        response = yield self._create_request(
+            destination.encode("ascii"),
+            "GET",
+            path.encode("ascii"),
+            query_bytes=query_bytes,
+            body_callback=body_callback,
+            retry_on_dns_fail=retry_on_dns_fail
+        )
+
+        headers = dict(response.headers.getAllRawHeaders())
+
+        try:
+            length = yield _readBodyToFile(response, output_stream, max_size)
+        except:
+            logger.exception("Failed to download body")
+            raise
+
+        defer.returnValue((length, headers))
+
     def _getEndpoint(self, reactor, destination):
         return matrix_federation_endpoint(
             reactor, destination, timeout=10,
@@ -296,6 +340,38 @@ class MatrixFederationHttpClient(object):
         )
 
 
+class _ReadBodyToFileProtocol(protocol.Protocol):
+    def __init__(self, stream, deferred, max_size):
+        self.stream = stream
+        self.deferred = deferred
+        self.length = 0
+        self.max_size = max_size
+
+    def dataReceived(self, data):
+        self.stream.write(data)
+        self.length += len(data)
+        if self.max_size is not None and self.length >= self.max_size:
+            self.deferred.errback(SynapseError(
+                502,
+                "Requested file is too large > %r bytes" % (self.max_size,),
+                Codes.TOO_LARGE,
+            ))
+            self.deferred = defer.Deferred()
+            self.transport.loseConnection()
+
+    def connectionLost(self, reason):
+        if reason.check(ResponseDone):
+            self.deferred.callback(self.length)
+        else:
+            self.deferred.errback(reason)
+
+
+def _readBodyToFile(response, stream, max_size):
+    d = defer.Deferred()
+    response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size))
+    return d
+
+
 def _print_ex(e):
     if hasattr(e, "reasons") and e.reasons:
         for ex in e.reasons:
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 8024ff5bde..02277c4998 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -166,14 +166,10 @@ class JsonResource(HttpServer, resource.Resource):
                 request)
             return
 
-        if not self._request_user_agent_is_curl(request):
-            json_bytes = encode_canonical_json(response_json_object)
-        else:
-            json_bytes = encode_pretty_printed_json(response_json_object)
-
         # TODO: Only enable CORS for the requests that need it.
-        respond_with_json_bytes(request, code, json_bytes, send_cors=True,
-                                response_code_message=response_code_message)
+        respond_with_json(request, code, response_json_object, send_cors=True,
+                          response_code_message=response_code_message,
+                          pretty_print=self._request_user_agent_is_curl)
 
     @staticmethod
     def _request_user_agent_is_curl(request):
@@ -202,6 +198,17 @@ class RootRedirect(resource.Resource):
         return resource.Resource.getChild(self, name, request)
 
 
+def respond_with_json(request, code, json_object, send_cors=False,
+                      response_code_message=None, pretty_print=False):
+    if not pretty_print:
+        json_bytes = encode_pretty_printed_json(json_object)
+    else:
+        json_bytes = encode_canonical_json(json_object)
+
+    return respond_with_json_bytes(request, code, json_bytes, send_cors,
+                                   response_code_message=response_code_message)
+
+
 def respond_with_json_bytes(request, code, json_bytes, send_cors=False,
                             response_code_message=None):
     """Sends encoded JSON in response to the given request.