diff options
author | Erik Johnston <erik@matrix.org> | 2014-12-11 17:46:23 +0000 |
---|---|---|
committer | Erik Johnston <erik@matrix.org> | 2014-12-11 17:46:23 +0000 |
commit | 85574cfbf06ffab46b511ec684b40fb8239b7807 (patch) | |
tree | 6596757f5eabfd708df0fc42de2320bb27517de0 /synapse/http | |
parent | Implement .cancel_call_later() in MockClock (diff) | |
parent | Allow only one download for a given image at a time, so that we don't end up ... (diff) | |
download | synapse-85574cfbf06ffab46b511ec684b40fb8239b7807.tar.xz |
Merge pull request #23 from matrix-org/media_repository
Media repository
Diffstat (limited to 'synapse/http')
-rw-r--r-- | synapse/http/content_repository.py | 212 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 88 | ||||
-rw-r--r-- | synapse/http/server.py | 21 |
3 files changed, 96 insertions, 225 deletions
diff --git a/synapse/http/content_repository.py b/synapse/http/content_repository.py deleted file mode 100644 index 64ecb5346e..0000000000 --- a/synapse/http/content_repository.py +++ /dev/null @@ -1,212 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .server import respond_with_json_bytes - -from synapse.util.stringutils import random_string -from synapse.api.errors import ( - cs_exception, SynapseError, CodeMessageException, Codes, cs_error -) - -from twisted.protocols.basic import FileSender -from twisted.web import server, resource -from twisted.internet import defer - -import base64 -import json -import logging -import os -import re - -logger = logging.getLogger(__name__) - - -class ContentRepoResource(resource.Resource): - """Provides file uploading and downloading. - - Uploads are POSTed to wherever this Resource is linked to. This resource - returns a "content token" which can be used to GET this content again. The - token is typically a path, but it may not be. Tokens can expire, be - one-time uses, etc. - - In this case, the token is a path to the file and contains 3 interesting - sections: - - User ID base64d (for namespacing content to each user) - - random 24 char string - - Content type base64d (so we can return it when clients GET it) - - """ - isLeaf = True - - def __init__(self, hs, directory, auth, external_addr): - resource.Resource.__init__(self) - self.hs = hs - self.directory = directory - self.auth = auth - self.external_addr = external_addr.rstrip('/') - self.max_upload_size = hs.config.max_upload_size - - if not os.path.isdir(self.directory): - os.mkdir(self.directory) - logger.info("ContentRepoResource : Created %s directory.", - self.directory) - - @defer.inlineCallbacks - def map_request_to_name(self, request): - # auth the user - auth_user = yield self.auth.get_user_by_req(request) - - # namespace all file uploads on the user - prefix = base64.urlsafe_b64encode( - auth_user.to_string() - ).replace('=', '') - - # use a random string for the main portion - main_part = random_string(24) - - # suffix with a file extension if we can make one. This is nice to - # provide a hint to clients on the file information. We will also reuse - # this info to spit back the content type to the client. - suffix = "" - if request.requestHeaders.hasHeader("Content-Type"): - content_type = request.requestHeaders.getRawHeaders( - "Content-Type")[0] - suffix = "." + base64.urlsafe_b64encode(content_type) - if (content_type.split("/")[0].lower() in - ["image", "video", "audio"]): - file_ext = content_type.split("/")[-1] - # be a little paranoid and only allow a-z - file_ext = re.sub("[^a-z]", "", file_ext) - suffix += "." + file_ext - - file_name = prefix + main_part + suffix - file_path = os.path.join(self.directory, file_name) - logger.info("User %s is uploading a file to path %s", - auth_user.to_string(), - file_path) - - # keep trying to make a non-clashing file, with a sensible max attempts - attempts = 0 - while os.path.exists(file_path): - main_part = random_string(24) - file_name = prefix + main_part + suffix - file_path = os.path.join(self.directory, file_name) - attempts += 1 - if attempts > 25: # really? Really? - raise SynapseError(500, "Unable to create file.") - - defer.returnValue(file_path) - - def render_GET(self, request): - # no auth here on purpose, to allow anyone to view, even across home - # servers. - - # TODO: A little crude here, we could do this better. - filename = request.path.split('/')[-1] - # be paranoid - filename = re.sub("[^0-9A-z.-_]", "", filename) - - file_path = self.directory + "/" + filename - - logger.debug("Searching for %s", file_path) - - if os.path.isfile(file_path): - # filename has the content type - base64_contentype = filename.split(".")[1] - content_type = base64.urlsafe_b64decode(base64_contentype) - logger.info("Sending file %s", file_path) - f = open(file_path, 'rb') - request.setHeader('Content-Type', content_type) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our matrix - # clients are smart enough to be happy with Cache-Control (right?) - request.setHeader( - "Cache-Control", "public,max-age=86400,s-maxage=86400" - ) - - d = FileSender().beginFileTransfer(f, request) - - # after the file has been sent, clean up and finish the request - def cbFinished(ignored): - f.close() - request.finish() - d.addCallback(cbFinished) - else: - respond_with_json_bytes( - request, - 404, - json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)), - send_cors=True) - - return server.NOT_DONE_YET - - def render_POST(self, request): - self._async_render(request) - return server.NOT_DONE_YET - - def render_OPTIONS(self, request): - respond_with_json_bytes(request, 200, {}, send_cors=True) - return server.NOT_DONE_YET - - @defer.inlineCallbacks - def _async_render(self, request): - try: - # TODO: The checks here are a bit late. The content will have - # already been uploaded to a tmp file at this point - content_length = request.getHeader("Content-Length") - if content_length is None: - raise SynapseError( - msg="Request must specify a Content-Length", code=400 - ) - if int(content_length) > self.max_upload_size: - raise SynapseError( - msg="Upload request body is too large", - code=413, - ) - - fname = yield self.map_request_to_name(request) - - # TODO I have a suspicious feeling this is just going to block - with open(fname, "wb") as f: - f.write(request.content.read()) - - # FIXME (erikj): These should use constants. - file_name = os.path.basename(fname) - # FIXME: we can't assume what the repo's public mounted path is - # ...plus self-signed SSL won't work to remote clients anyway - # ...and we can't assume that it's SSL anyway, as we might want to - # serve it via the non-SSL listener... - url = "%s/_matrix/content/%s" % ( - self.external_addr, file_name - ) - - respond_with_json_bytes(request, 200, - json.dumps({"content_token": url}), - send_cors=True) - - except CodeMessageException as e: - logger.exception(e) - respond_with_json_bytes(request, e.code, - json.dumps(cs_exception(e))) - except Exception as e: - logger.error("Failed to store file: %s" % e) - respond_with_json_bytes( - request, - 500, - json.dumps({"error": "Internal server error"}), - send_cors=True) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index fc5b5ab809..8f4db59c75 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -14,10 +14,11 @@ # limitations under the License. -from twisted.internet import defer, reactor +from twisted.internet import defer, reactor, protocol from twisted.internet.error import DNSLookupError from twisted.web.client import readBody, _AgentBase, _URI from twisted.web.http_headers import Headers +from twisted.web._newclient import ResponseDone from synapse.http.endpoint import matrix_federation_endpoint from synapse.util.async import sleep @@ -25,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext from syutil.jsonutil import encode_canonical_json -from synapse.api.errors import CodeMessageException, SynapseError +from synapse.api.errors import CodeMessageException, SynapseError, Codes from syutil.crypto.jsonsign import sign_json @@ -244,7 +245,7 @@ class MatrixFederationHttpClient(object): @defer.inlineCallbacks def get_json(self, destination, path, args={}, retry_on_dns_fail=True): - """ Get's some json from the given host homeserver and path + """ GETs some json from the given host homeserver and path Args: destination (str): The remote server to send the HTTP request @@ -252,9 +253,6 @@ class MatrixFederationHttpClient(object): path (str): The HTTP path. args (dict): A dictionary used to create query strings, defaults to None. - **Note**: The value of each key is assumed to be an iterable - and *not* a string. - Returns: Deferred: Succeeds when we get *any* HTTP response. @@ -289,6 +287,52 @@ class MatrixFederationHttpClient(object): defer.returnValue(json.loads(body)) + @defer.inlineCallbacks + def get_file(self, destination, path, output_stream, args={}, + retry_on_dns_fail=True, max_size=None): + """GETs a file from a given homeserver + Args: + destination (str): The remote server to send the HTTP request to. + path (str): The HTTP path to GET. + output_stream (file): File to write the response body to. + args (dict): Optional dictionary used to create the query string. + Returns: + A (int,dict) tuple of the file length and a dict of the response + headers. + """ + + encoded_args = {} + for k, vs in args.items(): + if isinstance(vs, basestring): + vs = [vs] + encoded_args[k] = [v.encode("UTF-8") for v in vs] + + query_bytes = urllib.urlencode(encoded_args, True) + logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) + + def body_callback(method, url_bytes, headers_dict): + self.sign_request(destination, method, url_bytes, headers_dict) + return None + + response = yield self._create_request( + destination.encode("ascii"), + "GET", + path.encode("ascii"), + query_bytes=query_bytes, + body_callback=body_callback, + retry_on_dns_fail=retry_on_dns_fail + ) + + headers = dict(response.headers.getAllRawHeaders()) + + try: + length = yield _readBodyToFile(response, output_stream, max_size) + except: + logger.exception("Failed to download body") + raise + + defer.returnValue((length, headers)) + def _getEndpoint(self, reactor, destination): return matrix_federation_endpoint( reactor, destination, timeout=10, @@ -296,6 +340,38 @@ class MatrixFederationHttpClient(object): ) +class _ReadBodyToFileProtocol(protocol.Protocol): + def __init__(self, stream, deferred, max_size): + self.stream = stream + self.deferred = deferred + self.length = 0 + self.max_size = max_size + + def dataReceived(self, data): + self.stream.write(data) + self.length += len(data) + if self.max_size is not None and self.length >= self.max_size: + self.deferred.errback(SynapseError( + 502, + "Requested file is too large > %r bytes" % (self.max_size,), + Codes.TOO_LARGE, + )) + self.deferred = defer.Deferred() + self.transport.loseConnection() + + def connectionLost(self, reason): + if reason.check(ResponseDone): + self.deferred.callback(self.length) + else: + self.deferred.errback(reason) + + +def _readBodyToFile(response, stream, max_size): + d = defer.Deferred() + response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) + return d + + def _print_ex(e): if hasattr(e, "reasons") and e.reasons: for ex in e.reasons: diff --git a/synapse/http/server.py b/synapse/http/server.py index 8024ff5bde..02277c4998 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -166,14 +166,10 @@ class JsonResource(HttpServer, resource.Resource): request) return - if not self._request_user_agent_is_curl(request): - json_bytes = encode_canonical_json(response_json_object) - else: - json_bytes = encode_pretty_printed_json(response_json_object) - # TODO: Only enable CORS for the requests that need it. - respond_with_json_bytes(request, code, json_bytes, send_cors=True, - response_code_message=response_code_message) + respond_with_json(request, code, response_json_object, send_cors=True, + response_code_message=response_code_message, + pretty_print=self._request_user_agent_is_curl) @staticmethod def _request_user_agent_is_curl(request): @@ -202,6 +198,17 @@ class RootRedirect(resource.Resource): return resource.Resource.getChild(self, name, request) +def respond_with_json(request, code, json_object, send_cors=False, + response_code_message=None, pretty_print=False): + if not pretty_print: + json_bytes = encode_pretty_printed_json(json_object) + else: + json_bytes = encode_canonical_json(json_object) + + return respond_with_json_bytes(request, code, json_bytes, send_cors, + response_code_message=response_code_message) + + def respond_with_json_bytes(request, code, json_bytes, send_cors=False, response_code_message=None): """Sends encoded JSON in response to the given request. |