diff --git a/synapse/http/content_repository.py b/synapse/http/content_repository.py
deleted file mode 100644
index 64ecb5346e..0000000000
--- a/synapse/http/content_repository.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2014 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .server import respond_with_json_bytes
-
-from synapse.util.stringutils import random_string
-from synapse.api.errors import (
- cs_exception, SynapseError, CodeMessageException, Codes, cs_error
-)
-
-from twisted.protocols.basic import FileSender
-from twisted.web import server, resource
-from twisted.internet import defer
-
-import base64
-import json
-import logging
-import os
-import re
-
-logger = logging.getLogger(__name__)
-
-
-class ContentRepoResource(resource.Resource):
- """Provides file uploading and downloading.
-
- Uploads are POSTed to wherever this Resource is linked to. This resource
- returns a "content token" which can be used to GET this content again. The
- token is typically a path, but it may not be. Tokens can expire, be
- one-time uses, etc.
-
- In this case, the token is a path to the file and contains 3 interesting
- sections:
- - User ID base64d (for namespacing content to each user)
- - random 24 char string
- - Content type base64d (so we can return it when clients GET it)
-
- """
- isLeaf = True
-
- def __init__(self, hs, directory, auth, external_addr):
- resource.Resource.__init__(self)
- self.hs = hs
- self.directory = directory
- self.auth = auth
- self.external_addr = external_addr.rstrip('/')
- self.max_upload_size = hs.config.max_upload_size
-
- if not os.path.isdir(self.directory):
- os.mkdir(self.directory)
- logger.info("ContentRepoResource : Created %s directory.",
- self.directory)
-
- @defer.inlineCallbacks
- def map_request_to_name(self, request):
- # auth the user
- auth_user = yield self.auth.get_user_by_req(request)
-
- # namespace all file uploads on the user
- prefix = base64.urlsafe_b64encode(
- auth_user.to_string()
- ).replace('=', '')
-
- # use a random string for the main portion
- main_part = random_string(24)
-
- # suffix with a file extension if we can make one. This is nice to
- # provide a hint to clients on the file information. We will also reuse
- # this info to spit back the content type to the client.
- suffix = ""
- if request.requestHeaders.hasHeader("Content-Type"):
- content_type = request.requestHeaders.getRawHeaders(
- "Content-Type")[0]
- suffix = "." + base64.urlsafe_b64encode(content_type)
- if (content_type.split("/")[0].lower() in
- ["image", "video", "audio"]):
- file_ext = content_type.split("/")[-1]
- # be a little paranoid and only allow a-z
- file_ext = re.sub("[^a-z]", "", file_ext)
- suffix += "." + file_ext
-
- file_name = prefix + main_part + suffix
- file_path = os.path.join(self.directory, file_name)
- logger.info("User %s is uploading a file to path %s",
- auth_user.to_string(),
- file_path)
-
- # keep trying to make a non-clashing file, with a sensible max attempts
- attempts = 0
- while os.path.exists(file_path):
- main_part = random_string(24)
- file_name = prefix + main_part + suffix
- file_path = os.path.join(self.directory, file_name)
- attempts += 1
- if attempts > 25: # really? Really?
- raise SynapseError(500, "Unable to create file.")
-
- defer.returnValue(file_path)
-
- def render_GET(self, request):
- # no auth here on purpose, to allow anyone to view, even across home
- # servers.
-
- # TODO: A little crude here, we could do this better.
- filename = request.path.split('/')[-1]
- # be paranoid
- filename = re.sub("[^0-9A-z.-_]", "", filename)
-
- file_path = self.directory + "/" + filename
-
- logger.debug("Searching for %s", file_path)
-
- if os.path.isfile(file_path):
- # filename has the content type
- base64_contentype = filename.split(".")[1]
- content_type = base64.urlsafe_b64decode(base64_contentype)
- logger.info("Sending file %s", file_path)
- f = open(file_path, 'rb')
- request.setHeader('Content-Type', content_type)
-
- # cache for at least a day.
- # XXX: we might want to turn this off for data we don't want to
- # recommend caching as it's sensitive or private - or at least
- # select private. don't bother setting Expires as all our matrix
- # clients are smart enough to be happy with Cache-Control (right?)
- request.setHeader(
- "Cache-Control", "public,max-age=86400,s-maxage=86400"
- )
-
- d = FileSender().beginFileTransfer(f, request)
-
- # after the file has been sent, clean up and finish the request
- def cbFinished(ignored):
- f.close()
- request.finish()
- d.addCallback(cbFinished)
- else:
- respond_with_json_bytes(
- request,
- 404,
- json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)),
- send_cors=True)
-
- return server.NOT_DONE_YET
-
- def render_POST(self, request):
- self._async_render(request)
- return server.NOT_DONE_YET
-
- def render_OPTIONS(self, request):
- respond_with_json_bytes(request, 200, {}, send_cors=True)
- return server.NOT_DONE_YET
-
- @defer.inlineCallbacks
- def _async_render(self, request):
- try:
- # TODO: The checks here are a bit late. The content will have
- # already been uploaded to a tmp file at this point
- content_length = request.getHeader("Content-Length")
- if content_length is None:
- raise SynapseError(
- msg="Request must specify a Content-Length", code=400
- )
- if int(content_length) > self.max_upload_size:
- raise SynapseError(
- msg="Upload request body is too large",
- code=413,
- )
-
- fname = yield self.map_request_to_name(request)
-
- # TODO I have a suspicious feeling this is just going to block
- with open(fname, "wb") as f:
- f.write(request.content.read())
-
- # FIXME (erikj): These should use constants.
- file_name = os.path.basename(fname)
- # FIXME: we can't assume what the repo's public mounted path is
- # ...plus self-signed SSL won't work to remote clients anyway
- # ...and we can't assume that it's SSL anyway, as we might want to
- # serve it via the non-SSL listener...
- url = "%s/_matrix/content/%s" % (
- self.external_addr, file_name
- )
-
- respond_with_json_bytes(request, 200,
- json.dumps({"content_token": url}),
- send_cors=True)
-
- except CodeMessageException as e:
- logger.exception(e)
- respond_with_json_bytes(request, e.code,
- json.dumps(cs_exception(e)))
- except Exception as e:
- logger.error("Failed to store file: %s" % e)
- respond_with_json_bytes(
- request,
- 500,
- json.dumps({"error": "Internal server error"}),
- send_cors=True)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 510f07dd7b..8f4db59c75 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -14,10 +14,11 @@
# limitations under the License.
-from twisted.internet import defer, reactor
+from twisted.internet import defer, reactor, protocol
from twisted.internet.error import DNSLookupError
from twisted.web.client import readBody, _AgentBase, _URI
from twisted.web.http_headers import Headers
+from twisted.web._newclient import ResponseDone
from synapse.http.endpoint import matrix_federation_endpoint
from synapse.util.async import sleep
@@ -25,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext
from syutil.jsonutil import encode_canonical_json
-from synapse.api.errors import CodeMessageException, SynapseError
+from synapse.api.errors import CodeMessageException, SynapseError, Codes
from syutil.crypto.jsonsign import sign_json
@@ -89,8 +90,8 @@ class MatrixFederationHttpClient(object):
("", "", path_bytes, param_bytes, query_bytes, "",)
)
- logger.debug("Sending request to %s: %s %s",
- destination, method, url_bytes)
+ logger.info("Sending request to %s: %s %s",
+ destination, method, url_bytes)
logger.debug(
"Types: %s",
@@ -101,6 +102,8 @@ class MatrixFederationHttpClient(object):
]
)
+ # XXX: Would be much nicer to retry only at the transaction-layer
+ # (once we have reliable transactions in place)
retries_left = 5
endpoint = self._getEndpoint(reactor, destination)
@@ -127,11 +130,20 @@ class MatrixFederationHttpClient(object):
break
except Exception as e:
if not retry_on_dns_fail and isinstance(e, DNSLookupError):
- logger.warn("DNS Lookup failed to %s with %s", destination,
- e)
+ logger.warn(
+ "DNS Lookup failed to %s with %s",
+ destination,
+ e
+ )
raise SynapseError(400, "Domain specified not found.")
- logger.exception("Got error in _create_request")
+ logger.warn(
+ "Sending request failed to %s: %s %s : %s",
+ destination,
+ method,
+ url_bytes,
+ e
+ )
_print_ex(e)
if retries_left:
@@ -140,15 +152,21 @@ class MatrixFederationHttpClient(object):
else:
raise
+ logger.info(
+ "Received response %d %s for %s: %s %s",
+ response.code,
+ response.phrase,
+ destination,
+ method,
+ url_bytes
+ )
+
if 200 <= response.code < 300:
# We need to update the transactions table to say it was sent?
pass
else:
# :'(
# Update transactions table?
- logger.error(
- "Got response %d %s", response.code, response.phrase
- )
raise CodeMessageException(
response.code, response.phrase
)
@@ -227,7 +245,7 @@ class MatrixFederationHttpClient(object):
@defer.inlineCallbacks
def get_json(self, destination, path, args={}, retry_on_dns_fail=True):
- """ Get's some json from the given host homeserver and path
+ """ GETs some json from the given host homeserver and path
Args:
destination (str): The remote server to send the HTTP request
@@ -235,9 +253,6 @@ class MatrixFederationHttpClient(object):
path (str): The HTTP path.
args (dict): A dictionary used to create query strings, defaults to
None.
- **Note**: The value of each key is assumed to be an iterable
- and *not* a string.
-
Returns:
Deferred: Succeeds when we get *any* HTTP response.
@@ -272,6 +287,52 @@ class MatrixFederationHttpClient(object):
defer.returnValue(json.loads(body))
+ @defer.inlineCallbacks
+ def get_file(self, destination, path, output_stream, args={},
+ retry_on_dns_fail=True, max_size=None):
+ """GETs a file from a given homeserver
+ Args:
+ destination (str): The remote server to send the HTTP request to.
+ path (str): The HTTP path to GET.
+ output_stream (file): File to write the response body to.
+ args (dict): Optional dictionary used to create the query string.
+ Returns:
+ A (int,dict) tuple of the file length and a dict of the response
+ headers.
+ """
+
+ encoded_args = {}
+ for k, vs in args.items():
+ if isinstance(vs, basestring):
+ vs = [vs]
+ encoded_args[k] = [v.encode("UTF-8") for v in vs]
+
+ query_bytes = urllib.urlencode(encoded_args, True)
+ logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail)
+
+ def body_callback(method, url_bytes, headers_dict):
+ self.sign_request(destination, method, url_bytes, headers_dict)
+ return None
+
+ response = yield self._create_request(
+ destination.encode("ascii"),
+ "GET",
+ path.encode("ascii"),
+ query_bytes=query_bytes,
+ body_callback=body_callback,
+ retry_on_dns_fail=retry_on_dns_fail
+ )
+
+ headers = dict(response.headers.getAllRawHeaders())
+
+ try:
+ length = yield _readBodyToFile(response, output_stream, max_size)
+ except:
+ logger.exception("Failed to download body")
+ raise
+
+ defer.returnValue((length, headers))
+
def _getEndpoint(self, reactor, destination):
return matrix_federation_endpoint(
reactor, destination, timeout=10,
@@ -279,12 +340,44 @@ class MatrixFederationHttpClient(object):
)
+class _ReadBodyToFileProtocol(protocol.Protocol):
+ def __init__(self, stream, deferred, max_size):
+ self.stream = stream
+ self.deferred = deferred
+ self.length = 0
+ self.max_size = max_size
+
+ def dataReceived(self, data):
+ self.stream.write(data)
+ self.length += len(data)
+ if self.max_size is not None and self.length >= self.max_size:
+ self.deferred.errback(SynapseError(
+ 502,
+ "Requested file is too large > %r bytes" % (self.max_size,),
+ Codes.TOO_LARGE,
+ ))
+ self.deferred = defer.Deferred()
+ self.transport.loseConnection()
+
+ def connectionLost(self, reason):
+ if reason.check(ResponseDone):
+ self.deferred.callback(self.length)
+ else:
+ self.deferred.errback(reason)
+
+
+def _readBodyToFile(response, stream, max_size):
+ d = defer.Deferred()
+ response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size))
+ return d
+
+
def _print_ex(e):
if hasattr(e, "reasons") and e.reasons:
for ex in e.reasons:
_print_ex(ex)
else:
- logger.exception(e)
+ logger.warn(e)
class _JsonProducer(object):
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 8024ff5bde..f33859cf76 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -29,6 +29,7 @@ from twisted.web.util import redirectTo
import collections
import logging
+import urllib
logger = logging.getLogger(__name__)
@@ -122,9 +123,14 @@ class JsonResource(HttpServer, resource.Resource):
# We found a match! Trigger callback and then return the
# returned response. We pass both the request and any
# matched groups from the regex to the callback.
+
+ args = [
+ urllib.unquote(u).decode("UTF-8") for u in m.groups()
+ ]
+
code, response = yield path_entry.callback(
request,
- *m.groups()
+ *args
)
self._send_response(request, code, response)
@@ -166,14 +172,10 @@ class JsonResource(HttpServer, resource.Resource):
request)
return
- if not self._request_user_agent_is_curl(request):
- json_bytes = encode_canonical_json(response_json_object)
- else:
- json_bytes = encode_pretty_printed_json(response_json_object)
-
# TODO: Only enable CORS for the requests that need it.
- respond_with_json_bytes(request, code, json_bytes, send_cors=True,
- response_code_message=response_code_message)
+ respond_with_json(request, code, response_json_object, send_cors=True,
+ response_code_message=response_code_message,
+ pretty_print=self._request_user_agent_is_curl)
@staticmethod
def _request_user_agent_is_curl(request):
@@ -202,6 +204,17 @@ class RootRedirect(resource.Resource):
return resource.Resource.getChild(self, name, request)
+def respond_with_json(request, code, json_object, send_cors=False,
+ response_code_message=None, pretty_print=False):
+ if not pretty_print:
+ json_bytes = encode_pretty_printed_json(json_object)
+ else:
+ json_bytes = encode_canonical_json(json_object)
+
+ return respond_with_json_bytes(request, code, json_bytes, send_cors,
+ response_code_message=response_code_message)
+
+
def respond_with_json_bytes(request, code, json_bytes, send_cors=False,
response_code_message=None):
"""Sends encoded JSON in response to the given request.
|