diff options
Diffstat (limited to 'synapse')
31 files changed, 1556 insertions, 45 deletions
diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 581439ceb3..e250b9b211 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -34,6 +34,7 @@ class Codes(object): LIMIT_EXCEEDED = "M_LIMIT_EXCEEDED" CAPTCHA_NEEDED = "M_CAPTCHA_NEEDED" CAPTCHA_INVALID = "M_CAPTCHA_INVALID" + TOO_LARGE = "M_TOO_LARGE" class CodeMessageException(Exception): diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 6dc19305b7..d7625127f8 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -20,3 +20,4 @@ FEDERATION_PREFIX = "/_matrix/federation/v1" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" SERVER_KEY_PREFIX = "/_matrix/key/v1" +MEDIA_PREFIX = "/_matrix/media/v1" diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 855fe8e170..a6e29c0860 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -24,12 +24,13 @@ from twisted.web.resource import Resource from twisted.web.static import File from twisted.web.server import Site from synapse.http.server import JsonResource, RootRedirect -from synapse.http.content_repository import ContentRepoResource +from synapse.media.v0.content_repository import ContentRepoResource +from synapse.media.v1.media_repository import MediaRepositoryResource from synapse.http.server_key_resource import LocalKey from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.api.urls import ( CLIENT_PREFIX, FEDERATION_PREFIX, WEB_CLIENT_PREFIX, CONTENT_REPO_PREFIX, - SERVER_KEY_PREFIX, + SERVER_KEY_PREFIX, MEDIA_PREFIX ) from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory @@ -69,6 +70,9 @@ class SynapseHomeServer(HomeServer): self, self.upload_dir, self.auth, self.content_addr ) + def build_resource_for_media_repository(self): + return MediaRepositoryResource(self) + def build_resource_for_server_key(self): return LocalKey(self) @@ -99,6 +103,7 @@ class SynapseHomeServer(HomeServer): (FEDERATION_PREFIX, self.get_resource_for_federation()), (CONTENT_REPO_PREFIX, self.get_resource_for_content_repo()), (SERVER_KEY_PREFIX, self.get_resource_for_server_key()), + (MEDIA_PREFIX, self.get_resource_for_media_repository()), ] if web_client: logger.info("Adding the web client.") diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 846570811f..1cdd03e414 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -50,6 +50,16 @@ class Config(object): ) return cls.abspath(file_path) + @staticmethod + def ensure_directory(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + if not os.path.isdir(dir_path): + raise ConfigError( + "%s is not a directory" % (dir_path,) + ) + return dir_path + @classmethod def read_file(cls, file_path, config_name): cls.check_file(file_path, config_name) @@ -57,6 +67,10 @@ class Config(object): return file_stream.read() @staticmethod + def default_path(name): + return os.path.abspath(os.path.join(os.path.curdir, name)) + + @staticmethod def read_config_file(file_path): with open(file_path) as file_stream: return yaml.load(file_stream) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 743bc26474..f1b7b1b74e 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -20,6 +20,8 @@ class ContentRepositoryConfig(Config): def __init__(self, args): super(ContentRepositoryConfig, self).__init__(args) self.max_upload_size = self.parse_size(args.max_upload_size) + self.max_image_pixels = self.parse_size(args.max_image_pixels) + self.media_store_path = self.ensure_directory(args.media_store_path) def parse_size(self, string): sizes = {"K": 1024, "M": 1024 * 1024} @@ -37,3 +39,10 @@ class ContentRepositoryConfig(Config): db_group.add_argument( "--max-upload-size", default="1M" ) + db_group.add_argument( + "--media-store-path", default=cls.default_path("media_store") + ) + db_group.add_argument( + "--max-image-pixels", default="32M", + help="Maximum number of pixels that will be thumbnailed" + ) diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py index 01f87fe423..0cb632fb08 100644 --- a/synapse/federation/replication.py +++ b/synapse/federation/replication.py @@ -334,7 +334,7 @@ class ReplicationLayer(object): defer.returnValue(response) return - logger.debug("[%s] Transacition is new", transaction.transaction_id) + logger.debug("[%s] Transaction is new", transaction.transaction_id) with PreserveLoggingContext(): dl = [] @@ -685,6 +685,7 @@ class _TransactionQueue(object): self.transport_layer = transport_layer self._clock = hs.get_clock() + self.store = hs.get_datastore() # Is a mapping from destinations -> deferreds. Used to keep track # of which destinations have transactions in flight and when they are @@ -728,8 +729,14 @@ class _TransactionQueue(object): (pdu, deferred, order) ) + def eb(failure): + if not deferred.called: + deferred.errback(failure) + else: + logger.warn("Failed to send pdu", failure) + with PreserveLoggingContext(): - self._attempt_new_transaction(destination) + self._attempt_new_transaction(destination).addErrback(eb) deferreds.append(deferred) @@ -739,6 +746,9 @@ class _TransactionQueue(object): def enqueue_edu(self, edu): destination = edu.destination + if destination == self.server_name: + return + deferred = defer.Deferred() self.pending_edus_by_dest.setdefault(destination, []).append( (edu, deferred) @@ -748,7 +758,7 @@ class _TransactionQueue(object): if not deferred.called: deferred.errback(failure) else: - logger.exception("Failed to send edu", failure) + logger.warn("Failed to send edu", failure) with PreserveLoggingContext(): self._attempt_new_transaction(destination).addErrback(eb) @@ -770,10 +780,33 @@ class _TransactionQueue(object): @defer.inlineCallbacks @log_function def _attempt_new_transaction(self, destination): + + (retry_last_ts, retry_interval) = (0, 0) + retry_timings = yield self.store.get_destination_retry_timings( + destination + ) + if retry_timings: + (retry_last_ts, retry_interval) = ( + retry_timings.retry_last_ts, retry_timings.retry_interval + ) + if retry_last_ts + retry_interval > int(self._clock.time_msec()): + logger.info( + "TX [%s] not ready for retry yet - " + "dropping transaction for now", + destination, + ) + return + else: + logger.info("TX [%s] is ready for retry", destination) + if destination in self.pending_transactions: + # XXX: pending_transactions can get stuck on by a never-ending + # request at which point pending_pdus_by_dest just keeps growing. + # we need application-layer timeouts of some flavour of these + # requests return - # list of (pending_pdu, deferred, order) + # list of (pending_pdu, deferred, order) pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_failures = self.pending_failures_by_dest.pop(destination, []) @@ -781,7 +814,14 @@ class _TransactionQueue(object): if not pending_pdus and not pending_edus and not pending_failures: return - logger.debug("TX [%s] Attempting new transaction", destination) + logger.debug( + "TX [%s] Attempting new transaction " + "(pdus: %d, edus: %d, failures: %d)", + destination, + len(pending_pdus), + len(pending_edus), + len(pending_failures) + ) # Sort based on the order field pending_pdus.sort(key=lambda t: t[2]) @@ -814,7 +854,11 @@ class _TransactionQueue(object): yield self.transaction_actions.prepare_to_send(transaction) logger.debug("TX [%s] Persisted transaction", destination) - logger.debug("TX [%s] Sending transaction...", destination) + logger.info( + "TX [%s] Sending transaction [%s]", + destination, + transaction.transaction_id, + ) # Actually send the transaction @@ -835,6 +879,8 @@ class _TransactionQueue(object): transaction, json_data_cb ) + logger.info("TX [%s] got %d response", destination, code) + logger.debug("TX [%s] Sent transaction", destination) logger.debug("TX [%s] Marking as delivered...", destination) @@ -847,8 +893,14 @@ class _TransactionQueue(object): for deferred in deferreds: if code == 200: + if retry_last_ts: + # this host is alive! reset retry schedule + yield self.store.set_destination_retry_timings( + destination, 0, 0 + ) deferred.callback(None) else: + self.set_retrying(destination, retry_interval) deferred.errback(RuntimeError("Got status %d" % code)) # Ensures we don't continue until all callbacks on that @@ -861,11 +913,15 @@ class _TransactionQueue(object): logger.debug("TX [%s] Yielded to callbacks", destination) except Exception as e: - logger.error("TX Problem in _attempt_transaction") - # We capture this here as there as nothing actually listens # for this finishing functions deferred. - logger.exception(e) + logger.warn( + "TX [%s] Problem in _attempt_transaction: %s", + destination, + e, + ) + + self.set_retrying(destination, retry_interval) for deferred in deferreds: if not deferred.called: @@ -877,3 +933,22 @@ class _TransactionQueue(object): # Check to see if there is anything else to send. self._attempt_new_transaction(destination) + + @defer.inlineCallbacks + def set_retrying(self, destination, retry_interval): + # track that this destination is having problems and we should + # give it a chance to recover before trying it again + + if retry_interval: + retry_interval *= 2 + # plateau at hourly retries for now + if retry_interval >= 60 * 60 * 1000: + retry_interval = 60 * 60 * 1000 + else: + retry_interval = 2000 # try again at first after 2 seconds + + yield self.store.set_destination_retry_timings( + destination, + int(self._clock.time_msec()), + retry_interval + ) diff --git a/synapse/federation/transport.py b/synapse/federation/transport.py index 8d86152085..0f11c6d491 100644 --- a/synapse/federation/transport.py +++ b/synapse/federation/transport.py @@ -155,7 +155,7 @@ class TransportLayer(object): @defer.inlineCallbacks @log_function def send_transaction(self, transaction, json_data_callback=None): - """ Sends the given Transaction to it's destination + """ Sends the given Transaction to its destination Args: transaction (Transaction) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 925eb5376e..cfb5029774 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -207,6 +207,13 @@ class FederationHandler(BaseHandler): e.msg, affected=event.event_id, ) + + # if we're receiving valid events from an origin, + # it's probably a good idea to mark it as not in retry-state + # for sending (although this is a bit of a leap) + retry_timings = yield self.store.get_destination_retry_timings(origin) + if (retry_timings and retry_timings.retry_last_ts): + self.store.set_destination_retry_timings(origin, 0, 0) room = yield self.store.get_room(event.room_id) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 815d40f166..84a039489f 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -659,10 +659,6 @@ class PresenceHandler(BaseHandler): if room_ids: logger.debug(" | %d interested room IDs %r", len(room_ids), room_ids) - if not observers and not room_ids: - logger.debug(" | no interested observers or room IDs") - continue - state = dict(push) del state["user_id"] @@ -683,6 +679,10 @@ class PresenceHandler(BaseHandler): self._user_cachemap_latest_serial += 1 statuscache.update(state, serial=self._user_cachemap_latest_serial) + if not observers and not room_ids: + logger.debug(" | no interested observers or room IDs") + continue + self.push_update_to_clients( observed_user=user, users_to_push=observers, @@ -804,6 +804,7 @@ class PresenceEventSource(object): ) @defer.inlineCallbacks + @log_function def get_new_events_for_user(self, user, from_key, limit): from_key = int(from_key) @@ -816,7 +817,8 @@ class PresenceEventSource(object): # TODO(paul): use a DeferredList ? How to limit concurrency. for observed_user in cachemap.keys(): cached = cachemap[observed_user] - if not (from_key < cached.serial): + + if cached.serial <= from_key: continue if (yield self.is_visible(observer_user, observed_user)): diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 510f07dd7b..8f4db59c75 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -14,10 +14,11 @@ # limitations under the License. -from twisted.internet import defer, reactor +from twisted.internet import defer, reactor, protocol from twisted.internet.error import DNSLookupError from twisted.web.client import readBody, _AgentBase, _URI from twisted.web.http_headers import Headers +from twisted.web._newclient import ResponseDone from synapse.http.endpoint import matrix_federation_endpoint from synapse.util.async import sleep @@ -25,7 +26,7 @@ from synapse.util.logcontext import PreserveLoggingContext from syutil.jsonutil import encode_canonical_json -from synapse.api.errors import CodeMessageException, SynapseError +from synapse.api.errors import CodeMessageException, SynapseError, Codes from syutil.crypto.jsonsign import sign_json @@ -89,8 +90,8 @@ class MatrixFederationHttpClient(object): ("", "", path_bytes, param_bytes, query_bytes, "",) ) - logger.debug("Sending request to %s: %s %s", - destination, method, url_bytes) + logger.info("Sending request to %s: %s %s", + destination, method, url_bytes) logger.debug( "Types: %s", @@ -101,6 +102,8 @@ class MatrixFederationHttpClient(object): ] ) + # XXX: Would be much nicer to retry only at the transaction-layer + # (once we have reliable transactions in place) retries_left = 5 endpoint = self._getEndpoint(reactor, destination) @@ -127,11 +130,20 @@ class MatrixFederationHttpClient(object): break except Exception as e: if not retry_on_dns_fail and isinstance(e, DNSLookupError): - logger.warn("DNS Lookup failed to %s with %s", destination, - e) + logger.warn( + "DNS Lookup failed to %s with %s", + destination, + e + ) raise SynapseError(400, "Domain specified not found.") - logger.exception("Got error in _create_request") + logger.warn( + "Sending request failed to %s: %s %s : %s", + destination, + method, + url_bytes, + e + ) _print_ex(e) if retries_left: @@ -140,15 +152,21 @@ class MatrixFederationHttpClient(object): else: raise + logger.info( + "Received response %d %s for %s: %s %s", + response.code, + response.phrase, + destination, + method, + url_bytes + ) + if 200 <= response.code < 300: # We need to update the transactions table to say it was sent? pass else: # :'( # Update transactions table? - logger.error( - "Got response %d %s", response.code, response.phrase - ) raise CodeMessageException( response.code, response.phrase ) @@ -227,7 +245,7 @@ class MatrixFederationHttpClient(object): @defer.inlineCallbacks def get_json(self, destination, path, args={}, retry_on_dns_fail=True): - """ Get's some json from the given host homeserver and path + """ GETs some json from the given host homeserver and path Args: destination (str): The remote server to send the HTTP request @@ -235,9 +253,6 @@ class MatrixFederationHttpClient(object): path (str): The HTTP path. args (dict): A dictionary used to create query strings, defaults to None. - **Note**: The value of each key is assumed to be an iterable - and *not* a string. - Returns: Deferred: Succeeds when we get *any* HTTP response. @@ -272,6 +287,52 @@ class MatrixFederationHttpClient(object): defer.returnValue(json.loads(body)) + @defer.inlineCallbacks + def get_file(self, destination, path, output_stream, args={}, + retry_on_dns_fail=True, max_size=None): + """GETs a file from a given homeserver + Args: + destination (str): The remote server to send the HTTP request to. + path (str): The HTTP path to GET. + output_stream (file): File to write the response body to. + args (dict): Optional dictionary used to create the query string. + Returns: + A (int,dict) tuple of the file length and a dict of the response + headers. + """ + + encoded_args = {} + for k, vs in args.items(): + if isinstance(vs, basestring): + vs = [vs] + encoded_args[k] = [v.encode("UTF-8") for v in vs] + + query_bytes = urllib.urlencode(encoded_args, True) + logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) + + def body_callback(method, url_bytes, headers_dict): + self.sign_request(destination, method, url_bytes, headers_dict) + return None + + response = yield self._create_request( + destination.encode("ascii"), + "GET", + path.encode("ascii"), + query_bytes=query_bytes, + body_callback=body_callback, + retry_on_dns_fail=retry_on_dns_fail + ) + + headers = dict(response.headers.getAllRawHeaders()) + + try: + length = yield _readBodyToFile(response, output_stream, max_size) + except: + logger.exception("Failed to download body") + raise + + defer.returnValue((length, headers)) + def _getEndpoint(self, reactor, destination): return matrix_federation_endpoint( reactor, destination, timeout=10, @@ -279,12 +340,44 @@ class MatrixFederationHttpClient(object): ) +class _ReadBodyToFileProtocol(protocol.Protocol): + def __init__(self, stream, deferred, max_size): + self.stream = stream + self.deferred = deferred + self.length = 0 + self.max_size = max_size + + def dataReceived(self, data): + self.stream.write(data) + self.length += len(data) + if self.max_size is not None and self.length >= self.max_size: + self.deferred.errback(SynapseError( + 502, + "Requested file is too large > %r bytes" % (self.max_size,), + Codes.TOO_LARGE, + )) + self.deferred = defer.Deferred() + self.transport.loseConnection() + + def connectionLost(self, reason): + if reason.check(ResponseDone): + self.deferred.callback(self.length) + else: + self.deferred.errback(reason) + + +def _readBodyToFile(response, stream, max_size): + d = defer.Deferred() + response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) + return d + + def _print_ex(e): if hasattr(e, "reasons") and e.reasons: for ex in e.reasons: _print_ex(ex) else: - logger.exception(e) + logger.warn(e) class _JsonProducer(object): diff --git a/synapse/http/server.py b/synapse/http/server.py index 8024ff5bde..02277c4998 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -166,14 +166,10 @@ class JsonResource(HttpServer, resource.Resource): request) return - if not self._request_user_agent_is_curl(request): - json_bytes = encode_canonical_json(response_json_object) - else: - json_bytes = encode_pretty_printed_json(response_json_object) - # TODO: Only enable CORS for the requests that need it. - respond_with_json_bytes(request, code, json_bytes, send_cors=True, - response_code_message=response_code_message) + respond_with_json(request, code, response_json_object, send_cors=True, + response_code_message=response_code_message, + pretty_print=self._request_user_agent_is_curl) @staticmethod def _request_user_agent_is_curl(request): @@ -202,6 +198,17 @@ class RootRedirect(resource.Resource): return resource.Resource.getChild(self, name, request) +def respond_with_json(request, code, json_object, send_cors=False, + response_code_message=None, pretty_print=False): + if not pretty_print: + json_bytes = encode_pretty_printed_json(json_object) + else: + json_bytes = encode_canonical_json(json_object) + + return respond_with_json_bytes(request, code, json_bytes, send_cors, + response_code_message=response_code_message) + + def respond_with_json_bytes(request, code, json_bytes, send_cors=False, response_code_message=None): """Sends encoded JSON in response to the given request. diff --git a/synapse/media/__init__.py b/synapse/media/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/synapse/media/__init__.py diff --git a/synapse/media/v0/__init__.py b/synapse/media/v0/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/synapse/media/v0/__init__.py diff --git a/synapse/http/content_repository.py b/synapse/media/v0/content_repository.py index 64ecb5346e..ce5d3d153e 100644 --- a/synapse/http/content_repository.py +++ b/synapse/media/v0/content_repository.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .server import respond_with_json_bytes +from synapse.http.server import respond_with_json_bytes from synapse.util.stringutils import random_string from synapse.api.errors import ( diff --git a/synapse/media/v1/__init__.py b/synapse/media/v1/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/synapse/media/v1/__init__.py diff --git a/synapse/media/v1/base_resource.py b/synapse/media/v1/base_resource.py new file mode 100644 index 0000000000..14735ff375 --- /dev/null +++ b/synapse/media/v1/base_resource.py @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .thumbnailer import Thumbnailer + +from synapse.http.server import respond_with_json +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, CodeMessageException, cs_error, Codes, SynapseError +) + +from twisted.internet import defer +from twisted.web.resource import Resource +from twisted.protocols.basic import FileSender + +import os + +import logging + +logger = logging.getLogger(__name__) + + +class BaseMediaResource(Resource): + isLeaf = True + + def __init__(self, hs, filepaths): + Resource.__init__(self) + self.auth = hs.get_auth() + self.client = hs.get_http_client() + self.clock = hs.get_clock() + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.max_upload_size = hs.config.max_upload_size + self.max_image_pixels = hs.config.max_image_pixels + self.filepaths = filepaths + self.downloads = {} + + @staticmethod + def catch_errors(request_handler): + @defer.inlineCallbacks + def wrapped_request_handler(self, request): + try: + yield request_handler(self, request) + except CodeMessageException as e: + logger.exception(e) + respond_with_json( + request, e.code, cs_exception(e), send_cors=True + ) + except: + logger.exception( + "Failed handle request %s.%s on %r", + request_handler.__module__, + request_handler.__name__, + self, + ) + respond_with_json( + request, + 500, + {"error": "Internal server error"}, + send_cors=True + ) + return wrapped_request_handler + + @staticmethod + def _parse_media_id(request): + try: + server_name, media_id = request.postpath + return (server_name, media_id) + except: + raise SynapseError( + 404, + "Invalid media id token %r" % (request.postpath,), + Codes.UNKKOWN, + ) + + @staticmethod + def _parse_integer(request, arg_name, default=None): + try: + if default is None: + return int(request.args[arg_name][0]) + else: + return int(request.args.get(arg_name, [default])[0]) + except: + raise SynapseError( + 400, + "Missing integer argument %r" % (arg_name,), + Codes.UNKNOWN, + ) + + @staticmethod + def _parse_string(request, arg_name, default=None): + try: + if default is None: + return request.args[arg_name][0] + else: + return request.args.get(arg_name, [default])[0] + except: + raise SynapseError( + 400, + "Missing string argument %r" % (arg_name,), + Codes.UNKNOWN, + ) + + def _respond_404(self, request): + respond_with_json( + request, 404, + cs_error( + "Not found %r" % (request.postpath,), + code=Codes.NOT_FOUND, + ), + send_cors=True + ) + + @staticmethod + def _makedirs(filepath): + dirname = os.path.dirname(filepath) + if not os.path.exists(dirname): + os.makedirs(dirname) + + def _get_remote_media(self, server_name, media_id): + key = (server_name, media_id) + download = self.downloads.get(key) + if download is None: + download = self._get_remote_media_impl(server_name, media_id) + self.downloads[key] = download + @download.addBoth + def callback(media_info): + del self.downloads[key] + return download + + @defer.inlineCallbacks + def _get_remote_media_impl(self, server_name, media_id): + media_info = yield self.store.get_cached_remote_media( + server_name, media_id + ) + if not media_info: + media_info = yield self._download_remote_file( + server_name, media_id + ) + defer.returnValue(media_info) + + @defer.inlineCallbacks + def _download_remote_file(self, server_name, media_id): + file_id = random_string(24) + + fname = self.filepaths.remote_media_filepath( + server_name, file_id + ) + self._makedirs(fname) + + try: + with open(fname, "wb") as f: + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )) + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + max_size=self.max_upload_size, + ) + media_type = headers["Content-Type"][0] + time_now_ms = self.clock.time_msec() + + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=length, + filesystem_id=file_id, + ) + except: + os.remove(fname) + raise + + media_info = { + "media_type": media_type, + "media_length": length, + "upload_name": None, + "created_ts": time_now_ms, + "filesystem_id": file_id, + } + + yield self._generate_remote_thumbnails( + server_name, media_id, media_info + ) + + defer.returnValue(media_info) + + @defer.inlineCallbacks + def _respond_with_file(self, request, media_type, file_path): + logger.debug("Responding with %r", file_path) + + if os.path.isfile(file_path): + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + with open(file_path, "rb") as f: + yield FileSender().beginFileTransfer(f, request) + + request.finish() + else: + self._respond_404() + + def _get_thumbnail_requirements(self, media_type): + if media_type == "image/jpeg": + return ( + (32, 32, "crop", "image/jpeg"), + (96, 96, "crop", "image/jpeg"), + (320, 240, "scale", "image/jpeg"), + (640, 480, "scale", "image/jpeg"), + ) + elif (media_type == "image/png") or (media_type == "image/gif"): + return ( + (32, 32, "crop", "image/png"), + (96, 96, "crop", "image/png"), + (320, 240, "scale", "image/png"), + (640, 480, "scale", "image/png"), + ) + else: + return () + + @defer.inlineCallbacks + def _generate_local_thumbnails(self, media_id, media_info): + media_type = media_info["media_type"] + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.local_media_filepath(media_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) + + @defer.inlineCallbacks + def _generate_remote_thumbnails(self, server_name, media_id, media_info): + media_type = media_info["media_type"] + file_id = media_info["filesystem_id"] + requirements = self._get_thumbnail_requirements(media_type) + if not requirements: + return + + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + thumbnailer = Thumbnailer(input_path) + m_width = thumbnailer.width + m_height = thumbnailer.height + + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, m_height, self.max_image_pixels + ) + return + + scales = set() + crops = set() + for r_width, r_height, r_method, r_type in requirements: + if r_method == "scale": + t_width, t_height = thumbnailer.aspect(r_width, r_height) + scales.add(( + min(m_width, t_width), min(m_height, t_height), r_type, + )) + elif r_method == "crop": + crops.add((r_width, r_height, r_type)) + + for t_width, t_height, t_type in scales: + t_method = "scale" + t_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + for t_width, t_height, t_type in crops: + if (t_width, t_height, t_type) in scales: + # If the aspect ratio of the cropped thumbnail matches a purely + # scaled one then there is no point in calculating a separate + # thumbnail. + continue + t_method = "crop" + t_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + self._makedirs(t_path) + t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) + + defer.returnValue({ + "width": m_width, + "height": m_height, + }) diff --git a/synapse/media/v1/download_resource.py b/synapse/media/v1/download_resource.py new file mode 100644 index 0000000000..f3a6804e05 --- /dev/null +++ b/synapse/media/v1/download_resource.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base_resource import BaseMediaResource + +from twisted.web.server import NOT_DONE_YET +from twisted.internet import defer + +import logging + +logger = logging.getLogger(__name__) + + +class DownloadResource(BaseMediaResource): + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + @BaseMediaResource.catch_errors + @defer.inlineCallbacks + def _async_render_GET(self, request): + try: + server_name, media_id = request.postpath + except: + self._respond_404(request) + return + + if server_name == self.server_name: + yield self._respond_local_file(request, media_id) + else: + yield self._respond_remote_file(request, server_name, media_id) + + @defer.inlineCallbacks + def _respond_local_file(self, request, media_id): + media_info = yield self.store.get_local_media(media_id) + if not media_info: + self._respond_404() + return + + media_type = media_info["media_type"] + file_path = self.filepaths.local_media_filepath(media_id) + + yield self._respond_with_file(request, media_type, file_path) + + @defer.inlineCallbacks + def _respond_remote_file(self, request, server_name, media_id): + media_info = yield self._get_remote_media(server_name, media_id) + + media_type = media_info["media_type"] + filesystem_id = media_info["filesystem_id"] + + file_path = self.filepaths.remote_media_filepath( + server_name, filesystem_id + ) + + yield self._respond_with_file(request, media_type, file_path) diff --git a/synapse/media/v1/filepath.py b/synapse/media/v1/filepath.py new file mode 100644 index 0000000000..0078bc3d40 --- /dev/null +++ b/synapse/media/v1/filepath.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + + +class MediaFilePaths(object): + + def __init__(self, base_path): + self.base_path = base_path + + def default_thumbnail(self, default_top_level, default_sub_type, width, + height, content_type, method): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % ( + width, height, top_level_type, sub_type, method + ) + return os.path.join( + self.base_path, "default_thumbnails", default_top_level, + default_sub_type, file_name + ) + + def local_media_filepath(self, media_id): + return os.path.join( + self.base_path, "local_content", + media_id[0:2], media_id[2:4], media_id[4:] + ) + + def local_media_thumbnail(self, media_id, width, height, content_type, + method): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s-%s" % ( + width, height, top_level_type, sub_type, method + ) + return os.path.join( + self.base_path, "local_thumbnails", + media_id[0:2], media_id[2:4], media_id[4:], + file_name + ) + + def remote_media_filepath(self, server_name, file_id): + return os.path.join( + self.base_path, "remote_content", server_name, + file_id[0:2], file_id[2:4], file_id[4:] + ) + + def remote_media_thumbnail(self, server_name, file_id, width, height, + content_type, method): + top_level_type, sub_type = content_type.split("/") + file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) + return os.path.join( + self.base_path, "remote_thumbnail", server_name, + file_id[0:2], file_id[2:4], file_id[4:], + file_name + ) diff --git a/synapse/media/v1/media_repository.py b/synapse/media/v1/media_repository.py new file mode 100644 index 0000000000..7e446fd827 --- /dev/null +++ b/synapse/media/v1/media_repository.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .upload_resource import UploadResource +from .download_resource import DownloadResource +from .thumbnail_resource import ThumbnailResource +from .filepath import MediaFilePaths + +from twisted.web.resource import Resource + +import logging + +logger = logging.getLogger(__name__) + + +class MediaRepositoryResource(Resource): + """File uploading and downloading. + + Uploads are POSTed to a resource which returns a token which is used to GET + the download:: + + => POST /_matrix/media/v1/upload HTTP/1.1 + Content-Type: <media-type> + + <media> + + <= HTTP/1.1 200 OK + Content-Type: application/json + + { "token": <media-id> } + + => GET /_matrix/media/v1/download/<server_name>/<media-id> HTTP/1.1 + + <= HTTP/1.1 200 OK + Content-Type: <media-type> + Content-Disposition: attachment;filename=<upload-filename> + + <media> + + Clients can get thumbnails by supplying a desired width and height and + thumbnailing method:: + + => GET /_matrix/media/v1/thumbnail/<server_name> + /<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1 + + <= HTTP/1.1 200 OK + Content-Type: image/jpeg or image/png + + <thumbnail> + + The thumbnail methods are "crop" and "scale". "scale" trys to return an + image where either the width or the height is smaller than the requested + size. The client should then scale and letterbox the image if it needs to + fit within a given rectangle. "crop" trys to return an image where the + width and height are close to the requested size and the aspect matches + the requested size. The client should scale the image if it needs to fit + within a given rectangle. + """ + + def __init__(self, hs): + Resource.__init__(self) + filepaths = MediaFilePaths(hs.config.media_store_path) + self.putChild("upload", UploadResource(hs, filepaths)) + self.putChild("download", DownloadResource(hs, filepaths)) + self.putChild("thumbnail", ThumbnailResource(hs, filepaths)) diff --git a/synapse/media/v1/thumbnail_resource.py b/synapse/media/v1/thumbnail_resource.py new file mode 100644 index 0000000000..e19620d456 --- /dev/null +++ b/synapse/media/v1/thumbnail_resource.py @@ -0,0 +1,182 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .base_resource import BaseMediaResource + +from twisted.web.server import NOT_DONE_YET +from twisted.internet import defer + +import logging + +logger = logging.getLogger(__name__) + + +class ThumbnailResource(BaseMediaResource): + isLeaf = True + + def render_GET(self, request): + self._async_render_GET(request) + return NOT_DONE_YET + + @BaseMediaResource.catch_errors + @defer.inlineCallbacks + def _async_render_GET(self, request): + server_name, media_id = self._parse_media_id(request) + width = self._parse_integer(request, "width") + height = self._parse_integer(request, "height") + method = self._parse_string(request, "method", "scale") + m_type = self._parse_string(request, "type", "image/png") + + if server_name == self.server_name: + yield self._respond_local_thumbnail( + request, media_id, width, height, method, m_type + ) + else: + yield self._respond_remote_thumbnail( + request, server_name, media_id, + width, height, method, m_type + ) + + @defer.inlineCallbacks + def _respond_local_thumbnail(self, request, media_id, width, height, + method, m_type): + media_info = yield self.store.get_local_media(media_id) + + if not media_info: + self._respond_404(request) + return + + thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) + + if thumbnail_infos: + thumbnail_info = self._select_thumbnail( + width, height, method, m_type, thumbnail_infos + ) + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] + + file_path = self.filepaths.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method, + ) + yield self._respond_with_file(request, t_type, file_path) + + else: + yield self._respond_default_thumbnail( + request, media_info, width, height, method, m_type, + ) + + @defer.inlineCallbacks + def _respond_remote_thumbnail(self, request, server_name, media_id, width, + height, method, m_type): + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead. + media_info = yield self._get_remote_media(server_name, media_id) + + thumbnail_infos = yield self.store.get_remote_media_thumbnails( + server_name, media_id, + ) + + if thumbnail_infos: + thumbnail_info = self._select_thumbnail( + width, height, method, m_type, thumbnail_infos + ) + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] + file_id = thumbnail_info["filesystem_id"] + + file_path = self.filepaths.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method, + ) + yield self._respond_with_file(request, t_type, file_path) + else: + yield self._respond_default_thumbnail( + request, media_info, width, height, method, m_type, + ) + + @defer.inlineCallbacks + def _respond_default_thumbnail(self, request, media_info, width, height, + method, m_type): + media_type = media_info["media_type"] + top_level_type = media_type.split("/")[0] + sub_type = media_type.split("/")[-1].split(";")[0] + thumbnail_infos = yield self.store.get_default_thumbnails( + top_level_type, sub_type, + ) + if not thumbnail_infos: + thumbnail_infos = yield self.store.get_default_thumbnails( + top_level_type, "_default", + ) + if not thumbnail_infos: + thumbnail_infos = yield self.store.get_default_thumbnails( + "_default", "_default", + ) + if not thumbnail_infos: + self._respond_404(request) + return + + thumbnail_info = self._select_thumbnail( + width, height, "crop", m_type, thumbnail_infos + ) + + t_width = thumbnail_info["thumbnail_width"] + t_height = thumbnail_info["thumbnail_height"] + t_type = thumbnail_info["thumbnail_type"] + t_method = thumbnail_info["thumbnail_method"] + + file_path = self.filepaths.default_thumbnail( + top_level_type, sub_type, t_width, t_height, t_type, t_method, + ) + yield self.respond_with_file(request, t_type, file_path) + + def _select_thumbnail(self, desired_width, desired_height, desired_method, + desired_type, thumbnail_infos): + d_w = desired_width + d_h = desired_height + + if desired_method.lower() == "crop": + info_list = [] + for info in thumbnail_infos: + t_w = info["thumbnail_width"] + t_h = info["thumbnail_height"] + t_method = info["thumbnail_method"] + if t_method == "scale" or t_method == "crop": + aspect_quality = abs(d_w * t_h - d_h * t_w) + size_quality = abs((d_w - t_w) * (d_h - t_h)) + type_quality = desired_type != info["thumbnail_type"] + length_quality = info["thumbnail_length"] + info_list.append(( + aspect_quality, size_quality, type_quality, + length_quality, info + )) + return min(info_list)[-1] + else: + info_list = [] + for info in thumbnail_infos: + t_w = info["thumbnail_width"] + t_h = info["thumbnail_height"] + t_method = info["thumbnail_method"] + if t_method == "scale" and (t_w >= d_w or t_h >= d_h): + size_quality = abs((d_w - t_w) * (d_h - t_h)) + type_quality = desired_type != info["thumbnail_type"] + length_quality = info["thumbnail_length"] + info_list.append(( + size_quality, type_quality, length_quality, info + )) + return min(info_list)[-1] diff --git a/synapse/media/v1/thumbnailer.py b/synapse/media/v1/thumbnailer.py new file mode 100644 index 0000000000..774ae4538f --- /dev/null +++ b/synapse/media/v1/thumbnailer.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import PIL.Image as Image +from io import BytesIO + + +class Thumbnailer(object): + + FORMATS = { + "image/jpeg": "JPEG", + "image/png": "PNG", + } + + def __init__(self, input_path): + self.image = Image.open(input_path) + self.width, self.height = self.image.size + + def aspect(self, max_width, max_height): + """Calculate the largest size that preserves aspect ratio which + fits within the given rectangle:: + + (w_in / h_in) = (w_out / h_out) + w_out = min(w_max, h_max * (w_in / h_in)) + h_out = min(h_max, w_max * (h_in / w_in)) + + Args: + max_width: The largest possible width. + max_height: The larget possible height. + """ + + if max_width * self.height < max_height * self.width: + return (max_width, (max_width * self.height) // self.width) + else: + return ((max_height * self.width) // self.height, max_height) + + def scale(self, output_path, width, height, output_type): + """Rescales the image to the given dimensions""" + scaled = self.image.resize((width, height), Image.BILINEAR) + return self.save_image(scaled, output_type, output_path) + + def crop(self, output_path, width, height, output_type): + """Rescales and crops the image to the given dimensions preserving + aspect:: + (w_in / h_in) = (w_scaled / h_scaled) + w_scaled = max(w_out, h_out * (w_in / h_in)) + h_scaled = max(h_out, w_out * (h_in / w_in)) + + Args: + max_width: The largest possible width. + max_height: The larget possible height. + """ + if width * self.height > height * self.width: + scaled_height = (width * self.height) // self.width + scaled_image = self.image.resize( + (width, scaled_height), Image.BILINEAR + ) + crop_top = (scaled_height - height) // 2 + crop_bottom = height + crop_top + cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) + else: + scaled_width = (height * self.width) // self.height + scaled_image = self.image.resize( + (scaled_width, height), Image.BILINEAR + ) + crop_left = (scaled_width - width) // 2 + crop_right = width + crop_left + cropped = scaled_image.crop((crop_left, 0, crop_right, height)) + return self.save_image(cropped, output_type, output_path) + + def save_image(self, output_image, output_type, output_path): + output_bytes_io = BytesIO() + output_image.save(output_bytes_io, self.FORMATS[output_type]) + output_bytes = output_bytes_io.getvalue() + with open(output_path, "wb") as output_file: + output_file.write(output_bytes) + return len(output_bytes) diff --git a/synapse/media/v1/upload_resource.py b/synapse/media/v1/upload_resource.py new file mode 100644 index 0000000000..b2449ff03d --- /dev/null +++ b/synapse/media/v1/upload_resource.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.http.server import respond_with_json + +from synapse.util.stringutils import random_string +from synapse.api.errors import ( + cs_exception, SynapseError, CodeMessageException +) + +from twisted.web.server import NOT_DONE_YET +from twisted.internet import defer + +from .base_resource import BaseMediaResource + +import logging + +logger = logging.getLogger(__name__) + + +class UploadResource(BaseMediaResource): + def render_POST(self, request): + self._async_render_POST(request) + return NOT_DONE_YET + + def render_OPTIONS(self, request): + respond_with_json(request, 200, {}, send_cors=True) + return NOT_DONE_YET + + @defer.inlineCallbacks + def _async_render_POST(self, request): + try: + auth_user = yield self.auth.get_user_by_req(request) + # TODO: The checks here are a bit late. The content will have + # already been uploaded to a tmp file at this point + content_length = request.getHeader("Content-Length") + if content_length is None: + raise SynapseError( + msg="Request must specify a Content-Length", code=400 + ) + if int(content_length) > self.max_upload_size: + raise SynapseError( + msg="Upload request body is too large", + code=413, + ) + + headers = request.requestHeaders + + if headers.hasHeader("Content-Type"): + media_type = headers.getRawHeaders("Content-Type")[0] + else: + raise SynapseError( + msg="Upload request missing 'Content-Type'", + code=400, + ) + + #if headers.hasHeader("Content-Disposition"): + # disposition = headers.getRawHeaders("Content-Disposition")[0] + # TODO(markjh): parse content-dispostion + + media_id = random_string(24) + + fname = self.filepaths.local_media_filepath(media_id) + self._makedirs(fname) + + # This shouldn't block for very long because the content will have + # already been uploaded at this point. + with open(fname, "wb") as f: + f.write(request.content.read()) + + yield self.store.store_local_media( + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=None, + media_length=content_length, + user_id=auth_user, + ) + media_info = { + "media_type": media_type, + "media_length": content_length, + } + + yield self._generate_local_thumbnails(media_id, media_info) + + respond_with_json( + request, 200, {"content_token": media_id}, send_cors=True + ) + except CodeMessageException as e: + logger.exception(e) + respond_with_json(request, e.code, cs_exception(e), send_cors=True) + except: + logger.exception("Failed to store file") + respond_with_json( + request, + 500, + {"error": "Internal server error"}, + send_cors=True + ) diff --git a/synapse/rest/transactions.py b/synapse/rest/transactions.py index 93c0122f30..8c41ab4edb 100644 --- a/synapse/rest/transactions.py +++ b/synapse/rest/transactions.py @@ -19,7 +19,7 @@ import logging logger = logging.getLogger(__name__) - +# FIXME: elsewhere we use FooStore to indicate something in the storage layer... class HttpTransactionStore(object): def __init__(self): diff --git a/synapse/server.py b/synapse/server.py index da0a44433a..7eb15270fc 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -78,6 +78,7 @@ class BaseHomeServer(object): 'resource_for_web_client', 'resource_for_content_repo', 'resource_for_server_key', + 'resource_for_media_repository', 'event_sources', 'ratelimiter', 'keyring', diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f15e3dfe62..c9ab434b4e 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -33,6 +33,7 @@ from .stream import StreamStore from .transactions import TransactionStore from .keys import KeyStore from .event_federation import EventFederationStore +from .media_repository import MediaRepositoryStore from .state import StateStore from .signatures import SignatureStore @@ -62,12 +63,13 @@ SCHEMAS = [ "state", "event_edges", "event_signatures", + "media_repository", ] # Remember to update this number every time an incompatible change is made to # database schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 8 +SCHEMA_VERSION = 9 class _RollbackButIsFineException(Exception): @@ -81,7 +83,9 @@ class DataStore(RoomMemberStore, RoomStore, RegistrationStore, StreamStore, ProfileStore, FeedbackStore, PresenceStore, TransactionStore, DirectoryStore, KeyStore, StateStore, SignatureStore, - EventFederationStore, ): + EventFederationStore, + MediaRepositoryStore, + ): def __init__(self, hs): super(DataStore, self).__init__(hs) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 4881f03368..e72200e2f7 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -650,7 +650,7 @@ class JoinHelper(object): to dump the results into. Attributes: - taples (list): List of `Table` classes + tables (list): List of `Table` classes EntryType (type) """ diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py new file mode 100644 index 0000000000..18c068d3d9 --- /dev/null +++ b/synapse/storage/media_repository.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright 2014 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from _base import SQLBaseStore + + +class MediaRepositoryStore(SQLBaseStore): + """Persistence for attachments and avatars""" + + def get_default_thumbnails(self, top_level_type, sub_type): + return [] + + def get_local_media(self, media_id): + """Get the metadata for a local piece of media + Returns: + None if the meia_id doesn't exist. + """ + return self._simple_select_one( + "local_media_repository", + {"media_id": media_id}, + ("media_type", "media_length", "upload_name", "created_ts"), + allow_none=True, + ) + + def store_local_media(self, media_id, media_type, time_now_ms, upload_name, + media_length, user_id): + return self._simple_insert( + "local_media_repository", + { + "media_id": media_id, + "media_type": media_type, + "created_ts": time_now_ms, + "upload_name": upload_name, + "media_length": media_length, + "user_id": user_id.to_string(), + } + ) + + def get_local_media_thumbnails(self, media_id): + return self._simple_select_list( + "local_media_repository_thumbnails", + {"media_id": media_id}, + ( + "thumbnail_width", "thumbnail_height", "thumbnail_method", + "thumbnail_type", "thumbnail_length", + ) + ) + + def store_local_thumbnail(self, media_id, thumbnail_width, + thumbnail_height, thumbnail_type, + thumbnail_method, thumbnail_length): + return self._simple_insert( + "local_media_repository_thumbnails", + { + "media_id": media_id, + "thumbnail_width": thumbnail_width, + "thumbnail_height": thumbnail_height, + "thumbnail_method": thumbnail_method, + "thumbnail_type": thumbnail_type, + "thumbnail_length": thumbnail_length, + } + ) + + def get_cached_remote_media(self, origin, media_id): + return self._simple_select_one( + "remote_media_cache", + {"media_origin": origin, "media_id": media_id}, + ( + "media_type", "media_length", "upload_name", "created_ts", + "filesystem_id", + ), + allow_none=True, + ) + + def store_cached_remote_media(self, origin, media_id, media_type, + media_length, time_now_ms, upload_name, + filesystem_id): + return self._simple_insert( + "remote_media_cache", + { + "media_origin": origin, + "media_id": media_id, + "media_type": media_type, + "media_length": media_length, + "created_ts": time_now_ms, + "upload_name": upload_name, + "filesystem_id": filesystem_id, + } + ) + + def get_remote_media_thumbnails(self, origin, media_id): + return self._simple_select_list( + "remote_media_cache_thumbnails", + {"media_origin": origin, "media_id": media_id}, + ( + "thumbnail_width", "thumbnail_height", "thumbnail_method", + "thumbnail_type", "thumbnail_length", "filesystem_id", + ) + ) + + def store_remote_media_thumbnail(self, origin, media_id, filesystem_id, + thumbnail_width, thumbnail_height, + thumbnail_type, thumbnail_method, + thumbnail_length): + return self._simple_insert( + "remote_media_cache_thumbnails", + { + "media_origin": origin, + "media_id": media_id, + "thumbnail_width": thumbnail_width, + "thumbnail_height": thumbnail_height, + "thumbnail_method": thumbnail_method, + "thumbnail_type": thumbnail_type, + "thumbnail_length": thumbnail_length, + "filesystem_id": filesystem_id, + } + ) diff --git a/synapse/storage/schema/delta/v9.sql b/synapse/storage/schema/delta/v9.sql new file mode 100644 index 0000000000..ad680c64da --- /dev/null +++ b/synapse/storage/schema/delta/v9.sql @@ -0,0 +1,23 @@ +/* Copyright 2014 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- To track destination health +CREATE TABLE IF NOT EXISTS destinations( + destination TEXT PRIMARY KEY, + retry_last_ts INTEGER, + retry_interval INTEGER +); + +PRAGMA user_version = 9; \ No newline at end of file diff --git a/synapse/storage/schema/media_repository.sql b/synapse/storage/schema/media_repository.sql new file mode 100644 index 0000000000..b785fa0208 --- /dev/null +++ b/synapse/storage/schema/media_repository.sql @@ -0,0 +1,68 @@ +/* Copyright 2014 OpenMarket Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE IF NOT EXISTS local_media_repository ( + media_id TEXT, -- The id used to refer to the media. + media_type TEXT, -- The MIME-type of the media. + media_length INTEGER, -- Length of the media in bytes. + created_ts INTEGER, -- When the content was uploaded in ms. + upload_name TEXT, -- The name the media was uploaded with. + user_id TEXT, -- The user who uploaded the file. + CONSTRAINT uniqueness UNIQUE (media_id) +); + +CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails ( + media_id TEXT, -- The id used to refer to the media. + thumbnail_width INTEGER, -- The width of the thumbnail in pixels. + thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_method TEXT, -- The method used to make the thumbnail. + thumbnail_length INTEGER, -- The length of the thumbnail in bytes. + CONSTRAINT uniqueness UNIQUE ( + media_id, thumbnail_width, thumbnail_height, thumbnail_type + ) +); + +CREATE INDEX IF NOT EXISTS local_media_repository_thumbnails_media_id + ON local_media_repository_thumbnails (media_id); + +CREATE TABLE IF NOT EXISTS remote_media_cache ( + media_origin TEXT, -- The remote HS the media came from. + media_id TEXT, -- The id used to refer to the media on that server. + media_type TEXT, -- The MIME-type of the media. + created_ts INTEGER, -- When the content was uploaded in ms. + upload_name TEXT, -- The name the media was uploaded with. + media_length INTEGER, -- Length of the media in bytes. + filesystem_id TEXT, -- The name used to store the media on disk. + CONSTRAINT uniqueness UNIQUE (media_origin, media_id) +); + +CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails ( + media_origin TEXT, -- The remote HS the media came from. + media_id TEXT, -- The id used to refer to the media. + thumbnail_width INTEGER, -- The width of the thumbnail in pixels. + thumbnail_height INTEGER, -- The height of the thumbnail in pixels. + thumbnail_method TEXT, -- The method used to make the thumbnail + thumbnail_type TEXT, -- The MIME-type of the thumbnail. + thumbnail_length INTEGER, -- The length of the thumbnail in bytes. + filesystem_id TEXT, -- The name used to store the media on disk. + CONSTRAINT uniqueness UNIQUE ( + media_origin, media_id, thumbnail_width, thumbnail_height, + thumbnail_type, thumbnail_type + ) +); + +CREATE INDEX IF NOT EXISTS remote_media_cache_thumbnails_media_id + ON local_media_repository_thumbnails (media_id); diff --git a/synapse/storage/schema/transactions.sql b/synapse/storage/schema/transactions.sql index 88e3e4e04d..de461bfa15 100644 --- a/synapse/storage/schema/transactions.sql +++ b/synapse/storage/schema/transactions.sql @@ -59,3 +59,9 @@ CREATE INDEX IF NOT EXISTS transaction_id_to_pdu_tx ON transaction_id_to_pdu(tra CREATE INDEX IF NOT EXISTS transaction_id_to_pdu_dest ON transaction_id_to_pdu(destination); CREATE INDEX IF NOT EXISTS transaction_id_to_pdu_index ON transaction_id_to_pdu(transaction_id, destination); +-- To track destination health +CREATE TABLE IF NOT EXISTS destinations( + destination TEXT PRIMARY KEY, + retry_last_ts INTEGER, + retry_interval INTEGER +); diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index 00d0f48082..423cc3f02a 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -17,6 +17,8 @@ from ._base import SQLBaseStore, Table from collections import namedtuple +from twisted.internet import defer + import logging logger = logging.getLogger(__name__) @@ -26,6 +28,10 @@ class TransactionStore(SQLBaseStore): """A collection of queries for handling PDUs. """ + # a write-through cache of DestinationsTable.EntryType indexed by + # destination string + destination_retry_cache = {} + def get_received_txn_response(self, transaction_id, origin): """For an incoming transaction from a given origin, check if we have already responded to it. If so, return the response code and response @@ -114,7 +120,7 @@ class TransactionStore(SQLBaseStore): def _prep_send_transaction(self, txn, transaction_id, destination, origin_server_ts): - # First we find out what the prev_txs should be. + # First we find out what the prev_txns should be. # Since we know that we are only sending one transaction at a time, # we can simply take the last one. query = "%s ORDER BY id DESC LIMIT 1" % ( @@ -205,6 +211,92 @@ class TransactionStore(SQLBaseStore): return ReceivedTransactionsTable.decode_results(txn.fetchall()) + def get_destination_retry_timings(self, destination): + """Gets the current retry timings (if any) for a given destination. + + Args: + destination (str) + + Returns: + None if not retrying + Otherwise a DestinationsTable.EntryType for the retry scheme + """ + if destination in self.destination_retry_cache: + return defer.succeed(self.destination_retry_cache[destination]) + + return self.runInteraction( + "get_destination_retry_timings", + self._get_destination_retry_timings, destination) + + def _get_destination_retry_timings(cls, txn, destination): + query = DestinationsTable.select_statement("destination = ?") + txn.execute(query, (destination,)) + result = txn.fetchall() + if result: + result = DestinationsTable.decode_single_result(result) + if result.retry_last_ts > 0: + return result + else: + return None + + def set_destination_retry_timings(self, destination, + retry_last_ts, retry_interval): + """Sets the current retry timings for a given destination. + Both timings should be zero if retrying is no longer occuring. + + Args: + destination (str) + retry_last_ts (int) - time of last retry attempt in unix epoch ms + retry_interval (int) - how long until next retry in ms + """ + + self.destination_retry_cache[destination] = ( + DestinationsTable.EntryType( + destination, + retry_last_ts, + retry_interval + ) + ) + + # XXX: we could chose to not bother persisting this if our cache thinks + # this is a NOOP + return self.runInteraction( + "set_destination_retry_timings", + self._set_destination_retry_timings, + destination, + retry_last_ts, + retry_interval, + ) + + def _set_destination_retry_timings(cls, txn, destination, + retry_last_ts, retry_interval): + + query = ( + "INSERT OR REPLACE INTO %s " + "(destination, retry_last_ts, retry_interval) " + "VALUES (?, ?, ?) " + ) % DestinationsTable.table_name + + txn.execute(query, (destination, retry_last_ts, retry_interval)) + + def get_destinations_needing_retry(self): + """Get all destinations which are due a retry for sending a transaction. + + Returns: + list: A list of `DestinationsTable.EntryType` + """ + + return self.runInteraction( + "get_destinations_needing_retry", + self._get_destinations_needing_retry + ) + + def _get_destinations_needing_retry(cls, txn): + where = "retry_last_ts > 0 and retry_next_ts < now()" + query = DestinationsTable.select_statement(where) + txn.execute(query) + return DestinationsTable.decode_results(txn.fetchall()) + class ReceivedTransactionsTable(Table): table_name = "received_transactions" @@ -247,3 +339,15 @@ class TransactionsToPduTable(Table): ] EntryType = namedtuple("TransactionsToPduEntry", fields) + + +class DestinationsTable(Table): + table_name = "destinations" + + fields = [ + "destination", + "retry_last_ts", + "retry_interval", + ] + + EntryType = namedtuple("DestinationsEntry", fields) |