From b30cd5b107acafce43fb63c471c086b8df4d981a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:31:00 +0000 Subject: Remove dead code related to default thumbnails --- synapse/rest/media/v1/thumbnail_resource.py | 76 ++--------------------------- 1 file changed, 3 insertions(+), 73 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 68d56b2b10..779fd3e9bc 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,11 +85,6 @@ class ThumbnailResource(Resource): respond_404(request) return - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.local_media_filepath(media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) if thumbnail_infos: @@ -114,9 +109,7 @@ class ThumbnailResource(Resource): yield respond_with_file(request, t_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, width, height, method, m_type, - ) + respond_404(request) @defer.inlineCallbacks def _select_or_generate_local_thumbnail(self, request, media_id, desired_width, @@ -128,11 +121,6 @@ class ThumbnailResource(Resource): respond_404(request) return - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.local_media_filepath(media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) for info in thumbnail_infos: t_w = info["thumbnail_width"] == desired_width @@ -166,10 +154,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, desired_width, desired_height, - desired_method, desired_type, - ) + respond_404(request) @defer.inlineCallbacks def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, @@ -177,11 +162,6 @@ class ThumbnailResource(Resource): desired_method, desired_type): media_info = yield self.media_repo.get_remote_media(server_name, media_id) - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.remote_media_filepath(server_name, media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) @@ -213,23 +193,13 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, desired_width, desired_height, - desired_method, desired_type, - ) + respond_404(request) @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead. - media_info = yield self.media_repo.get_remote_media(server_name, media_id) - - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.remote_media_filepath(server_name, media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) @@ -250,47 +220,7 @@ class ThumbnailResource(Resource): ) yield respond_with_file(request, t_type, file_path, t_length) else: - yield self._respond_default_thumbnail( - request, media_info, width, height, method, m_type, - ) - - @defer.inlineCallbacks - def _respond_default_thumbnail(self, request, media_info, width, height, - method, m_type): - # XXX: how is this meant to work? store.get_default_thumbnails - # appears to always return [] so won't this always 404? - media_type = media_info["media_type"] - top_level_type = media_type.split("/")[0] - sub_type = media_type.split("/")[-1].split(";")[0] - thumbnail_infos = yield self.store.get_default_thumbnails( - top_level_type, sub_type, - ) - if not thumbnail_infos: - thumbnail_infos = yield self.store.get_default_thumbnails( - top_level_type, "_default", - ) - if not thumbnail_infos: - thumbnail_infos = yield self.store.get_default_thumbnails( - "_default", "_default", - ) - if not thumbnail_infos: respond_404(request) - return - - thumbnail_info = self._select_thumbnail( - width, height, "crop", m_type, thumbnail_infos - ) - - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - t_length = thumbnail_info["thumbnail_length"] - - file_path = self.filepaths.default_thumbnail( - top_level_type, sub_type, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path, t_length) def _select_thumbnail(self, desired_width, desired_height, desired_method, desired_type, thumbnail_infos): -- cgit 1.4.1 From b6c9deffda94f230085d9974379643497749e7f5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 15:53:23 +0000 Subject: Remove dead TODO --- synapse/rest/media/v1/thumbnail_resource.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 779fd3e9bc..70dbf7f5c9 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -198,8 +198,6 @@ class ThumbnailResource(Resource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): - # TODO: Don't download the whole remote file - # We should proxy the thumbnail from the remote server instead. thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) -- cgit 1.4.1 From 47ca5eb8822ddc376b098a2747e1dbb85b2ce32b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jan 2018 15:09:43 +0000 Subject: Split out add_file_headers --- synapse/rest/media/v1/_base.py | 70 +++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 28 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 95fa95fce3..57a4509816 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -70,38 +70,11 @@ def respond_with_file(request, media_type, file_path, logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): - request.setHeader(b"Content-Type", media_type.encode("UTF-8")) - if upload_name: - if is_ascii(upload_name): - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - else: - request.setHeader( - b"Content-Disposition", - b"inline; filename*=utf-8''%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) if file_size is None: stat = os.stat(file_path) file_size = stat.st_size - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + add_file_headers(request, media_type, file_size, upload_name) with open(file_path, "rb") as f: yield logcontext.make_deferred_yieldable( @@ -111,3 +84,44 @@ def respond_with_file(request, media_type, file_path, finish_request(request) else: respond_404(request) + + +def add_file_headers(request, media_type, file_size, upload_name): + """Adds the correct response headers in preparation for responding with the + media. + + Args: + request (twisted.web.http.Request) + media_type (str): The media/content type. + file_size (int): Size in bytes of the media, if known. + upload_name (str): The name of the requested file, if any. + """ + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + if upload_name: + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + request.setHeader( + b"Content-Length", b"%d" % (file_size,) + ) -- cgit 1.4.1 From 1ee787912b1fda1f9255627b0ea5f69cf021679d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 16:58:09 +0000 Subject: Add some helper classes --- synapse/rest/media/v1/_base.py | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 57a4509816..1310820486 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -125,3 +125,76 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader( b"Content-Length", b"%d" % (file_size,) ) + + +@defer.inlineCallbacks +def respond_with_responder(request, responder, media_type, file_size, upload_name=None): + """Responds to the request with given responder. If responder is None then + returns 404. + + Args: + request (twisted.web.http.Request) + responder (Responder) + media_type (str): The media/content type. + file_size (int): Size in bytes of the media, if known. + upload_name (str): The name of the requested file, if any. + """ + if not responder: + respond_404(request) + return + + add_file_headers(request, media_type, file_size, upload_name) + yield responder.write_to_consumer(request) + finish_request(request) + + +class Responder(object): + """Represents a response that can be streamed to the requester. + + Either `write_to_consumer` or `cancel` must be called to clean up any open + resources. + """ + def write_to_consumer(self, consumer): + """Stream response into consumer + + Args: + consumer (IConsumer) + + Returns: + Deferred: Resolves once the response has finished being written + """ + pass + + def cancel(self): + """Called when the responder is not going to be used after all. + """ + pass + + +class FileInfo(object): + """Details about a requested/uploaded file. + + Attributes: + server_name (str): The server name where the media originated from, + or None if local. + file_id (str): The local ID of the file. For local files this is the + same as the media_id + media_type (str): Type of the file + url_cache (bool): If the file is for the url preview cache + thumbnail (bool): Whether the file is a thumbnail or not. + thumbnail_width (int) + thumbnail_height (int) + thumbnail_method (int) + thumbnail_type (str) + """ + def __init__(self, server_name, file_id, url_cache=False, + thumbnail=False, thumbnail_width=None, thumbnail_height=None, + thumbnail_method=None, thumbnail_type=None): + self.server_name = server_name + self.file_id = file_id + self.url_cache = url_cache + self.thumbnail = thumbnail + self.thumbnail_width = thumbnail_width + self.thumbnail_height = thumbnail_height + self.thumbnail_method = thumbnail_method + self.thumbnail_type = thumbnail_type -- cgit 1.4.1 From ada470bccbf08486ae9e48afbcc4ebfd8161e93b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:07:30 +0000 Subject: Add MediaStorage class --- synapse/rest/media/v1/media_storage.py | 198 +++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 synapse/rest/media/v1/media_storage.py (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py new file mode 100644 index 0000000000..052745e6f6 --- /dev/null +++ b/synapse/rest/media/v1/media_storage.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vecotr Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads +from twisted.protocols.basic import FileSender + +from ._base import Responder + +from synapse.util.logcontext import make_deferred_yieldable + +import contextlib +import os +import logging +import shutil + +logger = logging.getLogger(__name__) + + +class MediaStorage(object): + """Responsible for storing/fetching files from local sources. + """ + + def __init__(self, local_media_directory, filepaths): + self.local_media_directory = local_media_directory + self.filepaths = filepaths + + @defer.inlineCallbacks + def store_file(self, source, file_info): + """Write `source` to the on disk media store, and also any other + configured storage providers + + Args: + source: A file like object that should be written + file_info (FileInfo): Info about the file to store + + Returns: + Deferred[str]: the file path written to in the primary media store + """ + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + # Write to the main repository + yield make_deferred_yieldable(threads.deferToThread( + _write_file_synchronously, source, fname, + )) + + defer.returnValue(fname) + + @contextlib.contextmanager + def store_into_file(self, file_info): + """Context manager used to get a file like object to write into, as + described by file_info. + + Actually yields a 3-tuple (file, fname, finish_cb), where finish_cb is a + function that returns a Deferred that must be waited on after the file + has been successfully written to. + + Args: + file_info (FileInfo): Info about the file to store + + Example: + + with media_storage.store_into_file(info) as (f, fname, finish_cb): + # .. write into f ... + yield finish_cb() + """ + + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + finished_called = [False] + + def finish(): + # This will be used later when we want to hit out to other storage + # places + finished_called[0] = True + return defer.succeed(None) + + try: + with open(fname, "wb") as f: + yield f, fname, finish + except Exception as e: + try: + os.remove(fname) + except Exception: + pass + raise e + + if not finished_called: + raise Exception("Fnished callback not called") + + @defer.inlineCallbacks + def fetch_media(self, file_info): + """Attempts to fetch media described by file_info from the local cache + and configured storage providers. + + Args: + file_info (FileInfo) + + Returns: + Deferred(Responder): Returns a Responder if the file was found, + otherwise None. + """ + + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + defer.returnValue(FileResponder(open(local_path, "rb"))) + + defer.returnValue(None) + + def _file_info_to_path(self, file_info): + """Converts file_info into a relative path. + """ + if file_info.url_cache: + return self.filepaths.url_cache_filepath_rel(file_info.file_id) + + if file_info.server_name: + if file_info.thumbnail: + return self.filepaths.remote_media_thumbnail_rel( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method + ) + return self.filepaths.remote_media_filepath_rel( + file_info.server_name, file_info.file_id, + ) + + if file_info.thumbnail: + return self.filepaths.local_media_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method + ) + return self.filepaths.local_media_filepath_rel( + file_info.file_id, + ) + + +def _write_file_synchronously(source, fname): + """Write `source` to the path `fname` synchronously. Should be called + from a thread. + + Args: + source: A file like object to be written + fname (str): Path to write to + """ + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + source.seek(0) # Ensure we read from the start of the file + with open(fname, "wb") as f: + shutil.copyfileobj(source, f) + + +class FileResponder(Responder): + """Wraps an open file that can be sent to a request. + + Args: + open_file (file): A file like object to be streamed ot the client, + is closed when finished streaming. + """ + def __init__(self, open_file): + self.open_file = open_file + + @defer.inlineCallbacks + def write_to_consumer(self, consumer): + with self.open_file: + yield FileSender().beginFileTransfer(self.open_file, consumer) + + def cancel(self): + self.open_file.close() -- cgit 1.4.1 From dd3092c3a357be2453e25293b5590d627f3cfc48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:45:11 +0000 Subject: Use MediaStorage for local files --- synapse/rest/media/v1/download_resource.py | 24 +---- synapse/rest/media/v1/media_repository.py | 168 +++++++++++++---------------- 2 files changed, 73 insertions(+), 119 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 6879249c8a..2a5afa9ff4 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -57,34 +57,12 @@ class DownloadResource(Resource): ) server_name, media_id, name = parse_media_id(request) if server_name == self.server_name: - yield self._respond_local_file(request, media_id, name) + yield self.media_repo.get_local_media(request, media_id, name) else: yield self._respond_remote_file( request, server_name, media_id, name ) - @defer.inlineCallbacks - def _respond_local_file(self, request, media_id, name): - media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - respond_404(request) - return - - media_type = media_info["media_type"] - media_length = media_info["media_length"] - upload_name = name if name else media_info["upload_name"] - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_filepath(media_id) - else: - file_path = self.filepaths.local_media_filepath(media_id) - - yield respond_with_file( - request, media_type, file_path, media_length, - upload_name=upload_name, - ) - @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id, name): # don't forward requests for remote media if allow_remote is false diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index eed9056a2f..6ad9320b69 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vecotr Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +19,7 @@ import twisted.internet.error import twisted.web.http from twisted.web.resource import Resource +from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource @@ -25,6 +27,7 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer +from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string @@ -33,7 +36,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logcontext import make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination import os @@ -74,6 +77,8 @@ class MediaRepository(object): self.recently_accessed_remotes = set() + self.media_storage = MediaStorage(self.primary_base_path, self.filepaths) + self.clock.looping_call( self._update_recently_accessed_remotes, UPDATE_RECENTLY_ACCESSED_REMOTES_TS @@ -88,72 +93,6 @@ class MediaRepository(object): media, self.clock.time_msec() ) - @staticmethod - def _makedirs(filepath): - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - - @staticmethod - def _write_file_synchronously(source, fname): - """Write `source` to the path `fname` synchronously. Should be called - from a thread. - - Args: - source: A file like object to be written - fname (str): Path to write to - """ - MediaRepository._makedirs(fname) - source.seek(0) # Ensure we read from the start of the file - with open(fname, "wb") as f: - shutil.copyfileobj(source, f) - - @defer.inlineCallbacks - def write_to_file_and_backup(self, source, path): - """Write `source` to the on disk media store, and also the backup store - if configured. - - Args: - source: A file like object that should be written - path (str): Relative path to write file to - - Returns: - Deferred[str]: the file path written to in the primary media store - """ - fname = os.path.join(self.primary_base_path, path) - - # Write to the main repository - yield make_deferred_yieldable(threads.deferToThread( - self._write_file_synchronously, source, fname, - )) - - # Write to backup repository - yield self.copy_to_backup(path) - - defer.returnValue(fname) - - @defer.inlineCallbacks - def copy_to_backup(self, path): - """Copy a file from the primary to backup media store, if configured. - - Args: - path(str): Relative path to write file to - """ - if self.backup_base_path: - primary_fname = os.path.join(self.primary_base_path, path) - backup_fname = os.path.join(self.backup_base_path, path) - - # We can either wait for successful writing to the backup repository - # or write in the background and immediately return - if self.synchronous_backup_media_store: - yield make_deferred_yieldable(threads.deferToThread( - shutil.copyfile, primary_fname, backup_fname, - )) - else: - preserve_fn(threads.deferToThread)( - shutil.copyfile, primary_fname, backup_fname, - ) - @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -171,10 +110,13 @@ class MediaRepository(object): """ media_id = random_string(24) - fname = yield self.write_to_file_and_backup( - content, self.filepaths.local_media_filepath_rel(media_id) + file_info = FileInfo( + server_name=None, + file_id=media_id, ) + fname = yield self.media_storage.store_file(content, file_info) + logger.info("Stored local media in file %r", fname) yield self.store.store_local_media( @@ -194,6 +136,30 @@ class MediaRepository(object): defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) + @defer.inlineCallbacks + def get_local_media(self, request, media_id, name): + """Responds to reqests for local media, if exists, or returns 404. + """ + media_info = yield self.store.get_local_media(media_id) + if not media_info or media_info["quarantined_by"]: + respond_404(request) + return + + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + url_cache = media_info["url_cache"] + + file_info = FileInfo( + None, media_id, + url_cache=url_cache, + ) + + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder( + request, responder, media_type, media_length, upload_name, + ) + @defer.inlineCallbacks def get_remote_media(self, server_name, media_id): key = (server_name, media_id) @@ -368,11 +334,18 @@ class MediaRepository(object): if t_byte_source: try: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) + file_info = FileInfo( + server_name=None, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() @@ -400,11 +373,18 @@ class MediaRepository(object): if t_byte_source: try: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) + file_info = FileInfo( + server_name=server_name, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() @@ -472,20 +452,6 @@ class MediaRepository(object): # Now we generate the thumbnails for each dimension, store it for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): - # Work out the correct file name for thumbnail - if server_name: - file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) - elif url_cache: - file_path = self.filepaths.url_cache_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) - else: - file_path = self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) - # Generate the thumbnail if t_method == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -505,9 +471,19 @@ class MediaRepository(object): continue try: - # Write to disk - output_path = yield self.write_to_file_and_backup( - t_byte_source, file_path, + file_info = FileInfo( + server_name=server_name, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + url_cache=url_cache, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() -- cgit 1.4.1 From 9e20840e0296eb9b814bb0e7130342c5c9a19e3d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:52:06 +0000 Subject: Use MediaStorage for remote media --- synapse/rest/media/v1/download_resource.py | 43 ++--- synapse/rest/media/v1/media_repository.py | 249 +++++++++++++++++------------ 2 files changed, 156 insertions(+), 136 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 2a5afa9ff4..5dc92972cf 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -14,7 +14,7 @@ # limitations under the License. import synapse.http.servlet -from ._base import parse_media_id, respond_with_file, respond_404 +from ._base import parse_media_id, respond_404 from twisted.web.resource import Resource from synapse.http.server import request_handler, set_cors_headers @@ -59,35 +59,14 @@ class DownloadResource(Resource): if server_name == self.server_name: yield self.media_repo.get_local_media(request, media_id, name) else: - yield self._respond_remote_file( - request, server_name, media_id, name - ) + allow_remote = synapse.http.servlet.parse_boolean( + request, "allow_remote", default=True) + if not allow_remote: + logger.info( + "Rejecting request for remote media %s/%s due to allow_remote", + server_name, media_id, + ) + respond_404(request) + return - @defer.inlineCallbacks - def _respond_remote_file(self, request, server_name, media_id, name): - # don't forward requests for remote media if allow_remote is false - allow_remote = synapse.http.servlet.parse_boolean( - request, "allow_remote", default=True) - if not allow_remote: - logger.info( - "Rejecting request for remote media %s/%s due to allow_remote", - server_name, media_id, - ) - respond_404(request) - return - - media_info = yield self.media_repo.get_remote_media(server_name, media_id) - - media_type = media_info["media_type"] - media_length = media_info["media_length"] - filesystem_id = media_info["filesystem_id"] - upload_name = name if name else media_info["upload_name"] - - file_path = self.filepaths.remote_media_filepath( - server_name, filesystem_id - ) - - yield respond_with_file( - request, media_type, file_path, media_length, - upload_name=upload_name, - ) + yield self.media_repo.get_remote_media(request, server_name, media_id, name) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6ad9320b69..07820fab62 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -19,7 +19,7 @@ import twisted.internet.error import twisted.web.http from twisted.web.resource import Resource -from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder +from ._base import respond_404, FileInfo, respond_with_responder from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource @@ -161,124 +161,165 @@ class MediaRepository(object): ) @defer.inlineCallbacks - def get_remote_media(self, server_name, media_id): + def get_remote_media(self, request, server_name, media_id, name): + """Respond to requests for remote media. + """ + self.recently_accessed_remotes.add((server_name, media_id)) + + # We linearize here to ensure that we don't try and download remote + # media mutliple times concurrently key = (server_name, media_id) with (yield self.remote_media_linearizer.queue(key)): - media_info = yield self._get_remote_media_impl(server_name, media_id) - defer.returnValue(media_info) + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # We purposefully stream the file outside the lock + if responder: + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + yield respond_with_responder( + request, responder, media_type, media_length, upload_name, + ) + else: + respond_404(request) @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): + """Looks for media in local cache, if not there then attempt to + download from remote server. + + Returns: + Deferred((Respodner, media_info)) + """ media_info = yield self.store.get_cached_remote_media( server_name, media_id ) - if not media_info: - media_info = yield self._download_remote_file( - server_name, media_id - ) - elif media_info["quarantined_by"]: - raise NotFoundError() + + # file_id is the ID we use to track the file locally. If we've already + # seen the file then reuse the existing ID, otherwise genereate a new + # one. + if media_info: + file_id = media_info["filesystem_id"] else: - self.recently_accessed_remotes.add((server_name, media_id)) - yield self.store.update_cached_last_access_time( - [(server_name, media_id)], self.clock.time_msec() - ) - defer.returnValue(media_info) + file_id = random_string(24) + + file_info = FileInfo(server_name, file_id) + + # If we have an entry in the DB, try and look for it + if media_info: + if media_info["quarantined_by"]: + raise NotFoundError() + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + defer.returnValue((responder, media_info)) + + # Failed to find the file anywhere, lets download it. + + media_info = yield self._download_remote_file( + server_name, media_id, file_id + ) + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + defer.returnValue((responder, media_info)) + + defer.returnValue((None, media_info)) @defer.inlineCallbacks - def _download_remote_file(self, server_name, media_id): - file_id = random_string(24) + def _download_remote_file(self, server_name, media_id, file_id): + """Attempt to download the remote file from the given server name, + using the given file_id as the local id. + """ - fpath = self.filepaths.remote_media_filepath_rel( - server_name, file_id + file_info = FileInfo( + server_name=server_name, + file_id=file_id, ) - fname = os.path.join(self.primary_base_path, fpath) - self._makedirs(fname) - try: - with open(fname, "wb") as f: - request_path = "/".join(( - "/_matrix/media/v1/download", server_name, media_id, - )) + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )) + try: + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + max_size=self.max_upload_size, args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + } + ) + except twisted.internet.error.DNSLookupError as e: + logger.warn("HTTP error fetching remote media %s/%s: %r", + server_name, media_id, e) + raise NotFoundError() + + except HttpResponseException as e: + logger.warn("HTTP error fetching remote media %s/%s: %s", + server_name, media_id, e.response) + if e.code == twisted.web.http.NOT_FOUND: + raise SynapseError.from_http_response_exception(e) + raise SynapseError(502, "Failed to fetch remote media") + + except SynapseError: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise + except NotRetryingDestination: + logger.warn("Not retrying destination %r", server_name) + raise SynapseError(502, "Failed to fetch remote media") + except Exception: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise SynapseError(502, "Failed to fetch remote media") + + yield finish() + + media_type = headers["Content-Type"][0] + + time_now_ms = self.clock.time_msec() + + content_disposition = headers.get("Content-Disposition", None) + if content_disposition: + _, params = cgi.parse_header(content_disposition[0],) + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii + + if upload_name: + upload_name = urlparse.unquote(upload_name) try: - length, headers = yield self.client.get_file( - server_name, request_path, output_stream=f, - max_size=self.max_upload_size, args={ - # tell the remote server to 404 if it doesn't - # recognise the server_name, to make sure we don't - # end up with a routing loop. - "allow_remote": "false", - } - ) - except twisted.internet.error.DNSLookupError as e: - logger.warn("HTTP error fetching remote media %s/%s: %r", - server_name, media_id, e) - raise NotFoundError() - - except HttpResponseException as e: - logger.warn("HTTP error fetching remote media %s/%s: %s", - server_name, media_id, e.response) - if e.code == twisted.web.http.NOT_FOUND: - raise SynapseError.from_http_response_exception(e) - raise SynapseError(502, "Failed to fetch remote media") - - except SynapseError: - logger.exception("Failed to fetch remote media %s/%s", - server_name, media_id) - raise - except NotRetryingDestination: - logger.warn("Not retrying destination %r", server_name) - raise SynapseError(502, "Failed to fetch remote media") - except Exception: - logger.exception("Failed to fetch remote media %s/%s", - server_name, media_id) - raise SynapseError(502, "Failed to fetch remote media") - - yield self.copy_to_backup(fpath) - - media_type = headers["Content-Type"][0] - time_now_ms = self.clock.time_msec() - - content_disposition = headers.get("Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - upload_name = urlparse.unquote(upload_name) - try: - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - - logger.info("Stored remote media in file %r", fname) - - yield self.store.store_cached_remote_media( - origin=server_name, - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=upload_name, - media_length=length, - filesystem_id=file_id, - ) - except Exception: - os.remove(fname) - raise + upload_name = upload_name.decode("utf-8") + except UnicodeDecodeError: + upload_name = None + else: + upload_name = None + + logger.info("Stored remote media in file %r", fname) + + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=length, + filesystem_id=file_id, + ) media_info = { "media_type": media_type, -- cgit 1.4.1 From 9d30a7691c7a54bf2d299ea3cdbdf4af74dd0af5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:08:46 +0000 Subject: Make ThumbnailResource use MediaStorage --- synapse/rest/media/v1/media_repository.py | 4 +- synapse/rest/media/v1/thumbnail_resource.py | 112 ++++++++++++++++------------ 2 files changed, 68 insertions(+), 48 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 07820fab62..0c84f1be07 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -637,7 +637,9 @@ class MediaRepositoryResource(Resource): self.putChild("upload", UploadResource(hs, media_repo)) self.putChild("download", DownloadResource(hs, media_repo)) - self.putChild("thumbnail", ThumbnailResource(hs, media_repo)) + self.putChild("thumbnail", ThumbnailResource( + hs, media_repo, media_repo.media_storage, + )) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70dbf7f5c9..f59f300665 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -14,7 +14,10 @@ # limitations under the License. -from ._base import parse_media_id, respond_404, respond_with_file +from ._base import ( + parse_media_id, respond_404, respond_with_file, FileInfo, + respond_with_responder, +) from twisted.web.resource import Resource from synapse.http.servlet import parse_string, parse_integer from synapse.http.server import request_handler, set_cors_headers @@ -30,12 +33,12 @@ logger = logging.getLogger(__name__) class ThumbnailResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.store = hs.get_datastore() - self.filepaths = media_repo.filepaths self.media_repo = media_repo + self.media_storage = media_storage self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.server_name = hs.hostname self.version_string = hs.version_string @@ -91,23 +94,22 @@ class ThumbnailResource(Resource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) - else: - file_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path) + file_info = FileInfo( + server_name=None, file_id=media_id, + url_cache=media_info["url_cache"], + thumbnail=True, + thumbnail_width=thumbnail_info["thumbnail_width"], + thumbnail_height=thumbnail_info["thumbnail_height"], + thumbnail_type=thumbnail_info["thumbnail_type"], + thumbnail_method=thumbnail_info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type + t_length = thumbnail_info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder(request, responder, t_type, t_length) else: respond_404(request) @@ -129,20 +131,23 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_thumbnail( - media_id, desired_width, desired_height, desired_type, - desired_method, - ) - else: - file_path = self.filepaths.local_media_thumbnail( - media_id, desired_width, desired_height, desired_type, - desired_method, - ) - yield respond_with_file(request, desired_type, file_path) - return + file_info = FileInfo( + server_name=None, file_id=media_id, + url_cache=media_info["url_cache"], + thumbnail=True, + thumbnail_width=info["thumbnail_width"], + thumbnail_height=info["thumbnail_height"], + thumbnail_type=info["thumbnail_type"], + thumbnail_method=info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type + t_length = info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + yield respond_with_responder(request, responder, t_type, t_length) + return logger.debug("We don't have a local thumbnail of that size. Generating") @@ -175,12 +180,22 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - file_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, desired_width, desired_height, - desired_type, desired_method, + file_info = FileInfo( + server_name=None, file_id=media_id, + thumbnail=True, + thumbnail_width=info["thumbnail_width"], + thumbnail_height=info["thumbnail_height"], + thumbnail_type=info["thumbnail_type"], + thumbnail_method=info["thumbnail_method"], ) - yield respond_with_file(request, desired_type, file_path) - return + + t_type = file_info.thumbnail_type + t_length = info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + yield respond_with_responder(request, responder, t_type, t_length) + return logger.debug("We don't have a local thumbnail of that size. Generating") @@ -206,17 +221,20 @@ class ThumbnailResource(Resource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - file_id = thumbnail_info["filesystem_id"] + file_info = FileInfo( + server_name=None, file_id=media_id, + thumbnail=True, + thumbnail_width=thumbnail_info["thumbnail_width"], + thumbnail_height=thumbnail_info["thumbnail_height"], + thumbnail_type=thumbnail_info["thumbnail_type"], + thumbnail_method=thumbnail_info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type t_length = thumbnail_info["thumbnail_length"] - file_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path, t_length) + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder(request, responder, t_type, t_length) else: respond_404(request) -- cgit 1.4.1 From 2442e9876c2622e345ab62a414e509d4f5cecb4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 14:36:07 +0000 Subject: Make PreviewUrlResource use MediaStorage --- synapse/rest/media/v1/media_repository.py | 4 +++- synapse/rest/media/v1/preview_url_resource.py | 18 +++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0c84f1be07..9aba9f13f0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -642,4 +642,6 @@ class MediaRepositoryResource(Resource): )) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: - self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) + self.putChild("preview_url", PreviewUrlResource( + hs, media_repo, media_repo.media_storage, + )) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 40d2e664eb..f3dbbb3fec 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -17,6 +17,8 @@ from twisted.web.server import NOT_DONE_YET from twisted.internet import defer from twisted.web.resource import Resource +from ._base import FileInfo + from synapse.api.errors import ( SynapseError, Codes, ) @@ -49,7 +51,7 @@ logger = logging.getLogger(__name__) class PreviewUrlResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.auth = hs.get_auth() @@ -62,6 +64,7 @@ class PreviewUrlResource(Resource): self.client = SpiderHttpClient(hs) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path + self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -273,19 +276,21 @@ class PreviewUrlResource(Resource): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fpath = self.filepaths.url_cache_filepath_rel(file_id) - fname = os.path.join(self.primary_base_path, fpath) - self.media_repo._makedirs(fname) + file_info = FileInfo( + server_name=None, + file_id=file_id, + url_cache=True, + ) try: - with open(fname, "wb") as f: + with self.media_storage.store_into_file(file_info) as (f, fname, finish): logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) # FIXME: pass through 404s and other error messages nicely - yield self.media_repo.copy_to_backup(fpath) + yield finish() media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -327,7 +332,6 @@ class PreviewUrlResource(Resource): ) except Exception as e: - os.remove(fname) raise SynapseError( 500, ("Failed to download content: %s" % e), Codes.UNKNOWN -- cgit 1.4.1 From 8f03aa9f61e1d99dfcde972f4e5f0f52919db49f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:19:55 +0000 Subject: Add StorageProvider concept --- synapse/rest/media/v1/media_repository.py | 29 +++++-- synapse/rest/media/v1/media_storage.py | 15 +++- synapse/rest/media/v1/storage_provider.py | 127 ++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 9 deletions(-) create mode 100644 synapse/rest/media/v1/storage_provider.py (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 9aba9f13f0..7938fe7bc8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -27,6 +27,9 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer +from .storage_provider import ( + StorageProviderWrapper, FileStorageProviderBackend, +) from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -66,10 +69,6 @@ class MediaRepository(object): self.primary_base_path = hs.config.media_store_path self.filepaths = MediaFilePaths(self.primary_base_path) - self.backup_base_path = hs.config.backup_media_store_path - - self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store - self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -77,7 +76,27 @@ class MediaRepository(object): self.recently_accessed_remotes = set() - self.media_storage = MediaStorage(self.primary_base_path, self.filepaths) + # List of StorageProvider's where we should search for media and + # potentially upload to. + self.storage_providers = [] + + # TODO: Move this into config and allow other storage providers to be + # defined. + if hs.config.backup_media_store_path: + backend = FileStorageProviderBackend( + self.primary_base_path, hs.config.backup_media_store_path, + ) + provider = StorageProviderWrapper( + backend, + store=True, + store_synchronous=hs.config.synchronous_backup_media_store, + store_remote=True, + ) + self.storage_providers.append(provider) + + self.media_storage = MediaStorage( + self.primary_base_path, self.filepaths, self.storage_providers, + ) self.clock.looping_call( self._update_recently_accessed_remotes, diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 052745e6f6..a1ec6cadb1 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -32,9 +32,10 @@ class MediaStorage(object): """Responsible for storing/fetching files from local sources. """ - def __init__(self, local_media_directory, filepaths): + def __init__(self, local_media_directory, filepaths, storage_providers): self.local_media_directory = local_media_directory self.filepaths = filepaths + self.storage_providers = storage_providers @defer.inlineCallbacks def store_file(self, source, file_info): @@ -90,11 +91,12 @@ class MediaStorage(object): finished_called = [False] + @defer.inlineCallbacks def finish(): - # This will be used later when we want to hit out to other storage - # places + for provider in self.storage_providers: + yield provider.store_file(path, file_info) + finished_called[0] = True - return defer.succeed(None) try: with open(fname, "wb") as f: @@ -127,6 +129,11 @@ class MediaStorage(object): if os.path.exists(local_path): defer.returnValue(FileResponder(open(local_path, "rb"))) + for provider in self.storage_providers: + res = yield provider.fetch(path, file_info) + if res: + defer.returnValue(res) + defer.returnValue(None) def _file_info_to_path(self, file_info): diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py new file mode 100644 index 0000000000..2ad602e101 --- /dev/null +++ b/synapse/rest/media/v1/storage_provider.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads + +from .media_storage import FileResponder + +from synapse.util.logcontext import preserve_fn + +import logging +import os +import shutil + + +logger = logging.getLogger(__name__) + + +class StorageProvider(object): + """A storage provider is a service that can store uploaded media and + retrieve them. + """ + def store_file(self, path, file_info): + """Store the file described by file_info. The actual contents can be + retrieved by reading the file in file_info.upload_path. + + Args: + path (str): Relative path of file in local cache + file_info (FileInfo) + + Returns: + Deferred + """ + pass + + def fetch(self, path, file_info): + """Attempt to fetch the file described by file_info and stream it + into writer. + + Args: + path (str): Relative path of file in local cache + file_info (FileInfo) + + Returns: + Deferred(Responder): Returns a Responder if the provider has the file, + otherwise returns None. + """ + pass + + +class StorageProviderWrapper(StorageProvider): + """Wraps a storage provider and provides various config options + + Args: + backend (StorageProvider) + store (bool): Whether to store new files or not. + store_synchronous (bool): Whether to wait for file to be successfully + uploaded, or todo the upload in the backgroud. + store_remote (bool): Whether remote media should be uploaded + """ + def __init__(self, backend, store, store_synchronous, store_remote): + self.backend = backend + self.store = store + self.store_synchronous = store_synchronous + self.store_remote = store_remote + + def store_file(self, path, file_info): + if not self.store: + return defer.succeed(None) + + if file_info.server_name and not self.store_remote: + return defer.succeed(None) + + if self.store_synchronous: + return self.backend.store_file(path, file_info) + else: + # TODO: Handle errors. + preserve_fn(self.backend.store_file)(path, file_info) + return defer.succeed(None) + + def fetch(self, path, file_info): + return self.backend.fetch(path, file_info) + + +class FileStorageProviderBackend(StorageProvider): + """A storage provider that stores files in a directory on a filesystem. + + Args: + cache_directory (str): Base path of the local media repository + base_directory (str): Base path to store new files + """ + + def __init__(self, cache_directory, base_directory): + self.cache_directory = cache_directory + self.base_directory = base_directory + + def store_file(self, path, file_info): + """See StorageProvider.store_file""" + + primary_fname = os.path.join(self.cache_directory, path) + backup_fname = os.path.join(self.base_directory, path) + + dirname = os.path.dirname(backup_fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + return threads.deferToThread( + shutil.copyfile, primary_fname, backup_fname, + ) + + def fetch(self, path, file_info): + """See StorageProvider.fetch""" + + backup_fname = os.path.join(self.base_directory, path) + if os.path.isfile(backup_fname): + return FileResponder(open(backup_fname, "rb")) -- cgit 1.4.1 From 227c491510e09bc201b835a7dfa84aaeafb3cdc6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:15:31 +0000 Subject: Comments --- synapse/rest/media/v1/_base.py | 9 ++++--- synapse/rest/media/v1/media_repository.py | 39 +++++++++++++++++++++++++++---- synapse/rest/media/v1/media_storage.py | 32 +++++++++++++++++++++---- 3 files changed, 65 insertions(+), 15 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 1310820486..03df875b44 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -134,9 +134,9 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam Args: request (twisted.web.http.Request) - responder (Responder) + responder (Responder|None) media_type (str): The media/content type. - file_size (int): Size in bytes of the media, if known. + file_size (int): Size in bytes of the media. If not known it should be None upload_name (str): The name of the requested file, if any. """ if not responder: @@ -179,13 +179,12 @@ class FileInfo(object): or None if local. file_id (str): The local ID of the file. For local files this is the same as the media_id - media_type (str): Type of the file url_cache (bool): If the file is for the url preview cache thumbnail (bool): Whether the file is a thumbnail or not. thumbnail_width (int) thumbnail_height (int) - thumbnail_method (int) - thumbnail_type (str) + thumbnail_method (str) + thumbnail_type (str): Content type of thumbnail, e.g. image/png """ def __init__(self, server_name, file_id, url_cache=False, thumbnail=False, thumbnail_width=None, thumbnail_height=None, diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 7938fe7bc8..6508dbf178 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vecotr Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ class MediaRepository(object): self.recently_accessed_remotes = set() - # List of StorageProvider's where we should search for media and + # List of StorageProviders where we should search for media and # potentially upload to. self.storage_providers = [] @@ -158,6 +158,16 @@ class MediaRepository(object): @defer.inlineCallbacks def get_local_media(self, request, media_id, name): """Responds to reqests for local media, if exists, or returns 404. + + Args: + request(twisted.web.http.Request) + media_id (str) + name (str|None): Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Retruns: + Deferred: Resolves once a response has successfully been written + to request """ media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: @@ -182,18 +192,29 @@ class MediaRepository(object): @defer.inlineCallbacks def get_remote_media(self, request, server_name, media_id, name): """Respond to requests for remote media. + + Args: + request(twisted.web.http.Request) + server_name (str): Remote server_name where the media originated. + media_id (str) + name (str|None): Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Retruns: + Deferred: Resolves once a response has successfully been written + to request """ self.recently_accessed_remotes.add((server_name, media_id)) # We linearize here to ensure that we don't try and download remote - # media mutliple times concurrently + # media multiple times concurrently key = (server_name, media_id) with (yield self.remote_media_linearizer.queue(key)): responder, media_info = yield self._get_remote_media_impl( server_name, media_id, ) - # We purposefully stream the file outside the lock + # We deliberately stream the file outside the lock if responder: media_type = media_info["media_type"] media_length = media_info["media_length"] @@ -210,7 +231,7 @@ class MediaRepository(object): download from remote server. Returns: - Deferred((Respodner, media_info)) + Deferred[(Responder, media_info)] """ media_info = yield self.store.get_cached_remote_media( server_name, media_id @@ -251,6 +272,14 @@ class MediaRepository(object): def _download_remote_file(self, server_name, media_id, file_id): """Attempt to download the remote file from the given server name, using the given file_id as the local id. + + Args: + server_name (str): Originating server + media_id (str) + file_id (str): Local file ID + + Returns: + Deferred[MediaInfo] """ file_info = FileInfo( diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index a1ec6cadb1..49d2b7cd45 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -30,6 +30,12 @@ logger = logging.getLogger(__name__) class MediaStorage(object): """Responsible for storing/fetching files from local sources. + + Args: + local_media_directory (str): Base path where we store media on disk + filepaths (MediaFilePaths) + storage_providers ([StorageProvider]): List of StorageProvider that are + used to fetch and store files. """ def __init__(self, local_media_directory, filepaths, storage_providers): @@ -68,9 +74,16 @@ class MediaStorage(object): """Context manager used to get a file like object to write into, as described by file_info. - Actually yields a 3-tuple (file, fname, finish_cb), where finish_cb is a - function that returns a Deferred that must be waited on after the file - has been successfully written to. + Actually yields a 3-tuple (file, fname, finish_cb), where file is a file + like object that can be written to, fname is the absolute path of file + on disk, and finish_cb is a function that returns a Deferred. + + fname can be used to read the contents from after upload, e.g. to + generate thumbnails. + + finish_cb must be called and waited on after the file has been + successfully been written to. Should not be called if there was an + error. Args: file_info (FileInfo): Info about the file to store @@ -109,7 +122,7 @@ class MediaStorage(object): raise e if not finished_called: - raise Exception("Fnished callback not called") + raise Exception("Finished callback not called") @defer.inlineCallbacks def fetch_media(self, file_info): @@ -120,7 +133,7 @@ class MediaStorage(object): file_info (FileInfo) Returns: - Deferred(Responder): Returns a Responder if the file was found, + Deferred[Responder|None]: Returns a Responder if the file was found, otherwise None. """ @@ -138,6 +151,15 @@ class MediaStorage(object): def _file_info_to_path(self, file_info): """Converts file_info into a relative path. + + The path is suitable for storing files under a directory, e.g. used to + store files on local FS under the base media repository directory. + + Args: + file_info (FileInfo) + + Returns: + str """ if file_info.url_cache: return self.filepaths.url_cache_filepath_rel(file_info.file_id) -- cgit 1.4.1 From 4d88958cf6f9ed28ecd78990a8f51119eb294279 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:23:54 +0000 Subject: Make class var local --- synapse/rest/media/v1/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6508dbf178..65b16ce4cb 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -78,7 +78,7 @@ class MediaRepository(object): # List of StorageProviders where we should search for media and # potentially upload to. - self.storage_providers = [] + storage_providers = [] # TODO: Move this into config and allow other storage providers to be # defined. @@ -92,10 +92,10 @@ class MediaRepository(object): store_synchronous=hs.config.synchronous_backup_media_store, store_remote=True, ) - self.storage_providers.append(provider) + storage_providers.append(provider) self.media_storage = MediaStorage( - self.primary_base_path, self.filepaths, self.storage_providers, + self.primary_base_path, self.filepaths, storage_providers, ) self.clock.looping_call( -- cgit 1.4.1 From c6c009603cd72c2dfd777658b871dc66b08f7aa4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:24:05 +0000 Subject: Remove unused variables --- synapse/rest/media/v1/download_resource.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 5dc92972cf..3443db91ca 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -32,11 +32,8 @@ class DownloadResource(Resource): def __init__(self, hs, media_repo): Resource.__init__(self) - self.filepaths = media_repo.filepaths self.media_repo = media_repo self.server_name = hs.hostname - self.store = hs.get_datastore() - self.version_string = hs.version_string self.clock = hs.get_clock() def render_GET(self, request): -- cgit 1.4.1 From 1e4edd1717d1a3ef5ada210882d6798c520626eb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:28:32 +0000 Subject: Remove unnecessary condition --- synapse/rest/media/v1/media_repository.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 65b16ce4cb..5c50646bc7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -263,10 +263,7 @@ class MediaRepository(object): ) responder = yield self.media_storage.fetch_media(file_info) - if responder: - defer.returnValue((responder, media_info)) - - defer.returnValue((None, media_info)) + defer.returnValue((responder, media_info)) @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id, file_id): -- cgit 1.4.1 From dcc8eded4172670137b5dd9be1e4c70328cf85f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 13:16:27 +0000 Subject: Add missing class var --- synapse/rest/media/v1/download_resource.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 3443db91ca..fe7e17596f 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -34,7 +34,10 @@ class DownloadResource(Resource): self.media_repo = media_repo self.server_name = hs.hostname + + # Both of these are expected by @request_handler() self.clock = hs.get_clock() + self.version_string = hs.version_string def render_GET(self, request): self._async_render_GET(request) -- cgit 1.4.1 From 85a4d78213f6987c920043532bca428bb582a46b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 13:32:03 +0000 Subject: Make Responder a context manager --- synapse/rest/media/v1/_base.py | 14 ++++++++------ synapse/rest/media/v1/media_storage.py | 5 ++--- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 03df875b44..1145904aeb 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -144,15 +144,16 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam return add_file_headers(request, media_type, file_size, upload_name) - yield responder.write_to_consumer(request) + with responder: + yield responder.write_to_consumer(request) finish_request(request) class Responder(object): """Represents a response that can be streamed to the requester. - Either `write_to_consumer` or `cancel` must be called to clean up any open - resources. + Responder is a context manager which *must* be used, so that any resources + held can be cleaned up. """ def write_to_consumer(self, consumer): """Stream response into consumer @@ -165,9 +166,10 @@ class Responder(object): """ pass - def cancel(self): - """Called when the responder is not going to be used after all. - """ + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): pass diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 49d2b7cd45..b6e7a19e12 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -220,8 +220,7 @@ class FileResponder(Responder): @defer.inlineCallbacks def write_to_consumer(self, consumer): - with self.open_file: - yield FileSender().beginFileTransfer(self.open_file, consumer) + yield FileSender().beginFileTransfer(self.open_file, consumer) - def cancel(self): + def __exit__(self, exc_type, exc_val, exc_tb): self.open_file.close() -- cgit 1.4.1 From e21370ba54607c9eb78869bc7ce5ab3d6f896fdd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 14:44:02 +0000 Subject: Correctly reraise exception --- synapse/rest/media/v1/media_storage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index b6e7a19e12..001e84578e 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -24,6 +24,7 @@ import contextlib import os import logging import shutil +import sys logger = logging.getLogger(__name__) @@ -114,12 +115,13 @@ class MediaStorage(object): try: with open(fname, "wb") as f: yield f, fname, finish - except Exception as e: + except Exception: + t, v, tb = sys.exc_info() try: os.remove(fname) except Exception: pass - raise e + raise t, v, tb if not finished_called: raise Exception("Finished callback not called") -- cgit 1.4.1 From 694f1c1b185a8431679d39a80b7567ae68605e17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 15:02:46 +0000 Subject: Fix up comments --- synapse/rest/media/v1/_base.py | 4 ++-- synapse/rest/media/v1/media_repository.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 1145904aeb..e7ac01da01 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -136,8 +136,8 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam request (twisted.web.http.Request) responder (Responder|None) media_type (str): The media/content type. - file_size (int): Size in bytes of the media. If not known it should be None - upload_name (str): The name of the requested file, if any. + file_size (int|None): Size in bytes of the media. If not known it should be None + upload_name (str|None): The name of the requested file, if any. """ if not responder: respond_404(request) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5c50646bc7..45bc534200 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -161,11 +161,12 @@ class MediaRepository(object): Args: request(twisted.web.http.Request) - media_id (str) + media_id (str): The media ID of the content. (This is the same as + the file_id for local content.) name (str|None): Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. - Retruns: + Returns: Deferred: Resolves once a response has successfully been written to request """ @@ -196,11 +197,12 @@ class MediaRepository(object): Args: request(twisted.web.http.Request) server_name (str): Remote server_name where the media originated. - media_id (str) + media_id (str): The media ID of the content (as defined by the + remote server). name (str|None): Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. - Retruns: + Returns: Deferred: Resolves once a response has successfully been written to request """ @@ -230,6 +232,11 @@ class MediaRepository(object): """Looks for media in local cache, if not there then attempt to download from remote server. + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + Returns: Deferred[(Responder, media_info)] """ @@ -272,7 +279,9 @@ class MediaRepository(object): Args: server_name (str): Originating server - media_id (str) + media_id (str): The media ID of the content (as defined by the + remote server). This is different than the file_id, which is + locally generated. file_id (str): Local file ID Returns: -- cgit 1.4.1 From 21bf87a146e54d9c111abb6f39a1bcbdc0563df2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 15:38:06 +0000 Subject: Reinstate media download on thumbnail request We need to actually download the remote media when we get a request for a thumbnail. --- synapse/rest/media/v1/thumbnail_resource.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70dbf7f5c9..53e48aba22 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -198,6 +198,11 @@ class ThumbnailResource(Resource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead of + # downloading the remote file and generating our own thumbnails. + yield self.media_repo.get_remote_media(server_name, media_id) + thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) -- cgit 1.4.1 From a4c5e4a6451dfc7b378c10b916635de6bdafa80a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 11:06:42 +0000 Subject: Fix thumbnailing remote files --- synapse/rest/media/v1/media_repository.py | 28 ++++++++++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 4 ++-- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 45bc534200..2608fab5de 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -227,6 +227,34 @@ class MediaRepository(object): else: respond_404(request) + @defer.inlineCallbacks + def get_remote_media_info(self, server_name, media_id): + """Gets the media info associated with the remote file, downloading + if necessary. + + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + + Returns: + Deferred[dict]: The media_info of the file + """ + # We linearize here to ensure that we don't try and download remote + # media multiple times concurrently + key = (server_name, media_id) + with (yield self.remote_media_linearizer.queue(key)): + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # Ensure we actually use the responder so that it releases resources + if responder: + with responder: + pass + + defer.returnValue(media_info) + @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): """Looks for media in local cache, if not there then attempt to diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 835540c3dd..70cea7782e 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -165,7 +165,7 @@ class ThumbnailResource(Resource): def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, desired_width, desired_height, desired_method, desired_type): - media_info = yield self.media_repo.get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -216,7 +216,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media(server_name, media_id) + yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, -- cgit 1.4.1 From c5b589f2e8205ca0253534cf5826b807253bb8ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:01:40 +0000 Subject: Log when we respond with 404 --- synapse/rest/media/v1/media_repository.py | 1 + synapse/rest/media/v1/thumbnail_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 2608fab5de..b12fabd943 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,6 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: + logger.info("Media is quarentined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70cea7782e..b8c38eb319 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,6 +85,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -111,6 +112,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Couldn't find any generated thumbnails") respond_404(request) @defer.inlineCallbacks @@ -120,6 +122,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -159,6 +162,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate local thumbnail") respond_404(request) @defer.inlineCallbacks @@ -197,7 +201,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a remote thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -208,6 +212,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate remote thumbnail") respond_404(request) @defer.inlineCallbacks @@ -241,6 +246,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Failed to find any generated thumbnails") respond_404(request) def _select_thumbnail(self, desired_width, desired_height, desired_method, -- cgit 1.4.1 From 9795b9ebb11953747e6362fe123725326885667b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:02:06 +0000 Subject: Correctly use server_name/file_id when generating/fetching remote thumbnails --- synapse/rest/media/v1/media_repository.py | 7 +++++-- synapse/rest/media/v1/thumbnail_resource.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index b12fabd943..e82dcfdc2c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -539,7 +539,10 @@ class MediaRepository(object): Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - file_id = media_info.get("filesystem_id") + if server_name: + file_id = media_info["filesystem_id"] + else: + file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return @@ -597,7 +600,7 @@ class MediaRepository(object): try: file_info = FileInfo( server_name=server_name, - file_id=media_id, + file_id=file_id, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index b8c38eb319..e20d6f10b0 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -185,7 +185,7 @@ class ThumbnailResource(Resource): if t_w and t_h and t_method and t_type: file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=info["thumbnail_width"], thumbnail_height=info["thumbnail_height"], @@ -221,7 +221,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media_info(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -232,7 +232,7 @@ class ThumbnailResource(Resource): width, height, method, m_type, thumbnail_infos ) file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=thumbnail_info["thumbnail_width"], thumbnail_height=thumbnail_info["thumbnail_height"], -- cgit 1.4.1 From 307f88dfb6c157ee2bda6f9b8b3dee82cad490aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 13:53:43 +0000 Subject: Fix up log lines --- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e82dcfdc2c..d191f9d2f5 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,7 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: - logger.info("Media is quarentined") + logger.info("Media is quarantined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index e20d6f10b0..1451f6f106 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,7 +85,9 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info: + logger.info("Media is quarantined") + respond_404(request) return @@ -122,7 +124,8 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -152,7 +155,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( @@ -162,7 +165,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate local thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks @@ -201,7 +204,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a remote thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -212,7 +215,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate remote thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks -- cgit 1.4.1 From 5dfc83704b9f338c975a03bad7854218658b3a80 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 14:32:56 +0000 Subject: Fix typo --- synapse/rest/media/v1/thumbnail_resource.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 1451f6f106..8c9653843b 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -84,10 +84,11 @@ class ThumbnailResource(Resource): method, m_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info: - logger.info("Media is quarantined") - + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -123,9 +124,11 @@ class ThumbnailResource(Resource): desired_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info["quarantined_by"]: - logger.info("Media is quarantined") + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return -- cgit 1.4.1 From 0a90d9ede4a39d720afd131866f98b51aa591cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:03:05 +0000 Subject: Move setting of file_id up to caller --- synapse/rest/media/v1/media_repository.py | 21 ++++++++++----------- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d191f9d2f5..35c1dcbc97 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +413,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, media_info + server_name, media_id, file_id, media_info, ) defer.returnValue(media_info) @@ -525,24 +525,23 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + url_cache=False): """Generate and store thumbnails for an image. Args: - server_name(str|None): The server name if remote media, else None if local - media_id(str) - media_info(dict) - url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + server_name (str|None): The server name if remote media, else None if local + media_id (str): The media ID of the content. (This is the same as + the file_id for local content) + file_id (str): Local file ID + media_info (dict) + url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - if server_name: - file_id = media_info["filesystem_id"] - else: - file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f3dbbb3fec..5ddf581bd2 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -185,8 +185,9 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): + file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, media_info['filesystem_id'], media_info, url_cache=True, + None, file_id, file_id, media_info, url_cache=True, ) og = { @@ -231,8 +232,9 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, image_info['filesystem_id'], image_info, url_cache=True, + None, file_id, file_id, image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] -- cgit 1.4.1 From 6368e5c0ab39e43ee6950ba94a826c44db7d43f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:17:38 +0000 Subject: Change _generate_thumbnails to take media_type --- synapse/rest/media/v1/media_repository.py | 11 ++++++----- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 35c1dcbc97..e08cf8af46 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,9 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_id, media_info) + yield self._generate_thumbnails( + None, media_id, media_id, media_info["media_type"], + ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +415,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info, + server_name, media_id, file_id, media_info["media_type"], ) defer.returnValue(media_info) @@ -525,7 +527,7 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + def _generate_thumbnails(self, server_name, media_id, file_id, media_type, url_cache=False): """Generate and store thumbnails for an image. @@ -534,14 +536,13 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_info (dict) + media_type (str) url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ - media_type = media_info["media_type"] requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 5ddf581bd2..981f01e417 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -187,7 +187,8 @@ class PreviewUrlResource(Resource): if _is_media(media_info['media_type']): file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, media_info, url_cache=True, + None, file_id, file_id, media_info["media_type"], + url_cache=True, ) og = { @@ -234,7 +235,8 @@ class PreviewUrlResource(Resource): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info, url_cache=True, + None, file_id, file_id, image_info["media_type"], + url_cache=True, ) if dims: og["og:image:width"] = dims['width'] -- cgit 1.4.1 From d863f68cab9665335b1657feeab5d00724dddd95 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:24:15 +0000 Subject: Use local vars --- synapse/rest/media/v1/media_repository.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e08cf8af46..22f86781f7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -146,13 +146,9 @@ class MediaRepository(object): media_length=content_length, user_id=auth_user, ) - media_info = { - "media_type": media_type, - "media_length": content_length, - } yield self._generate_thumbnails( - None, media_id, media_id, media_info["media_type"], + None, media_id, media_id, media_type, ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -415,7 +411,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info["media_type"], + server_name, media_id, file_id, media_type, ) defer.returnValue(media_info) -- cgit 1.4.1 From d728c47142019da2896bba7a84ac92e9959fd7af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 10:06:14 +0000 Subject: Add docstring --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 22f86781f7..97c82c150e 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -532,7 +532,7 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_type (str) + media_type (str): The content type of the file url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer -- cgit 1.4.1 From 05f98a22249974ce40a461d12da93af0bc624319 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 16:42:43 +0000 Subject: Keep track of last access time for local media --- synapse/rest/media/v1/media_repository.py | 32 +++++++++++++++++----- synapse/storage/media_repository.py | 23 ++++++++++++++-- synapse/storage/prepare_database.py | 2 +- .../storage/schema/delta/47/last_access_media.sql | 19 +++++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 synapse/storage/schema/delta/47/last_access_media.sql (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150e..b2c76440b7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -53,7 +53,7 @@ import urlparse logger = logging.getLogger(__name__) -UPDATE_RECENTLY_ACCESSED_REMOTES_TS = 60 * 1000 +UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 class MediaRepository(object): @@ -75,6 +75,7 @@ class MediaRepository(object): self.remote_media_linearizer = Linearizer(name="media_remote") self.recently_accessed_remotes = set() + self.recently_accessed_locals = set() # List of StorageProviders where we should search for media and # potentially upload to. @@ -99,19 +100,34 @@ class MediaRepository(object): ) self.clock.looping_call( - self._update_recently_accessed_remotes, - UPDATE_RECENTLY_ACCESSED_REMOTES_TS + self._update_recently_accessed, + UPDATE_RECENTLY_ACCESSED_TS, ) @defer.inlineCallbacks - def _update_recently_accessed_remotes(self): - media = self.recently_accessed_remotes + def _update_recently_accessed(self): + remote_media = self.recently_accessed_remotes self.recently_accessed_remotes = set() + local_media = self.recently_accessed_locals + self.recently_accessed_locals = set() + yield self.store.update_cached_last_access_time( - media, self.clock.time_msec() + local_media, remote_media, self.clock.time_msec() ) + def mark_recently_accessed(self, server_name, media_id): + """Mark the given media as recently accessed. + + Args: + server_name (str|None): Origin server of media, or None if local + media_id (str): The media ID of the content + """ + if server_name: + self.recently_accessed_remotes.add((server_name, media_id)) + else: + self.recently_accessed_locals.add(media_id) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -173,6 +189,8 @@ class MediaRepository(object): respond_404(request) return + self.mark_recently_accessed(None, media_id) + media_type = media_info["media_type"] media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] @@ -204,7 +222,7 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ - self.recently_accessed_remotes.add((server_name, media_id)) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote # media multiple times concurrently diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6ebc372498..e6cdbb0545 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -173,7 +173,14 @@ class MediaRepositoryStore(BackgroundUpdateStore): desc="store_cached_remote_media", ) - def update_cached_last_access_time(self, origin_id_tuples, time_ts): + def update_cached_last_access_time(self, local_media, remote_media, time_ms): + """Updates the last access time of the given media + + Args: + local_media (iterable[str]): Set of media_ids + remote_media (iterable[(str, str)]): Set of (server_name, media_id) + time_ms: Current time in milliseconds + """ def update_cache_txn(txn): sql = ( "UPDATE remote_media_cache SET last_access_ts = ?" @@ -181,8 +188,18 @@ class MediaRepositoryStore(BackgroundUpdateStore): ) txn.executemany(sql, ( - (time_ts, media_origin, media_id) - for media_origin, media_id in origin_id_tuples + (time_ms, media_origin, media_id) + for media_origin, media_id in remote_media + )) + + sql = ( + "UPDATE local_media_repository SET last_access_ts = ?" + " WHERE media_id = ?" + ) + + txn.executemany(sql, ( + (time_ms, media_id) + for media_id in local_media )) return self.runInteraction("update_cached_last_access_time", update_cache_txn) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d1691bbac2..c845a0cec5 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 46 +SCHEMA_VERSION = 47 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql new file mode 100644 index 0000000000..bc754ac861 --- /dev/null +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- drop the unique constraint on deleted_pushers so that we can just insert +-- into it rather than upserting. + +ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; -- cgit 1.4.1 From 300edc23482fbc637a64a9e1cc1235a1fa7f9562 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 11:53:04 +0000 Subject: Update last access time when thumbnails are viewed --- synapse/rest/media/v1/thumbnail_resource.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 8c9653843b..c09f2dec41 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -67,6 +67,7 @@ class ThumbnailResource(Resource): yield self._respond_local_thumbnail( request, media_id, width, height, method, m_type ) + self.media_repo.mark_recently_accessed(server_name, media_id) else: if self.dynamic_thumbnails: yield self._select_or_generate_remote_thumbnail( @@ -78,6 +79,7 @@ class ThumbnailResource(Resource): request, server_name, media_id, width, height, method, m_type ) + self.media_repo.mark_recently_accessed(None, media_id) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, -- cgit 1.4.1 From 0af5dc63a8d180a2b610c14ce415fdf9be96e2ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 15:44:08 +0000 Subject: Make storage providers more configurable --- synapse/config/repository.py | 83 +++++++++++++++++++++++++++---- synapse/rest/media/v1/media_repository.py | 18 +++---- synapse/rest/media/v1/storage_provider.py | 28 ++++++++--- 3 files changed, 98 insertions(+), 31 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 6baa474931..81db0193fb 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -16,6 +16,8 @@ from ._base import Config, ConfigError from collections import namedtuple +from synapse.util.module_loader import load_module + MISSING_NETADDR = ( "Missing netaddr library. This is required for URL preview API." @@ -36,6 +38,10 @@ ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) +MediaStorageProviderConfig = namedtuple( + "MediaStorageProviderConfig", ("store_local", "store_remote", "store_synchronous",) +) + def parse_thumbnail_requirements(thumbnail_sizes): """ Takes a list of dictionaries with "width", "height", and "method" keys @@ -73,16 +79,65 @@ class ContentRepositoryConfig(Config): self.media_store_path = self.ensure_directory(config["media_store_path"]) - self.backup_media_store_path = config.get("backup_media_store_path") - if self.backup_media_store_path: - self.backup_media_store_path = self.ensure_directory( + backup_media_store_path = config.get("backup_media_store_path") + if backup_media_store_path: + backup_media_store_path = self.ensure_directory( self.backup_media_store_path ) - self.synchronous_backup_media_store = config.get( + synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False ) + storage_providers = config.get("media_storage_providers", []) + + if backup_media_store_path: + if storage_providers: + raise ConfigError( + "Cannot use both 'backup_media_store_path' and 'storage_providers'" + ) + + storage_providers = [{ + "module": "file_system", + "store_local": True, + "store_synchronous": synchronous_backup_media_store, + "store_remote": True, + "config": { + "directory": backup_media_store_path, + } + }] + + # This is a list of config that can be used to create the storage + # providers. The entries are tuples of (Class, class_config, + # MediaStorageProviderConfig), where Class is the class of the provider, + # the class_config the config to pass to it, and + # MediaStorageProviderConfig are options for StorageProviderWrapper. + # + # We don't create the storage providers here as not all workers need + # them to be started. + self.media_storage_providers = [] + + for provider_config in storage_providers: + # We special case the module "file_system" so as not to need to + # expose FileStorageProviderBackend + if provider_config["module"] == "file_system": + provider_config["module"] = ( + "synapse.rest.media.v1.storage_provider" + ".FileStorageProviderBackend" + ) + + provider_class, parsed_config = load_module(provider_config) + + wrapper_config = MediaStorageProviderConfig( + provider_config.get("store_local", False), + provider_config.get("store_remote", False), + provider_config.get("store_synchronous", False), + ) + + self.media_storage_providers.append( + (provider_class, provider_config, wrapper_config,) + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -127,13 +182,19 @@ class ContentRepositoryConfig(Config): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" - # A secondary directory where uploaded images and attachments are - # stored as a backup. - # backup_media_store_path: "%(media_store)s" - - # Whether to wait for successful write to backup media store before - # returning successfully. - # synchronous_backup_media_store: false + # Media storage providers allow media to be stored in different + # locations. + # media_storage_providers: + # - module: file_system + # # Whether to write new local files. + # store_local: false + # # Whether to write new remote media + # store_remote: false + # # Whether to block upload requests waiting for write to this + # # provider to complete + # store_synchronous: false + # config: + # directory: /mnt/some/other/directory # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150e..4163c9e416 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -27,9 +27,7 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer -from .storage_provider import ( - StorageProviderWrapper, FileStorageProviderBackend, -) +from .storage_provider import StorageProviderWrapper from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -80,17 +78,13 @@ class MediaRepository(object): # potentially upload to. storage_providers = [] - # TODO: Move this into config and allow other storage providers to be - # defined. - if hs.config.backup_media_store_path: - backend = FileStorageProviderBackend( - self.primary_base_path, hs.config.backup_media_store_path, - ) + for clz, provider_config, wrapper_config in hs.config.media_storage_providers: + backend = clz(hs, provider_config) provider = StorageProviderWrapper( backend, - store=True, - store_synchronous=hs.config.synchronous_backup_media_store, - store_remote=True, + store_local=wrapper_config.store_local, + store_remote=wrapper_config.store_remote, + store_synchronous=wrapper_config.store_synchronous, ) storage_providers.append(provider) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 2ad602e101..0074d2d426 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -17,6 +17,7 @@ from twisted.internet import defer, threads from .media_storage import FileResponder +from synapse.config._base import Config from synapse.util.logcontext import preserve_fn import logging @@ -64,14 +65,14 @@ class StorageProviderWrapper(StorageProvider): Args: backend (StorageProvider) - store (bool): Whether to store new files or not. + store_local (bool): Whether to store new local files or not. store_synchronous (bool): Whether to wait for file to be successfully uploaded, or todo the upload in the backgroud. store_remote (bool): Whether remote media should be uploaded """ - def __init__(self, backend, store, store_synchronous, store_remote): + def __init__(self, backend, store_local, store_synchronous, store_remote): self.backend = backend - self.store = store + self.store_local = store_local self.store_synchronous = store_synchronous self.store_remote = store_remote @@ -97,13 +98,13 @@ class FileStorageProviderBackend(StorageProvider): """A storage provider that stores files in a directory on a filesystem. Args: - cache_directory (str): Base path of the local media repository - base_directory (str): Base path to store new files + hs (HomeServer) + config: The config returned by `parse_config`, i """ - def __init__(self, cache_directory, base_directory): - self.cache_directory = cache_directory - self.base_directory = base_directory + def __init__(self, hs, config): + self.cache_directory = hs.config.media_store_path + self.base_directory = config def store_file(self, path, file_info): """See StorageProvider.store_file""" @@ -125,3 +126,14 @@ class FileStorageProviderBackend(StorageProvider): backup_fname = os.path.join(self.base_directory, path) if os.path.isfile(backup_fname): return FileResponder(open(backup_fname, "rb")) + + def parse_config(config): + """Called on startup to parse config supplied. This should parse + the config and raise if there is a problem. + + The returned value is passed into the constructor. + + In this case we only care about a single param, the directory, so lets + just pull that out. + """ + return Config.ensure_directory(config["directory"]) -- cgit 1.4.1 From 9a89dae8c53140c74f968538f72a4045b6aca90d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 15:06:24 +0000 Subject: Fix typo in thumbnail resource causing access times to be incorrect --- synapse/rest/media/v1/thumbnail_resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index c09f2dec41..12e84a2b7c 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -67,7 +67,7 @@ class ThumbnailResource(Resource): yield self._respond_local_thumbnail( request, media_id, width, height, method, m_type ) - self.media_repo.mark_recently_accessed(server_name, media_id) + self.media_repo.mark_recently_accessed(None, media_id) else: if self.dynamic_thumbnails: yield self._select_or_generate_remote_thumbnail( @@ -79,7 +79,7 @@ class ThumbnailResource(Resource): request, server_name, media_id, width, height, method, m_type ) - self.media_repo.mark_recently_accessed(None, media_id) + self.media_repo.mark_recently_accessed(server_name, media_id) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, -- cgit 1.4.1 From aae77da73ffc89c31d0b17fa8ce5d8b58605de63 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:11:20 +0000 Subject: Fixup comments --- synapse/config/repository.py | 6 +++++- synapse/rest/media/v1/storage_provider.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 81db0193fb..8bbc16ba40 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -39,7 +39,11 @@ ThumbnailRequirement = namedtuple( ) MediaStorageProviderConfig = namedtuple( - "MediaStorageProviderConfig", ("store_local", "store_remote", "store_synchronous",) + "MediaStorageProviderConfig", ( + "store_local", # Whether to store newly uploaded local files + "store_remote", # Whether to store newly downloaded remote files + "store_synchronous", # Whether to wait for successful storage for local uploads + ), ) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0074d2d426..9bf88f01f9 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -99,7 +99,7 @@ class FileStorageProviderBackend(StorageProvider): Args: hs (HomeServer) - config: The config returned by `parse_config`, i + config: The config returned by `parse_config`. """ def __init__(self, hs, config): @@ -133,7 +133,7 @@ class FileStorageProviderBackend(StorageProvider): The returned value is passed into the constructor. - In this case we only care about a single param, the directory, so lets + In this case we only care about a single param, the directory, so let's just pull that out. """ return Config.ensure_directory(config["directory"]) -- cgit 1.4.1 From 3fe2bae857cda58055c32329e15d3fe9828cf8f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:11:45 +0000 Subject: Missing staticmethod --- synapse/rest/media/v1/storage_provider.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 9bf88f01f9..0a84aba861 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -127,6 +127,7 @@ class FileStorageProviderBackend(StorageProvider): if os.path.isfile(backup_fname): return FileResponder(open(backup_fname, "rb")) + @staticmethod def parse_config(config): """Called on startup to parse config supplied. This should parse the config and raise if there is a problem. -- cgit 1.4.1 From cd871a305708bca1acb5289f93cb62c15ba438da Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 18:37:59 +0000 Subject: Fix storage provider bug introduced when renamed to store_local --- synapse/rest/media/v1/storage_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0a84aba861..c188192f2b 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -77,7 +77,7 @@ class StorageProviderWrapper(StorageProvider): self.store_remote = store_remote def store_file(self, path, file_info): - if not self.store: + if not file_info.server_name and not self.store_local: return defer.succeed(None) if file_info.server_name and not self.store_remote: -- cgit 1.4.1 From ad7ec63d08c9d814766ea4764e187bc7ba5589d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 18:29:39 +0000 Subject: Use the right path for url_preview thumbnails This was introduced by #2627: we were overwriting the original media for url previews with the thumbnails :/ (fixes https://github.com/vector-im/riot-web/issues/6012, hopefully) --- synapse/rest/media/v1/media_storage.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 001e84578e..041ae396cd 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -164,6 +164,14 @@ class MediaStorage(object): str """ if file_info.url_cache: + if file_info.thumbnail: + return self.filepaths.url_cache_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method, + ) return self.filepaths.url_cache_filepath_rel(file_info.file_id) if file_info.server_name: -- cgit 1.4.1 From ab9f844aaf3662a64dbc4c56077e9fa37bc7d5d0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 22 Jan 2018 19:11:18 +0100 Subject: Add federation_domain_whitelist option (#2820) Add federation_domain_whitelist gives a way to restrict which domains your HS is allowed to federate with. useful mainly for gracefully preventing a private but internet-connected HS from trying to federate to the wider public Matrix network --- synapse/api/errors.py | 26 ++++++++++++++++++++++++++ synapse/config/server.py | 22 ++++++++++++++++++++++ synapse/federation/federation_client.py | 5 ++++- synapse/federation/transaction_queue.py | 4 +++- synapse/federation/transport/client.py | 3 +++ synapse/federation/transport/server.py | 9 ++++++++- synapse/handlers/device.py | 4 ++++ synapse/handlers/e2e_keys.py | 8 +++++++- synapse/handlers/federation.py | 4 ++++ synapse/http/matrixfederationclient.py | 28 +++++++++++++++++++++++++++- synapse/rest/key/v2/remote_key_resource.py | 8 ++++++++ synapse/rest/media/v1/media_repository.py | 19 +++++++++++++++++-- synapse/util/retryutils.py | 12 ++++++++++++ tests/utils.py | 1 + 14 files changed, 146 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 46b0d7b34c..aa15f73f36 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -141,6 +141,32 @@ class RegistrationError(SynapseError): pass +class FederationDeniedError(SynapseError): + """An error raised when the server tries to federate with a server which + is not on its federation whitelist. + + Attributes: + destination (str): The destination which has been denied + """ + + def __init__(self, destination): + """Raised by federation client or server to indicate that we are + are deliberately not attempting to contact a given server because it is + not on our federation whitelist. + + Args: + destination (str): the domain in question + """ + + self.destination = destination + + super(FederationDeniedError, self).__init__( + code=403, + msg="Federation denied with %s." % (self.destination,), + errcode=Codes.FORBIDDEN, + ) + + class InteractiveAuthIncompleteError(Exception): """An error raised when UI auth is not yet complete diff --git a/synapse/config/server.py b/synapse/config/server.py index 436dd8a6fe..8f0b6d1f28 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -55,6 +55,17 @@ class ServerConfig(Config): "block_non_admin_invites", False, ) + # FIXME: federation_domain_whitelist needs sytests + self.federation_domain_whitelist = None + federation_domain_whitelist = config.get( + "federation_domain_whitelist", None + ) + # turn the whitelist into a hash for speed of lookup + if federation_domain_whitelist is not None: + self.federation_domain_whitelist = {} + for domain in federation_domain_whitelist: + self.federation_domain_whitelist[domain] = True + if self.public_baseurl is not None: if self.public_baseurl[-1] != '/': self.public_baseurl += '/' @@ -210,6 +221,17 @@ class ServerConfig(Config): # (except those sent by local server admins). The default is False. # block_non_admin_invites: True + # Restrict federation to the following whitelist of domains. + # N.B. we recommend also firewalling your federation listener to limit + # inbound federation traffic as early as possible, rather than relying + # purely on this application-layer restriction. If not specified, the + # default is to whitelist everything. + # + # federation_domain_whitelist: + # - lon.example.com + # - nyc.example.com + # - syd.example.com + # List of ports that Synapse should listen on, their purpose and their # configuration. listeners: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b1fe03f702..813907f7f2 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.api.constants import Membership from synapse.api.errors import ( - CodeMessageException, HttpResponseException, SynapseError, + CodeMessageException, HttpResponseException, SynapseError, FederationDeniedError ) from synapse.events import builder from synapse.federation.federation_base import ( @@ -266,6 +266,9 @@ class FederationClient(FederationBase): except NotRetryingDestination as e: logger.info(e.message) continue + except FederationDeniedError as e: + logger.info(e.message) + continue except Exception as e: pdu_attempts[destination] = now diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 9d39f46583..a141ec9953 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -19,7 +19,7 @@ from twisted.internet import defer from .persistence import TransactionActions from .units import Transaction, Edu -from synapse.api.errors import HttpResponseException +from synapse.api.errors import HttpResponseException, FederationDeniedError from synapse.util import logcontext, PreserveLoggingContext from synapse.util.async import run_on_reactor from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter @@ -490,6 +490,8 @@ class TransactionQueue(object): (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) + except FederationDeniedError as e: + logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 1f3ce238f6..5488e82985 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -212,6 +212,9 @@ class TransportLayerClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if the remote destination + is not in our federation whitelist """ valid_memberships = {Membership.JOIN, Membership.LEAVE} if membership not in valid_memberships: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2b02b021ec..06c16ba4fa 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -16,7 +16,7 @@ from twisted.internet import defer from synapse.api.urls import FEDERATION_PREFIX as PREFIX -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, SynapseError, FederationDeniedError from synapse.http.server import JsonResource from synapse.http.servlet import ( parse_json_object_from_request, parse_integer_from_args, parse_string_from_args, @@ -81,6 +81,7 @@ class Authenticator(object): self.keyring = hs.get_keyring() self.server_name = hs.hostname self.store = hs.get_datastore() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist # A method just so we can pass 'self' as the authenticator to the Servlets @defer.inlineCallbacks @@ -92,6 +93,12 @@ class Authenticator(object): "signatures": {}, } + if ( + self.federation_domain_whitelist is not None and + self.server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(self.server_name) + if content is not None: json_request["content"] = content diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 2152efc692..0e83453851 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,6 +14,7 @@ # limitations under the License. from synapse.api import errors from synapse.api.constants import EventTypes +from synapse.api.errors import FederationDeniedError from synapse.util import stringutils from synapse.util.async import Linearizer from synapse.util.caches.expiringcache import ExpiringCache @@ -513,6 +514,9 @@ class DeviceListEduUpdater(object): # This makes it more likely that the device lists will # eventually become consistent. return + except FederationDeniedError as e: + logger.info(e) + return except Exception: # TODO: Remember that we are now out of sync and try again # later diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 5af8abf66b..9aa95f89e6 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -19,7 +19,9 @@ import logging from canonicaljson import encode_canonical_json from twisted.internet import defer -from synapse.api.errors import SynapseError, CodeMessageException +from synapse.api.errors import ( + SynapseError, CodeMessageException, FederationDeniedError, +) from synapse.types import get_domain_from_id, UserID from synapse.util.logcontext import preserve_fn, make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -140,6 +142,10 @@ class E2eKeysHandler(object): failures[destination] = { "status": 503, "message": "Not ready for retry", } + except FederationDeniedError as e: + failures[destination] = { + "status": 403, "message": "Federation Denied", + } except Exception as e: # include ConnectionRefused and other errors failures[destination] = { diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ac70730885..677532c87b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -22,6 +22,7 @@ from ._base import BaseHandler from synapse.api.errors import ( AuthError, FederationError, StoreError, CodeMessageException, SynapseError, + FederationDeniedError, ) from synapse.api.constants import EventTypes, Membership, RejectedReason from synapse.events.validator import EventValidator @@ -782,6 +783,9 @@ class FederationHandler(BaseHandler): except NotRetryingDestination as e: logger.info(e.message) continue + except FederationDeniedError as e: + logger.info(e) + continue except Exception as e: logger.exception( "Failed to backfill from %s because %s", diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 833496b72d..9145405cb0 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -27,7 +27,7 @@ import synapse.metrics from canonicaljson import encode_canonical_json from synapse.api.errors import ( - SynapseError, Codes, HttpResponseException, + SynapseError, Codes, HttpResponseException, FederationDeniedError, ) from signedjson.sign import sign_json @@ -123,11 +123,22 @@ class MatrixFederationHttpClient(object): Fails with ``HTTPRequestException``: if we get an HTTP response code >= 300. + Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist + (May also fail with plenty of other Exceptions for things like DNS failures, connection failures, SSL failures.) """ + if ( + self.hs.config.federation_domain_whitelist and + destination not in self.hs.config.federation_domain_whitelist + ): + raise FederationDeniedError(destination) + limiter = yield synapse.util.retryutils.get_retry_limiter( destination, self.clock, @@ -308,6 +319,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ if not json_data_callback: @@ -368,6 +382,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ def body_callback(method, url_bytes, headers_dict): @@ -422,6 +439,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ logger.debug("get_json args: %s", args) @@ -475,6 +495,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ response = yield self._request( @@ -518,6 +541,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ encoded_args = {} diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index cc2842aa72..17e6079cba 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -93,6 +93,7 @@ class RemoteKey(Resource): self.store = hs.get_datastore() self.version_string = hs.version_string self.clock = hs.get_clock() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist def render_GET(self, request): self.async_render_GET(request) @@ -137,6 +138,13 @@ class RemoteKey(Resource): logger.info("Handling query for keys %r", query) store_queries = [] for server_name, key_ids in query.items(): + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + logger.debug("Federation denied with %s", server_name) + continue + if not key_ids: key_ids = (None,) for key_id in key_ids: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 4f56bcf577..485db8577a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -32,8 +32,9 @@ from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string -from synapse.api.errors import SynapseError, HttpResponseException, \ - NotFoundError +from synapse.api.errors import ( + SynapseError, HttpResponseException, NotFoundError, FederationDeniedError, +) from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii @@ -75,6 +76,8 @@ class MediaRepository(object): self.recently_accessed_remotes = set() self.recently_accessed_locals = set() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist + # List of StorageProviders where we should search for media and # potentially upload to. storage_providers = [] @@ -216,6 +219,12 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote @@ -250,6 +259,12 @@ class MediaRepository(object): Returns: Deferred[dict]: The media_info of the file """ + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + # We linearize here to ensure that we don't try and download remote # media multiple times concurrently key = (server_name, media_id) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 1adedbb361..47b0bb5eb3 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -26,6 +26,18 @@ logger = logging.getLogger(__name__) class NotRetryingDestination(Exception): def __init__(self, retry_last_ts, retry_interval, destination): + """Raised by the limiter (and federation client) to indicate that we are + are deliberately not attempting to contact a given server. + + Args: + retry_last_ts (int): the unix ts in milliseconds of our last attempt + to contact the server. 0 indicates that the last attempt was + successful or that we've never actually attempted to connect. + retry_interval (int): the time in milliseconds to wait until the next + attempt. + destination (str): the domain in question + """ + msg = "Not retrying server %s." % (destination,) super(NotRetryingDestination, self).__init__(msg) diff --git a/tests/utils.py b/tests/utils.py index 44e5f75093..3116047892 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -57,6 +57,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.worker_app = None config.email_enable_notifs = False config.block_non_admin_invites = False + config.federation_domain_whitelist = None # disable user directory updates, because they get done in the # background, which upsets the test runner. -- cgit 1.4.1