From bf4fb1fb400daad23702bc0b3231ec069d68e87e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:20:59 +0100 Subject: Basic implementation of backup media store --- synapse/config/repository.py | 18 +++ synapse/rest/media/v1/media_repository.py | 221 ++++++++++++++---------------- synapse/rest/media/v1/thumbnailer.py | 16 +-- synapse/rest/media/v1/upload_resource.py | 2 +- 4 files changed, 131 insertions(+), 126 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2c6f57168e..e3c83d56fa 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -70,7 +70,17 @@ class ContentRepositoryConfig(Config): self.max_upload_size = self.parse_size(config["max_upload_size"]) self.max_image_pixels = self.parse_size(config["max_image_pixels"]) self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.media_store_path = self.ensure_directory(config["media_store_path"]) + + self.backup_media_store_path = config.get("backup_media_store_path") + if self.backup_media_store_path: + self.ensure_directory(self.backup_media_store_path) + + self.synchronous_backup_media_store = config.get( + "synchronous_backup_media_store", False + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -115,6 +125,14 @@ class ContentRepositoryConfig(Config): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" + # A secondary directory where uploaded images and attachments are + # stored as a backup. + # backup_media_store_path: "%(media_store)s" + + # Whether to wait for successful write to backup media store before + # returning successfully. + # synchronous_backup_media_store: false + # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0ea1248ce6..3b442cc16b 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.logcontext import preserve_context_over_fn, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -59,7 +59,12 @@ class MediaRepository(object): self.store = hs.get_datastore() self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels + self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.backup_filepaths = None + if hs.config.backup_media_store_path: + self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -87,18 +92,43 @@ class MediaRepository(object): if not os.path.exists(dirname): os.makedirs(dirname) + @defer.inlineCallbacks + def _write_to_file(self, source, file_name_func): + def write_file_thread(file_name): + source.seek(0) # Ensure we read from the start of the file + with open(file_name, "wb") as f: + shutil.copyfileobj(source, f) + + fname = file_name_func(self.filepaths) + self._makedirs(fname) + + # Write to the main repository + yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + + # Write to backup repository + if self.backup_filepaths: + backup_fname = file_name_func(backup_filepaths) + self._makedirs(backup_fname) + + # We can either wait for successful writing to the backup repository + # or write in the background and immediately return + if hs.config.synchronous_backup_media_store: + yield preserve_context_over_fn( + threads.deferToThread, write_file_thread, backup_fname, + ) + else: + preserve_fn(threads.deferToThread)(write_file, backup_fname) + + defer.returnValue(fname) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = self.filepaths.local_media_filepath(media_id) - self._makedirs(fname) - - # This shouldn't block for very long because the content will have - # already been uploaded at this point. - with open(fname, "wb") as f: - f.write(content) + fname = yield self._write_to_file( + content, lambda f: f.local_media_filepath(media_id) + ) logger.info("Stored local media in file %r", fname) @@ -253,9 +283,8 @@ class MediaRepository(object): def _get_thumbnail_requirements(self, media_type): return self.thumbnail_requirements.get(media_type, ()) - def _generate_thumbnail(self, input_path, t_path, t_width, t_height, + def _generate_thumbnail(self, thumbnailer, t_width, t_height, t_method, t_type): - thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -267,36 +296,40 @@ class MediaRepository(object): return if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) elif t_method == "scale": t_width, t_height = thumbnailer.aspect(t_width, t_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.scale(t_width, t_height, t_type) else: - t_len = None + t_byte_source = None - return t_len + return t_byte_source @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, t_method, t_type): input_path = self.filepaths.local_media_filepath(media_id) - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -306,21 +339,25 @@ class MediaRepository(object): t_width, t_height, t_method, t_type): input_path = self.filepaths.remote_media_filepath(server_name, file_id) - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -351,59 +388,32 @@ class MediaRepository(object): local_thumbnails = [] def generate_thumbnails(): - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - - local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len - )) + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, + ) - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len + r_width, r_height, r_method, r_type, t_byte_source )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for l in local_thumbnails: - yield self.store.store_local_thumbnail(*l) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + if url_cache: + path_name_func = lambda f: f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + path_name_func = lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) defer.returnValue({ "width": m_width, @@ -433,51 +443,32 @@ class MediaRepository(object): ) return - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) - - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) + + remote_thumbnails.append(( + r_width, r_height, r_method, r_type, t_byte_source + )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + path_name_func = lambda f: f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) + defer.returnValue({ "width": m_width, "height": m_height, diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 3868d4f65f..60498b08aa 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -50,12 +50,12 @@ class Thumbnailer(object): else: return ((max_height * self.width) // self.height, max_height) - def scale(self, output_path, width, height, output_type): + def scale(self, width, height, output_type): """Rescales the image to the given dimensions""" scaled = self.image.resize((width, height), Image.ANTIALIAS) - return self.save_image(scaled, output_type, output_path) + return self._encode_image(scaled, output_type) - def crop(self, output_path, width, height, output_type): + def crop(self, width, height, output_type): """Rescales and crops the image to the given dimensions preserving aspect:: (w_in / h_in) = (w_scaled / h_scaled) @@ -82,13 +82,9 @@ class Thumbnailer(object): crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - return self.save_image(cropped, output_type, output_path) + return self._encode_image(cropped, output_type) - def save_image(self, output_image, output_type, output_path): + def _encode_image(self, output_image, output_type): output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_bytes = output_bytes_io.getvalue() - with open(output_path, "wb") as output_file: - output_file.write(output_bytes) - logger.info("Stored thumbnail in file %r", output_path) - return len(output_bytes) + return output_bytes_io diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 4ab33f73bf..f6f498cdc5 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -93,7 +93,7 @@ class UploadResource(Resource): # TODO(markjh): parse content-dispostion content_uri = yield self.media_repo.create_content( - media_type, upload_name, request.content.read(), + media_type, upload_name, request.content, content_length, requester.user ) -- cgit 1.4.1