From 9ccb4226ba0824a1468c4e8f0abe91aca3381862 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:18:06 +0100 Subject: Delete expired url cache data --- synapse/rest/media/v1/filepath.py | 43 ++++++++++++- synapse/rest/media/v1/preview_url_resource.py | 90 ++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 4 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index d92b7ff337..c5d43209f9 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -73,19 +73,58 @@ class MediaFilePaths(object): ) def url_cache_filepath(self, media_id): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf return os.path.join( self.base_path, "url_cache", - media_id[0:2], media_id[2:4], media_id[4:] + media_id[:10], media_id[11:] ) + def url_cache_filepath_dirs_to_delete(self, media_id): + "The dirs to try and remove if we delete the media_id file" + return [ + os.path.join( + self.base_path, "url_cache", + media_id[:10], + ), + ] + def url_cache_thumbnail(self, media_id, width, height, content_type, method): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) + return os.path.join( self.base_path, "url_cache_thumbnails", - media_id[0:2], media_id[2:4], media_id[4:], + media_id[:10], media_id[11:], file_name ) + + def url_cache_thumbnail_directory(self, media_id): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + ) + + def url_cache_thumbnail_dirs_to_delete(self, media_id): + "The dirs to try and remove if we delete the media_id thumbnails" + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + return [ + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + ), + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], + ), + ] diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index b81a336c5d..c5ba83ddfd 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -36,6 +36,9 @@ import cgi import ujson as json import urlparse import itertools +import datetime +import errno +import shutil import logging logger = logging.getLogger(__name__) @@ -70,6 +73,10 @@ class PreviewUrlResource(Resource): self.downloads = {} + self._cleaner_loop = self.clock.looping_call( + self._expire_url_cache_data, 30 * 10000 + ) + def render_GET(self, request): self._async_render_GET(request) return NOT_DONE_YET @@ -253,8 +260,7 @@ class PreviewUrlResource(Resource): # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? - # XXX: horrible duplication with base_resource's _download_remote_file() - file_id = random_string(24) + file_id = datetime.date.today().isoformat() + '_' + random_string(16) fname = self.filepaths.url_cache_filepath(file_id) self.media_repo._makedirs(fname) @@ -328,6 +334,86 @@ class PreviewUrlResource(Resource): "etag": headers["ETag"][0] if "ETag" in headers else None, }) + @defer.inlineCallbacks + def _expire_url_cache_data(self): + """Clean up expired url cache content, media and thumbnails. + """ + now = self.clock.time_msec() + + # First we delete expired url cache entries + media_ids = yield self.store.get_expired_url_cache(now) + + removed_media = [] + for media_id in media_ids: + fname = self.filepaths.url_cache_filepath(media_id) + try: + os.remove(fname) + except OSError as e: + # If the path doesn't exist, meh + if e.errno != errno.ENOENT: + logger.warn("Failed to remove media: %r: %s", media_id, e) + continue + + removed_media.append(media_id) + + try: + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + for dir in dirs: + os.rmdir(dir) + except: + pass + + yield self.store.delete_url_cache(removed_media) + + logger.info("Deleted %d entries from url cache", len(removed_media)) + + # Now we delete old images associated with the url cache. + # These may be cached for a bit on the client (i.e., they + # may have a room open with a preview url thing open). + # So we wait a couple of days before deleting, just in case. + expire_before = now - 2 * 24 * 60 * 60 * 1000 + yield self.store.get_url_cache_media_before(expire_before) + + removed_media = [] + for media_id in media_ids: + fname = self.filepaths.url_cache_filepath(media_id) + try: + os.remove(fname) + except OSError as e: + # If the path doesn't exist, meh + if e.errno != errno.ENOENT: + logger.warn("Failed to remove media: %r: %s", media_id, e) + continue + + try: + dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) + for dir in dirs: + os.rmdir(dir) + except: + pass + + thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) + try: + shutil.rmtree(thumbnail_dir) + except OSError as e: + # If the path doesn't exist, meh + if e.errno != errno.ENOENT: + logger.warn("Failed to remove media: %r: %s", media_id, e) + continue + + removed_media.append(media_id) + + try: + dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) + for dir in dirs: + os.rmdir(dir) + except: + pass + + yield self.store.delete_url_cache_media(removed_media) + + logger.info("Deleted %d media from url cache", len(removed_media)) + def decode_and_calc_og(body, media_uri, request_encoding=None): from lxml import etree -- cgit 1.4.1 From ae79764fe55ab15156b4f28658326bd2c9c0b937 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:37:53 +0100 Subject: Change expires column to expires_ts --- synapse/rest/media/v1/preview_url_resource.py | 4 ++-- synapse/storage/media_repository.py | 14 +++++++------- .../storage/schema/delta/44/expire_url_cache.sql | 21 ++++++++++++++++++++- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c5ba83ddfd..6f896ffb53 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -137,7 +137,7 @@ class PreviewUrlResource(Resource): cache_result = yield self.store.get_url_cache(url, ts) if ( cache_result and - cache_result["download_ts"] + cache_result["expires"] > ts and + cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2 ): respond_with_json_bytes( @@ -246,7 +246,7 @@ class PreviewUrlResource(Resource): url, media_info["response_code"], media_info["etag"], - media_info["expires"], + media_info["expires"] + media_info["created_ts"], json.dumps(og), media_info["filesystem_id"], media_info["created_ts"], diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 5cca14ccb2..b8a0dd0762 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -62,7 +62,7 @@ class MediaRepositoryStore(SQLBaseStore): def get_url_cache_txn(txn): # get the most recently cached result (relative to the given ts) sql = ( - "SELECT response_code, etag, expires, og, media_id, download_ts" + "SELECT response_code, etag, expires_ts, og, media_id, download_ts" " FROM local_media_repository_url_cache" " WHERE url = ? AND download_ts <= ?" " ORDER BY download_ts DESC LIMIT 1" @@ -74,7 +74,7 @@ class MediaRepositoryStore(SQLBaseStore): # ...or if we've requested a timestamp older than the oldest # copy in the cache, return the oldest copy (if any) sql = ( - "SELECT response_code, etag, expires, og, media_id, download_ts" + "SELECT response_code, etag, expires_ts, og, media_id, download_ts" " FROM local_media_repository_url_cache" " WHERE url = ? AND download_ts > ?" " ORDER BY download_ts ASC LIMIT 1" @@ -86,14 +86,14 @@ class MediaRepositoryStore(SQLBaseStore): return None return dict(zip(( - 'response_code', 'etag', 'expires', 'og', 'media_id', 'download_ts' + 'response_code', 'etag', 'expires_ts', 'og', 'media_id', 'download_ts' ), row)) return self.runInteraction( "get_url_cache", get_url_cache_txn ) - def store_url_cache(self, url, response_code, etag, expires, og, media_id, + def store_url_cache(self, url, response_code, etag, expires_ts, og, media_id, download_ts): return self._simple_insert( "local_media_repository_url_cache", @@ -101,7 +101,7 @@ class MediaRepositoryStore(SQLBaseStore): "url": url, "response_code": response_code, "etag": etag, - "expires": expires, + "expires_ts": expires_ts, "og": og, "media_id": media_id, "download_ts": download_ts, @@ -242,8 +242,8 @@ class MediaRepositoryStore(SQLBaseStore): def get_expired_url_cache(self, now_ts): sql = ( "SELECT media_id FROM local_media_repository_url_cache" - " WHERE download_ts + expires < ?" - " ORDER BY download_ts + expires ASC" + " WHERE expires_ts < ?" + " ORDER BY expires_ts ASC" " LIMIT 100" ) diff --git a/synapse/storage/schema/delta/44/expire_url_cache.sql b/synapse/storage/schema/delta/44/expire_url_cache.sql index 997e790b6d..9475d53e84 100644 --- a/synapse/storage/schema/delta/44/expire_url_cache.sql +++ b/synapse/storage/schema/delta/44/expire_url_cache.sql @@ -14,4 +14,23 @@ */ CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL; -CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache((download_ts + expires)); + +-- we need to change `expires` to `expires_ts` so that we can index on it. SQLite doesn't support +-- indices on expressions until 3.9. +CREATE TABLE local_media_repository_url_cache_new( + url TEXT, + response_code INTEGER, + etag TEXT, + expires_ts BIGINT, + og TEXT, + media_id TEXT, + download_ts BIGINT +); + +INSERT INTO local_media_repository_url_cache_new + SELECT url, response_code, etag, expires + download_ts, og, media_id, download_ts FROM local_media_repository_url_cache; + +DROP TABLE local_media_repository_url_cache; +ALTER TABLE local_media_repository_url_cache_new RENAME TO local_media_repository_url_cache; + +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts); -- cgit 1.4.1 From ace807908602cb955fc7a2cae63dc6e64bf90cc5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:52:51 +0100 Subject: Support new and old style media id formats --- synapse/rest/media/v1/filepath.py | 112 +++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 31 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index c5d43209f9..d5cec10127 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -14,6 +14,9 @@ # limitations under the License. import os +import re + +NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") class MediaFilePaths(object): @@ -73,21 +76,39 @@ class MediaFilePaths(object): ) def url_cache_filepath(self, media_id): - # Media id is of the form - # E.g.: 2017-09-28-fsdRDt24DS234dsf - return os.path.join( - self.base_path, "url_cache", - media_id[:10], media_id[11:] - ) + if NEW_FORMAT_ID_RE.match(media_id): + # Media id is of the form + # E.g.: 2017-09-28-fsdRDt24DS234dsf + return os.path.join( + self.base_path, "url_cache", + media_id[:10], media_id[11:] + ) + else: + return os.path.join( + self.base_path, "url_cache", + media_id[0:2], media_id[2:4], media_id[4:], + ) def url_cache_filepath_dirs_to_delete(self, media_id): "The dirs to try and remove if we delete the media_id file" - return [ - os.path.join( - self.base_path, "url_cache", - media_id[:10], - ), - ] + if NEW_FORMAT_ID_RE.match(media_id): + return [ + os.path.join( + self.base_path, "url_cache", + media_id[:10], + ), + ] + else: + return [ + os.path.join( + self.base_path, "url_cache", + media_id[0:2], media_id[2:4], + ), + os.path.join( + self.base_path, "url_cache", + media_id[0:2], + ), + ] def url_cache_thumbnail(self, media_id, width, height, content_type, method): @@ -99,32 +120,61 @@ class MediaFilePaths(object): width, height, top_level_type, sub_type, method ) - return os.path.join( - self.base_path, "url_cache_thumbnails", - media_id[:10], media_id[11:], - file_name - ) + if NEW_FORMAT_ID_RE.match(media_id): + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + file_name + ) + else: + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[0:2], media_id[2:4], media_id[4:], + file_name + ) def url_cache_thumbnail_directory(self, media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf - return os.path.join( - self.base_path, "url_cache_thumbnails", - media_id[:10], media_id[11:], - ) + if NEW_FORMAT_ID_RE.match(media_id): + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + ) + else: + return os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[0:2], media_id[2:4], media_id[4:], + ) def url_cache_thumbnail_dirs_to_delete(self, media_id): "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf - return [ - os.path.join( - self.base_path, "url_cache_thumbnails", - media_id[:10], media_id[11:], - ), - os.path.join( - self.base_path, "url_cache_thumbnails", - media_id[:10], - ), - ] + if NEW_FORMAT_ID_RE.match(media_id): + return [ + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], media_id[11:], + ), + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[:10], + ), + ] + else: + return [ + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[0:2], media_id[2:4], media_id[4:], + ), + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[0:2], media_id[2:4], + ), + os.path.join( + self.base_path, "url_cache_thumbnails", + media_id[0:2], + ), + ] -- cgit 1.4.1 From 5f501ec7e2645abe232bd6bab407ac863e3250c2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 12:59:01 +0100 Subject: Fix typo in url cache expiry timer --- synapse/rest/media/v1/preview_url_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6f896ffb53..1616809e8f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -74,7 +74,7 @@ class PreviewUrlResource(Resource): self.downloads = {} self._cleaner_loop = self.clock.looping_call( - self._expire_url_cache_data, 30 * 10000 + self._expire_url_cache_data, 30 * 1000 ) def render_GET(self, request): -- cgit 1.4.1 From e1e7d76cf16858d998884f19b141f90a0415d297 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 13:55:29 +0100 Subject: Actually assign result to variable --- synapse/rest/media/v1/preview_url_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 1616809e8f..0123369a7f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -372,7 +372,7 @@ class PreviewUrlResource(Resource): # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. expire_before = now - 2 * 24 * 60 * 60 * 1000 - yield self.store.get_url_cache_media_before(expire_before) + media_ids = yield self.store.get_url_cache_media_before(expire_before) removed_media = [] for media_id in media_ids: -- cgit 1.4.1 From 7cc483aa0ef9e51bd3839768e44b449cf6d24136 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 13:56:53 +0100 Subject: Clear up expired url cache every 10s --- synapse/rest/media/v1/preview_url_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0123369a7f..2300c263e0 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -74,7 +74,7 @@ class PreviewUrlResource(Resource): self.downloads = {} self._cleaner_loop = self.clock.looping_call( - self._expire_url_cache_data, 30 * 1000 + self._expire_url_cache_data, 10 * 1000 ) def render_GET(self, request): -- cgit 1.4.1 From d5694ac5fa3266a777fa171f33bebc0d7477c12a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 28 Sep 2017 16:08:08 +0100 Subject: Only log if we've removed media --- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 2300c263e0..895b480d5c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -365,7 +365,8 @@ class PreviewUrlResource(Resource): yield self.store.delete_url_cache(removed_media) - logger.info("Deleted %d entries from url cache", len(removed_media)) + if removed_media: + logger.info("Deleted %d entries from url cache", len(removed_media)) # Now we delete old images associated with the url cache. # These may be cached for a bit on the client (i.e., they @@ -412,7 +413,8 @@ class PreviewUrlResource(Resource): yield self.store.delete_url_cache_media(removed_media) - logger.info("Deleted %d media from url cache", len(removed_media)) + if removed_media: + logger.info("Deleted %d media from url cache", len(removed_media)) def decode_and_calc_og(body, media_uri, request_encoding=None): -- cgit 1.4.1 From bf4fb1fb400daad23702bc0b3231ec069d68e87e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:20:59 +0100 Subject: Basic implementation of backup media store --- synapse/config/repository.py | 18 +++ synapse/rest/media/v1/media_repository.py | 221 ++++++++++++++---------------- synapse/rest/media/v1/thumbnailer.py | 16 +-- synapse/rest/media/v1/upload_resource.py | 2 +- 4 files changed, 131 insertions(+), 126 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2c6f57168e..e3c83d56fa 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -70,7 +70,17 @@ class ContentRepositoryConfig(Config): self.max_upload_size = self.parse_size(config["max_upload_size"]) self.max_image_pixels = self.parse_size(config["max_image_pixels"]) self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.media_store_path = self.ensure_directory(config["media_store_path"]) + + self.backup_media_store_path = config.get("backup_media_store_path") + if self.backup_media_store_path: + self.ensure_directory(self.backup_media_store_path) + + self.synchronous_backup_media_store = config.get( + "synchronous_backup_media_store", False + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -115,6 +125,14 @@ class ContentRepositoryConfig(Config): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" + # A secondary directory where uploaded images and attachments are + # stored as a backup. + # backup_media_store_path: "%(media_store)s" + + # Whether to wait for successful write to backup media store before + # returning successfully. + # synchronous_backup_media_store: false + # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0ea1248ce6..3b442cc16b 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.logcontext import preserve_context_over_fn, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -59,7 +59,12 @@ class MediaRepository(object): self.store = hs.get_datastore() self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels + self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.backup_filepaths = None + if hs.config.backup_media_store_path: + self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -87,18 +92,43 @@ class MediaRepository(object): if not os.path.exists(dirname): os.makedirs(dirname) + @defer.inlineCallbacks + def _write_to_file(self, source, file_name_func): + def write_file_thread(file_name): + source.seek(0) # Ensure we read from the start of the file + with open(file_name, "wb") as f: + shutil.copyfileobj(source, f) + + fname = file_name_func(self.filepaths) + self._makedirs(fname) + + # Write to the main repository + yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + + # Write to backup repository + if self.backup_filepaths: + backup_fname = file_name_func(backup_filepaths) + self._makedirs(backup_fname) + + # We can either wait for successful writing to the backup repository + # or write in the background and immediately return + if hs.config.synchronous_backup_media_store: + yield preserve_context_over_fn( + threads.deferToThread, write_file_thread, backup_fname, + ) + else: + preserve_fn(threads.deferToThread)(write_file, backup_fname) + + defer.returnValue(fname) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = self.filepaths.local_media_filepath(media_id) - self._makedirs(fname) - - # This shouldn't block for very long because the content will have - # already been uploaded at this point. - with open(fname, "wb") as f: - f.write(content) + fname = yield self._write_to_file( + content, lambda f: f.local_media_filepath(media_id) + ) logger.info("Stored local media in file %r", fname) @@ -253,9 +283,8 @@ class MediaRepository(object): def _get_thumbnail_requirements(self, media_type): return self.thumbnail_requirements.get(media_type, ()) - def _generate_thumbnail(self, input_path, t_path, t_width, t_height, + def _generate_thumbnail(self, thumbnailer, t_width, t_height, t_method, t_type): - thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -267,36 +296,40 @@ class MediaRepository(object): return if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) elif t_method == "scale": t_width, t_height = thumbnailer.aspect(t_width, t_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.scale(t_width, t_height, t_type) else: - t_len = None + t_byte_source = None - return t_len + return t_byte_source @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, t_method, t_type): input_path = self.filepaths.local_media_filepath(media_id) - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -306,21 +339,25 @@ class MediaRepository(object): t_width, t_height, t_method, t_type): input_path = self.filepaths.remote_media_filepath(server_name, file_id) - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -351,59 +388,32 @@ class MediaRepository(object): local_thumbnails = [] def generate_thumbnails(): - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - - local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len - )) + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, + ) - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len + r_width, r_height, r_method, r_type, t_byte_source )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for l in local_thumbnails: - yield self.store.store_local_thumbnail(*l) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + if url_cache: + path_name_func = lambda f: f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + path_name_func = lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) defer.returnValue({ "width": m_width, @@ -433,51 +443,32 @@ class MediaRepository(object): ) return - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) - - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) + + remote_thumbnails.append(( + r_width, r_height, r_method, r_type, t_byte_source + )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + path_name_func = lambda f: f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) + defer.returnValue({ "width": m_width, "height": m_height, diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 3868d4f65f..60498b08aa 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -50,12 +50,12 @@ class Thumbnailer(object): else: return ((max_height * self.width) // self.height, max_height) - def scale(self, output_path, width, height, output_type): + def scale(self, width, height, output_type): """Rescales the image to the given dimensions""" scaled = self.image.resize((width, height), Image.ANTIALIAS) - return self.save_image(scaled, output_type, output_path) + return self._encode_image(scaled, output_type) - def crop(self, output_path, width, height, output_type): + def crop(self, width, height, output_type): """Rescales and crops the image to the given dimensions preserving aspect:: (w_in / h_in) = (w_scaled / h_scaled) @@ -82,13 +82,9 @@ class Thumbnailer(object): crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - return self.save_image(cropped, output_type, output_path) + return self._encode_image(cropped, output_type) - def save_image(self, output_image, output_type, output_path): + def _encode_image(self, output_image, output_type): output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_bytes = output_bytes_io.getvalue() - with open(output_path, "wb") as output_file: - output_file.write(output_bytes) - logger.info("Stored thumbnail in file %r", output_path) - return len(output_bytes) + return output_bytes_io diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 4ab33f73bf..f6f498cdc5 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -93,7 +93,7 @@ class UploadResource(Resource): # TODO(markjh): parse content-dispostion content_uri = yield self.media_repo.create_content( - media_type, upload_name, request.content.read(), + media_type, upload_name, request.content, content_length, requester.user ) -- cgit 1.4.1 From 67cb89fbdf62dfb2ff65f6f7f0ca23445cdac0ac Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:23:41 +0100 Subject: Fix typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3b442cc16b..f26f793bed 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -107,7 +107,7 @@ class MediaRepository(object): # Write to backup repository if self.backup_filepaths: - backup_fname = file_name_func(backup_filepaths) + backup_fname = file_name_func(self.backup_filepaths) self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository -- cgit 1.4.1 From c8eeef6947af762c3eabef6ecca0f69833fbf8ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:28:24 +0100 Subject: Fix typos --- synapse/rest/media/v1/media_repository.py | 46 +++++++++++++++++-------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f26f793bed..a16034fd67 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -65,6 +65,8 @@ class MediaRepository(object): if hs.config.backup_media_store_path: self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -112,12 +114,12 @@ class MediaRepository(object): # We can either wait for successful writing to the backup repository # or write in the background and immediately return - if hs.config.synchronous_backup_media_store: + if self.synchronous_backup_media_store: yield preserve_context_over_fn( threads.deferToThread, write_file_thread, backup_fname, ) else: - preserve_fn(threads.deferToThread)(write_file, backup_fname) + preserve_fn(threads.deferToThread)(write_file_thread, backup_fname) defer.returnValue(fname) @@ -321,7 +323,7 @@ class MediaRepository(object): if t_byte_source: output_path = yield self._write_to_file( - content, + t_byte_source, lambda f: f.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) @@ -329,10 +331,11 @@ class MediaRepository(object): logger.info("Stored thumbnail in file %r", output_path) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, + len(t_byte_source.getvalue()) ) - defer.returnValue(t_path) + defer.returnValue(output_path) @defer.inlineCallbacks def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, @@ -348,7 +351,7 @@ class MediaRepository(object): if t_byte_source: output_path = yield self._write_to_file( - content, + t_byte_source, lambda f: f.remote_media_thumbnail( server_name, file_id, t_width, t_height, t_type, t_method ) @@ -360,7 +363,7 @@ class MediaRepository(object): t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) - defer.returnValue(t_path) + defer.returnValue(output_path) @defer.inlineCallbacks def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): @@ -400,19 +403,21 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - if url_cache: - path_name_func = lambda f: f.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - path_name_func = lambda f: f.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + def path_name_func(f): + if url_cache: + return f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + return f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) yield self._write_to_file(t_byte_source, path_name_func) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, + len(t_byte_source.getvalue()) ) defer.returnValue({ @@ -457,10 +462,11 @@ class MediaRepository(object): for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) - for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - path_name_func = lambda f: f.remote_media_thumbnail( - server_name, media_id, file_id, t_width, t_height, t_type, t_method - ) + for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: + def path_name_func(f): + return f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) yield self._write_to_file(t_byte_source, path_name_func) -- cgit 1.4.1 From 6dfde6d4856695890271232f8a2e4c5f32615dd1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:30:26 +0100 Subject: Remove dead code --- synapse/rest/media/v1/media_repository.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a16034fd67..1eeb128d2a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -459,9 +459,6 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for r in remote_thumbnails: - yield self.store.store_remote_media_thumbnail(*r) - for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: def path_name_func(f): return f.remote_media_thumbnail( -- cgit 1.4.1 From b77a13812c38b2e79b2ebfddb52ce88a2ac8e9b9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:32:32 +0100 Subject: Typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 1eeb128d2a..93b35af9cf 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -462,7 +462,7 @@ class MediaRepository(object): for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: def path_name_func(f): return f.remote_media_thumbnail( - server_name, media_id, file_id, t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) yield self._write_to_file(t_byte_source, path_name_func) -- cgit 1.4.1 From e283b555b1f20de4fd393fd947e82eb3c635b7e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:31:24 +0100 Subject: Copy everything to backup --- synapse/config/repository.py | 4 +- synapse/rest/media/v1/filepath.py | 99 +++++++++++++++-------- synapse/rest/media/v1/media_repository.py | 109 ++++++++++++++++---------- synapse/rest/media/v1/preview_url_resource.py | 7 +- synapse/rest/media/v1/thumbnailer.py | 9 ++- 5 files changed, 151 insertions(+), 77 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/config/repository.py b/synapse/config/repository.py index e3c83d56fa..6baa474931 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -75,7 +75,9 @@ class ContentRepositoryConfig(Config): self.backup_media_store_path = config.get("backup_media_store_path") if self.backup_media_store_path: - self.ensure_directory(self.backup_media_store_path) + self.backup_media_store_path = self.ensure_directory( + self.backup_media_store_path + ) self.synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index d5cec10127..43d0eea00d 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -15,103 +15,134 @@ import os import re +import functools NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") +def _wrap_in_base_path(func): + """Takes a function that returns a relative path and turns it into an + absolute path based on the location of the primary media store + """ + @functools.wraps(func) + def _wrapped(self, *args, **kwargs): + path = func(self, *args, **kwargs) + return os.path.join(self.primary_base_path, path) + + return _wrapped + + class MediaFilePaths(object): + """Describes where files are stored on disk. - def __init__(self, base_path): - self.base_path = base_path + Most of the function have a `*_rel` variant which returns a file path that + is relative to the base media store path. This is mainly used when we want + to write to the backup media store (when one is configured) + """ - def default_thumbnail(self, default_top_level, default_sub_type, width, - height, content_type, method): + def __init__(self, primary_base_path): + self.primary_base_path = primary_base_path + + def default_thumbnail_rel(self, default_top_level, default_sub_type, width, + height, content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) return os.path.join( - self.base_path, "default_thumbnails", default_top_level, + "default_thumbnails", default_top_level, default_sub_type, file_name ) - def local_media_filepath(self, media_id): + default_thumbnail = _wrap_in_base_path(default_thumbnail_rel) + + def local_media_filepath_rel(self, media_id): return os.path.join( - self.base_path, "local_content", + "local_content", media_id[0:2], media_id[2:4], media_id[4:] ) - def local_media_thumbnail(self, media_id, width, height, content_type, - method): + local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) + + def local_media_thumbnail_rel(self, media_id, width, height, content_type, + method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) return os.path.join( - self.base_path, "local_thumbnails", + "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) - def remote_media_filepath(self, server_name, file_id): + local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + + def remote_media_filepath_rel(self, server_name, file_id): return os.path.join( - self.base_path, "remote_content", server_name, + "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] ) - def remote_media_thumbnail(self, server_name, file_id, width, height, - content_type, method): + remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) + + def remote_media_thumbnail_rel(self, server_name, file_id, width, height, + content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( - self.base_path, "remote_thumbnail", server_name, + "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], file_name ) + remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel) + def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - self.base_path, "remote_thumbnail", server_name, + "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) - def url_cache_filepath(self, media_id): + def url_cache_filepath_rel(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf return os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[:10], media_id[11:] ) else: return os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], media_id[2:4], media_id[4:], ) + url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) + def url_cache_filepath_dirs_to_delete(self, media_id): "The dirs to try and remove if we delete the media_id file" if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[:10], ), ] else: return [ os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], ), ] - def url_cache_thumbnail(self, media_id, width, height, content_type, - method): + def url_cache_thumbnail_rel(self, media_id, width, height, content_type, + method): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf @@ -122,29 +153,31 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], file_name ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) + url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) + def url_cache_thumbnail_directory(self, media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -155,26 +188,26 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], ), ] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 93b35af9cf..398e973ca9 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -60,10 +60,12 @@ class MediaRepository(object): self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels - self.filepaths = MediaFilePaths(hs.config.media_store_path) - self.backup_filepaths = None + self.primary_base_path = hs.config.media_store_path + self.filepaths = MediaFilePaths(self.primary_base_path) + + self.backup_base_path = None if hs.config.backup_media_store_path: - self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.backup_base_path = hs.config.backup_media_store_path self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store @@ -94,42 +96,63 @@ class MediaRepository(object): if not os.path.exists(dirname): os.makedirs(dirname) - @defer.inlineCallbacks - def _write_to_file(self, source, file_name_func): - def write_file_thread(file_name): - source.seek(0) # Ensure we read from the start of the file - with open(file_name, "wb") as f: - shutil.copyfileobj(source, f) + @staticmethod + def write_file_synchronously(source, fname): + source.seek(0) # Ensure we read from the start of the file + with open(fname, "wb") as f: + shutil.copyfileobj(source, f) - fname = file_name_func(self.filepaths) + @defer.inlineCallbacks + def write_to_file(self, source, path): + """Write `source` to the on disk media store, and also the backup store + if configured. + + Args: + source: A file like object that should be written + path: Relative path to write file to + + Returns: + string: the file path written to in the primary media store + """ + fname = os.path.join(self.primary_base_path, path) self._makedirs(fname) # Write to the main repository - yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + yield preserve_context_over_fn( + threads.deferToThread, + self.write_file_synchronously, source, fname, + ) # Write to backup repository - if self.backup_filepaths: - backup_fname = file_name_func(self.backup_filepaths) + yield self.copy_to_backup(source, path) + + defer.returnValue(fname) + + @defer.inlineCallbacks + def copy_to_backup(self, source, path): + if self.backup_base_path: + backup_fname = os.path.join(self.backup_base_path, path) self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: yield preserve_context_over_fn( - threads.deferToThread, write_file_thread, backup_fname, + threads.deferToThread, + self.write_file_synchronously, source, backup_fname, ) else: - preserve_fn(threads.deferToThread)(write_file_thread, backup_fname) - - defer.returnValue(fname) + preserve_fn(threads.deferToThread)( + self.write_file_synchronously, source, backup_fname, + ) @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = yield self._write_to_file( - content, lambda f: f.local_media_filepath(media_id) + fname = yield self.write_to_file( + content, self.filepaths.local_media_filepath_rel(media_id) ) logger.info("Stored local media in file %r", fname) @@ -180,9 +203,10 @@ class MediaRepository(object): def _download_remote_file(self, server_name, media_id): file_id = random_string(24) - fname = self.filepaths.remote_media_filepath( + fpath = self.filepaths.remote_media_filepath_rel( server_name, file_id ) + fname = os.path.join(self.primary_base_path, fpath) self._makedirs(fname) try: @@ -224,6 +248,9 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") + with open(fname) as f: + yield self.copy_to_backup(f, fpath) + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -322,15 +349,15 @@ class MediaRepository(object): ) if t_byte_source: - output_path = yield self._write_to_file( + output_path = yield self.write_to_file( t_byte_source, - lambda f: f.local_media_thumbnail( + self.filepaths.local_media_thumbnail_rel( media_id, t_width, t_height, t_type, t_method ) ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_local_thumbnail( + yield self.store.store_local_thumbnail_rel( media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) @@ -350,15 +377,15 @@ class MediaRepository(object): ) if t_byte_source: - output_path = yield self._write_to_file( + output_path = yield self.write_to_file( t_byte_source, - lambda f: f.remote_media_thumbnail( + self.filepaths.remote_media_thumbnail_rel( server_name, file_id, t_width, t_height, t_type, t_method ) ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_remote_media_thumbnail( + yield self.store.store_remote_media_thumbnail_rel( server_name, media_id, file_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) @@ -403,17 +430,16 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - def path_name_func(f): - if url_cache: - return f.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - return f.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + if url_cache: + file_path = self.filepaths.url_cache_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) + else: + file_path = self.filepaths.local_media_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) - yield self._write_to_file(t_byte_source, path_name_func) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, @@ -460,12 +486,11 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: - def path_name_func(f): - return f.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) + file_path = self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) - yield self._write_to_file(t_byte_source, path_name_func) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, @@ -491,6 +516,8 @@ class MediaRepository(object): logger.info("Deleting: %r", key) + # TODO: Should we delete from the backup store + with (yield self.remote_media_linearizer.queue(key)): full_path = self.filepaths.remote_media_filepath(origin, file_id) try: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 895b480d5c..f82b8fbc51 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -59,6 +59,7 @@ class PreviewUrlResource(Resource): self.store = hs.get_datastore() self.client = SpiderHttpClient(hs) self.media_repo = media_repo + self.primary_base_path = media_repo.primary_base_path self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -262,7 +263,8 @@ class PreviewUrlResource(Resource): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fname = self.filepaths.url_cache_filepath(file_id) + fpath = self.filepaths.url_cache_filepath_rel(file_id) + fname = os.path.join(self.primary_base_path, fpath) self.media_repo._makedirs(fname) try: @@ -273,6 +275,9 @@ class PreviewUrlResource(Resource): ) # FIXME: pass through 404s and other error messages nicely + with open(fname) as f: + yield self.media_repo.copy_to_backup(f, fpath) + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 60498b08aa..e1ee535b9a 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -51,7 +51,11 @@ class Thumbnailer(object): return ((max_height * self.width) // self.height, max_height) def scale(self, width, height, output_type): - """Rescales the image to the given dimensions""" + """Rescales the image to the given dimensions. + + Returns: + BytesIO: the bytes of the encoded image ready to be written to disk + """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -65,6 +69,9 @@ class Thumbnailer(object): Args: max_width: The largest possible width. max_height: The larget possible height. + + Returns: + BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width -- cgit 1.4.1 From 802ca12d0551ff761e01d9af8348df1dc96fc372 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:37:21 +0100 Subject: Don't close file prematurely --- synapse/rest/media/v1/media_repository.py | 22 ++++++++++++++++------ synapse/rest/media/v1/preview_url_resource.py | 4 ++-- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 398e973ca9..63ed1c4268 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -97,16 +97,20 @@ class MediaRepository(object): os.makedirs(dirname) @staticmethod - def write_file_synchronously(source, fname): + def _write_file_synchronously(source, fname): source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) + source.close() + @defer.inlineCallbacks def write_to_file(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. + Will close source once finished. + Args: source: A file like object that should be written path: Relative path to write file to @@ -120,7 +124,7 @@ class MediaRepository(object): # Write to the main repository yield preserve_context_over_fn( threads.deferToThread, - self.write_file_synchronously, source, fname, + self._write_file_synchronously, source, fname, ) # Write to backup repository @@ -130,6 +134,10 @@ class MediaRepository(object): @defer.inlineCallbacks def copy_to_backup(self, source, path): + """Copy file like object source to the backup media store, if configured. + + Will close source after its done. + """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) self._makedirs(backup_fname) @@ -139,12 +147,14 @@ class MediaRepository(object): if self.synchronous_backup_media_store: yield preserve_context_over_fn( threads.deferToThread, - self.write_file_synchronously, source, backup_fname, + self._write_file_synchronously, source, backup_fname, ) else: preserve_fn(threads.deferToThread)( - self.write_file_synchronously, source, backup_fname, + self._write_file_synchronously, source, backup_fname, ) + else: + source.close() @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, @@ -248,8 +258,8 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - with open(fname) as f: - yield self.copy_to_backup(f, fpath) + # Will close the file after its done + yield self.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f82b8fbc51..a3288c9cc6 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -275,8 +275,8 @@ class PreviewUrlResource(Resource): ) # FIXME: pass through 404s and other error messages nicely - with open(fname) as f: - yield self.media_repo.copy_to_backup(f, fpath) + # Will close the file after its done + yield self.media_repo.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() -- cgit 1.4.1 From 1259a76047a0a718ce0c9fb26513c9127f8ea919 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:39:23 +0100 Subject: Get len before close --- synapse/rest/media/v1/media_repository.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 63ed1c4268..d25b98db45 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -359,6 +359,8 @@ class MediaRepository(object): ) if t_byte_source: + t_len = len(t_byte_source.getvalue()) + output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -368,8 +370,7 @@ class MediaRepository(object): logger.info("Stored thumbnail in file %r", output_path) yield self.store.store_local_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method, - len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, t_len ) defer.returnValue(output_path) @@ -387,6 +388,7 @@ class MediaRepository(object): ) if t_byte_source: + t_len = len(t_byte_source.getvalue()) output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -397,7 +399,7 @@ class MediaRepository(object): yield self.store.store_remote_media_thumbnail_rel( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + t_width, t_height, t_type, t_method, t_len ) defer.returnValue(output_path) @@ -449,11 +451,12 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) + t_len = len(t_byte_source.getvalue()) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, - len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, t_len ) defer.returnValue({ @@ -500,11 +503,13 @@ class MediaRepository(object): server_name, file_id, t_width, t_height, t_type, t_method ) + t_len = len(t_byte_source.getvalue()) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + t_width, t_height, t_type, t_method, t_len ) defer.returnValue({ -- cgit 1.4.1 From cc505b4b5e98ba70d8576a562fc36b03d6aa5150 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:52:30 +0100 Subject: getvalue closes buffer --- synapse/rest/media/v1/media_repository.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d25b98db45..ff2ddd2f18 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -258,7 +258,7 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - # Will close the file after its done + # Will close the file after its done yield self.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] @@ -359,8 +359,6 @@ class MediaRepository(object): ) if t_byte_source: - t_len = len(t_byte_source.getvalue()) - output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -369,6 +367,8 @@ class MediaRepository(object): ) logger.info("Stored thumbnail in file %r", output_path) + t_len = os.path.getsize(output_path) + yield self.store.store_local_thumbnail_rel( media_id, t_width, t_height, t_type, t_method, t_len ) @@ -388,7 +388,6 @@ class MediaRepository(object): ) if t_byte_source: - t_len = len(t_byte_source.getvalue()) output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -397,7 +396,9 @@ class MediaRepository(object): ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_remote_media_thumbnail_rel( + t_len = os.path.getsize(output_path) + + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len ) @@ -451,9 +452,8 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) - t_len = len(t_byte_source.getvalue()) - - yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file(t_byte_source, file_path) + t_len = os.path.getsize(output_path) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -503,9 +503,8 @@ class MediaRepository(object): server_name, file_id, t_width, t_height, t_type, t_method ) - t_len = len(t_byte_source.getvalue()) - - yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file(t_byte_source, file_path) + t_len = os.path.getsize(output_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, -- cgit 1.4.1 From 4ae85ae12190015595f979f1a302ee608de6fd65 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:57:31 +0100 Subject: Don't close prematurely.. --- synapse/rest/media/v1/media_repository.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index ff2ddd2f18..80b14a6739 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -97,12 +97,13 @@ class MediaRepository(object): os.makedirs(dirname) @staticmethod - def _write_file_synchronously(source, fname): + def _write_file_synchronously(source, fname, close_source=False): source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) - source.close() + if close_source: + source.close() @defer.inlineCallbacks def write_to_file(self, source, path): @@ -148,10 +149,12 @@ class MediaRepository(object): yield preserve_context_over_fn( threads.deferToThread, self._write_file_synchronously, source, backup_fname, + close_source=True, ) else: preserve_fn(threads.deferToThread)( self._write_file_synchronously, source, backup_fname, + close_source=True, ) else: source.close() -- cgit 1.4.1 From d76621a47b7b4b778055760d43df9d02614dac19 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 18:16:25 +0100 Subject: Fix comments --- synapse/rest/media/v1/filepath.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 43d0eea00d..6923a3fbd3 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -35,7 +35,7 @@ def _wrap_in_base_path(func): class MediaFilePaths(object): """Describes where files are stored on disk. - Most of the function have a `*_rel` variant which returns a file path that + Most of the functions have a `*_rel` variant which returns a file path that is relative to the base media store path. This is mainly used when we want to write to the backup media store (when one is configured) """ diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index a3288c9cc6..e986e855a7 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -343,6 +343,9 @@ class PreviewUrlResource(Resource): def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. """ + + # TODO: Delete from backup media store + now = self.clock.time_msec() # First we delete expired url cache entries -- cgit 1.4.1 From b60859d6cc429ea1934b94a8749caadd9a96ee21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:24:19 +0100 Subject: Use make_deferred_yieldable --- synapse/rest/media/v1/media_repository.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 80b14a6739..5c5020fe9d 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn, preserve_fn +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -123,7 +123,7 @@ class MediaRepository(object): self._makedirs(fname) # Write to the main repository - yield preserve_context_over_fn( + yield make_deferred_yieldable( threads.deferToThread, self._write_file_synchronously, source, fname, ) @@ -146,7 +146,7 @@ class MediaRepository(object): # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: - yield preserve_context_over_fn( + yield make_deferred_yieldable( threads.deferToThread, self._write_file_synchronously, source, backup_fname, close_source=True, @@ -355,7 +355,7 @@ class MediaRepository(object): input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield preserve_context_over_fn( + t_byte_source = yield make_deferred_yieldable( threads.deferToThread, self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type @@ -384,7 +384,7 @@ class MediaRepository(object): input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield preserve_context_over_fn( + t_byte_source = yield make_deferred_yieldable( threads.deferToThread, self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type @@ -443,7 +443,7 @@ class MediaRepository(object): r_width, r_height, r_method, r_type, t_byte_source )) - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: if url_cache: @@ -499,7 +499,7 @@ class MediaRepository(object): r_width, r_height, r_method, r_type, t_byte_source )) - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: file_path = self.filepaths.remote_media_thumbnail_rel( -- cgit 1.4.1 From 64db043a71238db3f65f575c40f29260b83145be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:25:01 +0100 Subject: Move makedirs to thread --- synapse/rest/media/v1/media_repository.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5c5020fe9d..72aad221a8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -98,6 +98,7 @@ class MediaRepository(object): @staticmethod def _write_file_synchronously(source, fname, close_source=False): + MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) @@ -120,7 +121,6 @@ class MediaRepository(object): string: the file path written to in the primary media store """ fname = os.path.join(self.primary_base_path, path) - self._makedirs(fname) # Write to the main repository yield make_deferred_yieldable( @@ -141,7 +141,6 @@ class MediaRepository(object): """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) - self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository # or write in the background and immediately return -- cgit 1.4.1 From 35332298ef6a9828aa1fdb10f59230f47763084e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:39:32 +0100 Subject: Fix up comments --- synapse/rest/media/v1/media_repository.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 72aad221a8..f3a5b19a80 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -98,6 +98,14 @@ class MediaRepository(object): @staticmethod def _write_file_synchronously(source, fname, close_source=False): + """Write `source` to the path `fname` synchronously. Should be called + from a thread. + + Args: + source: A file like object to be written + fname (str): Path to write to + close_source (bool): Whether to close source after writing + """ MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: @@ -115,10 +123,10 @@ class MediaRepository(object): Args: source: A file like object that should be written - path: Relative path to write file to + path(str): Relative path to write file to Returns: - string: the file path written to in the primary media store + Deferred[str]: the file path written to in the primary media store """ fname = os.path.join(self.primary_base_path, path) @@ -138,6 +146,10 @@ class MediaRepository(object): """Copy file like object source to the backup media store, if configured. Will close source after its done. + + Args: + source: A file like object that should be written + path(str): Relative path to write file to """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) @@ -161,6 +173,18 @@ class MediaRepository(object): @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): + """Store uploaded content for a local user and return the mxc URL + + Args: + media_type(str): The content type of the file + upload_name(str): The name of the file + content: A file like object that is the content to store + content_length(int): The length of the content + auth_user(str): The user_id of the uploader + + Returns: + Deferred[str]: The mxc url of the stored content + """ media_id = random_string(24) fname = yield self.write_to_file( -- cgit 1.4.1 From e3428d26ca5a23a3dac6d106aff8ac19f9839f32 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:39:59 +0100 Subject: Fix typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f3a5b19a80..76220a5531 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -395,7 +395,7 @@ class MediaRepository(object): t_len = os.path.getsize(output_path) - yield self.store.store_local_thumbnail_rel( + yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len ) -- cgit 1.4.1 From 505371414f6ba9aeaa95eb8d34f7893c4cc2b07e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:23:53 +0100 Subject: Fix up thumbnailing function --- synapse/rest/media/v1/media_repository.py | 127 ++++++++++++-------------- synapse/rest/media/v1/preview_url_resource.py | 8 +- synapse/rest/media/v1/thumbnailer.py | 13 ++- 3 files changed, 73 insertions(+), 75 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 76220a5531..36f42c73be 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -206,7 +206,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_local_thumbnails(media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -339,7 +339,7 @@ class MediaRepository(object): "filesystem_id": file_id, } - yield self._generate_remote_thumbnails( + yield self._generate_thumbnails( server_name, media_id, media_info ) @@ -385,6 +385,8 @@ class MediaRepository(object): ) if t_byte_source: + t_width, t_height = t_byte_source.dimensions + output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -414,6 +416,8 @@ class MediaRepository(object): ) if t_byte_source: + t_width, t_height = t_byte_source.dimensions + output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -432,13 +436,28 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + """Generate and store thumbnails for an image. + + Args: + server_name(str|None): The server name if remote media, else None if local + media_id(str) + media_info(dict) + url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + used exclusively by the url previewer + + Returns: + Deferred[dict]: Dict with "width" and "height" keys of original image + """ media_type = media_info["media_type"] + file_id = media_info.get("filesystem_id") requirements = self._get_thumbnail_requirements(media_type) if not requirements: return - if url_cache: + if server_name: + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + elif url_cache: input_path = self.filepaths.url_cache_filepath(media_id) else: input_path = self.filepaths.local_media_filepath(media_id) @@ -454,22 +473,40 @@ class MediaRepository(object): ) return - local_thumbnails = [] + # We deduplicate the thumbnail sizes by ignoring the cropped versions if + # they have the same dimensions of a scaled one. + thumbnails = {} + for r_width, r_height, r_method, r_type in requirements: + if r_method == "crop": + thumbnails.setdefault[(r_width, r_height)] = (r_method, r_type) + elif r_method == "scale": + t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + thumbnails[(t_width, t_height)] = (r_method, r_type) + + # Now we generate the thumbnails for each dimension, store it + for (r_width, r_height), (r_method, r_type) in thumbnails.iteritems(): + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) - def generate_thumbnails(): - for r_width, r_height, r_method, r_type in requirements: - t_byte_source = self._generate_thumbnail( - thumbnailer, r_width, r_height, r_method, r_type, + if r_type == "crop": + t_byte_source = yield make_deferred_yieldable( + threads.deferToThread, thumbnailer.crop, + r_width, r_height, r_type, + ) + else: + t_byte_source = yield make_deferred_yieldable( + threads.deferToThread, thumbnailer.scale, + r_width, r_height, r_type, ) - local_thumbnails.append(( - r_width, r_height, r_method, r_type, t_byte_source - )) - - yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) + t_width, t_height = t_byte_source.dimensions - for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - if url_cache: + if server_name: + file_path = self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) + elif url_cache: file_path = self.filepaths.url_cache_thumbnail_rel( media_id, t_width, t_height, t_type, t_method ) @@ -481,61 +518,15 @@ class MediaRepository(object): output_path = yield self.write_to_file(t_byte_source, file_path) t_len = os.path.getsize(output_path) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) - - defer.returnValue({ - "width": m_width, - "height": m_height, - }) - - @defer.inlineCallbacks - def _generate_remote_thumbnails(self, server_name, media_id, media_info): - media_type = media_info["media_type"] - file_id = media_info["filesystem_id"] - requirements = self._get_thumbnail_requirements(media_type) - if not requirements: - return - - remote_thumbnails = [] - - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height - - def generate_thumbnails(): - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return - - for r_width, r_height, r_method, r_type in requirements: - t_byte_source = self._generate_thumbnail( - thumbnailer, r_width, r_height, r_method, r_type, - ) - - remote_thumbnails.append(( - r_width, r_height, r_method, r_type, t_byte_source - )) - - yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) - - for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: - file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) - - output_path = yield self.write_to_file(t_byte_source, file_path) - t_len = os.path.getsize(output_path) - - yield self.store.store_remote_media_thumbnail( + if server_name: + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len ) + else: + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) defer.returnValue({ "width": m_width, diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index e986e855a7..c734f6b7cd 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -171,8 +171,8 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): - dims = yield self.media_repo._generate_local_thumbnails( - media_info['filesystem_id'], media_info, url_cache=True, + dims = yield self.media_repo._generate_thumbnails( + None, media_info['filesystem_id'], media_info, url_cache=True, ) og = { @@ -217,8 +217,8 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images - dims = yield self.media_repo._generate_local_thumbnails( - image_info['filesystem_id'], image_info, url_cache=True, + dims = yield self.media_repo._generate_thumbnails( + None, image_info['filesystem_id'], image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index e1ee535b9a..09650bd527 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -54,7 +54,7 @@ class Thumbnailer(object): """Rescales the image to the given dimensions. Returns: - BytesIO: the bytes of the encoded image ready to be written to disk + ImageIO: the bytes of the encoded image ready to be written to disk """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -71,7 +71,7 @@ class Thumbnailer(object): max_height: The larget possible height. Returns: - BytesIO: the bytes of the encoded image ready to be written to disk + ImageIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width @@ -92,6 +92,13 @@ class Thumbnailer(object): return self._encode_image(cropped, output_type) def _encode_image(self, output_image, output_type): - output_bytes_io = BytesIO() + output_bytes_io = ImageIO(output_image.size) output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) + output_image.close() return output_bytes_io + + +class ImageIO(BytesIO): + def __init__(self, dimensions): + super(ImageIO, self).__init__() + self.dimensions = dimensions -- cgit 1.4.1 From 0e28281a021101ac199cbf2d0d130190110921bb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:33:49 +0100 Subject: Fix up --- synapse/rest/media/v1/media_repository.py | 62 +++++++++++++++---------------- synapse/rest/media/v1/thumbnailer.py | 13 ++----- 2 files changed, 32 insertions(+), 43 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 36f42c73be..a310d08f5f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -131,10 +131,9 @@ class MediaRepository(object): fname = os.path.join(self.primary_base_path, path) # Write to the main repository - yield make_deferred_yieldable( - threads.deferToThread, + yield make_deferred_yieldable(threads.deferToThread( self._write_file_synchronously, source, fname, - ) + )) # Write to backup repository yield self.copy_to_backup(source, path) @@ -157,11 +156,10 @@ class MediaRepository(object): # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: - yield make_deferred_yieldable( - threads.deferToThread, + yield make_deferred_yieldable(threads.deferToThread( self._write_file_synchronously, source, backup_fname, close_source=True, - ) + )) else: preserve_fn(threads.deferToThread)( self._write_file_synchronously, source, backup_fname, @@ -378,11 +376,10 @@ class MediaRepository(object): input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type - ) + )) if t_byte_source: t_width, t_height = t_byte_source.dimensions @@ -409,11 +406,10 @@ class MediaRepository(object): input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type - ) + )) if t_byte_source: t_width, t_height = t_byte_source.dimensions @@ -478,34 +474,32 @@ class MediaRepository(object): thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault[(r_width, r_height)] = (r_method, r_type) + thumbnails.setdefault((r_width, r_height), (r_method, r_type)) elif r_method == "scale": - t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) thumbnails[(t_width, t_height)] = (r_method, r_type) # Now we generate the thumbnails for each dimension, store it - for (r_width, r_height), (r_method, r_type) in thumbnails.iteritems(): - t_byte_source = thumbnailer.crop(t_width, t_height, t_type) - - if r_type == "crop": - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, thumbnailer.crop, - r_width, r_height, r_type, - ) + for (t_width, t_height), (t_method, t_type) in thumbnails.iteritems(): + # Generate the thumbnail + if t_type == "crop": + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.crop, + r_width, r_height, t_type, + )) else: - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, thumbnailer.scale, - r_width, r_height, r_type, - ) - - t_width, t_height = t_byte_source.dimensions + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.scale, + r_width, r_height, t_type, + )) + # Work out the correct file name for thumbnail if server_name: file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) + server_name, file_id, t_width, t_height, t_type, t_method + ) elif url_cache: file_path = self.filepaths.url_cache_thumbnail_rel( media_id, t_width, t_height, t_type, t_method @@ -515,14 +509,16 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) + # Write to disk output_path = yield self.write_to_file(t_byte_source, file_path) t_len = os.path.getsize(output_path) + # Write to database if server_name: yield self.store.store_remote_media_thumbnail( - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ) + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) else: yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 09650bd527..e1ee535b9a 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -54,7 +54,7 @@ class Thumbnailer(object): """Rescales the image to the given dimensions. Returns: - ImageIO: the bytes of the encoded image ready to be written to disk + BytesIO: the bytes of the encoded image ready to be written to disk """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -71,7 +71,7 @@ class Thumbnailer(object): max_height: The larget possible height. Returns: - ImageIO: the bytes of the encoded image ready to be written to disk + BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width @@ -92,13 +92,6 @@ class Thumbnailer(object): return self._encode_image(cropped, output_type) def _encode_image(self, output_image, output_type): - output_bytes_io = ImageIO(output_image.size) + output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_image.close() return output_bytes_io - - -class ImageIO(BytesIO): - def __init__(self, dimensions): - super(ImageIO, self).__init__() - self.dimensions = dimensions -- cgit 1.4.1 From 9732ec6797339dd33bc472cef5081a858ddccb30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:34:41 +0100 Subject: s/write_to_file/write_to_file_and_backup/ --- synapse/rest/media/v1/media_repository.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a310d08f5f..c9753ebb52 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -115,7 +115,7 @@ class MediaRepository(object): source.close() @defer.inlineCallbacks - def write_to_file(self, source, path): + def write_to_file_and_backup(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. @@ -185,7 +185,7 @@ class MediaRepository(object): """ media_id = random_string(24) - fname = yield self.write_to_file( + fname = yield self.write_to_file_and_backup( content, self.filepaths.local_media_filepath_rel(media_id) ) @@ -384,7 +384,7 @@ class MediaRepository(object): if t_byte_source: t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file( + output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.local_media_thumbnail_rel( media_id, t_width, t_height, t_type, t_method @@ -414,7 +414,7 @@ class MediaRepository(object): if t_byte_source: t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file( + output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.remote_media_thumbnail_rel( server_name, file_id, t_width, t_height, t_type, t_method @@ -510,7 +510,7 @@ class MediaRepository(object): ) # Write to disk - output_path = yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file_and_backup(t_byte_source, file_path) t_len = os.path.getsize(output_path) # Write to database -- cgit 1.4.1 From ae5d18617afd98bb5d51b43c2a12a99e9d96da39 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:35:44 +0100 Subject: Make things be absolute paths again --- synapse/rest/media/v1/filepath.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 6923a3fbd3..a3a15ac302 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -172,12 +172,12 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -188,26 +188,26 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], ), ] -- cgit 1.4.1 From 4d7e1dde70e6f2300ab83fb3208152f3d73bde71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:36:32 +0100 Subject: Remove unnecessary diff --- synapse/rest/media/v1/media_repository.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index c9753ebb52..f06813c48c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -63,9 +63,7 @@ class MediaRepository(object): self.primary_base_path = hs.config.media_store_path self.filepaths = MediaFilePaths(self.primary_base_path) - self.backup_base_path = None - if hs.config.backup_media_store_path: - self.backup_base_path = hs.config.backup_media_store_path + self.backup_base_path = hs.config.backup_media_store_path self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store -- cgit 1.4.1 From a675bd08bd1a016a16bd0e10547e8c26be391ee0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:41:06 +0100 Subject: Add paths back in... --- synapse/rest/media/v1/filepath.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index a3a15ac302..fec0bbf572 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -100,7 +100,7 @@ class MediaFilePaths(object): def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - "remote_thumbnail", server_name, + self.primary_base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) @@ -125,18 +125,18 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[:10], ), ] else: return [ os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[0:2], ), ] -- cgit 1.4.1 From 1f43d2239757db0b376a3582066190221942cddc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:42:07 +0100 Subject: Don't needlessly rename variable --- synapse/rest/media/v1/filepath.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index fec0bbf572..d5164e47e0 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -27,7 +27,7 @@ def _wrap_in_base_path(func): @functools.wraps(func) def _wrapped(self, *args, **kwargs): path = func(self, *args, **kwargs) - return os.path.join(self.primary_base_path, path) + return os.path.join(self.base_path, path) return _wrapped @@ -41,7 +41,7 @@ class MediaFilePaths(object): """ def __init__(self, primary_base_path): - self.primary_base_path = primary_base_path + self.base_path = primary_base_path def default_thumbnail_rel(self, default_top_level, default_sub_type, width, height, content_type, method): @@ -100,7 +100,7 @@ class MediaFilePaths(object): def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - self.primary_base_path, "remote_thumbnail", server_name, + self.base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) @@ -125,18 +125,18 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[:10], ), ] else: return [ os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[0:2], ), ] @@ -172,12 +172,12 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -188,26 +188,26 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], ), ] -- cgit 1.4.1 From c021c39cbd0bf1f0a85c9699275600ac35aa9ec4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:46:53 +0100 Subject: Remove spurious addition --- synapse/rest/media/v1/media_repository.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f06813c48c..3b8fe5ddb4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -380,8 +380,6 @@ class MediaRepository(object): )) if t_byte_source: - t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -410,8 +408,6 @@ class MediaRepository(object): )) if t_byte_source: - t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.remote_media_thumbnail_rel( -- cgit 1.4.1 From ad1911bbf46f658ee343ee26e3011b3f1bcbd572 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:47:05 +0100 Subject: Comment --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3b8fe5ddb4..700fd0dd24 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -121,7 +121,7 @@ class MediaRepository(object): Args: source: A file like object that should be written - path(str): Relative path to write file to + path (str): Relative path to write file to Returns: Deferred[str]: the file path written to in the primary media store -- cgit 1.4.1 From 31aa7bd8d1748548b2523f58b348bb6787dcc019 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:47:38 +0100 Subject: Move type into key --- synapse/rest/media/v1/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 700fd0dd24..dee834389f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -468,15 +468,15 @@ class MediaRepository(object): thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault((r_width, r_height), (r_method, r_type)) + thumbnails.setdefault((r_width, r_height,r_type), r_method) elif r_method == "scale": t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - thumbnails[(t_width, t_height)] = (r_method, r_type) + thumbnails[(t_width, t_height, r_type)] = r_method # Now we generate the thumbnails for each dimension, store it - for (t_width, t_height), (t_method, t_type) in thumbnails.iteritems(): + for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): # Generate the thumbnail if t_type == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( -- cgit 1.4.1 From b92a8e6e4aa2283daaa4e6050f1dbd503ddc9434 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:58:57 +0100 Subject: PEP8 --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index dee834389f..d2ac0175d7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -468,7 +468,7 @@ class MediaRepository(object): thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault((r_width, r_height,r_type), r_method) + thumbnails.setdefault((r_width, r_height, r_type), r_method) elif r_method == "scale": t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) -- cgit 1.4.1 From 2b24416e90b0bf1ee6d29cfc384670f4eeca0ced Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 14:11:34 +0100 Subject: Don't reuse source but instead copy from primary media store to backup --- synapse/rest/media/v1/media_repository.py | 28 ++++++++------------------- synapse/rest/media/v1/preview_url_resource.py | 3 +-- 2 files changed, 9 insertions(+), 22 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d2ac0175d7..e32a67e16a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -95,7 +95,7 @@ class MediaRepository(object): os.makedirs(dirname) @staticmethod - def _write_file_synchronously(source, fname, close_source=False): + def _write_file_synchronously(source, fname): """Write `source` to the path `fname` synchronously. Should be called from a thread. @@ -109,16 +109,11 @@ class MediaRepository(object): with open(fname, "wb") as f: shutil.copyfileobj(source, f) - if close_source: - source.close() - @defer.inlineCallbacks def write_to_file_and_backup(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. - Will close source once finished. - Args: source: A file like object that should be written path (str): Relative path to write file to @@ -134,37 +129,31 @@ class MediaRepository(object): )) # Write to backup repository - yield self.copy_to_backup(source, path) + yield self.copy_to_backup(path) defer.returnValue(fname) @defer.inlineCallbacks - def copy_to_backup(self, source, path): - """Copy file like object source to the backup media store, if configured. - - Will close source after its done. + def copy_to_backup(self, path): + """Copy a file from the primary to backup media store, if configured. Args: - source: A file like object that should be written path(str): Relative path to write file to """ if self.backup_base_path: + primary_fname = os.path.join(self.primary_base_path, path) backup_fname = os.path.join(self.backup_base_path, path) # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: yield make_deferred_yieldable(threads.deferToThread( - self._write_file_synchronously, source, backup_fname, - close_source=True, + shutil.copyfile, primary_fname, backup_fname, )) else: preserve_fn(threads.deferToThread)( - self._write_file_synchronously, source, backup_fname, - close_source=True, + shutil.copyfile, primary_fname, backup_fname, ) - else: - source.close() @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, @@ -280,8 +269,7 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - # Will close the file after its done - yield self.copy_to_backup(open(fname), fpath) + yield self.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c734f6b7cd..2a3e37fdf4 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -275,8 +275,7 @@ class PreviewUrlResource(Resource): ) # FIXME: pass through 404s and other error messages nicely - # Will close the file after its done - yield self.media_repo.copy_to_backup(open(fname), fpath) + yield self.media_repo.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() -- cgit 1.4.1 From 6b725cf56aaf090f9cc9d5409dec7912feae8869 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 15:23:41 +0100 Subject: Remove old comment --- synapse/rest/media/v1/media_repository.py | 1 - 1 file changed, 1 deletion(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e32a67e16a..cc267d0c16 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -102,7 +102,6 @@ class MediaRepository(object): Args: source: A file like object to be written fname (str): Path to write to - close_source (bool): Whether to close source after writing """ MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file -- cgit 1.4.1 From 1b6b0b1e66ab1cd5682ad1fa99020474afd6db7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 15:34:08 +0100 Subject: Add try/finally block to close t_byte_source --- synapse/rest/media/v1/media_repository.py | 65 +++++++++++++++++++------------ 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index cc267d0c16..515b3d3e74 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -367,12 +367,16 @@ class MediaRepository(object): )) if t_byte_source: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method + try: + output_path = yield self.write_to_file_and_backup( + t_byte_source, + self.filepaths.local_media_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) ) - ) + finally: + t_byte_source.close() + logger.info("Stored thumbnail in file %r", output_path) t_len = os.path.getsize(output_path) @@ -395,12 +399,16 @@ class MediaRepository(object): )) if t_byte_source: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method + try: + output_path = yield self.write_to_file_and_backup( + t_byte_source, + self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) ) - ) + finally: + t_byte_source.close() + logger.info("Stored thumbnail in file %r", output_path) t_len = os.path.getsize(output_path) @@ -464,18 +472,6 @@ class MediaRepository(object): # Now we generate the thumbnails for each dimension, store it for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): - # Generate the thumbnail - if t_type == "crop": - t_byte_source = yield make_deferred_yieldable(threads.deferToThread( - thumbnailer.crop, - r_width, r_height, t_type, - )) - else: - t_byte_source = yield make_deferred_yieldable(threads.deferToThread( - thumbnailer.scale, - r_width, r_height, t_type, - )) - # Work out the correct file name for thumbnail if server_name: file_path = self.filepaths.remote_media_thumbnail_rel( @@ -490,8 +486,29 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) - # Write to disk - output_path = yield self.write_to_file_and_backup(t_byte_source, file_path) + # Generate the thumbnail + if t_type == "crop": + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.crop, + r_width, r_height, t_type, + )) + else: + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.scale, + r_width, r_height, t_type, + )) + + if not t_byte_source: + continue + + try: + # Write to disk + output_path = yield self.write_to_file_and_backup( + t_byte_source, file_path, + ) + finally: + t_byte_source.close() + t_len = os.path.getsize(output_path) # Write to database -- cgit 1.4.1 From a6245478c82d8bd2c9abea34b9ec4a94ccc5ed09 Mon Sep 17 00:00:00 2001 From: Krombel Date: Tue, 17 Oct 2017 12:45:33 +0200 Subject: fix thumbnailing (#2548) in commit 0e28281a the code for thumbnailing got refactored and the renaming of this variables was not done correctly. Signed-Off-by: Matthias Kesler --- synapse/rest/media/v1/media_repository.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 515b3d3e74..057c925b7b 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -490,12 +490,12 @@ class MediaRepository(object): if t_type == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( thumbnailer.crop, - r_width, r_height, t_type, + t_width, t_height, t_type, )) else: t_byte_source = yield make_deferred_yieldable(threads.deferToThread( thumbnailer.scale, - r_width, r_height, t_type, + t_width, t_height, t_type, )) if not t_byte_source: -- cgit 1.4.1 From bd5718d0ad2a9380ae292507a1022a230f8b2011 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 19 Oct 2017 10:27:18 +0100 Subject: Fix typo in thumbnail generation --- synapse/rest/media/v1/media_repository.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 057c925b7b..6b50b45b1f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -487,16 +487,19 @@ class MediaRepository(object): ) # Generate the thumbnail - if t_type == "crop": + if t_method == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( thumbnailer.crop, t_width, t_height, t_type, )) - else: + elif t_method == "scale": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( thumbnailer.scale, t_width, t_height, t_type, )) + else: + logger.error("Unrecognized method: %r", t_method) + continue if not t_byte_source: continue -- cgit 1.4.1