diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index d92b7ff337..d5164e47e0 100644
--- a/synapse/rest/media/v1/filepath.py
+++ b/synapse/rest/media/v1/filepath.py
@@ -14,78 +14,200 @@
# limitations under the License.
import os
+import re
+import functools
+
+NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
+
+
+def _wrap_in_base_path(func):
+ """Takes a function that returns a relative path and turns it into an
+ absolute path based on the location of the primary media store
+ """
+ @functools.wraps(func)
+ def _wrapped(self, *args, **kwargs):
+ path = func(self, *args, **kwargs)
+ return os.path.join(self.base_path, path)
+
+ return _wrapped
class MediaFilePaths(object):
+ """Describes where files are stored on disk.
- def __init__(self, base_path):
- self.base_path = base_path
+ Most of the functions have a `*_rel` variant which returns a file path that
+ is relative to the base media store path. This is mainly used when we want
+ to write to the backup media store (when one is configured)
+ """
- def default_thumbnail(self, default_top_level, default_sub_type, width,
- height, content_type, method):
+ def __init__(self, primary_base_path):
+ self.base_path = primary_base_path
+
+ def default_thumbnail_rel(self, default_top_level, default_sub_type, width,
+ height, content_type, method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (
width, height, top_level_type, sub_type, method
)
return os.path.join(
- self.base_path, "default_thumbnails", default_top_level,
+ "default_thumbnails", default_top_level,
default_sub_type, file_name
)
- def local_media_filepath(self, media_id):
+ default_thumbnail = _wrap_in_base_path(default_thumbnail_rel)
+
+ def local_media_filepath_rel(self, media_id):
return os.path.join(
- self.base_path, "local_content",
+ "local_content",
media_id[0:2], media_id[2:4], media_id[4:]
)
- def local_media_thumbnail(self, media_id, width, height, content_type,
- method):
+ local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)
+
+ def local_media_thumbnail_rel(self, media_id, width, height, content_type,
+ method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (
width, height, top_level_type, sub_type, method
)
return os.path.join(
- self.base_path, "local_thumbnails",
+ "local_thumbnails",
media_id[0:2], media_id[2:4], media_id[4:],
file_name
)
- def remote_media_filepath(self, server_name, file_id):
+ local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
+
+ def remote_media_filepath_rel(self, server_name, file_id):
return os.path.join(
- self.base_path, "remote_content", server_name,
+ "remote_content", server_name,
file_id[0:2], file_id[2:4], file_id[4:]
)
- def remote_media_thumbnail(self, server_name, file_id, width, height,
- content_type, method):
+ remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)
+
+ def remote_media_thumbnail_rel(self, server_name, file_id, width, height,
+ content_type, method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
return os.path.join(
- self.base_path, "remote_thumbnail", server_name,
+ "remote_thumbnail", server_name,
file_id[0:2], file_id[2:4], file_id[4:],
file_name
)
+ remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel)
+
def remote_media_thumbnail_dir(self, server_name, file_id):
return os.path.join(
self.base_path, "remote_thumbnail", server_name,
file_id[0:2], file_id[2:4], file_id[4:],
)
- def url_cache_filepath(self, media_id):
- return os.path.join(
- self.base_path, "url_cache",
- media_id[0:2], media_id[2:4], media_id[4:]
- )
+ def url_cache_filepath_rel(self, media_id):
+ if NEW_FORMAT_ID_RE.match(media_id):
+ # Media id is of the form <DATE><RANDOM_STRING>
+ # E.g.: 2017-09-28-fsdRDt24DS234dsf
+ return os.path.join(
+ "url_cache",
+ media_id[:10], media_id[11:]
+ )
+ else:
+ return os.path.join(
+ "url_cache",
+ media_id[0:2], media_id[2:4], media_id[4:],
+ )
+
+ url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)
+
+ def url_cache_filepath_dirs_to_delete(self, media_id):
+ "The dirs to try and remove if we delete the media_id file"
+ if NEW_FORMAT_ID_RE.match(media_id):
+ return [
+ os.path.join(
+ self.base_path, "url_cache",
+ media_id[:10],
+ ),
+ ]
+ else:
+ return [
+ os.path.join(
+ self.base_path, "url_cache",
+ media_id[0:2], media_id[2:4],
+ ),
+ os.path.join(
+ self.base_path, "url_cache",
+ media_id[0:2],
+ ),
+ ]
+
+ def url_cache_thumbnail_rel(self, media_id, width, height, content_type,
+ method):
+ # Media id is of the form <DATE><RANDOM_STRING>
+ # E.g.: 2017-09-28-fsdRDt24DS234dsf
- def url_cache_thumbnail(self, media_id, width, height, content_type,
- method):
top_level_type, sub_type = content_type.split("/")
file_name = "%i-%i-%s-%s-%s" % (
width, height, top_level_type, sub_type, method
)
- return os.path.join(
- self.base_path, "url_cache_thumbnails",
- media_id[0:2], media_id[2:4], media_id[4:],
- file_name
- )
+
+ if NEW_FORMAT_ID_RE.match(media_id):
+ return os.path.join(
+ "url_cache_thumbnails",
+ media_id[:10], media_id[11:],
+ file_name
+ )
+ else:
+ return os.path.join(
+ "url_cache_thumbnails",
+ media_id[0:2], media_id[2:4], media_id[4:],
+ file_name
+ )
+
+ url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
+
+ def url_cache_thumbnail_directory(self, media_id):
+ # Media id is of the form <DATE><RANDOM_STRING>
+ # E.g.: 2017-09-28-fsdRDt24DS234dsf
+
+ if NEW_FORMAT_ID_RE.match(media_id):
+ return os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[:10], media_id[11:],
+ )
+ else:
+ return os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[0:2], media_id[2:4], media_id[4:],
+ )
+
+ def url_cache_thumbnail_dirs_to_delete(self, media_id):
+ "The dirs to try and remove if we delete the media_id thumbnails"
+ # Media id is of the form <DATE><RANDOM_STRING>
+ # E.g.: 2017-09-28-fsdRDt24DS234dsf
+ if NEW_FORMAT_ID_RE.match(media_id):
+ return [
+ os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[:10], media_id[11:],
+ ),
+ os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[:10],
+ ),
+ ]
+ else:
+ return [
+ os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[0:2], media_id[2:4], media_id[4:],
+ ),
+ os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[0:2], media_id[2:4],
+ ),
+ os.path.join(
+ self.base_path, "url_cache_thumbnails",
+ media_id[0:2],
+ ),
+ ]
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 0ea1248ce6..6b50b45b1f 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \
from synapse.util.async import Linearizer
from synapse.util.stringutils import is_ascii
-from synapse.util.logcontext import preserve_context_over_fn
+from synapse.util.logcontext import make_deferred_yieldable, preserve_fn
from synapse.util.retryutils import NotRetryingDestination
import os
@@ -59,7 +59,14 @@ class MediaRepository(object):
self.store = hs.get_datastore()
self.max_upload_size = hs.config.max_upload_size
self.max_image_pixels = hs.config.max_image_pixels
- self.filepaths = MediaFilePaths(hs.config.media_store_path)
+
+ self.primary_base_path = hs.config.media_store_path
+ self.filepaths = MediaFilePaths(self.primary_base_path)
+
+ self.backup_base_path = hs.config.backup_media_store_path
+
+ self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store
+
self.dynamic_thumbnails = hs.config.dynamic_thumbnails
self.thumbnail_requirements = hs.config.thumbnail_requirements
@@ -87,18 +94,86 @@ class MediaRepository(object):
if not os.path.exists(dirname):
os.makedirs(dirname)
+ @staticmethod
+ def _write_file_synchronously(source, fname):
+ """Write `source` to the path `fname` synchronously. Should be called
+ from a thread.
+
+ Args:
+ source: A file like object to be written
+ fname (str): Path to write to
+ """
+ MediaRepository._makedirs(fname)
+ source.seek(0) # Ensure we read from the start of the file
+ with open(fname, "wb") as f:
+ shutil.copyfileobj(source, f)
+
+ @defer.inlineCallbacks
+ def write_to_file_and_backup(self, source, path):
+ """Write `source` to the on disk media store, and also the backup store
+ if configured.
+
+ Args:
+ source: A file like object that should be written
+ path (str): Relative path to write file to
+
+ Returns:
+ Deferred[str]: the file path written to in the primary media store
+ """
+ fname = os.path.join(self.primary_base_path, path)
+
+ # Write to the main repository
+ yield make_deferred_yieldable(threads.deferToThread(
+ self._write_file_synchronously, source, fname,
+ ))
+
+ # Write to backup repository
+ yield self.copy_to_backup(path)
+
+ defer.returnValue(fname)
+
+ @defer.inlineCallbacks
+ def copy_to_backup(self, path):
+ """Copy a file from the primary to backup media store, if configured.
+
+ Args:
+ path(str): Relative path to write file to
+ """
+ if self.backup_base_path:
+ primary_fname = os.path.join(self.primary_base_path, path)
+ backup_fname = os.path.join(self.backup_base_path, path)
+
+ # We can either wait for successful writing to the backup repository
+ # or write in the background and immediately return
+ if self.synchronous_backup_media_store:
+ yield make_deferred_yieldable(threads.deferToThread(
+ shutil.copyfile, primary_fname, backup_fname,
+ ))
+ else:
+ preserve_fn(threads.deferToThread)(
+ shutil.copyfile, primary_fname, backup_fname,
+ )
+
@defer.inlineCallbacks
def create_content(self, media_type, upload_name, content, content_length,
auth_user):
+ """Store uploaded content for a local user and return the mxc URL
+
+ Args:
+ media_type(str): The content type of the file
+ upload_name(str): The name of the file
+ content: A file like object that is the content to store
+ content_length(int): The length of the content
+ auth_user(str): The user_id of the uploader
+
+ Returns:
+ Deferred[str]: The mxc url of the stored content
+ """
media_id = random_string(24)
- fname = self.filepaths.local_media_filepath(media_id)
- self._makedirs(fname)
-
- # This shouldn't block for very long because the content will have
- # already been uploaded at this point.
- with open(fname, "wb") as f:
- f.write(content)
+ fname = yield self.write_to_file_and_backup(
+ content, self.filepaths.local_media_filepath_rel(media_id)
+ )
logger.info("Stored local media in file %r", fname)
@@ -115,7 +190,7 @@ class MediaRepository(object):
"media_length": content_length,
}
- yield self._generate_local_thumbnails(media_id, media_info)
+ yield self._generate_thumbnails(None, media_id, media_info)
defer.returnValue("mxc://%s/%s" % (self.server_name, media_id))
@@ -148,9 +223,10 @@ class MediaRepository(object):
def _download_remote_file(self, server_name, media_id):
file_id = random_string(24)
- fname = self.filepaths.remote_media_filepath(
+ fpath = self.filepaths.remote_media_filepath_rel(
server_name, file_id
)
+ fname = os.path.join(self.primary_base_path, fpath)
self._makedirs(fname)
try:
@@ -192,6 +268,8 @@ class MediaRepository(object):
server_name, media_id)
raise SynapseError(502, "Failed to fetch remote media")
+ yield self.copy_to_backup(fpath)
+
media_type = headers["Content-Type"][0]
time_now_ms = self.clock.time_msec()
@@ -244,7 +322,7 @@ class MediaRepository(object):
"filesystem_id": file_id,
}
- yield self._generate_remote_thumbnails(
+ yield self._generate_thumbnails(
server_name, media_id, media_info
)
@@ -253,9 +331,8 @@ class MediaRepository(object):
def _get_thumbnail_requirements(self, media_type):
return self.thumbnail_requirements.get(media_type, ())
- def _generate_thumbnail(self, input_path, t_path, t_width, t_height,
+ def _generate_thumbnail(self, thumbnailer, t_width, t_height,
t_method, t_type):
- thumbnailer = Thumbnailer(input_path)
m_width = thumbnailer.width
m_height = thumbnailer.height
@@ -267,72 +344,105 @@ class MediaRepository(object):
return
if t_method == "crop":
- t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
+ t_byte_source = thumbnailer.crop(t_width, t_height, t_type)
elif t_method == "scale":
t_width, t_height = thumbnailer.aspect(t_width, t_height)
t_width = min(m_width, t_width)
t_height = min(m_height, t_height)
- t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
+ t_byte_source = thumbnailer.scale(t_width, t_height, t_type)
else:
- t_len = None
+ t_byte_source = None
- return t_len
+ return t_byte_source
@defer.inlineCallbacks
def generate_local_exact_thumbnail(self, media_id, t_width, t_height,
t_method, t_type):
input_path = self.filepaths.local_media_filepath(media_id)
- t_path = self.filepaths.local_media_thumbnail(
- media_id, t_width, t_height, t_type, t_method
- )
- self._makedirs(t_path)
-
- t_len = yield preserve_context_over_fn(
- threads.deferToThread,
+ thumbnailer = Thumbnailer(input_path)
+ t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
self._generate_thumbnail,
- input_path, t_path, t_width, t_height, t_method, t_type
- )
+ thumbnailer, t_width, t_height, t_method, t_type
+ ))
+
+ if t_byte_source:
+ try:
+ output_path = yield self.write_to_file_and_backup(
+ t_byte_source,
+ self.filepaths.local_media_thumbnail_rel(
+ media_id, t_width, t_height, t_type, t_method
+ )
+ )
+ finally:
+ t_byte_source.close()
+
+ logger.info("Stored thumbnail in file %r", output_path)
+
+ t_len = os.path.getsize(output_path)
- if t_len:
yield self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
)
- defer.returnValue(t_path)
+ defer.returnValue(output_path)
@defer.inlineCallbacks
def generate_remote_exact_thumbnail(self, server_name, file_id, media_id,
t_width, t_height, t_method, t_type):
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
- t_path = self.filepaths.remote_media_thumbnail(
- server_name, file_id, t_width, t_height, t_type, t_method
- )
- self._makedirs(t_path)
-
- t_len = yield preserve_context_over_fn(
- threads.deferToThread,
+ thumbnailer = Thumbnailer(input_path)
+ t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
self._generate_thumbnail,
- input_path, t_path, t_width, t_height, t_method, t_type
- )
+ thumbnailer, t_width, t_height, t_method, t_type
+ ))
+
+ if t_byte_source:
+ try:
+ output_path = yield self.write_to_file_and_backup(
+ t_byte_source,
+ self.filepaths.remote_media_thumbnail_rel(
+ server_name, file_id, t_width, t_height, t_type, t_method
+ )
+ )
+ finally:
+ t_byte_source.close()
+
+ logger.info("Stored thumbnail in file %r", output_path)
+
+ t_len = os.path.getsize(output_path)
- if t_len:
yield self.store.store_remote_media_thumbnail(
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len
)
- defer.returnValue(t_path)
+ defer.returnValue(output_path)
@defer.inlineCallbacks
- def _generate_local_thumbnails(self, media_id, media_info, url_cache=False):
+ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False):
+ """Generate and store thumbnails for an image.
+
+ Args:
+ server_name(str|None): The server name if remote media, else None if local
+ media_id(str)
+ media_info(dict)
+ url_cache(bool): If we are thumbnailing images downloaded for the URL cache,
+ used exclusively by the url previewer
+
+ Returns:
+ Deferred[dict]: Dict with "width" and "height" keys of original image
+ """
media_type = media_info["media_type"]
+ file_id = media_info.get("filesystem_id")
requirements = self._get_thumbnail_requirements(media_type)
if not requirements:
return
- if url_cache:
+ if server_name:
+ input_path = self.filepaths.remote_media_filepath(server_name, file_id)
+ elif url_cache:
input_path = self.filepaths.url_cache_filepath(media_id)
else:
input_path = self.filepaths.local_media_filepath(media_id)
@@ -348,135 +458,72 @@ class MediaRepository(object):
)
return
- local_thumbnails = []
-
- def generate_thumbnails():
- scales = set()
- crops = set()
- for r_width, r_height, r_method, r_type in requirements:
- if r_method == "scale":
- t_width, t_height = thumbnailer.aspect(r_width, r_height)
- scales.add((
- min(m_width, t_width), min(m_height, t_height), r_type,
- ))
- elif r_method == "crop":
- crops.add((r_width, r_height, r_type))
-
- for t_width, t_height, t_type in scales:
- t_method = "scale"
- if url_cache:
- t_path = self.filepaths.url_cache_thumbnail(
- media_id, t_width, t_height, t_type, t_method
- )
- else:
- t_path = self.filepaths.local_media_thumbnail(
- media_id, t_width, t_height, t_type, t_method
- )
- self._makedirs(t_path)
- t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
+ # We deduplicate the thumbnail sizes by ignoring the cropped versions if
+ # they have the same dimensions of a scaled one.
+ thumbnails = {}
+ for r_width, r_height, r_method, r_type in requirements:
+ if r_method == "crop":
+ thumbnails.setdefault((r_width, r_height, r_type), r_method)
+ elif r_method == "scale":
+ t_width, t_height = thumbnailer.aspect(r_width, r_height)
+ t_width = min(m_width, t_width)
+ t_height = min(m_height, t_height)
+ thumbnails[(t_width, t_height, r_type)] = r_method
+
+ # Now we generate the thumbnails for each dimension, store it
+ for (t_width, t_height, t_type), t_method in thumbnails.iteritems():
+ # Work out the correct file name for thumbnail
+ if server_name:
+ file_path = self.filepaths.remote_media_thumbnail_rel(
+ server_name, file_id, t_width, t_height, t_type, t_method
+ )
+ elif url_cache:
+ file_path = self.filepaths.url_cache_thumbnail_rel(
+ media_id, t_width, t_height, t_type, t_method
+ )
+ else:
+ file_path = self.filepaths.local_media_thumbnail_rel(
+ media_id, t_width, t_height, t_type, t_method
+ )
- local_thumbnails.append((
- media_id, t_width, t_height, t_type, t_method, t_len
+ # Generate the thumbnail
+ if t_method == "crop":
+ t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
+ thumbnailer.crop,
+ t_width, t_height, t_type,
))
-
- for t_width, t_height, t_type in crops:
- if (t_width, t_height, t_type) in scales:
- # If the aspect ratio of the cropped thumbnail matches a purely
- # scaled one then there is no point in calculating a separate
- # thumbnail.
- continue
- t_method = "crop"
- if url_cache:
- t_path = self.filepaths.url_cache_thumbnail(
- media_id, t_width, t_height, t_type, t_method
- )
- else:
- t_path = self.filepaths.local_media_thumbnail(
- media_id, t_width, t_height, t_type, t_method
- )
- self._makedirs(t_path)
- t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
- local_thumbnails.append((
- media_id, t_width, t_height, t_type, t_method, t_len
+ elif t_method == "scale":
+ t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
+ thumbnailer.scale,
+ t_width, t_height, t_type,
))
+ else:
+ logger.error("Unrecognized method: %r", t_method)
+ continue
- yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
-
- for l in local_thumbnails:
- yield self.store.store_local_thumbnail(*l)
-
- defer.returnValue({
- "width": m_width,
- "height": m_height,
- })
-
- @defer.inlineCallbacks
- def _generate_remote_thumbnails(self, server_name, media_id, media_info):
- media_type = media_info["media_type"]
- file_id = media_info["filesystem_id"]
- requirements = self._get_thumbnail_requirements(media_type)
- if not requirements:
- return
+ if not t_byte_source:
+ continue
- remote_thumbnails = []
+ try:
+ # Write to disk
+ output_path = yield self.write_to_file_and_backup(
+ t_byte_source, file_path,
+ )
+ finally:
+ t_byte_source.close()
- input_path = self.filepaths.remote_media_filepath(server_name, file_id)
- thumbnailer = Thumbnailer(input_path)
- m_width = thumbnailer.width
- m_height = thumbnailer.height
+ t_len = os.path.getsize(output_path)
- def generate_thumbnails():
- if m_width * m_height >= self.max_image_pixels:
- logger.info(
- "Image too large to thumbnail %r x %r > %r",
- m_width, m_height, self.max_image_pixels
- )
- return
-
- scales = set()
- crops = set()
- for r_width, r_height, r_method, r_type in requirements:
- if r_method == "scale":
- t_width, t_height = thumbnailer.aspect(r_width, r_height)
- scales.add((
- min(m_width, t_width), min(m_height, t_height), r_type,
- ))
- elif r_method == "crop":
- crops.add((r_width, r_height, r_type))
-
- for t_width, t_height, t_type in scales:
- t_method = "scale"
- t_path = self.filepaths.remote_media_thumbnail(
- server_name, file_id, t_width, t_height, t_type, t_method
- )
- self._makedirs(t_path)
- t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
- remote_thumbnails.append([
+ # Write to database
+ if server_name:
+ yield self.store.store_remote_media_thumbnail(
server_name, media_id, file_id,
t_width, t_height, t_type, t_method, t_len
- ])
-
- for t_width, t_height, t_type in crops:
- if (t_width, t_height, t_type) in scales:
- # If the aspect ratio of the cropped thumbnail matches a purely
- # scaled one then there is no point in calculating a separate
- # thumbnail.
- continue
- t_method = "crop"
- t_path = self.filepaths.remote_media_thumbnail(
- server_name, file_id, t_width, t_height, t_type, t_method
)
- self._makedirs(t_path)
- t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
- remote_thumbnails.append([
- server_name, media_id, file_id,
- t_width, t_height, t_type, t_method, t_len
- ])
-
- yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
-
- for r in remote_thumbnails:
- yield self.store.store_remote_media_thumbnail(*r)
+ else:
+ yield self.store.store_local_thumbnail(
+ media_id, t_width, t_height, t_type, t_method, t_len
+ )
defer.returnValue({
"width": m_width,
@@ -497,6 +544,8 @@ class MediaRepository(object):
logger.info("Deleting: %r", key)
+ # TODO: Should we delete from the backup store
+
with (yield self.remote_media_linearizer.queue(key)):
full_path = self.filepaths.remote_media_filepath(origin, file_id)
try:
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index b81a336c5d..2a3e37fdf4 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -36,6 +36,9 @@ import cgi
import ujson as json
import urlparse
import itertools
+import datetime
+import errno
+import shutil
import logging
logger = logging.getLogger(__name__)
@@ -56,6 +59,7 @@ class PreviewUrlResource(Resource):
self.store = hs.get_datastore()
self.client = SpiderHttpClient(hs)
self.media_repo = media_repo
+ self.primary_base_path = media_repo.primary_base_path
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
@@ -70,6 +74,10 @@ class PreviewUrlResource(Resource):
self.downloads = {}
+ self._cleaner_loop = self.clock.looping_call(
+ self._expire_url_cache_data, 10 * 1000
+ )
+
def render_GET(self, request):
self._async_render_GET(request)
return NOT_DONE_YET
@@ -130,7 +138,7 @@ class PreviewUrlResource(Resource):
cache_result = yield self.store.get_url_cache(url, ts)
if (
cache_result and
- cache_result["download_ts"] + cache_result["expires"] > ts and
+ cache_result["expires_ts"] > ts and
cache_result["response_code"] / 100 == 2
):
respond_with_json_bytes(
@@ -163,8 +171,8 @@ class PreviewUrlResource(Resource):
logger.debug("got media_info of '%s'" % media_info)
if _is_media(media_info['media_type']):
- dims = yield self.media_repo._generate_local_thumbnails(
- media_info['filesystem_id'], media_info, url_cache=True,
+ dims = yield self.media_repo._generate_thumbnails(
+ None, media_info['filesystem_id'], media_info, url_cache=True,
)
og = {
@@ -209,8 +217,8 @@ class PreviewUrlResource(Resource):
if _is_media(image_info['media_type']):
# TODO: make sure we don't choke on white-on-transparent images
- dims = yield self.media_repo._generate_local_thumbnails(
- image_info['filesystem_id'], image_info, url_cache=True,
+ dims = yield self.media_repo._generate_thumbnails(
+ None, image_info['filesystem_id'], image_info, url_cache=True,
)
if dims:
og["og:image:width"] = dims['width']
@@ -239,7 +247,7 @@ class PreviewUrlResource(Resource):
url,
media_info["response_code"],
media_info["etag"],
- media_info["expires"],
+ media_info["expires"] + media_info["created_ts"],
json.dumps(og),
media_info["filesystem_id"],
media_info["created_ts"],
@@ -253,10 +261,10 @@ class PreviewUrlResource(Resource):
# we're most likely being explicitly triggered by a human rather than a
# bot, so are we really a robot?
- # XXX: horrible duplication with base_resource's _download_remote_file()
- file_id = random_string(24)
+ file_id = datetime.date.today().isoformat() + '_' + random_string(16)
- fname = self.filepaths.url_cache_filepath(file_id)
+ fpath = self.filepaths.url_cache_filepath_rel(file_id)
+ fname = os.path.join(self.primary_base_path, fpath)
self.media_repo._makedirs(fname)
try:
@@ -267,6 +275,8 @@ class PreviewUrlResource(Resource):
)
# FIXME: pass through 404s and other error messages nicely
+ yield self.media_repo.copy_to_backup(fpath)
+
media_type = headers["Content-Type"][0]
time_now_ms = self.clock.time_msec()
@@ -328,6 +338,91 @@ class PreviewUrlResource(Resource):
"etag": headers["ETag"][0] if "ETag" in headers else None,
})
+ @defer.inlineCallbacks
+ def _expire_url_cache_data(self):
+ """Clean up expired url cache content, media and thumbnails.
+ """
+
+ # TODO: Delete from backup media store
+
+ now = self.clock.time_msec()
+
+ # First we delete expired url cache entries
+ media_ids = yield self.store.get_expired_url_cache(now)
+
+ removed_media = []
+ for media_id in media_ids:
+ fname = self.filepaths.url_cache_filepath(media_id)
+ try:
+ os.remove(fname)
+ except OSError as e:
+ # If the path doesn't exist, meh
+ if e.errno != errno.ENOENT:
+ logger.warn("Failed to remove media: %r: %s", media_id, e)
+ continue
+
+ removed_media.append(media_id)
+
+ try:
+ dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+ for dir in dirs:
+ os.rmdir(dir)
+ except:
+ pass
+
+ yield self.store.delete_url_cache(removed_media)
+
+ if removed_media:
+ logger.info("Deleted %d entries from url cache", len(removed_media))
+
+ # Now we delete old images associated with the url cache.
+ # These may be cached for a bit on the client (i.e., they
+ # may have a room open with a preview url thing open).
+ # So we wait a couple of days before deleting, just in case.
+ expire_before = now - 2 * 24 * 60 * 60 * 1000
+ media_ids = yield self.store.get_url_cache_media_before(expire_before)
+
+ removed_media = []
+ for media_id in media_ids:
+ fname = self.filepaths.url_cache_filepath(media_id)
+ try:
+ os.remove(fname)
+ except OSError as e:
+ # If the path doesn't exist, meh
+ if e.errno != errno.ENOENT:
+ logger.warn("Failed to remove media: %r: %s", media_id, e)
+ continue
+
+ try:
+ dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+ for dir in dirs:
+ os.rmdir(dir)
+ except:
+ pass
+
+ thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
+ try:
+ shutil.rmtree(thumbnail_dir)
+ except OSError as e:
+ # If the path doesn't exist, meh
+ if e.errno != errno.ENOENT:
+ logger.warn("Failed to remove media: %r: %s", media_id, e)
+ continue
+
+ removed_media.append(media_id)
+
+ try:
+ dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
+ for dir in dirs:
+ os.rmdir(dir)
+ except:
+ pass
+
+ yield self.store.delete_url_cache_media(removed_media)
+
+ if removed_media:
+ logger.info("Deleted %d media from url cache", len(removed_media))
+
def decode_and_calc_og(body, media_uri, request_encoding=None):
from lxml import etree
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index 3868d4f65f..e1ee535b9a 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -50,12 +50,16 @@ class Thumbnailer(object):
else:
return ((max_height * self.width) // self.height, max_height)
- def scale(self, output_path, width, height, output_type):
- """Rescales the image to the given dimensions"""
+ def scale(self, width, height, output_type):
+ """Rescales the image to the given dimensions.
+
+ Returns:
+ BytesIO: the bytes of the encoded image ready to be written to disk
+ """
scaled = self.image.resize((width, height), Image.ANTIALIAS)
- return self.save_image(scaled, output_type, output_path)
+ return self._encode_image(scaled, output_type)
- def crop(self, output_path, width, height, output_type):
+ def crop(self, width, height, output_type):
"""Rescales and crops the image to the given dimensions preserving
aspect::
(w_in / h_in) = (w_scaled / h_scaled)
@@ -65,6 +69,9 @@ class Thumbnailer(object):
Args:
max_width: The largest possible width.
max_height: The larget possible height.
+
+ Returns:
+ BytesIO: the bytes of the encoded image ready to be written to disk
"""
if width * self.height > height * self.width:
scaled_height = (width * self.height) // self.width
@@ -82,13 +89,9 @@ class Thumbnailer(object):
crop_left = (scaled_width - width) // 2
crop_right = width + crop_left
cropped = scaled_image.crop((crop_left, 0, crop_right, height))
- return self.save_image(cropped, output_type, output_path)
+ return self._encode_image(cropped, output_type)
- def save_image(self, output_image, output_type, output_path):
+ def _encode_image(self, output_image, output_type):
output_bytes_io = BytesIO()
output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80)
- output_bytes = output_bytes_io.getvalue()
- with open(output_path, "wb") as output_file:
- output_file.write(output_bytes)
- logger.info("Stored thumbnail in file %r", output_path)
- return len(output_bytes)
+ return output_bytes_io
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 4ab33f73bf..f6f498cdc5 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -93,7 +93,7 @@ class UploadResource(Resource):
# TODO(markjh): parse content-dispostion
content_uri = yield self.media_repo.create_content(
- media_type, upload_name, request.content.read(),
+ media_type, upload_name, request.content,
content_length, requester.user
)
|