summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2022-06-01 11:57:49 +0100
committerGitHub <noreply@github.com>2022-06-01 10:57:49 +0000
commit5949ab86f8db0ef3dac2063e42210030f17786fb (patch)
tree14b89e4604c5a2ccbc3f6acdcbd7d8f144403f03
parentRemove most groups datastore code. (#12895) (diff)
downloadsynapse-5949ab86f8db0ef3dac2063e42210030f17786fb.tar.xz
Fix potential thumbnail memory leaks. (#12932)
-rw-r--r--changelog.d/12932.bugfix1
-rw-r--r--synapse/rest/media/v1/media_repository.py265
-rw-r--r--synapse/rest/media/v1/thumbnailer.py71
3 files changed, 202 insertions, 135 deletions
diff --git a/changelog.d/12932.bugfix b/changelog.d/12932.bugfix
new file mode 100644
index 0000000000..506f92b427
--- /dev/null
+++ b/changelog.d/12932.bugfix
@@ -0,0 +1 @@
+Fix potential memory leak when generating thumbnails.
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 20af366538..a551458a9f 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -587,15 +587,16 @@ class MediaRepository:
             )
             return None
 
-        t_byte_source = await defer_to_thread(
-            self.hs.get_reactor(),
-            self._generate_thumbnail,
-            thumbnailer,
-            t_width,
-            t_height,
-            t_method,
-            t_type,
-        )
+        with thumbnailer:
+            t_byte_source = await defer_to_thread(
+                self.hs.get_reactor(),
+                self._generate_thumbnail,
+                thumbnailer,
+                t_width,
+                t_height,
+                t_method,
+                t_type,
+            )
 
         if t_byte_source:
             try:
@@ -657,15 +658,16 @@ class MediaRepository:
             )
             return None
 
-        t_byte_source = await defer_to_thread(
-            self.hs.get_reactor(),
-            self._generate_thumbnail,
-            thumbnailer,
-            t_width,
-            t_height,
-            t_method,
-            t_type,
-        )
+        with thumbnailer:
+            t_byte_source = await defer_to_thread(
+                self.hs.get_reactor(),
+                self._generate_thumbnail,
+                thumbnailer,
+                t_width,
+                t_height,
+                t_method,
+                t_type,
+            )
 
         if t_byte_source:
             try:
@@ -749,119 +751,134 @@ class MediaRepository:
             )
             return None
 
-        m_width = thumbnailer.width
-        m_height = thumbnailer.height
-
-        if m_width * m_height >= self.max_image_pixels:
-            logger.info(
-                "Image too large to thumbnail %r x %r > %r",
-                m_width,
-                m_height,
-                self.max_image_pixels,
-            )
-            return None
-
-        if thumbnailer.transpose_method is not None:
-            m_width, m_height = await defer_to_thread(
-                self.hs.get_reactor(), thumbnailer.transpose
-            )
+        with thumbnailer:
+            m_width = thumbnailer.width
+            m_height = thumbnailer.height
 
-        # We deduplicate the thumbnail sizes by ignoring the cropped versions if
-        # they have the same dimensions of a scaled one.
-        thumbnails: Dict[Tuple[int, int, str], str] = {}
-        for requirement in requirements:
-            if requirement.method == "crop":
-                thumbnails.setdefault(
-                    (requirement.width, requirement.height, requirement.media_type),
-                    requirement.method,
-                )
-            elif requirement.method == "scale":
-                t_width, t_height = thumbnailer.aspect(
-                    requirement.width, requirement.height
+            if m_width * m_height >= self.max_image_pixels:
+                logger.info(
+                    "Image too large to thumbnail %r x %r > %r",
+                    m_width,
+                    m_height,
+                    self.max_image_pixels,
                 )
-                t_width = min(m_width, t_width)
-                t_height = min(m_height, t_height)
-                thumbnails[
-                    (t_width, t_height, requirement.media_type)
-                ] = requirement.method
-
-        # Now we generate the thumbnails for each dimension, store it
-        for (t_width, t_height, t_type), t_method in thumbnails.items():
-            # Generate the thumbnail
-            if t_method == "crop":
-                t_byte_source = await defer_to_thread(
-                    self.hs.get_reactor(), thumbnailer.crop, t_width, t_height, t_type
+                return None
+
+            if thumbnailer.transpose_method is not None:
+                m_width, m_height = await defer_to_thread(
+                    self.hs.get_reactor(), thumbnailer.transpose
                 )
-            elif t_method == "scale":
-                t_byte_source = await defer_to_thread(
-                    self.hs.get_reactor(), thumbnailer.scale, t_width, t_height, t_type
+
+            # We deduplicate the thumbnail sizes by ignoring the cropped versions if
+            # they have the same dimensions of a scaled one.
+            thumbnails: Dict[Tuple[int, int, str], str] = {}
+            for requirement in requirements:
+                if requirement.method == "crop":
+                    thumbnails.setdefault(
+                        (requirement.width, requirement.height, requirement.media_type),
+                        requirement.method,
+                    )
+                elif requirement.method == "scale":
+                    t_width, t_height = thumbnailer.aspect(
+                        requirement.width, requirement.height
+                    )
+                    t_width = min(m_width, t_width)
+                    t_height = min(m_height, t_height)
+                    thumbnails[
+                        (t_width, t_height, requirement.media_type)
+                    ] = requirement.method
+
+            # Now we generate the thumbnails for each dimension, store it
+            for (t_width, t_height, t_type), t_method in thumbnails.items():
+                # Generate the thumbnail
+                if t_method == "crop":
+                    t_byte_source = await defer_to_thread(
+                        self.hs.get_reactor(),
+                        thumbnailer.crop,
+                        t_width,
+                        t_height,
+                        t_type,
+                    )
+                elif t_method == "scale":
+                    t_byte_source = await defer_to_thread(
+                        self.hs.get_reactor(),
+                        thumbnailer.scale,
+                        t_width,
+                        t_height,
+                        t_type,
+                    )
+                else:
+                    logger.error("Unrecognized method: %r", t_method)
+                    continue
+
+                if not t_byte_source:
+                    continue
+
+                file_info = FileInfo(
+                    server_name=server_name,
+                    file_id=file_id,
+                    url_cache=url_cache,
+                    thumbnail=ThumbnailInfo(
+                        width=t_width,
+                        height=t_height,
+                        method=t_method,
+                        type=t_type,
+                    ),
                 )
-            else:
-                logger.error("Unrecognized method: %r", t_method)
-                continue
-
-            if not t_byte_source:
-                continue
-
-            file_info = FileInfo(
-                server_name=server_name,
-                file_id=file_id,
-                url_cache=url_cache,
-                thumbnail=ThumbnailInfo(
-                    width=t_width,
-                    height=t_height,
-                    method=t_method,
-                    type=t_type,
-                ),
-            )
 
-            with self.media_storage.store_into_file(file_info) as (f, fname, finish):
-                try:
-                    await self.media_storage.write_to_file(t_byte_source, f)
-                    await finish()
-                finally:
-                    t_byte_source.close()
-
-                t_len = os.path.getsize(fname)
-
-                # Write to database
-                if server_name:
-                    # Multiple remote media download requests can race (when
-                    # using multiple media repos), so this may throw a violation
-                    # constraint exception. If it does we'll delete the newly
-                    # generated thumbnail from disk (as we're in the ctx
-                    # manager).
-                    #
-                    # However: we've already called `finish()` so we may have
-                    # also written to the storage providers. This is preferable
-                    # to the alternative where we call `finish()` *after* this,
-                    # where we could end up having an entry in the DB but fail
-                    # to write the files to the storage providers.
+                with self.media_storage.store_into_file(file_info) as (
+                    f,
+                    fname,
+                    finish,
+                ):
                     try:
-                        await self.store.store_remote_media_thumbnail(
-                            server_name,
-                            media_id,
-                            file_id,
-                            t_width,
-                            t_height,
-                            t_type,
-                            t_method,
-                            t_len,
-                        )
-                    except Exception as e:
-                        thumbnail_exists = await self.store.get_remote_media_thumbnail(
-                            server_name,
-                            media_id,
-                            t_width,
-                            t_height,
-                            t_type,
+                        await self.media_storage.write_to_file(t_byte_source, f)
+                        await finish()
+                    finally:
+                        t_byte_source.close()
+
+                    t_len = os.path.getsize(fname)
+
+                    # Write to database
+                    if server_name:
+                        # Multiple remote media download requests can race (when
+                        # using multiple media repos), so this may throw a violation
+                        # constraint exception. If it does we'll delete the newly
+                        # generated thumbnail from disk (as we're in the ctx
+                        # manager).
+                        #
+                        # However: we've already called `finish()` so we may have
+                        # also written to the storage providers. This is preferable
+                        # to the alternative where we call `finish()` *after* this,
+                        # where we could end up having an entry in the DB but fail
+                        # to write the files to the storage providers.
+                        try:
+                            await self.store.store_remote_media_thumbnail(
+                                server_name,
+                                media_id,
+                                file_id,
+                                t_width,
+                                t_height,
+                                t_type,
+                                t_method,
+                                t_len,
+                            )
+                        except Exception as e:
+                            thumbnail_exists = (
+                                await self.store.get_remote_media_thumbnail(
+                                    server_name,
+                                    media_id,
+                                    t_width,
+                                    t_height,
+                                    t_type,
+                                )
+                            )
+                            if not thumbnail_exists:
+                                raise e
+                    else:
+                        await self.store.store_local_thumbnail(
+                            media_id, t_width, t_height, t_type, t_method, t_len
                         )
-                        if not thumbnail_exists:
-                            raise e
-                else:
-                    await self.store.store_local_thumbnail(
-                        media_id, t_width, t_height, t_type, t_method, t_len
-                    )
 
         return {"width": m_width, "height": m_height}
 
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index 390491eb83..9b93b9b4f6 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -14,7 +14,8 @@
 # limitations under the License.
 import logging
 from io import BytesIO
-from typing import Tuple
+from types import TracebackType
+from typing import Optional, Tuple, Type
 
 from PIL import Image
 
@@ -45,6 +46,9 @@ class Thumbnailer:
         Image.MAX_IMAGE_PIXELS = max_image_pixels
 
     def __init__(self, input_path: str):
+        # Have we closed the image?
+        self._closed = False
+
         try:
             self.image = Image.open(input_path)
         except OSError as e:
@@ -89,7 +93,8 @@ class Thumbnailer:
             # Safety: `transpose` takes an int rather than e.g. an IntEnum.
             # self.transpose_method is set above to be a value in
             # EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values.
-            self.image = self.image.transpose(self.transpose_method)  # type: ignore[arg-type]
+            with self.image:
+                self.image = self.image.transpose(self.transpose_method)  # type: ignore[arg-type]
             self.width, self.height = self.image.size
             self.transpose_method = None
             # We don't need EXIF any more
@@ -122,9 +127,11 @@ class Thumbnailer:
         # If the image has transparency, use RGBA instead.
         if self.image.mode in ["1", "L", "P"]:
             if self.image.info.get("transparency", None) is not None:
-                self.image = self.image.convert("RGBA")
+                with self.image:
+                    self.image = self.image.convert("RGBA")
             else:
-                self.image = self.image.convert("RGB")
+                with self.image:
+                    self.image = self.image.convert("RGB")
         return self.image.resize((width, height), Image.ANTIALIAS)
 
     def scale(self, width: int, height: int, output_type: str) -> BytesIO:
@@ -133,8 +140,8 @@ class Thumbnailer:
         Returns:
             BytesIO: the bytes of the encoded image ready to be written to disk
         """
-        scaled = self._resize(width, height)
-        return self._encode_image(scaled, output_type)
+        with self._resize(width, height) as scaled:
+            return self._encode_image(scaled, output_type)
 
     def crop(self, width: int, height: int, output_type: str) -> BytesIO:
         """Rescales and crops the image to the given dimensions preserving
@@ -151,18 +158,21 @@ class Thumbnailer:
             BytesIO: the bytes of the encoded image ready to be written to disk
         """
         if width * self.height > height * self.width:
+            scaled_width = width
             scaled_height = (width * self.height) // self.width
-            scaled_image = self._resize(width, scaled_height)
             crop_top = (scaled_height - height) // 2
             crop_bottom = height + crop_top
-            cropped = scaled_image.crop((0, crop_top, width, crop_bottom))
+            crop = (0, crop_top, width, crop_bottom)
         else:
             scaled_width = (height * self.width) // self.height
-            scaled_image = self._resize(scaled_width, height)
+            scaled_height = height
             crop_left = (scaled_width - width) // 2
             crop_right = width + crop_left
-            cropped = scaled_image.crop((crop_left, 0, crop_right, height))
-        return self._encode_image(cropped, output_type)
+            crop = (crop_left, 0, crop_right, height)
+
+        with self._resize(scaled_width, scaled_height) as scaled_image:
+            with scaled_image.crop(crop) as cropped:
+                return self._encode_image(cropped, output_type)
 
     def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO:
         output_bytes_io = BytesIO()
@@ -171,3 +181,42 @@ class Thumbnailer:
             output_image = output_image.convert("RGB")
         output_image.save(output_bytes_io, fmt, quality=80)
         return output_bytes_io
+
+    def close(self) -> None:
+        """Closes the underlying image file.
+
+        Once closed no other functions can be called.
+
+        Can be called multiple times.
+        """
+
+        if self._closed:
+            return
+
+        self._closed = True
+
+        # Since we run this on the finalizer then we need to handle `__init__`
+        # raising an exception before it can define `self.image`.
+        image = getattr(self, "image", None)
+        if image is None:
+            return
+
+        image.close()
+
+    def __enter__(self) -> "Thumbnailer":
+        """Make `Thumbnailer` a context manager that calls `close` on
+        `__exit__`.
+        """
+        return self
+
+    def __exit__(
+        self,
+        type: Optional[Type[BaseException]],
+        value: Optional[BaseException],
+        traceback: Optional[TracebackType],
+    ) -> None:
+        self.close()
+
+    def __del__(self) -> None:
+        # Make sure we actually do close the image, rather than leak data.
+        self.close()