diff --git a/changelog.d/17213.feature b/changelog.d/17213.feature
new file mode 100644
index 0000000000..ca60afa8f3
--- /dev/null
+++ b/changelog.d/17213.feature
@@ -0,0 +1 @@
+Support MSC3916 by adding unstable media endpoints to `_matrix/client` (#17213).
\ No newline at end of file
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index cda7afc5c4..75fe6d7b24 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -439,3 +439,7 @@ class ExperimentalConfig(Config):
self.msc4115_membership_on_events = experimental.get(
"msc4115_membership_on_events", False
)
+
+ self.msc3916_authenticated_media_enabled = experimental.get(
+ "msc3916_authenticated_media_enabled", False
+ )
diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
index 5538020bec..cc3acf51e1 100644
--- a/synapse/media/thumbnailer.py
+++ b/synapse/media/thumbnailer.py
@@ -22,11 +22,27 @@
import logging
from io import BytesIO
from types import TracebackType
-from typing import Optional, Tuple, Type
+from typing import TYPE_CHECKING, List, Optional, Tuple, Type
from PIL import Image
+from synapse.api.errors import Codes, SynapseError, cs_error
+from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
+from synapse.http.server import respond_with_json
+from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import trace
+from synapse.media._base import (
+ FileInfo,
+ ThumbnailInfo,
+ respond_404,
+ respond_with_file,
+ respond_with_responder,
+)
+from synapse.media.media_storage import MediaStorage
+
+if TYPE_CHECKING:
+ from synapse.media.media_repository import MediaRepository
+ from synapse.server import HomeServer
logger = logging.getLogger(__name__)
@@ -231,3 +247,471 @@ class Thumbnailer:
def __del__(self) -> None:
# Make sure we actually do close the image, rather than leak data.
self.close()
+
+
+class ThumbnailProvider:
+ def __init__(
+ self,
+ hs: "HomeServer",
+ media_repo: "MediaRepository",
+ media_storage: MediaStorage,
+ ):
+ self.hs = hs
+ self.media_repo = media_repo
+ self.media_storage = media_storage
+ self.store = hs.get_datastores().main
+ self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
+
+ async def respond_local_thumbnail(
+ self,
+ request: SynapseRequest,
+ media_id: str,
+ width: int,
+ height: int,
+ method: str,
+ m_type: str,
+ max_timeout_ms: int,
+ ) -> None:
+ media_info = await self.media_repo.get_local_media_info(
+ request, media_id, max_timeout_ms
+ )
+ if not media_info:
+ return
+
+ thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
+ await self._select_and_respond_with_thumbnail(
+ request,
+ width,
+ height,
+ method,
+ m_type,
+ thumbnail_infos,
+ media_id,
+ media_id,
+ url_cache=bool(media_info.url_cache),
+ server_name=None,
+ )
+
+ async def select_or_generate_local_thumbnail(
+ self,
+ request: SynapseRequest,
+ media_id: str,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
+ max_timeout_ms: int,
+ ) -> None:
+ media_info = await self.media_repo.get_local_media_info(
+ request, media_id, max_timeout_ms
+ )
+
+ if not media_info:
+ return
+
+ thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
+ for info in thumbnail_infos:
+ t_w = info.width == desired_width
+ t_h = info.height == desired_height
+ t_method = info.method == desired_method
+ t_type = info.type == desired_type
+
+ if t_w and t_h and t_method and t_type:
+ file_info = FileInfo(
+ server_name=None,
+ file_id=media_id,
+ url_cache=bool(media_info.url_cache),
+ thumbnail=info,
+ )
+
+ responder = await self.media_storage.fetch_media(file_info)
+ if responder:
+ await respond_with_responder(
+ request, responder, info.type, info.length
+ )
+ return
+
+ logger.debug("We don't have a thumbnail of that size. Generating")
+
+ # Okay, so we generate one.
+ file_path = await self.media_repo.generate_local_exact_thumbnail(
+ media_id,
+ desired_width,
+ desired_height,
+ desired_method,
+ desired_type,
+ url_cache=bool(media_info.url_cache),
+ )
+
+ if file_path:
+ await respond_with_file(request, desired_type, file_path)
+ else:
+ logger.warning("Failed to generate thumbnail")
+ raise SynapseError(400, "Failed to generate thumbnail.")
+
+ async def select_or_generate_remote_thumbnail(
+ self,
+ request: SynapseRequest,
+ server_name: str,
+ media_id: str,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
+ max_timeout_ms: int,
+ ) -> None:
+ media_info = await self.media_repo.get_remote_media_info(
+ server_name, media_id, max_timeout_ms
+ )
+ if not media_info:
+ respond_404(request)
+ return
+
+ thumbnail_infos = await self.store.get_remote_media_thumbnails(
+ server_name, media_id
+ )
+
+ file_id = media_info.filesystem_id
+
+ for info in thumbnail_infos:
+ t_w = info.width == desired_width
+ t_h = info.height == desired_height
+ t_method = info.method == desired_method
+ t_type = info.type == desired_type
+
+ if t_w and t_h and t_method and t_type:
+ file_info = FileInfo(
+ server_name=server_name,
+ file_id=file_id,
+ thumbnail=info,
+ )
+
+ responder = await self.media_storage.fetch_media(file_info)
+ if responder:
+ await respond_with_responder(
+ request, responder, info.type, info.length
+ )
+ return
+
+ logger.debug("We don't have a thumbnail of that size. Generating")
+
+ # Okay, so we generate one.
+ file_path = await self.media_repo.generate_remote_exact_thumbnail(
+ server_name,
+ file_id,
+ media_id,
+ desired_width,
+ desired_height,
+ desired_method,
+ desired_type,
+ )
+
+ if file_path:
+ await respond_with_file(request, desired_type, file_path)
+ else:
+ logger.warning("Failed to generate thumbnail")
+ raise SynapseError(400, "Failed to generate thumbnail.")
+
+ async def respond_remote_thumbnail(
+ self,
+ request: SynapseRequest,
+ server_name: str,
+ media_id: str,
+ width: int,
+ height: int,
+ method: str,
+ m_type: str,
+ max_timeout_ms: int,
+ ) -> None:
+ # TODO: Don't download the whole remote file
+ # We should proxy the thumbnail from the remote server instead of
+ # downloading the remote file and generating our own thumbnails.
+ media_info = await self.media_repo.get_remote_media_info(
+ server_name, media_id, max_timeout_ms
+ )
+ if not media_info:
+ return
+
+ thumbnail_infos = await self.store.get_remote_media_thumbnails(
+ server_name, media_id
+ )
+ await self._select_and_respond_with_thumbnail(
+ request,
+ width,
+ height,
+ method,
+ m_type,
+ thumbnail_infos,
+ media_id,
+ media_info.filesystem_id,
+ url_cache=False,
+ server_name=server_name,
+ )
+
+ async def _select_and_respond_with_thumbnail(
+ self,
+ request: SynapseRequest,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
+ thumbnail_infos: List[ThumbnailInfo],
+ media_id: str,
+ file_id: str,
+ url_cache: bool,
+ server_name: Optional[str] = None,
+ ) -> None:
+ """
+ Respond to a request with an appropriate thumbnail from the previously generated thumbnails.
+
+ Args:
+ request: The incoming request.
+ desired_width: The desired width, the returned thumbnail may be larger than this.
+ desired_height: The desired height, the returned thumbnail may be larger than this.
+ desired_method: The desired method used to generate the thumbnail.
+ desired_type: The desired content-type of the thumbnail.
+ thumbnail_infos: A list of thumbnail info of candidate thumbnails.
+ file_id: The ID of the media that a thumbnail is being requested for.
+ url_cache: True if this is from a URL cache.
+ server_name: The server name, if this is a remote thumbnail.
+ """
+ logger.debug(
+ "_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
+ media_id,
+ desired_width,
+ desired_height,
+ desired_method,
+ thumbnail_infos,
+ )
+
+ # If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
+ # different code path to handle it.
+ assert not self.dynamic_thumbnails
+
+ if thumbnail_infos:
+ file_info = self._select_thumbnail(
+ desired_width,
+ desired_height,
+ desired_method,
+ desired_type,
+ thumbnail_infos,
+ file_id,
+ url_cache,
+ server_name,
+ )
+ if not file_info:
+ logger.info("Couldn't find a thumbnail matching the desired inputs")
+ respond_404(request)
+ return
+
+ # The thumbnail property must exist.
+ assert file_info.thumbnail is not None
+
+ responder = await self.media_storage.fetch_media(file_info)
+ if responder:
+ await respond_with_responder(
+ request,
+ responder,
+ file_info.thumbnail.type,
+ file_info.thumbnail.length,
+ )
+ return
+
+ # If we can't find the thumbnail we regenerate it. This can happen
+ # if e.g. we've deleted the thumbnails but still have the original
+ # image somewhere.
+ #
+ # Since we have an entry for the thumbnail in the DB we a) know we
+ # have have successfully generated the thumbnail in the past (so we
+ # don't need to worry about repeatedly failing to generate
+ # thumbnails), and b) have already calculated that appropriate
+ # width/height/method so we can just call the "generate exact"
+ # methods.
+
+ # First let's check that we do actually have the original image
+ # still. This will throw a 404 if we don't.
+ # TODO: We should refetch the thumbnails for remote media.
+ await self.media_storage.ensure_media_is_in_local_cache(
+ FileInfo(server_name, file_id, url_cache=url_cache)
+ )
+
+ if server_name:
+ await self.media_repo.generate_remote_exact_thumbnail(
+ server_name,
+ file_id=file_id,
+ media_id=media_id,
+ t_width=file_info.thumbnail.width,
+ t_height=file_info.thumbnail.height,
+ t_method=file_info.thumbnail.method,
+ t_type=file_info.thumbnail.type,
+ )
+ else:
+ await self.media_repo.generate_local_exact_thumbnail(
+ media_id=media_id,
+ t_width=file_info.thumbnail.width,
+ t_height=file_info.thumbnail.height,
+ t_method=file_info.thumbnail.method,
+ t_type=file_info.thumbnail.type,
+ url_cache=url_cache,
+ )
+
+ responder = await self.media_storage.fetch_media(file_info)
+ await respond_with_responder(
+ request,
+ responder,
+ file_info.thumbnail.type,
+ file_info.thumbnail.length,
+ )
+ else:
+ # This might be because:
+ # 1. We can't create thumbnails for the given media (corrupted or
+ # unsupported file type), or
+ # 2. The thumbnailing process never ran or errored out initially
+ # when the media was first uploaded (these bugs should be
+ # reported and fixed).
+ # Note that we don't attempt to generate a thumbnail now because
+ # `dynamic_thumbnails` is disabled.
+ logger.info("Failed to find any generated thumbnails")
+
+ assert request.path is not None
+ respond_with_json(
+ request,
+ 400,
+ cs_error(
+ "Cannot find any thumbnails for the requested media ('%s'). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
+ % (
+ request.path.decode(),
+ ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
+ ),
+ code=Codes.UNKNOWN,
+ ),
+ send_cors=True,
+ )
+
+ def _select_thumbnail(
+ self,
+ desired_width: int,
+ desired_height: int,
+ desired_method: str,
+ desired_type: str,
+ thumbnail_infos: List[ThumbnailInfo],
+ file_id: str,
+ url_cache: bool,
+ server_name: Optional[str],
+ ) -> Optional[FileInfo]:
+ """
+ Choose an appropriate thumbnail from the previously generated thumbnails.
+
+ Args:
+ desired_width: The desired width, the returned thumbnail may be larger than this.
+ desired_height: The desired height, the returned thumbnail may be larger than this.
+ desired_method: The desired method used to generate the thumbnail.
+ desired_type: The desired content-type of the thumbnail.
+ thumbnail_infos: A list of thumbnail infos of candidate thumbnails.
+ file_id: The ID of the media that a thumbnail is being requested for.
+ url_cache: True if this is from a URL cache.
+ server_name: The server name, if this is a remote thumbnail.
+
+ Returns:
+ The thumbnail which best matches the desired parameters.
+ """
+ desired_method = desired_method.lower()
+
+ # The chosen thumbnail.
+ thumbnail_info = None
+
+ d_w = desired_width
+ d_h = desired_height
+
+ if desired_method == "crop":
+ # Thumbnails that match equal or larger sizes of desired width/height.
+ crop_info_list: List[
+ Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
+ ] = []
+ # Other thumbnails.
+ crop_info_list2: List[
+ Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
+ ] = []
+ for info in thumbnail_infos:
+ # Skip thumbnails generated with different methods.
+ if info.method != "crop":
+ continue
+
+ t_w = info.width
+ t_h = info.height
+ aspect_quality = abs(d_w * t_h - d_h * t_w)
+ min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
+ size_quality = abs((d_w - t_w) * (d_h - t_h))
+ type_quality = desired_type != info.type
+ length_quality = info.length
+ if t_w >= d_w or t_h >= d_h:
+ crop_info_list.append(
+ (
+ aspect_quality,
+ min_quality,
+ size_quality,
+ type_quality,
+ length_quality,
+ info,
+ )
+ )
+ else:
+ crop_info_list2.append(
+ (
+ aspect_quality,
+ min_quality,
+ size_quality,
+ type_quality,
+ length_quality,
+ info,
+ )
+ )
+ # Pick the most appropriate thumbnail. Some values of `desired_width` and
+ # `desired_height` may result in a tie, in which case we avoid comparing on
+ # the thumbnail info and pick the thumbnail that appears earlier
+ # in the list of candidates.
+ if crop_info_list:
+ thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
+ elif crop_info_list2:
+ thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
+ elif desired_method == "scale":
+ # Thumbnails that match equal or larger sizes of desired width/height.
+ info_list: List[Tuple[int, bool, int, ThumbnailInfo]] = []
+ # Other thumbnails.
+ info_list2: List[Tuple[int, bool, int, ThumbnailInfo]] = []
+
+ for info in thumbnail_infos:
+ # Skip thumbnails generated with different methods.
+ if info.method != "scale":
+ continue
+
+ t_w = info.width
+ t_h = info.height
+ size_quality = abs((d_w - t_w) * (d_h - t_h))
+ type_quality = desired_type != info.type
+ length_quality = info.length
+ if t_w >= d_w or t_h >= d_h:
+ info_list.append((size_quality, type_quality, length_quality, info))
+ else:
+ info_list2.append(
+ (size_quality, type_quality, length_quality, info)
+ )
+ # Pick the most appropriate thumbnail. Some values of `desired_width` and
+ # `desired_height` may result in a tie, in which case we avoid comparing on
+ # the thumbnail info and pick the thumbnail that appears earlier
+ # in the list of candidates.
+ if info_list:
+ thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
+ elif info_list2:
+ thumbnail_info = min(info_list2, key=lambda t: t[:-1])[-1]
+
+ if thumbnail_info:
+ return FileInfo(
+ file_id=file_id,
+ url_cache=url_cache,
+ server_name=server_name,
+ thumbnail=thumbnail_info,
+ )
+
+ # No matching thumbnail was found.
+ return None
diff --git a/synapse/rest/client/media.py b/synapse/rest/client/media.py
new file mode 100644
index 0000000000..172d240783
--- /dev/null
+++ b/synapse/rest/client/media.py
@@ -0,0 +1,205 @@
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright (C) 2024 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
+#
+#
+
+import logging
+import re
+
+from synapse.http.server import (
+ HttpServer,
+ respond_with_json,
+ respond_with_json_bytes,
+ set_corp_headers,
+ set_cors_headers,
+)
+from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.site import SynapseRequest
+from synapse.media._base import (
+ DEFAULT_MAX_TIMEOUT_MS,
+ MAXIMUM_ALLOWED_MAX_TIMEOUT_MS,
+ respond_404,
+)
+from synapse.media.media_repository import MediaRepository
+from synapse.media.media_storage import MediaStorage
+from synapse.media.thumbnailer import ThumbnailProvider
+from synapse.server import HomeServer
+from synapse.util.stringutils import parse_and_validate_server_name
+
+logger = logging.getLogger(__name__)
+
+
+class UnstablePreviewURLServlet(RestServlet):
+ """
+ Same as `GET /_matrix/media/r0/preview_url`, this endpoint provides a generic preview API
+ for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
+ specific additions).
+
+ This does have trade-offs compared to other designs:
+
+ * Pros:
+ * Simple and flexible; can be used by any clients at any point
+ * Cons:
+ * If each homeserver provides one of these independently, all the homeservers in a
+ room may needlessly DoS the target URI
+ * The URL metadata must be stored somewhere, rather than just using Matrix
+ itself to store the media.
+ * Matrix cannot be used to distribute the metadata between homeservers.
+ """
+
+ PATTERNS = [
+ re.compile(r"^/_matrix/client/unstable/org.matrix.msc3916/media/preview_url$")
+ ]
+
+ def __init__(
+ self,
+ hs: "HomeServer",
+ media_repo: "MediaRepository",
+ media_storage: MediaStorage,
+ ):
+ super().__init__()
+
+ self.auth = hs.get_auth()
+ self.clock = hs.get_clock()
+ self.media_repo = media_repo
+ self.media_storage = media_storage
+ assert self.media_repo.url_previewer is not None
+ self.url_previewer = self.media_repo.url_previewer
+
+ async def on_GET(self, request: SynapseRequest) -> None:
+ requester = await self.auth.get_user_by_req(request)
+ url = parse_string(request, "url", required=True)
+ ts = parse_integer(request, "ts")
+ if ts is None:
+ ts = self.clock.time_msec()
+
+ og = await self.url_previewer.preview(url, requester.user, ts)
+ respond_with_json_bytes(request, 200, og, send_cors=True)
+
+
+class UnstableMediaConfigResource(RestServlet):
+ PATTERNS = [
+ re.compile(r"^/_matrix/client/unstable/org.matrix.msc3916/media/config$")
+ ]
+
+ def __init__(self, hs: "HomeServer"):
+ super().__init__()
+ config = hs.config
+ self.clock = hs.get_clock()
+ self.auth = hs.get_auth()
+ self.limits_dict = {"m.upload.size": config.media.max_upload_size}
+
+ async def on_GET(self, request: SynapseRequest) -> None:
+ await self.auth.get_user_by_req(request)
+ respond_with_json(request, 200, self.limits_dict, send_cors=True)
+
+
+class UnstableThumbnailResource(RestServlet):
+ PATTERNS = [
+ re.compile(
+ "/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/(?P<server_name>[^/]*)/(?P<media_id>[^/]*)$"
+ )
+ ]
+
+ def __init__(
+ self,
+ hs: "HomeServer",
+ media_repo: "MediaRepository",
+ media_storage: MediaStorage,
+ ):
+ super().__init__()
+
+ self.store = hs.get_datastores().main
+ self.media_repo = media_repo
+ self.media_storage = media_storage
+ self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
+ self._is_mine_server_name = hs.is_mine_server_name
+ self._server_name = hs.hostname
+ self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
+ self.thumbnailer = ThumbnailProvider(hs, media_repo, media_storage)
+ self.auth = hs.get_auth()
+
+ async def on_GET(
+ self, request: SynapseRequest, server_name: str, media_id: str
+ ) -> None:
+ # Validate the server name, raising if invalid
+ parse_and_validate_server_name(server_name)
+ await self.auth.get_user_by_req(request)
+
+ set_cors_headers(request)
+ set_corp_headers(request)
+ width = parse_integer(request, "width", required=True)
+ height = parse_integer(request, "height", required=True)
+ method = parse_string(request, "method", "scale")
+ # TODO Parse the Accept header to get an prioritised list of thumbnail types.
+ m_type = "image/png"
+ max_timeout_ms = parse_integer(
+ request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS
+ )
+ max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS)
+
+ if self._is_mine_server_name(server_name):
+ if self.dynamic_thumbnails:
+ await self.thumbnailer.select_or_generate_local_thumbnail(
+ request, media_id, width, height, method, m_type, max_timeout_ms
+ )
+ else:
+ await self.thumbnailer.respond_local_thumbnail(
+ request, media_id, width, height, method, m_type, max_timeout_ms
+ )
+ self.media_repo.mark_recently_accessed(None, media_id)
+ else:
+ # Don't let users download media from configured domains, even if it
+ # is already downloaded. This is Trust & Safety tooling to make some
+ # media inaccessible to local users.
+ # See `prevent_media_downloads_from` config docs for more info.
+ if server_name in self.prevent_media_downloads_from:
+ respond_404(request)
+ return
+
+ remote_resp_function = (
+ self.thumbnailer.select_or_generate_remote_thumbnail
+ if self.dynamic_thumbnails
+ else self.thumbnailer.respond_remote_thumbnail
+ )
+ await remote_resp_function(
+ request,
+ server_name,
+ media_id,
+ width,
+ height,
+ method,
+ m_type,
+ max_timeout_ms,
+ )
+ self.media_repo.mark_recently_accessed(server_name, media_id)
+
+
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+ if hs.config.experimental.msc3916_authenticated_media_enabled:
+ media_repo = hs.get_media_repository()
+ if hs.config.media.url_preview_enabled:
+ UnstablePreviewURLServlet(
+ hs, media_repo, media_repo.media_storage
+ ).register(http_server)
+ UnstableMediaConfigResource(hs).register(http_server)
+ UnstableThumbnailResource(hs, media_repo, media_repo.media_storage).register(
+ http_server
+ )
diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py
index 7cb335c7c3..fe8fbb06e4 100644
--- a/synapse/rest/media/thumbnail_resource.py
+++ b/synapse/rest/media/thumbnail_resource.py
@@ -22,23 +22,18 @@
import logging
import re
-from typing import TYPE_CHECKING, List, Optional, Tuple
+from typing import TYPE_CHECKING
-from synapse.api.errors import Codes, SynapseError, cs_error
-from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
-from synapse.http.server import respond_with_json, set_corp_headers, set_cors_headers
+from synapse.http.server import set_corp_headers, set_cors_headers
from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest
from synapse.media._base import (
DEFAULT_MAX_TIMEOUT_MS,
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS,
- FileInfo,
- ThumbnailInfo,
respond_404,
- respond_with_file,
- respond_with_responder,
)
from synapse.media.media_storage import MediaStorage
+from synapse.media.thumbnailer import ThumbnailProvider
from synapse.util.stringutils import parse_and_validate_server_name
if TYPE_CHECKING:
@@ -66,10 +61,11 @@ class ThumbnailResource(RestServlet):
self.store = hs.get_datastores().main
self.media_repo = media_repo
self.media_storage = media_storage
- self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self._is_mine_server_name = hs.is_mine_server_name
self._server_name = hs.hostname
self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
+ self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
+ self.thumbnail_provider = ThumbnailProvider(hs, media_repo, media_storage)
async def on_GET(
self, request: SynapseRequest, server_name: str, media_id: str
@@ -91,11 +87,11 @@ class ThumbnailResource(RestServlet):
if self._is_mine_server_name(server_name):
if self.dynamic_thumbnails:
- await self._select_or_generate_local_thumbnail(
+ await self.thumbnail_provider.select_or_generate_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
else:
- await self._respond_local_thumbnail(
+ await self.thumbnail_provider.respond_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
self.media_repo.mark_recently_accessed(None, media_id)
@@ -109,9 +105,9 @@ class ThumbnailResource(RestServlet):
return
remote_resp_function = (
- self._select_or_generate_remote_thumbnail
+ self.thumbnail_provider.select_or_generate_remote_thumbnail
if self.dynamic_thumbnails
- else self._respond_remote_thumbnail
+ else self.thumbnail_provider.respond_remote_thumbnail
)
await remote_resp_function(
request,
@@ -124,457 +120,3 @@ class ThumbnailResource(RestServlet):
max_timeout_ms,
)
self.media_repo.mark_recently_accessed(server_name, media_id)
-
- async def _respond_local_thumbnail(
- self,
- request: SynapseRequest,
- media_id: str,
- width: int,
- height: int,
- method: str,
- m_type: str,
- max_timeout_ms: int,
- ) -> None:
- media_info = await self.media_repo.get_local_media_info(
- request, media_id, max_timeout_ms
- )
- if not media_info:
- return
-
- thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
- await self._select_and_respond_with_thumbnail(
- request,
- width,
- height,
- method,
- m_type,
- thumbnail_infos,
- media_id,
- media_id,
- url_cache=bool(media_info.url_cache),
- server_name=None,
- )
-
- async def _select_or_generate_local_thumbnail(
- self,
- request: SynapseRequest,
- media_id: str,
- desired_width: int,
- desired_height: int,
- desired_method: str,
- desired_type: str,
- max_timeout_ms: int,
- ) -> None:
- media_info = await self.media_repo.get_local_media_info(
- request, media_id, max_timeout_ms
- )
-
- if not media_info:
- return
-
- thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
- for info in thumbnail_infos:
- t_w = info.width == desired_width
- t_h = info.height == desired_height
- t_method = info.method == desired_method
- t_type = info.type == desired_type
-
- if t_w and t_h and t_method and t_type:
- file_info = FileInfo(
- server_name=None,
- file_id=media_id,
- url_cache=bool(media_info.url_cache),
- thumbnail=info,
- )
-
- responder = await self.media_storage.fetch_media(file_info)
- if responder:
- await respond_with_responder(
- request, responder, info.type, info.length
- )
- return
-
- logger.debug("We don't have a thumbnail of that size. Generating")
-
- # Okay, so we generate one.
- file_path = await self.media_repo.generate_local_exact_thumbnail(
- media_id,
- desired_width,
- desired_height,
- desired_method,
- desired_type,
- url_cache=bool(media_info.url_cache),
- )
-
- if file_path:
- await respond_with_file(request, desired_type, file_path)
- else:
- logger.warning("Failed to generate thumbnail")
- raise SynapseError(400, "Failed to generate thumbnail.")
-
- async def _select_or_generate_remote_thumbnail(
- self,
- request: SynapseRequest,
- server_name: str,
- media_id: str,
- desired_width: int,
- desired_height: int,
- desired_method: str,
- desired_type: str,
- max_timeout_ms: int,
- ) -> None:
- media_info = await self.media_repo.get_remote_media_info(
- server_name, media_id, max_timeout_ms
- )
- if not media_info:
- respond_404(request)
- return
-
- thumbnail_infos = await self.store.get_remote_media_thumbnails(
- server_name, media_id
- )
-
- file_id = media_info.filesystem_id
-
- for info in thumbnail_infos:
- t_w = info.width == desired_width
- t_h = info.height == desired_height
- t_method = info.method == desired_method
- t_type = info.type == desired_type
-
- if t_w and t_h and t_method and t_type:
- file_info = FileInfo(
- server_name=server_name,
- file_id=file_id,
- thumbnail=info,
- )
-
- responder = await self.media_storage.fetch_media(file_info)
- if responder:
- await respond_with_responder(
- request, responder, info.type, info.length
- )
- return
-
- logger.debug("We don't have a thumbnail of that size. Generating")
-
- # Okay, so we generate one.
- file_path = await self.media_repo.generate_remote_exact_thumbnail(
- server_name,
- file_id,
- media_id,
- desired_width,
- desired_height,
- desired_method,
- desired_type,
- )
-
- if file_path:
- await respond_with_file(request, desired_type, file_path)
- else:
- logger.warning("Failed to generate thumbnail")
- raise SynapseError(400, "Failed to generate thumbnail.")
-
- async def _respond_remote_thumbnail(
- self,
- request: SynapseRequest,
- server_name: str,
- media_id: str,
- width: int,
- height: int,
- method: str,
- m_type: str,
- max_timeout_ms: int,
- ) -> None:
- # TODO: Don't download the whole remote file
- # We should proxy the thumbnail from the remote server instead of
- # downloading the remote file and generating our own thumbnails.
- media_info = await self.media_repo.get_remote_media_info(
- server_name, media_id, max_timeout_ms
- )
- if not media_info:
- return
-
- thumbnail_infos = await self.store.get_remote_media_thumbnails(
- server_name, media_id
- )
- await self._select_and_respond_with_thumbnail(
- request,
- width,
- height,
- method,
- m_type,
- thumbnail_infos,
- media_id,
- media_info.filesystem_id,
- url_cache=False,
- server_name=server_name,
- )
-
- async def _select_and_respond_with_thumbnail(
- self,
- request: SynapseRequest,
- desired_width: int,
- desired_height: int,
- desired_method: str,
- desired_type: str,
- thumbnail_infos: List[ThumbnailInfo],
- media_id: str,
- file_id: str,
- url_cache: bool,
- server_name: Optional[str] = None,
- ) -> None:
- """
- Respond to a request with an appropriate thumbnail from the previously generated thumbnails.
-
- Args:
- request: The incoming request.
- desired_width: The desired width, the returned thumbnail may be larger than this.
- desired_height: The desired height, the returned thumbnail may be larger than this.
- desired_method: The desired method used to generate the thumbnail.
- desired_type: The desired content-type of the thumbnail.
- thumbnail_infos: A list of thumbnail info of candidate thumbnails.
- file_id: The ID of the media that a thumbnail is being requested for.
- url_cache: True if this is from a URL cache.
- server_name: The server name, if this is a remote thumbnail.
- """
- logger.debug(
- "_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
- media_id,
- desired_width,
- desired_height,
- desired_method,
- thumbnail_infos,
- )
-
- # If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
- # different code path to handle it.
- assert not self.dynamic_thumbnails
-
- if thumbnail_infos:
- file_info = self._select_thumbnail(
- desired_width,
- desired_height,
- desired_method,
- desired_type,
- thumbnail_infos,
- file_id,
- url_cache,
- server_name,
- )
- if not file_info:
- logger.info("Couldn't find a thumbnail matching the desired inputs")
- respond_404(request)
- return
-
- # The thumbnail property must exist.
- assert file_info.thumbnail is not None
-
- responder = await self.media_storage.fetch_media(file_info)
- if responder:
- await respond_with_responder(
- request,
- responder,
- file_info.thumbnail.type,
- file_info.thumbnail.length,
- )
- return
-
- # If we can't find the thumbnail we regenerate it. This can happen
- # if e.g. we've deleted the thumbnails but still have the original
- # image somewhere.
- #
- # Since we have an entry for the thumbnail in the DB we a) know we
- # have have successfully generated the thumbnail in the past (so we
- # don't need to worry about repeatedly failing to generate
- # thumbnails), and b) have already calculated that appropriate
- # width/height/method so we can just call the "generate exact"
- # methods.
-
- # First let's check that we do actually have the original image
- # still. This will throw a 404 if we don't.
- # TODO: We should refetch the thumbnails for remote media.
- await self.media_storage.ensure_media_is_in_local_cache(
- FileInfo(server_name, file_id, url_cache=url_cache)
- )
-
- if server_name:
- await self.media_repo.generate_remote_exact_thumbnail(
- server_name,
- file_id=file_id,
- media_id=media_id,
- t_width=file_info.thumbnail.width,
- t_height=file_info.thumbnail.height,
- t_method=file_info.thumbnail.method,
- t_type=file_info.thumbnail.type,
- )
- else:
- await self.media_repo.generate_local_exact_thumbnail(
- media_id=media_id,
- t_width=file_info.thumbnail.width,
- t_height=file_info.thumbnail.height,
- t_method=file_info.thumbnail.method,
- t_type=file_info.thumbnail.type,
- url_cache=url_cache,
- )
-
- responder = await self.media_storage.fetch_media(file_info)
- await respond_with_responder(
- request,
- responder,
- file_info.thumbnail.type,
- file_info.thumbnail.length,
- )
- else:
- # This might be because:
- # 1. We can't create thumbnails for the given media (corrupted or
- # unsupported file type), or
- # 2. The thumbnailing process never ran or errored out initially
- # when the media was first uploaded (these bugs should be
- # reported and fixed).
- # Note that we don't attempt to generate a thumbnail now because
- # `dynamic_thumbnails` is disabled.
- logger.info("Failed to find any generated thumbnails")
-
- assert request.path is not None
- respond_with_json(
- request,
- 400,
- cs_error(
- "Cannot find any thumbnails for the requested media ('%s'). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
- % (
- request.path.decode(),
- ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
- ),
- code=Codes.UNKNOWN,
- ),
- send_cors=True,
- )
-
- def _select_thumbnail(
- self,
- desired_width: int,
- desired_height: int,
- desired_method: str,
- desired_type: str,
- thumbnail_infos: List[ThumbnailInfo],
- file_id: str,
- url_cache: bool,
- server_name: Optional[str],
- ) -> Optional[FileInfo]:
- """
- Choose an appropriate thumbnail from the previously generated thumbnails.
-
- Args:
- desired_width: The desired width, the returned thumbnail may be larger than this.
- desired_height: The desired height, the returned thumbnail may be larger than this.
- desired_method: The desired method used to generate the thumbnail.
- desired_type: The desired content-type of the thumbnail.
- thumbnail_infos: A list of thumbnail infos of candidate thumbnails.
- file_id: The ID of the media that a thumbnail is being requested for.
- url_cache: True if this is from a URL cache.
- server_name: The server name, if this is a remote thumbnail.
-
- Returns:
- The thumbnail which best matches the desired parameters.
- """
- desired_method = desired_method.lower()
-
- # The chosen thumbnail.
- thumbnail_info = None
-
- d_w = desired_width
- d_h = desired_height
-
- if desired_method == "crop":
- # Thumbnails that match equal or larger sizes of desired width/height.
- crop_info_list: List[
- Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
- ] = []
- # Other thumbnails.
- crop_info_list2: List[
- Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
- ] = []
- for info in thumbnail_infos:
- # Skip thumbnails generated with different methods.
- if info.method != "crop":
- continue
-
- t_w = info.width
- t_h = info.height
- aspect_quality = abs(d_w * t_h - d_h * t_w)
- min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
- size_quality = abs((d_w - t_w) * (d_h - t_h))
- type_quality = desired_type != info.type
- length_quality = info.length
- if t_w >= d_w or t_h >= d_h:
- crop_info_list.append(
- (
- aspect_quality,
- min_quality,
- size_quality,
- type_quality,
- length_quality,
- info,
- )
- )
- else:
- crop_info_list2.append(
- (
- aspect_quality,
- min_quality,
- size_quality,
- type_quality,
- length_quality,
- info,
- )
- )
- # Pick the most appropriate thumbnail. Some values of `desired_width` and
- # `desired_height` may result in a tie, in which case we avoid comparing on
- # the thumbnail info and pick the thumbnail that appears earlier
- # in the list of candidates.
- if crop_info_list:
- thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
- elif crop_info_list2:
- thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
- elif desired_method == "scale":
- # Thumbnails that match equal or larger sizes of desired width/height.
- info_list: List[Tuple[int, bool, int, ThumbnailInfo]] = []
- # Other thumbnails.
- info_list2: List[Tuple[int, bool, int, ThumbnailInfo]] = []
-
- for info in thumbnail_infos:
- # Skip thumbnails generated with different methods.
- if info.method != "scale":
- continue
-
- t_w = info.width
- t_h = info.height
- size_quality = abs((d_w - t_w) * (d_h - t_h))
- type_quality = desired_type != info.type
- length_quality = info.length
- if t_w >= d_w or t_h >= d_h:
- info_list.append((size_quality, type_quality, length_quality, info))
- else:
- info_list2.append(
- (size_quality, type_quality, length_quality, info)
- )
- # Pick the most appropriate thumbnail. Some values of `desired_width` and
- # `desired_height` may result in a tie, in which case we avoid comparing on
- # the thumbnail info and pick the thumbnail that appears earlier
- # in the list of candidates.
- if info_list:
- thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
- elif info_list2:
- thumbnail_info = min(info_list2, key=lambda t: t[:-1])[-1]
-
- if thumbnail_info:
- return FileInfo(
- file_id=file_id,
- url_cache=url_cache,
- server_name=server_name,
- thumbnail=thumbnail_info,
- )
-
- # No matching thumbnail was found.
- return None
diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py
index cae67e11c8..1bd51ceba2 100644
--- a/tests/media/test_media_storage.py
+++ b/tests/media/test_media_storage.py
@@ -18,6 +18,7 @@
# [This file includes modifications made by New Vector Limited]
#
#
+import itertools
import os
import shutil
import tempfile
@@ -46,11 +47,11 @@ from synapse.media._base import FileInfo, ThumbnailInfo
from synapse.media.filepath import MediaFilePaths
from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
from synapse.media.storage_provider import FileStorageProviderBackend
+from synapse.media.thumbnailer import ThumbnailProvider
from synapse.module_api import ModuleApi
from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
from synapse.rest import admin
-from synapse.rest.client import login
-from synapse.rest.media.thumbnail_resource import ThumbnailResource
+from synapse.rest.client import login, media
from synapse.server import HomeServer
from synapse.types import JsonDict, RoomAlias
from synapse.util import Clock
@@ -153,68 +154,54 @@ class _TestImage:
is_inline: bool = True
-@parameterized_class(
- ("test_image",),
- [
- # small png
- (
- _TestImage(
- SMALL_PNG,
- b"image/png",
- b".png",
- unhexlify(
- b"89504e470d0a1a0a0000000d4948445200000020000000200806"
- b"000000737a7af40000001a49444154789cedc101010000008220"
- b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
- b"44ae426082"
- ),
- unhexlify(
- b"89504e470d0a1a0a0000000d4948445200000001000000010806"
- b"0000001f15c4890000000d49444154789c636060606000000005"
- b"0001a5f645400000000049454e44ae426082"
- ),
- ),
- ),
- # small png with transparency.
- (
- _TestImage(
- unhexlify(
- b"89504e470d0a1a0a0000000d49484452000000010000000101000"
- b"00000376ef9240000000274524e5300010194fdae0000000a4944"
- b"4154789c636800000082008177cd72b60000000049454e44ae426"
- b"082"
- ),
- b"image/png",
- b".png",
- # Note that we don't check the output since it varies across
- # different versions of Pillow.
- ),
- ),
- # small lossless webp
- (
- _TestImage(
- unhexlify(
- b"524946461a000000574542505650384c0d0000002f0000001007"
- b"1011118888fe0700"
- ),
- b"image/webp",
- b".webp",
- ),
- ),
- # an empty file
- (
- _TestImage(
- b"",
- b"image/gif",
- b".gif",
- expected_found=False,
- unable_to_thumbnail=True,
- ),
- ),
- # An SVG.
- (
- _TestImage(
- b"""<?xml version="1.0"?>
+small_png = _TestImage(
+ SMALL_PNG,
+ b"image/png",
+ b".png",
+ unhexlify(
+ b"89504e470d0a1a0a0000000d4948445200000020000000200806"
+ b"000000737a7af40000001a49444154789cedc101010000008220"
+ b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
+ b"44ae426082"
+ ),
+ unhexlify(
+ b"89504e470d0a1a0a0000000d4948445200000001000000010806"
+ b"0000001f15c4890000000d49444154789c636060606000000005"
+ b"0001a5f645400000000049454e44ae426082"
+ ),
+)
+
+small_png_with_transparency = _TestImage(
+ unhexlify(
+ b"89504e470d0a1a0a0000000d49484452000000010000000101000"
+ b"00000376ef9240000000274524e5300010194fdae0000000a4944"
+ b"4154789c636800000082008177cd72b60000000049454e44ae426"
+ b"082"
+ ),
+ b"image/png",
+ b".png",
+ # Note that we don't check the output since it varies across
+ # different versions of Pillow.
+)
+
+small_lossless_webp = _TestImage(
+ unhexlify(
+ b"524946461a000000574542505650384c0d0000002f0000001007" b"1011118888fe0700"
+ ),
+ b"image/webp",
+ b".webp",
+)
+
+empty_file = _TestImage(
+ b"",
+ b"image/gif",
+ b".gif",
+ expected_found=False,
+ unable_to_thumbnail=True,
+)
+
+SVG = _TestImage(
+ b"""<?xml version="1.0"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
@@ -223,19 +210,32 @@ class _TestImage:
<circle cx="100" cy="100" r="50" stroke="black"
stroke-width="5" fill="red" />
</svg>""",
- b"image/svg",
- b".svg",
- expected_found=False,
- unable_to_thumbnail=True,
- is_inline=False,
- ),
- ),
- ],
+ b"image/svg",
+ b".svg",
+ expected_found=False,
+ unable_to_thumbnail=True,
+ is_inline=False,
)
+test_images = [
+ small_png,
+ small_png_with_transparency,
+ small_lossless_webp,
+ empty_file,
+ SVG,
+]
+urls = [
+ "_matrix/media/r0/thumbnail",
+ "_matrix/client/unstable/org.matrix.msc3916/media/thumbnail",
+]
+
+
+@parameterized_class(("test_image", "url"), itertools.product(test_images, urls))
class MediaRepoTests(unittest.HomeserverTestCase):
+ servlets = [media.register_servlets]
test_image: ClassVar[_TestImage]
hijack_auth = True
user_id = "@test:user"
+ url: ClassVar[str]
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
self.fetches: List[
@@ -298,6 +298,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
"config": {"directory": self.storage_path},
}
config["media_storage_providers"] = [provider_config]
+ config["experimental_features"] = {"msc3916_authenticated_media_enabled": True}
hs = self.setup_test_homeserver(config=config, federation_http_client=client)
@@ -502,7 +503,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
params = "?width=32&height=32&method=scale"
channel = self.make_request(
"GET",
- f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
+ f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
@@ -530,7 +531,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
channel = self.make_request(
"GET",
- f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
+ f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
@@ -566,12 +567,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
params = "?width=32&height=32&method=" + method
channel = self.make_request(
"GET",
- f"/_matrix/media/r0/thumbnail/{self.media_id}{params}",
+ f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
self.pump()
-
headers = {
b"Content-Length": [b"%d" % (len(self.test_image.data))],
b"Content-Type": [self.test_image.content_type],
@@ -580,7 +580,6 @@ class MediaRepoTests(unittest.HomeserverTestCase):
(self.test_image.data, (len(self.test_image.data), headers))
)
self.pump()
-
if expected_found:
self.assertEqual(channel.code, 200)
@@ -603,7 +602,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
channel.json_body,
{
"errcode": "M_UNKNOWN",
- "error": "Cannot find any thumbnails for the requested media ('/_matrix/media/r0/thumbnail/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
+ "error": f"Cannot find any thumbnails for the requested media ('/{self.url}/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
},
)
else:
@@ -613,7 +612,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
channel.json_body,
{
"errcode": "M_NOT_FOUND",
- "error": "Not found '/_matrix/media/r0/thumbnail/example.com/12345'",
+ "error": f"Not found '/{self.url}/example.com/12345'",
},
)
@@ -625,12 +624,12 @@ class MediaRepoTests(unittest.HomeserverTestCase):
content_type = self.test_image.content_type.decode()
media_repo = self.hs.get_media_repository()
- thumbnail_resouce = ThumbnailResource(
+ thumbnail_provider = ThumbnailProvider(
self.hs, media_repo, media_repo.media_storage
)
self.assertIsNotNone(
- thumbnail_resouce._select_thumbnail(
+ thumbnail_provider._select_thumbnail(
desired_width=desired_size,
desired_height=desired_size,
desired_method=method,
diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py
new file mode 100644
index 0000000000..600cbf8963
--- /dev/null
+++ b/tests/rest/client/test_media.py
@@ -0,0 +1,1609 @@
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+# Copyright (C) 2024 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
+#
+#
+import base64
+import json
+import os
+import re
+from typing import Any, Dict, Optional, Sequence, Tuple, Type
+from urllib.parse import quote, urlencode
+
+from twisted.internet._resolver import HostResolution
+from twisted.internet.address import IPv4Address, IPv6Address
+from twisted.internet.error import DNSLookupError
+from twisted.internet.interfaces import IAddress, IResolutionReceiver
+from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor
+from twisted.web.resource import Resource
+
+from synapse.config.oembed import OEmbedEndpointConfig
+from synapse.media._base import FileInfo
+from synapse.media.url_previewer import IMAGE_CACHE_EXPIRY_MS
+from synapse.rest import admin
+from synapse.rest.client import login, media
+from synapse.server import HomeServer
+from synapse.types import JsonDict
+from synapse.util import Clock
+from synapse.util.stringutils import parse_and_validate_mxc_uri
+
+from tests import unittest
+from tests.server import FakeTransport, ThreadedMemoryReactorClock
+from tests.test_utils import SMALL_PNG
+from tests.unittest import override_config
+
+try:
+ import lxml
+except ImportError:
+ lxml = None # type: ignore[assignment]
+
+
+class UnstableMediaDomainBlockingTests(unittest.HomeserverTestCase):
+ remote_media_id = "doesnotmatter"
+ remote_server_name = "evil.com"
+ servlets = [
+ media.register_servlets,
+ admin.register_servlets,
+ login.register_servlets,
+ ]
+
+ def make_homeserver(
+ self, reactor: ThreadedMemoryReactorClock, clock: Clock
+ ) -> HomeServer:
+ config = self.default_config()
+
+ self.storage_path = self.mktemp()
+ self.media_store_path = self.mktemp()
+ os.mkdir(self.storage_path)
+ os.mkdir(self.media_store_path)
+ config["media_store_path"] = self.media_store_path
+
+ provider_config = {
+ "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+ "store_local": True,
+ "store_synchronous": False,
+ "store_remote": True,
+ "config": {"directory": self.storage_path},
+ }
+
+ config["media_storage_providers"] = [provider_config]
+
+ return self.setup_test_homeserver(config=config)
+
+ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+ self.store = hs.get_datastores().main
+
+ # Inject a piece of media. We'll use this to ensure we're returning a sane
+ # response when we're not supposed to block it, distinguishing a media block
+ # from a regular 404.
+ file_id = "abcdefg12345"
+ file_info = FileInfo(server_name=self.remote_server_name, file_id=file_id)
+ with hs.get_media_repository().media_storage.store_into_file(file_info) as (
+ f,
+ fname,
+ finish,
+ ):
+ f.write(SMALL_PNG)
+ self.get_success(finish())
+
+ self.get_success(
+ self.store.store_cached_remote_media(
+ origin=self.remote_server_name,
+ media_id=self.remote_media_id,
+ media_type="image/png",
+ media_length=1,
+ time_now_ms=clock.time_msec(),
+ upload_name="test.png",
+ filesystem_id=file_id,
+ )
+ )
+ self.register_user("user", "password")
+ self.tok = self.login("user", "password")
+
+ @override_config(
+ {
+ # Disable downloads from the domain we'll be trying to download from.
+ # Should result in a 404.
+ "prevent_media_downloads_from": ["evil.com"],
+ "dynamic_thumbnails": True,
+ "experimental_features": {"msc3916_authenticated_media_enabled": True},
+ }
+ )
+ def test_cannot_download_blocked_media_thumbnail(self) -> None:
+ """
+ Same test as test_cannot_download_blocked_media but for thumbnails.
+ """
+ response = self.make_request(
+ "GET",
+ f"/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+ shorthand=False,
+ content={"width": 100, "height": 100},
+ access_token=self.tok,
+ )
+ self.assertEqual(response.code, 404)
+
+ @override_config(
+ {
+ # Disable downloads from a domain we won't be requesting downloads from.
+ # This proves we haven't broken anything.
+ "prevent_media_downloads_from": ["not-listed.com"],
+ "dynamic_thumbnails": True,
+ "experimental_features": {"msc3916_authenticated_media_enabled": True},
+ }
+ )
+ def test_remote_media_thumbnail_normally_unblocked(self) -> None:
+ """
+ Same test as test_remote_media_normally_unblocked but for thumbnails.
+ """
+ response = self.make_request(
+ "GET",
+ f"/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+ shorthand=False,
+ access_token=self.tok,
+ )
+ self.assertEqual(response.code, 200)
+
+
+class UnstableURLPreviewTests(unittest.HomeserverTestCase):
+ if not lxml:
+ skip = "url preview feature requires lxml"
+
+ servlets = [media.register_servlets]
+ hijack_auth = True
+ user_id = "@test:user"
+ end_content = (
+ b"<html><head>"
+ b'<meta property="og:title" content="~matrix~" />'
+ b'<meta property="og:description" content="hi" />'
+ b"</head></html>"
+ )
+
+ def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+ config = self.default_config()
+ config["experimental_features"] = {"msc3916_authenticated_media_enabled": True}
+ config["url_preview_enabled"] = True
+ config["max_spider_size"] = 9999999
+ config["url_preview_ip_range_blacklist"] = (
+ "192.168.1.1",
+ "1.0.0.0/8",
+ "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+ "2001:800::/21",
+ )
+ config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
+ config["url_preview_accept_language"] = [
+ "en-UK",
+ "en-US;q=0.9",
+ "fr;q=0.8",
+ "*;q=0.7",
+ ]
+
+ self.storage_path = self.mktemp()
+ self.media_store_path = self.mktemp()
+ os.mkdir(self.storage_path)
+ os.mkdir(self.media_store_path)
+ config["media_store_path"] = self.media_store_path
+
+ provider_config = {
+ "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+ "store_local": True,
+ "store_synchronous": False,
+ "store_remote": True,
+ "config": {"directory": self.storage_path},
+ }
+
+ config["media_storage_providers"] = [provider_config]
+
+ hs = self.setup_test_homeserver(config=config)
+
+ # After the hs is created, modify the parsed oEmbed config (to avoid
+ # messing with files).
+ #
+ # Note that HTTP URLs are used to avoid having to deal with TLS in tests.
+ hs.config.oembed.oembed_patterns = [
+ OEmbedEndpointConfig(
+ api_endpoint="http://publish.twitter.com/oembed",
+ url_patterns=[
+ re.compile(r"http://twitter\.com/.+/status/.+"),
+ ],
+ formats=None,
+ ),
+ OEmbedEndpointConfig(
+ api_endpoint="http://www.hulu.com/api/oembed.{format}",
+ url_patterns=[
+ re.compile(r"http://www\.hulu\.com/watch/.+"),
+ ],
+ formats=["json"],
+ ),
+ ]
+
+ return hs
+
+ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+ self.media_repo = hs.get_media_repository()
+ assert self.media_repo.url_previewer is not None
+ self.url_previewer = self.media_repo.url_previewer
+
+ self.lookups: Dict[str, Any] = {}
+
+ class Resolver:
+ def resolveHostName(
+ _self,
+ resolutionReceiver: IResolutionReceiver,
+ hostName: str,
+ portNumber: int = 0,
+ addressTypes: Optional[Sequence[Type[IAddress]]] = None,
+ transportSemantics: str = "TCP",
+ ) -> IResolutionReceiver:
+ resolution = HostResolution(hostName)
+ resolutionReceiver.resolutionBegan(resolution)
+ if hostName not in self.lookups:
+ raise DNSLookupError("OH NO")
+
+ for i in self.lookups[hostName]:
+ resolutionReceiver.addressResolved(i[0]("TCP", i[1], portNumber))
+ resolutionReceiver.resolutionComplete()
+ return resolutionReceiver
+
+ self.reactor.nameResolver = Resolver() # type: ignore[assignment]
+
+ def create_resource_dict(self) -> Dict[str, Resource]:
+ """Create a resource tree for the test server
+
+ A resource tree is a mapping from path to twisted.web.resource.
+
+ The default implementation creates a JsonResource and calls each function in
+ `servlets` to register servlets against it.
+ """
+ resources = super().create_resource_dict()
+ resources["/_matrix/media"] = self.hs.get_media_repository_resource()
+ return resources
+
+ def _assert_small_png(self, json_body: JsonDict) -> None:
+ """Assert properties from the SMALL_PNG test image."""
+ self.assertTrue(json_body["og:image"].startswith("mxc://"))
+ self.assertEqual(json_body["og:image:height"], 1)
+ self.assertEqual(json_body["og:image:width"], 1)
+ self.assertEqual(json_body["og:image:type"], "image/png")
+ self.assertEqual(json_body["matrix:image:size"], 67)
+
+ def test_cache_returns_correct_type(self) -> None:
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+ % (len(self.end_content),)
+ + self.end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+ )
+
+ # Check the cache returns the correct response
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ )
+
+ # Check the cache response has the same content
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+ )
+
+ # Clear the in-memory cache
+ self.assertIn("http://matrix.org", self.url_previewer._cache)
+ self.url_previewer._cache.pop("http://matrix.org")
+ self.assertNotIn("http://matrix.org", self.url_previewer._cache)
+
+ # Check the database cache returns the correct response
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ )
+
+ # Check the cache response has the same content
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+ )
+
+ def test_non_ascii_preview_httpequiv(self) -> None:
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ end_content = (
+ b"<html><head>"
+ b'<meta http-equiv="Content-Type" content="text/html; charset=windows-1251"/>'
+ b'<meta property="og:title" content="\xe4\xea\xe0" />'
+ b'<meta property="og:description" content="hi" />'
+ b"</head></html>"
+ )
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
+
+ def test_video_rejected(self) -> None:
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ end_content = b"anything"
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b"Content-Type: video/mp4\r\n\r\n"
+ )
+ % (len(end_content))
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "Requested file's content type not allowed for this operation: video/mp4",
+ },
+ )
+
+ def test_audio_rejected(self) -> None:
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ end_content = b"anything"
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b"Content-Type: audio/aac\r\n\r\n"
+ )
+ % (len(end_content))
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "Requested file's content type not allowed for this operation: audio/aac",
+ },
+ )
+
+ def test_non_ascii_preview_content_type(self) -> None:
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ end_content = (
+ b"<html><head>"
+ b'<meta property="og:title" content="\xe4\xea\xe0" />'
+ b'<meta property="og:description" content="hi" />'
+ b"</head></html>"
+ )
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
+
+ def test_overlong_title(self) -> None:
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ end_content = (
+ b"<html><head>"
+ b"<title>" + b"x" * 2000 + b"</title>"
+ b'<meta property="og:description" content="hi" />'
+ b"</head></html>"
+ )
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ res = channel.json_body
+ # We should only see the `og:description` field, as `title` is too long and should be stripped out
+ self.assertCountEqual(["og:description"], res.keys())
+
+ def test_ipaddr(self) -> None:
+ """
+ IP addresses can be previewed directly.
+ """
+ self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+ % (len(self.end_content),)
+ + self.end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+ )
+
+ def test_blocked_ip_specific(self) -> None:
+ """
+ Blocked IP addresses, found via DNS, are not spidered.
+ """
+ self.lookups["example.com"] = [(IPv4Address, "192.168.1.1")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ )
+
+ # No requests made.
+ self.assertEqual(len(self.reactor.tcpClients), 0)
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "DNS resolution failure during URL preview generation",
+ },
+ )
+
+ def test_blocked_ip_range(self) -> None:
+ """
+ Blocked IP ranges, IPs found over DNS, are not spidered.
+ """
+ self.lookups["example.com"] = [(IPv4Address, "1.1.1.2")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ )
+
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "DNS resolution failure during URL preview generation",
+ },
+ )
+
+ def test_blocked_ip_specific_direct(self) -> None:
+ """
+ Blocked IP addresses, accessed directly, are not spidered.
+ """
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://192.168.1.1",
+ shorthand=False,
+ )
+
+ # No requests made.
+ self.assertEqual(len(self.reactor.tcpClients), 0)
+ self.assertEqual(
+ channel.json_body,
+ {"errcode": "M_UNKNOWN", "error": "IP address blocked"},
+ )
+ self.assertEqual(channel.code, 403)
+
+ def test_blocked_ip_range_direct(self) -> None:
+ """
+ Blocked IP ranges, accessed directly, are not spidered.
+ """
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://1.1.1.2",
+ shorthand=False,
+ )
+
+ self.assertEqual(channel.code, 403)
+ self.assertEqual(
+ channel.json_body,
+ {"errcode": "M_UNKNOWN", "error": "IP address blocked"},
+ )
+
+ def test_blocked_ip_range_whitelisted_ip(self) -> None:
+ """
+ Blocked but then subsequently whitelisted IP addresses can be
+ spidered.
+ """
+ self.lookups["example.com"] = [(IPv4Address, "1.1.1.1")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+
+ client.dataReceived(
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+ % (len(self.end_content),)
+ + self.end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+ )
+
+ def test_blocked_ip_with_external_ip(self) -> None:
+ """
+ If a hostname resolves a blocked IP, even if there's a non-blocked one,
+ it will be rejected.
+ """
+ # Hardcode the URL resolving to the IP we want.
+ self.lookups["example.com"] = [
+ (IPv4Address, "1.1.1.2"),
+ (IPv4Address, "10.1.2.3"),
+ ]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ )
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "DNS resolution failure during URL preview generation",
+ },
+ )
+
+ def test_blocked_ipv6_specific(self) -> None:
+ """
+ Blocked IP addresses, found via DNS, are not spidered.
+ """
+ self.lookups["example.com"] = [
+ (IPv6Address, "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")
+ ]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ )
+
+ # No requests made.
+ self.assertEqual(len(self.reactor.tcpClients), 0)
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "DNS resolution failure during URL preview generation",
+ },
+ )
+
+ def test_blocked_ipv6_range(self) -> None:
+ """
+ Blocked IP ranges, IPs found over DNS, are not spidered.
+ """
+ self.lookups["example.com"] = [(IPv6Address, "2001:800::1")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ )
+
+ self.assertEqual(channel.code, 502)
+ self.assertEqual(
+ channel.json_body,
+ {
+ "errcode": "M_UNKNOWN",
+ "error": "DNS resolution failure during URL preview generation",
+ },
+ )
+
+ def test_OPTIONS(self) -> None:
+ """
+ OPTIONS returns the OPTIONS.
+ """
+ channel = self.make_request(
+ "OPTIONS",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ )
+ self.assertEqual(channel.code, 204)
+
+ def test_accept_language_config_option(self) -> None:
+ """
+ Accept-Language header is sent to the remote server
+ """
+ self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
+
+ # Build and make a request to the server
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://example.com",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ # Extract Synapse's tcp client
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+
+ # Build a fake remote server to reply with
+ server = AccumulatingProtocol()
+
+ # Connect the two together
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+
+ # Tell Synapse that it has received some data from the remote server
+ client.dataReceived(
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+ % (len(self.end_content),)
+ + self.end_content
+ )
+
+ # Move the reactor along until we get a response on our original channel
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+ )
+
+ # Check that the server received the Accept-Language header as part
+ # of the request from Synapse
+ self.assertIn(
+ (
+ b"Accept-Language: en-UK\r\n"
+ b"Accept-Language: en-US;q=0.9\r\n"
+ b"Accept-Language: fr;q=0.8\r\n"
+ b"Accept-Language: *;q=0.7"
+ ),
+ server.data,
+ )
+
+ def test_image(self) -> None:
+ """An image should be precached if mentioned in the HTML."""
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")]
+
+ result = (
+ b"""<html><body><img src="http://cdn.matrix.org/foo.png"></body></html>"""
+ )
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ # Respond with the HTML.
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+ self.pump()
+
+ # Respond with the photo.
+ client = self.reactor.tcpClients[1][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b"Content-Type: image/png\r\n\r\n"
+ )
+ % (len(SMALL_PNG),)
+ + SMALL_PNG
+ )
+ self.pump()
+
+ # The image should be in the result.
+ self.assertEqual(channel.code, 200)
+ self._assert_small_png(channel.json_body)
+
+ def test_nonexistent_image(self) -> None:
+ """If the preview image doesn't exist, ensure some data is returned."""
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ result = (
+ b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
+ )
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+
+ self.pump()
+
+ # There should not be a second connection.
+ self.assertEqual(len(self.reactor.tcpClients), 1)
+
+ # The image should not be in the result.
+ self.assertEqual(channel.code, 200)
+ self.assertNotIn("og:image", channel.json_body)
+
+ @unittest.override_config(
+ {"url_preview_url_blacklist": [{"netloc": "cdn.matrix.org"}]}
+ )
+ def test_image_blocked(self) -> None:
+ """If the preview image doesn't exist, ensure some data is returned."""
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")]
+
+ result = (
+ b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
+ )
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+ self.pump()
+
+ # There should not be a second connection.
+ self.assertEqual(len(self.reactor.tcpClients), 1)
+
+ # The image should not be in the result.
+ self.assertEqual(channel.code, 200)
+ self.assertNotIn("og:image", channel.json_body)
+
+ def test_oembed_failure(self) -> None:
+ """If the autodiscovered oEmbed URL fails, ensure some data is returned."""
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ result = b"""
+ <title>oEmbed Autodiscovery Fail</title>
+ <link rel="alternate" type="application/json+oembed"
+ href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
+ title="matrixdotorg" />
+ """
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+
+ # The image should not be in the result.
+ self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")
+
+ def test_data_url(self) -> None:
+ """
+ Requesting to preview a data URL is not supported.
+ """
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ data = base64.b64encode(SMALL_PNG).decode()
+
+ query_params = urlencode(
+ {
+ "url": f'<html><head><img src="data:image/png;base64,{data}" /></head></html>'
+ }
+ )
+
+ channel = self.make_request(
+ "GET",
+ f"/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?{query_params}",
+ shorthand=False,
+ )
+ self.pump()
+
+ self.assertEqual(channel.code, 500)
+
+ def test_inline_data_url(self) -> None:
+ """
+ An inline image (as a data URL) should be parsed properly.
+ """
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ data = base64.b64encode(SMALL_PNG)
+
+ end_content = (
+ b"<html><head>" b'<img src="data:image/png;base64,%s" />' b"</head></html>"
+ ) % (data,)
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ self._assert_small_png(channel.json_body)
+
+ def test_oembed_photo(self) -> None:
+ """Test an oEmbed endpoint which returns a 'photo' type which redirects the preview to a new URL."""
+ self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+ result = {
+ "version": "1.0",
+ "type": "photo",
+ "url": "http://cdn.twitter.com/matrixdotorg",
+ }
+ oembed_content = json.dumps(result).encode("utf-8")
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://twitter.com/matrixdotorg/status/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+ )
+ % (len(oembed_content),)
+ + oembed_content
+ )
+
+ self.pump()
+
+ # Ensure a second request is made to the photo URL.
+ client = self.reactor.tcpClients[1][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b"Content-Type: image/png\r\n\r\n"
+ )
+ % (len(SMALL_PNG),)
+ + SMALL_PNG
+ )
+
+ self.pump()
+
+ # Ensure the URL is what was requested.
+ self.assertIn(b"/matrixdotorg", server.data)
+
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ self.assertEqual(body["og:url"], "http://twitter.com/matrixdotorg/status/12345")
+ self._assert_small_png(body)
+
+ def test_oembed_rich(self) -> None:
+ """Test an oEmbed endpoint which returns HTML content via the 'rich' type."""
+ self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+ result = {
+ "version": "1.0",
+ "type": "rich",
+ # Note that this provides the author, not the title.
+ "author_name": "Alice",
+ "html": "<div>Content Preview</div>",
+ }
+ end_content = json.dumps(result).encode("utf-8")
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://twitter.com/matrixdotorg/status/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+
+ # Double check that the proper host is being connected to. (Note that
+ # twitter.com can't be resolved so this is already implicitly checked.)
+ self.assertIn(b"\r\nHost: publish.twitter.com\r\n", server.data)
+
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ self.assertEqual(
+ body,
+ {
+ "og:url": "http://twitter.com/matrixdotorg/status/12345",
+ "og:title": "Alice",
+ "og:description": "Content Preview",
+ },
+ )
+
+ def test_oembed_format(self) -> None:
+ """Test an oEmbed endpoint which requires the format in the URL."""
+ self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]
+
+ result = {
+ "version": "1.0",
+ "type": "rich",
+ "html": "<div>Content Preview</div>",
+ }
+ end_content = json.dumps(result).encode("utf-8")
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://www.hulu.com/watch/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+
+ # The {format} should have been turned into json.
+ self.assertIn(b"/api/oembed.json", server.data)
+ # A URL parameter of format=json should be provided.
+ self.assertIn(b"format=json", server.data)
+
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ self.assertEqual(
+ body,
+ {
+ "og:url": "http://www.hulu.com/watch/12345",
+ "og:description": "Content Preview",
+ },
+ )
+
+ @unittest.override_config(
+ {"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]}
+ )
+ def test_oembed_blocked(self) -> None:
+ """The oEmbed URL should not be downloaded if the oEmbed URL is blocked."""
+ self.lookups["twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://twitter.com/matrixdotorg/status/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(channel.code, 403, channel.result)
+
+ def test_oembed_autodiscovery(self) -> None:
+ """
+ Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
+ 1. Request a preview of a URL which is not known to the oEmbed code.
+ 2. It returns HTML including a link to an oEmbed preview.
+ 3. The oEmbed preview is requested and returns a URL for an image.
+ 4. The image is requested for thumbnailing.
+ """
+ # This is a little cheesy in that we use the www subdomain (which isn't the
+ # list of oEmbed patterns) to get "raw" HTML response.
+ self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+ result = b"""
+ <link rel="alternate" type="application/json+oembed"
+ href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
+ title="matrixdotorg" />
+ """
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+ self.pump()
+
+ # The oEmbed response.
+ result2 = {
+ "version": "1.0",
+ "type": "photo",
+ "url": "http://cdn.twitter.com/matrixdotorg",
+ }
+ oembed_content = json.dumps(result2).encode("utf-8")
+
+ # Ensure a second request is made to the oEmbed URL.
+ client = self.reactor.tcpClients[1][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+ )
+ % (len(oembed_content),)
+ + oembed_content
+ )
+ self.pump()
+
+ # Ensure the URL is what was requested.
+ self.assertIn(b"/oembed?", server.data)
+
+ # Ensure a third request is made to the photo URL.
+ client = self.reactor.tcpClients[2][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b"Content-Type: image/png\r\n\r\n"
+ )
+ % (len(SMALL_PNG),)
+ + SMALL_PNG
+ )
+ self.pump()
+
+ # Ensure the URL is what was requested.
+ self.assertIn(b"/matrixdotorg", server.data)
+
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ self.assertEqual(
+ body["og:url"], "http://www.twitter.com/matrixdotorg/status/12345"
+ )
+ self._assert_small_png(body)
+
+ @unittest.override_config(
+ {"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]}
+ )
+ def test_oembed_autodiscovery_blocked(self) -> None:
+ """
+ If the discovered oEmbed URL is blocked, it should be discarded.
+ """
+ # This is a little cheesy in that we use the www subdomain (which isn't the
+ # list of oEmbed patterns) to get "raw" HTML response.
+ self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.4")]
+
+ result = b"""
+ <title>Test</title>
+ <link rel="alternate" type="application/json+oembed"
+ href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
+ title="matrixdotorg" />
+ """
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+
+ self.pump()
+
+ # Ensure there's no additional connections.
+ self.assertEqual(len(self.reactor.tcpClients), 1)
+
+ # Ensure the URL is what was requested.
+ self.assertIn(b"\r\nHost: www.twitter.com\r\n", server.data)
+
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ self.assertEqual(body["og:title"], "Test")
+ self.assertNotIn("og:image", body)
+
+ def _download_image(self) -> Tuple[str, str]:
+ """Downloads an image into the URL cache.
+ Returns:
+ A (host, media_id) tuple representing the MXC URI of the image.
+ """
+ self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url=http://cdn.twitter.com/matrixdotorg",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: image/png\r\n\r\n"
+ % (len(SMALL_PNG),)
+ + SMALL_PNG
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ mxc_uri = body["og:image"]
+ host, _port, media_id = parse_and_validate_mxc_uri(mxc_uri)
+ self.assertIsNone(_port)
+ return host, media_id
+
+ def test_storage_providers_exclude_files(self) -> None:
+ """Test that files are not stored in or fetched from storage providers."""
+ host, media_id = self._download_image()
+
+ rel_file_path = self.media_repo.filepaths.url_cache_filepath_rel(media_id)
+ media_store_path = os.path.join(self.media_store_path, rel_file_path)
+ storage_provider_path = os.path.join(self.storage_path, rel_file_path)
+
+ # Check storage
+ self.assertTrue(os.path.isfile(media_store_path))
+ self.assertFalse(
+ os.path.isfile(storage_provider_path),
+ "URL cache file was unexpectedly stored in a storage provider",
+ )
+
+ # Check fetching
+ channel = self.make_request(
+ "GET",
+ f"/_matrix/media/v3/download/{host}/{media_id}",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(channel.code, 200)
+
+ # Move cached file into the storage provider
+ os.makedirs(os.path.dirname(storage_provider_path), exist_ok=True)
+ os.rename(media_store_path, storage_provider_path)
+
+ channel = self.make_request(
+ "GET",
+ f"/_matrix/media/v3/download/{host}/{media_id}",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(
+ channel.code,
+ 404,
+ "URL cache file was unexpectedly retrieved from a storage provider",
+ )
+
+ def test_storage_providers_exclude_thumbnails(self) -> None:
+ """Test that thumbnails are not stored in or fetched from storage providers."""
+ host, media_id = self._download_image()
+
+ rel_thumbnail_path = (
+ self.media_repo.filepaths.url_cache_thumbnail_directory_rel(media_id)
+ )
+ media_store_thumbnail_path = os.path.join(
+ self.media_store_path, rel_thumbnail_path
+ )
+ storage_provider_thumbnail_path = os.path.join(
+ self.storage_path, rel_thumbnail_path
+ )
+
+ # Check storage
+ self.assertTrue(os.path.isdir(media_store_thumbnail_path))
+ self.assertFalse(
+ os.path.isdir(storage_provider_thumbnail_path),
+ "URL cache thumbnails were unexpectedly stored in a storage provider",
+ )
+
+ # Check fetching
+ channel = self.make_request(
+ "GET",
+ f"/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(channel.code, 200)
+
+ # Remove the original, otherwise thumbnails will regenerate
+ rel_file_path = self.media_repo.filepaths.url_cache_filepath_rel(media_id)
+ media_store_path = os.path.join(self.media_store_path, rel_file_path)
+ os.remove(media_store_path)
+
+ # Move cached thumbnails into the storage provider
+ os.makedirs(os.path.dirname(storage_provider_thumbnail_path), exist_ok=True)
+ os.rename(media_store_thumbnail_path, storage_provider_thumbnail_path)
+
+ channel = self.make_request(
+ "GET",
+ f"/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(
+ channel.code,
+ 404,
+ "URL cache thumbnail was unexpectedly retrieved from a storage provider",
+ )
+
+ def test_cache_expiry(self) -> None:
+ """Test that URL cache files and thumbnails are cleaned up properly on expiry."""
+ _host, media_id = self._download_image()
+
+ file_path = self.media_repo.filepaths.url_cache_filepath(media_id)
+ file_dirs = self.media_repo.filepaths.url_cache_filepath_dirs_to_delete(
+ media_id
+ )
+ thumbnail_dir = self.media_repo.filepaths.url_cache_thumbnail_directory(
+ media_id
+ )
+ thumbnail_dirs = self.media_repo.filepaths.url_cache_thumbnail_dirs_to_delete(
+ media_id
+ )
+
+ self.assertTrue(os.path.isfile(file_path))
+ self.assertTrue(os.path.isdir(thumbnail_dir))
+
+ self.reactor.advance(IMAGE_CACHE_EXPIRY_MS * 1000 + 1)
+ self.get_success(self.url_previewer._expire_url_cache_data())
+
+ for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
+ self.assertFalse(
+ os.path.exists(path),
+ f"{os.path.relpath(path, self.media_store_path)} was not deleted",
+ )
+
+ @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
+ def test_blocked_port(self) -> None:
+ """Tests that blocking URLs with a port makes previewing such URLs
+ fail with a 403 error and doesn't impact other previews.
+ """
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ bad_url = quote("http://matrix.org:8888/foo")
+ good_url = quote("http://matrix.org/foo")
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url="
+ + bad_url,
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(channel.code, 403, channel.result)
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url="
+ + good_url,
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+ % (len(self.end_content),)
+ + self.end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+
+ @unittest.override_config(
+ {"url_preview_url_blacklist": [{"netloc": "example.com"}]}
+ )
+ def test_blocked_url(self) -> None:
+ """Tests that blocking URLs with a host makes previewing such URLs
+ fail with a 403 error.
+ """
+ self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
+
+ bad_url = quote("http://example.com/foo")
+
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/preview_url?url="
+ + bad_url,
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+ self.assertEqual(channel.code, 403, channel.result)
+
+
+class UnstableMediaConfigTest(unittest.HomeserverTestCase):
+ servlets = [
+ media.register_servlets,
+ admin.register_servlets,
+ login.register_servlets,
+ ]
+
+ def make_homeserver(
+ self, reactor: ThreadedMemoryReactorClock, clock: Clock
+ ) -> HomeServer:
+ config = self.default_config()
+ config["experimental_features"] = {"msc3916_authenticated_media_enabled": True}
+
+ self.storage_path = self.mktemp()
+ self.media_store_path = self.mktemp()
+ os.mkdir(self.storage_path)
+ os.mkdir(self.media_store_path)
+ config["media_store_path"] = self.media_store_path
+
+ provider_config = {
+ "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+ "store_local": True,
+ "store_synchronous": False,
+ "store_remote": True,
+ "config": {"directory": self.storage_path},
+ }
+
+ config["media_storage_providers"] = [provider_config]
+
+ return self.setup_test_homeserver(config=config)
+
+ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+ self.register_user("user", "password")
+ self.tok = self.login("user", "password")
+
+ def test_media_config(self) -> None:
+ channel = self.make_request(
+ "GET",
+ "/_matrix/client/unstable/org.matrix.msc3916/media/config",
+ shorthand=False,
+ access_token=self.tok,
+ )
+ self.assertEqual(channel.code, 200)
+ self.assertEqual(
+ channel.json_body["m.upload.size"], self.hs.config.media.max_upload_size
+ )
|