summary refs log tree commit diff
path: root/synapse/rest/media
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/rest/media')
-rw-r--r--synapse/rest/media/v1/_base.py7
-rw-r--r--synapse/rest/media/v1/config_resource.py6
-rw-r--r--synapse/rest/media/v1/download_resource.py5
-rw-r--r--synapse/rest/media/v1/filepath.py11
-rw-r--r--synapse/rest/media/v1/media_repository.py34
-rw-r--r--synapse/rest/media/v1/media_storage.py6
-rw-r--r--synapse/rest/media/v1/oembed.py49
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py16
-rw-r--r--synapse/rest/media/v1/storage_provider.py15
-rw-r--r--synapse/rest/media/v1/thumbnail_resource.py17
-rw-r--r--synapse/rest/media/v1/upload_resource.py6
11 files changed, 112 insertions, 60 deletions
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py

index 7c881f2bdb..014fa893d6 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py
@@ -27,6 +27,7 @@ from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError, cs_error from synapse.http.server import finish_request, respond_with_json +from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable from synapse.util.stringutils import is_ascii @@ -74,7 +75,7 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: ) -def respond_404(request: Request) -> None: +def respond_404(request: SynapseRequest) -> None: respond_with_json( request, 404, @@ -84,7 +85,7 @@ def respond_404(request: Request) -> None: async def respond_with_file( - request: Request, + request: SynapseRequest, media_type: str, file_path: str, file_size: Optional[int] = None, @@ -221,7 +222,7 @@ def _can_encode_filename_as_token(x: str) -> bool: async def respond_with_responder( - request: Request, + request: SynapseRequest, responder: "Optional[Responder]", media_type: str, file_size: Optional[int], diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py
index a1d36e5cf1..a95804d327 100644 --- a/synapse/rest/media/v1/config_resource.py +++ b/synapse/rest/media/v1/config_resource.py
@@ -16,8 +16,6 @@ from typing import TYPE_CHECKING -from twisted.web.server import Request - from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.site import SynapseRequest @@ -33,11 +31,11 @@ class MediaConfigResource(DirectServeJsonResource): config = hs.config self.clock = hs.get_clock() self.auth = hs.get_auth() - self.limits_dict = {"m.upload.size": config.max_upload_size} + self.limits_dict = {"m.upload.size": config.media.max_upload_size} async def _async_render_GET(self, request: SynapseRequest) -> None: await self.auth.get_user_by_req(request) respond_with_json(request, 200, self.limits_dict, send_cors=True) - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: respond_with_json(request, 200, {}, send_cors=True) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
index d6d938953e..6180fa575e 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py
@@ -15,10 +15,9 @@ import logging from typing import TYPE_CHECKING -from twisted.web.server import Request - from synapse.http.server import DirectServeJsonResource, set_cors_headers from synapse.http.servlet import parse_boolean +from synapse.http.site import SynapseRequest from ._base import parse_media_id, respond_404 @@ -37,7 +36,7 @@ class DownloadResource(DirectServeJsonResource): self.media_repo = media_repo self.server_name = hs.hostname - async def _async_render_GET(self, request: Request) -> None: + async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) request.setHeader( b"Content-Security-Policy", diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index 39bbe4e874..08bd85f664 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py
@@ -195,23 +195,24 @@ class MediaFilePaths: url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) - def url_cache_thumbnail_directory(self, media_id: str) -> str: + def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: # Media id is of the form <DATE><RANDOM_STRING> # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): - return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:] - ) + return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:]) else: return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) + url_cache_thumbnail_directory = _wrap_in_base_path( + url_cache_thumbnail_directory_rel + ) + def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form <DATE><RANDOM_STRING> diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 50e4c9e29f..abd88a2d4f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py
@@ -23,7 +23,6 @@ import twisted.internet.error import twisted.web.http from twisted.internet.defer import Deferred from twisted.web.resource import Resource -from twisted.web.server import Request from synapse.api.errors import ( FederationDeniedError, @@ -34,6 +33,7 @@ from synapse.api.errors import ( ) from synapse.config._base import ConfigError from synapse.config.repository import ThumbnailRequirement +from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import UserID @@ -76,29 +76,35 @@ class MediaRepository: self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastore() - self.max_upload_size = hs.config.max_upload_size - self.max_image_pixels = hs.config.max_image_pixels + self.max_upload_size = hs.config.media.max_upload_size + self.max_image_pixels = hs.config.media.max_image_pixels Thumbnailer.set_limits(self.max_image_pixels) - self.primary_base_path: str = hs.config.media_store_path + self.primary_base_path: str = hs.config.media.media_store_path self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path) - self.dynamic_thumbnails = hs.config.dynamic_thumbnails - self.thumbnail_requirements = hs.config.thumbnail_requirements + self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails + self.thumbnail_requirements = hs.config.media.thumbnail_requirements self.remote_media_linearizer = Linearizer(name="media_remote") self.recently_accessed_remotes: Set[Tuple[str, str]] = set() self.recently_accessed_locals: Set[str] = set() - self.federation_domain_whitelist = hs.config.federation_domain_whitelist + self.federation_domain_whitelist = ( + hs.config.federation.federation_domain_whitelist + ) # List of StorageProviders where we should search for media and # potentially upload to. storage_providers = [] - for clz, provider_config, wrapper_config in hs.config.media_storage_providers: + for ( + clz, + provider_config, + wrapper_config, + ) in hs.config.media.media_storage_providers: backend = clz(hs, provider_config) provider = StorageProviderWrapper( backend, @@ -187,7 +193,7 @@ class MediaRepository: return "mxc://%s/%s" % (self.server_name, media_id) async def get_local_media( - self, request: Request, media_id: str, name: Optional[str] + self, request: SynapseRequest, media_id: str, name: Optional[str] ) -> None: """Responds to requests for local media, if exists, or returns 404. @@ -221,7 +227,11 @@ class MediaRepository: ) async def get_remote_media( - self, request: Request, server_name: str, media_id: str, name: Optional[str] + self, + request: SynapseRequest, + server_name: str, + media_id: str, + name: Optional[str], ) -> None: """Respond to requests for remote media. @@ -969,7 +979,7 @@ class MediaRepositoryResource(Resource): def __init__(self, hs: "HomeServer"): # If we're not configured to use it, raise if we somehow got here. - if not hs.config.can_load_media_repo: + if not hs.config.media.can_load_media_repo: raise ConfigError("Synapse is not configured to use a media repo.") super().__init__() @@ -980,7 +990,7 @@ class MediaRepositoryResource(Resource): self.putChild( b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage) ) - if hs.config.url_preview_enabled: + if hs.config.media.url_preview_enabled: self.putChild( b"preview_url", PreviewUrlResource(hs, media_repo, media_repo.media_storage), diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 01fada8fb5..fca239d8c7 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py
@@ -132,8 +132,7 @@ class MediaStorage: fname = os.path.join(self.local_media_directory, path) dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) finished_called = [False] @@ -244,8 +243,7 @@ class MediaStorage: return legacy_local_path dirname = os.path.dirname(local_path) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) for provider in self.storage_providers: res: Any = await provider.fetch(path, file_info) diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index 8b74e72655..e04671fb95 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py
@@ -13,7 +13,7 @@ # limitations under the License. import logging import urllib.parse -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional import attr @@ -22,6 +22,8 @@ from synapse.types import JsonDict from synapse.util import json_decoder if TYPE_CHECKING: + from lxml import etree + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -31,7 +33,7 @@ logger = logging.getLogger(__name__) class OEmbedResult: # The Open Graph result (converted from the oEmbed result). open_graph_result: JsonDict - # Number of seconds to cache the content, according to the oEmbed response. + # Number of milliseconds to cache the content, according to the oEmbed response. # # This will be None if no cache-age is provided in the oEmbed response (or # if the oEmbed response cannot be turned into an Open Graph response). @@ -119,10 +121,22 @@ class OEmbedProvider: # Ensure the cache age is None or an int. cache_age = oembed.get("cache_age") if cache_age: - cache_age = int(cache_age) + cache_age = int(cache_age) * 1000 # The results. - open_graph_response = {"og:title": oembed.get("title")} + open_graph_response = { + "og:url": url, + } + + # Use either title or author's name as the title. + title = oembed.get("title") or oembed.get("author_name") + if title: + open_graph_response["og:title"] = title + + # Use the provider name and as the site. + provider_name = oembed.get("provider_name") + if provider_name: + open_graph_response["og:site_name"] = provider_name # If a thumbnail exists, use it. Note that dimensions will be calculated later. if "thumbnail_url" in oembed: @@ -137,6 +151,15 @@ class OEmbedProvider: # If this is a photo, use the full image, not the thumbnail. open_graph_response["og:image"] = oembed["url"] + elif oembed_type == "video": + open_graph_response["og:type"] = "video.other" + calc_description_and_urls(open_graph_response, oembed["html"]) + open_graph_response["og:video:width"] = oembed["width"] + open_graph_response["og:video:height"] = oembed["height"] + + elif oembed_type == "link": + open_graph_response["og:type"] = "website" + else: raise RuntimeError(f"Unknown oEmbed type: {oembed_type}") @@ -149,6 +172,14 @@ class OEmbedProvider: return OEmbedResult(open_graph_response, cache_age) +def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: + results = [] + for tag in tree.xpath("//*/" + tag_name): + if "src" in tag.attrib: + results.append(tag.attrib["src"]) + return results + + def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: """ Calculate description for an HTML document. @@ -179,6 +210,16 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> if tree is None: return + # Attempt to find interesting URLs (images, videos, embeds). + if "og:image" not in open_graph_response: + image_urls = _fetch_urls(tree, "img") + if image_urls: + open_graph_response["og:image"] = image_urls[0] + + video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed") + if video_urls: + open_graph_response["og:video"] = video_urls[0] + from synapse.rest.media.v1.preview_url_resource import _calc_description description = _calc_description(tree) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 0a0b476d2b..79a42b2455 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -29,7 +29,6 @@ import attr from twisted.internet.defer import Deferred from twisted.internet.error import DNSLookupError -from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError from synapse.http.client import SimpleHttpClient @@ -126,14 +125,14 @@ class PreviewUrlResource(DirectServeJsonResource): self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths - self.max_spider_size = hs.config.max_spider_size + self.max_spider_size = hs.config.media.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, - ip_whitelist=hs.config.url_preview_ip_range_whitelist, - ip_blacklist=hs.config.url_preview_ip_range_blacklist, + ip_whitelist=hs.config.media.url_preview_ip_range_whitelist, + ip_blacklist=hs.config.media.url_preview_ip_range_blacklist, use_proxy=True, ) self.media_repo = media_repo @@ -151,8 +150,8 @@ class PreviewUrlResource(DirectServeJsonResource): or instance_running_jobs == hs.get_instance_name() ) - self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist - self.url_preview_accept_language = hs.config.url_preview_accept_language + self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist + self.url_preview_accept_language = hs.config.media.url_preview_accept_language # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata @@ -168,7 +167,7 @@ class PreviewUrlResource(DirectServeJsonResource): self._start_expire_url_cache_data, 10 * 1000 ) - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: request.setHeader(b"Allow", b"OPTIONS, GET") respond_with_json(request, 200, {}, send_cors=True) @@ -305,7 +304,7 @@ class PreviewUrlResource(DirectServeJsonResource): with open(media_info.filename, "rb") as file: body = file.read() - oembed_response = self._oembed.parse_oembed_response(media_info.uri, body) + oembed_response = self._oembed.parse_oembed_response(url, body) og = oembed_response.open_graph_result # Use the cache age from the oEmbed result, instead of the HTTP response. @@ -486,7 +485,6 @@ class PreviewUrlResource(DirectServeJsonResource): async def _expire_url_cache_data(self) -> None: """Clean up expired url cache content, media and thumbnails.""" - # TODO: Delete from backup media store assert self._worker_run_media_background_jobs diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index 6c9969e55f..18bf977d3d 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py
@@ -93,6 +93,11 @@ class StorageProviderWrapper(StorageProvider): if file_info.server_name and not self.store_remote: return None + if file_info.url_cache: + # The URL preview cache is short lived and not worth offloading or + # backing up. + return None + if self.store_synchronous: # store_file is supposed to return an Awaitable, but guard # against improper implementations. @@ -110,6 +115,11 @@ class StorageProviderWrapper(StorageProvider): run_in_background(store) async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: + if file_info.url_cache: + # Files in the URL preview cache definitely aren't stored here, + # so avoid any potentially slow I/O or network access. + return None + # store_file is supposed to return an Awaitable, but guard # against improper implementations. return await maybe_awaitable(self.backend.fetch(path, file_info)) @@ -125,7 +135,7 @@ class FileStorageProviderBackend(StorageProvider): def __init__(self, hs: "HomeServer", config: str): self.hs = hs - self.cache_directory = hs.config.media_store_path + self.cache_directory = hs.config.media.media_store_path self.base_directory = config def __str__(self) -> str: @@ -138,8 +148,7 @@ class FileStorageProviderBackend(StorageProvider): backup_fname = os.path.join(self.base_directory, path) dirname = os.path.dirname(backup_fname) - if not os.path.exists(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) await defer_to_thread( self.hs.get_reactor(), shutil.copyfile, primary_fname, backup_fname diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 22f43d8531..ed91ef5a42 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -17,11 +17,10 @@ import logging from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple -from twisted.web.server import Request - from synapse.api.errors import SynapseError from synapse.http.server import DirectServeJsonResource, set_cors_headers from synapse.http.servlet import parse_integer, parse_string +from synapse.http.site import SynapseRequest from synapse.rest.media.v1.media_storage import MediaStorage from ._base import ( @@ -54,10 +53,10 @@ class ThumbnailResource(DirectServeJsonResource): self.store = hs.get_datastore() self.media_repo = media_repo self.media_storage = media_storage - self.dynamic_thumbnails = hs.config.dynamic_thumbnails + self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails self.server_name = hs.hostname - async def _async_render_GET(self, request: Request) -> None: + async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) server_name, media_id, _ = parse_media_id(request) width = parse_integer(request, "width", required=True) @@ -88,7 +87,7 @@ class ThumbnailResource(DirectServeJsonResource): async def _respond_local_thumbnail( self, - request: Request, + request: SynapseRequest, media_id: str, width: int, height: int, @@ -121,7 +120,7 @@ class ThumbnailResource(DirectServeJsonResource): async def _select_or_generate_local_thumbnail( self, - request: Request, + request: SynapseRequest, media_id: str, desired_width: int, desired_height: int, @@ -186,7 +185,7 @@ class ThumbnailResource(DirectServeJsonResource): async def _select_or_generate_remote_thumbnail( self, - request: Request, + request: SynapseRequest, server_name: str, media_id: str, desired_width: int, @@ -249,7 +248,7 @@ class ThumbnailResource(DirectServeJsonResource): async def _respond_remote_thumbnail( self, - request: Request, + request: SynapseRequest, server_name: str, media_id: str, width: int, @@ -280,7 +279,7 @@ class ThumbnailResource(DirectServeJsonResource): async def _select_and_respond_with_thumbnail( self, - request: Request, + request: SynapseRequest, desired_width: int, desired_height: int, desired_method: str, diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 146adca8f1..7dcb1428e4 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py
@@ -16,8 +16,6 @@ import logging from typing import IO, TYPE_CHECKING, Dict, List, Optional -from twisted.web.server import Request - from synapse.api.errors import Codes, SynapseError from synapse.http.server import DirectServeJsonResource, respond_with_json from synapse.http.servlet import parse_bytes_from_args @@ -43,10 +41,10 @@ class UploadResource(DirectServeJsonResource): self.clock = hs.get_clock() self.server_name = hs.hostname self.auth = hs.get_auth() - self.max_upload_size = hs.config.max_upload_size + self.max_upload_size = hs.config.media.max_upload_size self.clock = hs.get_clock() - async def _async_render_OPTIONS(self, request: Request) -> None: + async def _async_render_OPTIONS(self, request: SynapseRequest) -> None: respond_with_json(request, 200, {}, send_cors=True) async def _async_render_POST(self, request: SynapseRequest) -> None: