diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 7c881f2bdb..014fa893d6 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -27,6 +27,7 @@ from twisted.web.server import Request
from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.http.server import finish_request, respond_with_json
+from synapse.http.site import SynapseRequest
from synapse.logging.context import make_deferred_yieldable
from synapse.util.stringutils import is_ascii
@@ -74,7 +75,7 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
)
-def respond_404(request: Request) -> None:
+def respond_404(request: SynapseRequest) -> None:
respond_with_json(
request,
404,
@@ -84,7 +85,7 @@ def respond_404(request: Request) -> None:
async def respond_with_file(
- request: Request,
+ request: SynapseRequest,
media_type: str,
file_path: str,
file_size: Optional[int] = None,
@@ -221,7 +222,7 @@ def _can_encode_filename_as_token(x: str) -> bool:
async def respond_with_responder(
- request: Request,
+ request: SynapseRequest,
responder: "Optional[Responder]",
media_type: str,
file_size: Optional[int],
diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py
index a1d36e5cf1..a95804d327 100644
--- a/synapse/rest/media/v1/config_resource.py
+++ b/synapse/rest/media/v1/config_resource.py
@@ -16,8 +16,6 @@
from typing import TYPE_CHECKING
-from twisted.web.server import Request
-
from synapse.http.server import DirectServeJsonResource, respond_with_json
from synapse.http.site import SynapseRequest
@@ -33,11 +31,11 @@ class MediaConfigResource(DirectServeJsonResource):
config = hs.config
self.clock = hs.get_clock()
self.auth = hs.get_auth()
- self.limits_dict = {"m.upload.size": config.max_upload_size}
+ self.limits_dict = {"m.upload.size": config.media.max_upload_size}
async def _async_render_GET(self, request: SynapseRequest) -> None:
await self.auth.get_user_by_req(request)
respond_with_json(request, 200, self.limits_dict, send_cors=True)
- async def _async_render_OPTIONS(self, request: Request) -> None:
+ async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
respond_with_json(request, 200, {}, send_cors=True)
diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
index d6d938953e..6180fa575e 100644
--- a/synapse/rest/media/v1/download_resource.py
+++ b/synapse/rest/media/v1/download_resource.py
@@ -15,10 +15,9 @@
import logging
from typing import TYPE_CHECKING
-from twisted.web.server import Request
-
from synapse.http.server import DirectServeJsonResource, set_cors_headers
from synapse.http.servlet import parse_boolean
+from synapse.http.site import SynapseRequest
from ._base import parse_media_id, respond_404
@@ -37,7 +36,7 @@ class DownloadResource(DirectServeJsonResource):
self.media_repo = media_repo
self.server_name = hs.hostname
- async def _async_render_GET(self, request: Request) -> None:
+ async def _async_render_GET(self, request: SynapseRequest) -> None:
set_cors_headers(request)
request.setHeader(
b"Content-Security-Policy",
diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index 39bbe4e874..08bd85f664 100644
--- a/synapse/rest/media/v1/filepath.py
+++ b/synapse/rest/media/v1/filepath.py
@@ -195,23 +195,24 @@ class MediaFilePaths:
url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
- def url_cache_thumbnail_directory(self, media_id: str) -> str:
+ def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
# Media id is of the form <DATE><RANDOM_STRING>
# E.g.: 2017-09-28-fsdRDt24DS234dsf
if NEW_FORMAT_ID_RE.match(media_id):
- return os.path.join(
- self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:]
- )
+ return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:])
else:
return os.path.join(
- self.base_path,
"url_cache_thumbnails",
media_id[0:2],
media_id[2:4],
media_id[4:],
)
+ url_cache_thumbnail_directory = _wrap_in_base_path(
+ url_cache_thumbnail_directory_rel
+ )
+
def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
"The dirs to try and remove if we delete the media_id thumbnails"
# Media id is of the form <DATE><RANDOM_STRING>
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 50e4c9e29f..abd88a2d4f 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -23,7 +23,6 @@ import twisted.internet.error
import twisted.web.http
from twisted.internet.defer import Deferred
from twisted.web.resource import Resource
-from twisted.web.server import Request
from synapse.api.errors import (
FederationDeniedError,
@@ -34,6 +33,7 @@ from synapse.api.errors import (
)
from synapse.config._base import ConfigError
from synapse.config.repository import ThumbnailRequirement
+from synapse.http.site import SynapseRequest
from synapse.logging.context import defer_to_thread
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import UserID
@@ -76,29 +76,35 @@ class MediaRepository:
self.clock = hs.get_clock()
self.server_name = hs.hostname
self.store = hs.get_datastore()
- self.max_upload_size = hs.config.max_upload_size
- self.max_image_pixels = hs.config.max_image_pixels
+ self.max_upload_size = hs.config.media.max_upload_size
+ self.max_image_pixels = hs.config.media.max_image_pixels
Thumbnailer.set_limits(self.max_image_pixels)
- self.primary_base_path: str = hs.config.media_store_path
+ self.primary_base_path: str = hs.config.media.media_store_path
self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path)
- self.dynamic_thumbnails = hs.config.dynamic_thumbnails
- self.thumbnail_requirements = hs.config.thumbnail_requirements
+ self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
+ self.thumbnail_requirements = hs.config.media.thumbnail_requirements
self.remote_media_linearizer = Linearizer(name="media_remote")
self.recently_accessed_remotes: Set[Tuple[str, str]] = set()
self.recently_accessed_locals: Set[str] = set()
- self.federation_domain_whitelist = hs.config.federation_domain_whitelist
+ self.federation_domain_whitelist = (
+ hs.config.federation.federation_domain_whitelist
+ )
# List of StorageProviders where we should search for media and
# potentially upload to.
storage_providers = []
- for clz, provider_config, wrapper_config in hs.config.media_storage_providers:
+ for (
+ clz,
+ provider_config,
+ wrapper_config,
+ ) in hs.config.media.media_storage_providers:
backend = clz(hs, provider_config)
provider = StorageProviderWrapper(
backend,
@@ -187,7 +193,7 @@ class MediaRepository:
return "mxc://%s/%s" % (self.server_name, media_id)
async def get_local_media(
- self, request: Request, media_id: str, name: Optional[str]
+ self, request: SynapseRequest, media_id: str, name: Optional[str]
) -> None:
"""Responds to requests for local media, if exists, or returns 404.
@@ -221,7 +227,11 @@ class MediaRepository:
)
async def get_remote_media(
- self, request: Request, server_name: str, media_id: str, name: Optional[str]
+ self,
+ request: SynapseRequest,
+ server_name: str,
+ media_id: str,
+ name: Optional[str],
) -> None:
"""Respond to requests for remote media.
@@ -969,7 +979,7 @@ class MediaRepositoryResource(Resource):
def __init__(self, hs: "HomeServer"):
# If we're not configured to use it, raise if we somehow got here.
- if not hs.config.can_load_media_repo:
+ if not hs.config.media.can_load_media_repo:
raise ConfigError("Synapse is not configured to use a media repo.")
super().__init__()
@@ -980,7 +990,7 @@ class MediaRepositoryResource(Resource):
self.putChild(
b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage)
)
- if hs.config.url_preview_enabled:
+ if hs.config.media.url_preview_enabled:
self.putChild(
b"preview_url",
PreviewUrlResource(hs, media_repo, media_repo.media_storage),
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 01fada8fb5..fca239d8c7 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -132,8 +132,7 @@ class MediaStorage:
fname = os.path.join(self.local_media_directory, path)
dirname = os.path.dirname(fname)
- if not os.path.exists(dirname):
- os.makedirs(dirname)
+ os.makedirs(dirname, exist_ok=True)
finished_called = [False]
@@ -244,8 +243,7 @@ class MediaStorage:
return legacy_local_path
dirname = os.path.dirname(local_path)
- if not os.path.exists(dirname):
- os.makedirs(dirname)
+ os.makedirs(dirname, exist_ok=True)
for provider in self.storage_providers:
res: Any = await provider.fetch(path, file_info)
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index 8b74e72655..e04671fb95 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -13,7 +13,7 @@
# limitations under the License.
import logging
import urllib.parse
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, List, Optional
import attr
@@ -22,6 +22,8 @@ from synapse.types import JsonDict
from synapse.util import json_decoder
if TYPE_CHECKING:
+ from lxml import etree
+
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
@@ -31,7 +33,7 @@ logger = logging.getLogger(__name__)
class OEmbedResult:
# The Open Graph result (converted from the oEmbed result).
open_graph_result: JsonDict
- # Number of seconds to cache the content, according to the oEmbed response.
+ # Number of milliseconds to cache the content, according to the oEmbed response.
#
# This will be None if no cache-age is provided in the oEmbed response (or
# if the oEmbed response cannot be turned into an Open Graph response).
@@ -119,10 +121,22 @@ class OEmbedProvider:
# Ensure the cache age is None or an int.
cache_age = oembed.get("cache_age")
if cache_age:
- cache_age = int(cache_age)
+ cache_age = int(cache_age) * 1000
# The results.
- open_graph_response = {"og:title": oembed.get("title")}
+ open_graph_response = {
+ "og:url": url,
+ }
+
+ # Use either title or author's name as the title.
+ title = oembed.get("title") or oembed.get("author_name")
+ if title:
+ open_graph_response["og:title"] = title
+
+ # Use the provider name and as the site.
+ provider_name = oembed.get("provider_name")
+ if provider_name:
+ open_graph_response["og:site_name"] = provider_name
# If a thumbnail exists, use it. Note that dimensions will be calculated later.
if "thumbnail_url" in oembed:
@@ -137,6 +151,15 @@ class OEmbedProvider:
# If this is a photo, use the full image, not the thumbnail.
open_graph_response["og:image"] = oembed["url"]
+ elif oembed_type == "video":
+ open_graph_response["og:type"] = "video.other"
+ calc_description_and_urls(open_graph_response, oembed["html"])
+ open_graph_response["og:video:width"] = oembed["width"]
+ open_graph_response["og:video:height"] = oembed["height"]
+
+ elif oembed_type == "link":
+ open_graph_response["og:type"] = "website"
+
else:
raise RuntimeError(f"Unknown oEmbed type: {oembed_type}")
@@ -149,6 +172,14 @@ class OEmbedProvider:
return OEmbedResult(open_graph_response, cache_age)
+def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
+ results = []
+ for tag in tree.xpath("//*/" + tag_name):
+ if "src" in tag.attrib:
+ results.append(tag.attrib["src"])
+ return results
+
+
def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
"""
Calculate description for an HTML document.
@@ -179,6 +210,16 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) ->
if tree is None:
return
+ # Attempt to find interesting URLs (images, videos, embeds).
+ if "og:image" not in open_graph_response:
+ image_urls = _fetch_urls(tree, "img")
+ if image_urls:
+ open_graph_response["og:image"] = image_urls[0]
+
+ video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed")
+ if video_urls:
+ open_graph_response["og:video"] = video_urls[0]
+
from synapse.rest.media.v1.preview_url_resource import _calc_description
description = _calc_description(tree)
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 0a0b476d2b..79a42b2455 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -29,7 +29,6 @@ import attr
from twisted.internet.defer import Deferred
from twisted.internet.error import DNSLookupError
-from twisted.web.server import Request
from synapse.api.errors import Codes, SynapseError
from synapse.http.client import SimpleHttpClient
@@ -126,14 +125,14 @@ class PreviewUrlResource(DirectServeJsonResource):
self.auth = hs.get_auth()
self.clock = hs.get_clock()
self.filepaths = media_repo.filepaths
- self.max_spider_size = hs.config.max_spider_size
+ self.max_spider_size = hs.config.media.max_spider_size
self.server_name = hs.hostname
self.store = hs.get_datastore()
self.client = SimpleHttpClient(
hs,
treq_args={"browser_like_redirects": True},
- ip_whitelist=hs.config.url_preview_ip_range_whitelist,
- ip_blacklist=hs.config.url_preview_ip_range_blacklist,
+ ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
+ ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
use_proxy=True,
)
self.media_repo = media_repo
@@ -151,8 +150,8 @@ class PreviewUrlResource(DirectServeJsonResource):
or instance_running_jobs == hs.get_instance_name()
)
- self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
- self.url_preview_accept_language = hs.config.url_preview_accept_language
+ self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
+ self.url_preview_accept_language = hs.config.media.url_preview_accept_language
# memory cache mapping urls to an ObservableDeferred returning
# JSON-encoded OG metadata
@@ -168,7 +167,7 @@ class PreviewUrlResource(DirectServeJsonResource):
self._start_expire_url_cache_data, 10 * 1000
)
- async def _async_render_OPTIONS(self, request: Request) -> None:
+ async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
request.setHeader(b"Allow", b"OPTIONS, GET")
respond_with_json(request, 200, {}, send_cors=True)
@@ -305,7 +304,7 @@ class PreviewUrlResource(DirectServeJsonResource):
with open(media_info.filename, "rb") as file:
body = file.read()
- oembed_response = self._oembed.parse_oembed_response(media_info.uri, body)
+ oembed_response = self._oembed.parse_oembed_response(url, body)
og = oembed_response.open_graph_result
# Use the cache age from the oEmbed result, instead of the HTTP response.
@@ -486,7 +485,6 @@ class PreviewUrlResource(DirectServeJsonResource):
async def _expire_url_cache_data(self) -> None:
"""Clean up expired url cache content, media and thumbnails."""
- # TODO: Delete from backup media store
assert self._worker_run_media_background_jobs
diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index 6c9969e55f..18bf977d3d 100644
--- a/synapse/rest/media/v1/storage_provider.py
+++ b/synapse/rest/media/v1/storage_provider.py
@@ -93,6 +93,11 @@ class StorageProviderWrapper(StorageProvider):
if file_info.server_name and not self.store_remote:
return None
+ if file_info.url_cache:
+ # The URL preview cache is short lived and not worth offloading or
+ # backing up.
+ return None
+
if self.store_synchronous:
# store_file is supposed to return an Awaitable, but guard
# against improper implementations.
@@ -110,6 +115,11 @@ class StorageProviderWrapper(StorageProvider):
run_in_background(store)
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
+ if file_info.url_cache:
+ # Files in the URL preview cache definitely aren't stored here,
+ # so avoid any potentially slow I/O or network access.
+ return None
+
# store_file is supposed to return an Awaitable, but guard
# against improper implementations.
return await maybe_awaitable(self.backend.fetch(path, file_info))
@@ -125,7 +135,7 @@ class FileStorageProviderBackend(StorageProvider):
def __init__(self, hs: "HomeServer", config: str):
self.hs = hs
- self.cache_directory = hs.config.media_store_path
+ self.cache_directory = hs.config.media.media_store_path
self.base_directory = config
def __str__(self) -> str:
@@ -138,8 +148,7 @@ class FileStorageProviderBackend(StorageProvider):
backup_fname = os.path.join(self.base_directory, path)
dirname = os.path.dirname(backup_fname)
- if not os.path.exists(dirname):
- os.makedirs(dirname)
+ os.makedirs(dirname, exist_ok=True)
await defer_to_thread(
self.hs.get_reactor(), shutil.copyfile, primary_fname, backup_fname
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 22f43d8531..ed91ef5a42 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -17,11 +17,10 @@
import logging
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
-from twisted.web.server import Request
-
from synapse.api.errors import SynapseError
from synapse.http.server import DirectServeJsonResource, set_cors_headers
from synapse.http.servlet import parse_integer, parse_string
+from synapse.http.site import SynapseRequest
from synapse.rest.media.v1.media_storage import MediaStorage
from ._base import (
@@ -54,10 +53,10 @@ class ThumbnailResource(DirectServeJsonResource):
self.store = hs.get_datastore()
self.media_repo = media_repo
self.media_storage = media_storage
- self.dynamic_thumbnails = hs.config.dynamic_thumbnails
+ self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self.server_name = hs.hostname
- async def _async_render_GET(self, request: Request) -> None:
+ async def _async_render_GET(self, request: SynapseRequest) -> None:
set_cors_headers(request)
server_name, media_id, _ = parse_media_id(request)
width = parse_integer(request, "width", required=True)
@@ -88,7 +87,7 @@ class ThumbnailResource(DirectServeJsonResource):
async def _respond_local_thumbnail(
self,
- request: Request,
+ request: SynapseRequest,
media_id: str,
width: int,
height: int,
@@ -121,7 +120,7 @@ class ThumbnailResource(DirectServeJsonResource):
async def _select_or_generate_local_thumbnail(
self,
- request: Request,
+ request: SynapseRequest,
media_id: str,
desired_width: int,
desired_height: int,
@@ -186,7 +185,7 @@ class ThumbnailResource(DirectServeJsonResource):
async def _select_or_generate_remote_thumbnail(
self,
- request: Request,
+ request: SynapseRequest,
server_name: str,
media_id: str,
desired_width: int,
@@ -249,7 +248,7 @@ class ThumbnailResource(DirectServeJsonResource):
async def _respond_remote_thumbnail(
self,
- request: Request,
+ request: SynapseRequest,
server_name: str,
media_id: str,
width: int,
@@ -280,7 +279,7 @@ class ThumbnailResource(DirectServeJsonResource):
async def _select_and_respond_with_thumbnail(
self,
- request: Request,
+ request: SynapseRequest,
desired_width: int,
desired_height: int,
desired_method: str,
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 146adca8f1..7dcb1428e4 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -16,8 +16,6 @@
import logging
from typing import IO, TYPE_CHECKING, Dict, List, Optional
-from twisted.web.server import Request
-
from synapse.api.errors import Codes, SynapseError
from synapse.http.server import DirectServeJsonResource, respond_with_json
from synapse.http.servlet import parse_bytes_from_args
@@ -43,10 +41,10 @@ class UploadResource(DirectServeJsonResource):
self.clock = hs.get_clock()
self.server_name = hs.hostname
self.auth = hs.get_auth()
- self.max_upload_size = hs.config.max_upload_size
+ self.max_upload_size = hs.config.media.max_upload_size
self.clock = hs.get_clock()
- async def _async_render_OPTIONS(self, request: Request) -> None:
+ async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
respond_with_json(request, 200, {}, send_cors=True)
async def _async_render_POST(self, request: SynapseRequest) -> None:
|