diff options
Diffstat (limited to 'synapse/rest/media')
-rw-r--r-- | synapse/rest/media/v1/media_storage.py | 6 | ||||
-rw-r--r-- | synapse/rest/media/v1/oembed.py | 10 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_url_resource.py | 35 |
3 files changed, 35 insertions, 16 deletions
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index fca239d8c7..9f6c251caf 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -343,7 +343,7 @@ class SpamMediaException(NotFoundError): """ -@attr.s(slots=True) +@attr.s(slots=True, auto_attribs=True) class ReadableFileWrapper: """Wrapper that allows reading a file in chunks, yielding to the reactor, and writing to a callback. @@ -354,8 +354,8 @@ class ReadableFileWrapper: CHUNK_SIZE = 2 ** 14 - clock = attr.ib(type=Clock) - path = attr.ib(type=str) + clock: Clock + path: str async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None: """Reads the file in chunks and calls the callback with each chunk.""" diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index cce1527ed9..2177b46c9e 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -33,6 +33,8 @@ logger = logging.getLogger(__name__) class OEmbedResult: # The Open Graph result (converted from the oEmbed result). open_graph_result: JsonDict + # The author_name of the oEmbed result + author_name: Optional[str] # Number of milliseconds to cache the content, according to the oEmbed response. # # This will be None if no cache-age is provided in the oEmbed response (or @@ -154,11 +156,12 @@ class OEmbedProvider: "og:url": url, } - # Use either title or author's name as the title. - title = oembed.get("title") or oembed.get("author_name") + title = oembed.get("title") if title: open_graph_response["og:title"] = title + author_name = oembed.get("author_name") + # Use the provider name and as the site. provider_name = oembed.get("provider_name") if provider_name: @@ -193,9 +196,10 @@ class OEmbedProvider: # Trap any exception and let the code follow as usual. logger.warning("Error parsing oEmbed metadata from %s: %r", url, e) open_graph_response = {} + author_name = None cache_age = None - return OEmbedResult(open_graph_response, cache_age) + return OEmbedResult(open_graph_response, author_name, cache_age) def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index a3829d943b..e8881bc870 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -262,6 +262,7 @@ class PreviewUrlResource(DirectServeJsonResource): # The number of milliseconds that the response should be considered valid. expiration_ms = media_info.expires + author_name: Optional[str] = None if _is_media(media_info.media_type): file_id = media_info.filesystem_id @@ -294,17 +295,25 @@ class PreviewUrlResource(DirectServeJsonResource): # Check if this HTML document points to oEmbed information and # defer to that. oembed_url = self._oembed.autodiscover_from_html(tree) - og = {} + og_from_oembed: JsonDict = {} if oembed_url: oembed_info = await self._download_url(oembed_url, user) - og, expiration_ms = await self._handle_oembed_response( + ( + og_from_oembed, + author_name, + expiration_ms, + ) = await self._handle_oembed_response( url, oembed_info, expiration_ms ) - # If there was no oEmbed URL (or oEmbed parsing failed), attempt - # to generate the Open Graph information from the HTML. - if not oembed_url or not og: - og = parse_html_to_open_graph(tree, media_info.uri) + # Parse Open Graph information from the HTML in case the oEmbed + # response failed or is incomplete. + og_from_html = parse_html_to_open_graph(tree, media_info.uri) + + # Compile the Open Graph response by using the scraped + # information from the HTML and overlaying any information + # from the oEmbed response. + og = {**og_from_html, **og_from_oembed} await self._precache_image_url(user, media_info, og) else: @@ -312,7 +321,7 @@ class PreviewUrlResource(DirectServeJsonResource): elif oembed_url: # Handle the oEmbed information. - og, expiration_ms = await self._handle_oembed_response( + og, author_name, expiration_ms = await self._handle_oembed_response( url, media_info, expiration_ms ) await self._precache_image_url(user, media_info, og) @@ -321,6 +330,11 @@ class PreviewUrlResource(DirectServeJsonResource): logger.warning("Failed to find any OG data in %s", url) og = {} + # If we don't have a title but we have author_name, copy it as + # title + if not og.get("og:title") and author_name: + og["og:title"] = author_name + # filter out any stupidly long values keys_to_remove = [] for k, v in og.items(): @@ -484,7 +498,7 @@ class PreviewUrlResource(DirectServeJsonResource): async def _handle_oembed_response( self, url: str, media_info: MediaInfo, expiration_ms: int - ) -> Tuple[JsonDict, int]: + ) -> Tuple[JsonDict, Optional[str], int]: """ Parse the downloaded oEmbed info. @@ -497,11 +511,12 @@ class PreviewUrlResource(DirectServeJsonResource): Returns: A tuple of: The Open Graph dictionary, if the oEmbed info can be parsed. + The author name if it could be retrieved from oEmbed. The (possibly updated) length of time, in milliseconds, the media is valid for. """ # If JSON was not returned, there's nothing to do. if not _is_json(media_info.media_type): - return {}, expiration_ms + return {}, None, expiration_ms with open(media_info.filename, "rb") as file: body = file.read() @@ -513,7 +528,7 @@ class PreviewUrlResource(DirectServeJsonResource): if open_graph_result and oembed_response.cache_age is not None: expiration_ms = oembed_response.cache_age - return open_graph_result, expiration_ms + return open_graph_result, oembed_response.author_name, expiration_ms def _start_expire_url_cache_data(self) -> Deferred: return run_as_background_process( |