diff options
author | Patrick Cloke <clokep@users.noreply.github.com> | 2021-09-22 09:45:20 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-22 09:45:20 -0400 |
commit | 6fc8be9a1b2046e69e8c6f731442887e3addeec0 (patch) | |
tree | 9c47c86d1d925ff7ff30f40ec0d0ee4d89743d77 /synapse/rest/media/v1/oembed.py | |
parent | Fix /initialSync error due to unhashable `RoomStreamToken` (#10827) (diff) | |
download | synapse-6fc8be9a1b2046e69e8c6f731442887e3addeec0.tar.xz |
Include more information in oEmbed previews. (#10819)
* Improved titles (fall back to the author name if there's not title) and include the site name. * Handle photo/video payloads. * Include the original URL in the Open Graph response. * Fix the expiration time (by properly converting from seconds to milliseconds).
Diffstat (limited to 'synapse/rest/media/v1/oembed.py')
-rw-r--r-- | synapse/rest/media/v1/oembed.py | 49 |
1 files changed, 45 insertions, 4 deletions
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 8b74e72655..e04671fb95 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -13,7 +13,7 @@ # limitations under the License. import logging import urllib.parse -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional import attr @@ -22,6 +22,8 @@ from synapse.types import JsonDict from synapse.util import json_decoder if TYPE_CHECKING: + from lxml import etree + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -31,7 +33,7 @@ logger = logging.getLogger(__name__) class OEmbedResult: # The Open Graph result (converted from the oEmbed result). open_graph_result: JsonDict - # Number of seconds to cache the content, according to the oEmbed response. + # Number of milliseconds to cache the content, according to the oEmbed response. # # This will be None if no cache-age is provided in the oEmbed response (or # if the oEmbed response cannot be turned into an Open Graph response). @@ -119,10 +121,22 @@ class OEmbedProvider: # Ensure the cache age is None or an int. cache_age = oembed.get("cache_age") if cache_age: - cache_age = int(cache_age) + cache_age = int(cache_age) * 1000 # The results. - open_graph_response = {"og:title": oembed.get("title")} + open_graph_response = { + "og:url": url, + } + + # Use either title or author's name as the title. + title = oembed.get("title") or oembed.get("author_name") + if title: + open_graph_response["og:title"] = title + + # Use the provider name and as the site. + provider_name = oembed.get("provider_name") + if provider_name: + open_graph_response["og:site_name"] = provider_name # If a thumbnail exists, use it. Note that dimensions will be calculated later. if "thumbnail_url" in oembed: @@ -137,6 +151,15 @@ class OEmbedProvider: # If this is a photo, use the full image, not the thumbnail. open_graph_response["og:image"] = oembed["url"] + elif oembed_type == "video": + open_graph_response["og:type"] = "video.other" + calc_description_and_urls(open_graph_response, oembed["html"]) + open_graph_response["og:video:width"] = oembed["width"] + open_graph_response["og:video:height"] = oembed["height"] + + elif oembed_type == "link": + open_graph_response["og:type"] = "website" + else: raise RuntimeError(f"Unknown oEmbed type: {oembed_type}") @@ -149,6 +172,14 @@ class OEmbedProvider: return OEmbedResult(open_graph_response, cache_age) +def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]: + results = [] + for tag in tree.xpath("//*/" + tag_name): + if "src" in tag.attrib: + results.append(tag.attrib["src"]) + return results + + def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: """ Calculate description for an HTML document. @@ -179,6 +210,16 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> if tree is None: return + # Attempt to find interesting URLs (images, videos, embeds). + if "og:image" not in open_graph_response: + image_urls = _fetch_urls(tree, "img") + if image_urls: + open_graph_response["og:image"] = image_urls[0] + + video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed") + if video_urls: + open_graph_response["og:video"] = video_urls[0] + from synapse.rest.media.v1.preview_url_resource import _calc_description description = _calc_description(tree) |