diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index cce1527ed9..2177b46c9e 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -33,6 +33,8 @@ logger = logging.getLogger(__name__)
class OEmbedResult:
# The Open Graph result (converted from the oEmbed result).
open_graph_result: JsonDict
+ # The author_name of the oEmbed result
+ author_name: Optional[str]
# Number of milliseconds to cache the content, according to the oEmbed response.
#
# This will be None if no cache-age is provided in the oEmbed response (or
@@ -154,11 +156,12 @@ class OEmbedProvider:
"og:url": url,
}
- # Use either title or author's name as the title.
- title = oembed.get("title") or oembed.get("author_name")
+ title = oembed.get("title")
if title:
open_graph_response["og:title"] = title
+ author_name = oembed.get("author_name")
+
# Use the provider name and as the site.
provider_name = oembed.get("provider_name")
if provider_name:
@@ -193,9 +196,10 @@ class OEmbedProvider:
# Trap any exception and let the code follow as usual.
logger.warning("Error parsing oEmbed metadata from %s: %r", url, e)
open_graph_response = {}
+ author_name = None
cache_age = None
- return OEmbedResult(open_graph_response, cache_age)
+ return OEmbedResult(open_graph_response, author_name, cache_age)
def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index a3829d943b..e8881bc870 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -262,6 +262,7 @@ class PreviewUrlResource(DirectServeJsonResource):
# The number of milliseconds that the response should be considered valid.
expiration_ms = media_info.expires
+ author_name: Optional[str] = None
if _is_media(media_info.media_type):
file_id = media_info.filesystem_id
@@ -294,17 +295,25 @@ class PreviewUrlResource(DirectServeJsonResource):
# Check if this HTML document points to oEmbed information and
# defer to that.
oembed_url = self._oembed.autodiscover_from_html(tree)
- og = {}
+ og_from_oembed: JsonDict = {}
if oembed_url:
oembed_info = await self._download_url(oembed_url, user)
- og, expiration_ms = await self._handle_oembed_response(
+ (
+ og_from_oembed,
+ author_name,
+ expiration_ms,
+ ) = await self._handle_oembed_response(
url, oembed_info, expiration_ms
)
- # If there was no oEmbed URL (or oEmbed parsing failed), attempt
- # to generate the Open Graph information from the HTML.
- if not oembed_url or not og:
- og = parse_html_to_open_graph(tree, media_info.uri)
+ # Parse Open Graph information from the HTML in case the oEmbed
+ # response failed or is incomplete.
+ og_from_html = parse_html_to_open_graph(tree, media_info.uri)
+
+ # Compile the Open Graph response by using the scraped
+ # information from the HTML and overlaying any information
+ # from the oEmbed response.
+ og = {**og_from_html, **og_from_oembed}
await self._precache_image_url(user, media_info, og)
else:
@@ -312,7 +321,7 @@ class PreviewUrlResource(DirectServeJsonResource):
elif oembed_url:
# Handle the oEmbed information.
- og, expiration_ms = await self._handle_oembed_response(
+ og, author_name, expiration_ms = await self._handle_oembed_response(
url, media_info, expiration_ms
)
await self._precache_image_url(user, media_info, og)
@@ -321,6 +330,11 @@ class PreviewUrlResource(DirectServeJsonResource):
logger.warning("Failed to find any OG data in %s", url)
og = {}
+ # If we don't have a title but we have author_name, copy it as
+ # title
+ if not og.get("og:title") and author_name:
+ og["og:title"] = author_name
+
# filter out any stupidly long values
keys_to_remove = []
for k, v in og.items():
@@ -484,7 +498,7 @@ class PreviewUrlResource(DirectServeJsonResource):
async def _handle_oembed_response(
self, url: str, media_info: MediaInfo, expiration_ms: int
- ) -> Tuple[JsonDict, int]:
+ ) -> Tuple[JsonDict, Optional[str], int]:
"""
Parse the downloaded oEmbed info.
@@ -497,11 +511,12 @@ class PreviewUrlResource(DirectServeJsonResource):
Returns:
A tuple of:
The Open Graph dictionary, if the oEmbed info can be parsed.
+ The author name if it could be retrieved from oEmbed.
The (possibly updated) length of time, in milliseconds, the media is valid for.
"""
# If JSON was not returned, there's nothing to do.
if not _is_json(media_info.media_type):
- return {}, expiration_ms
+ return {}, None, expiration_ms
with open(media_info.filename, "rb") as file:
body = file.read()
@@ -513,7 +528,7 @@ class PreviewUrlResource(DirectServeJsonResource):
if open_graph_result and oembed_response.cache_age is not None:
expiration_ms = oembed_response.cache_age
- return open_graph_result, expiration_ms
+ return open_graph_result, oembed_response.author_name, expiration_ms
def _start_expire_url_cache_data(self) -> Deferred:
return run_as_background_process(
|