diff options
author | Patrick Cloke <patrickc@matrix.org> | 2023-05-26 16:26:14 -0400 |
---|---|---|
committer | Patrick Cloke <patrickc@matrix.org> | 2023-05-26 16:26:14 -0400 |
commit | 1b12e34d067b7f9d96401b35f36272eb42187273 (patch) | |
tree | a84b717e98f5e26ab0faedcb58a2fa8289985061 /synapse | |
parent | Add Unix socket support for Redis connections (#15644) (diff) | |
download | synapse-clokep/oembed-and-html.tar.xz |
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/media/oembed.py | 1 | ||||
-rw-r--r-- | synapse/media/url_previewer.py | 82 | ||||
-rw-r--r-- | synapse/res/providers.json | 2 |
3 files changed, 41 insertions, 44 deletions
diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py index c0eaf04be5..699e11f04e 100644 --- a/synapse/media/oembed.py +++ b/synapse/media/oembed.py @@ -136,6 +136,7 @@ class OEmbedProvider: Returns: json-encoded Open Graph data """ + breakpoint() try: # oEmbed responses *must* be UTF-8 according to the spec. diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index 70b32cee17..60aac58c2a 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -218,7 +218,7 @@ class UrlPreviewer: if not observable: download = run_in_background(self._do_preview, url, user, ts) observable = ObservableDeferred(download, consumeErrors=True) - self._cache[url] = observable + # self._cache[url] = observable else: logger.info("Returning cached response") @@ -239,7 +239,8 @@ class UrlPreviewer: # historical previews, if we have any) cache_result = await self.store.get_url_cache(url, ts) if ( - cache_result + False + and cache_result and cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2 ): @@ -250,12 +251,12 @@ class UrlPreviewer: og = og.encode("utf8") return og - # If this URL can be accessed via an allowed oEmbed, use that instead. + # Check if this URl has a corresponding oEmbed URL. url_to_download = url oembed_url = self._oembed.get_oembed_url(url) - if oembed_url: - url_to_download = oembed_url + # TODO If fetching the URL fails and we have an oEmbed URL, try that + # instead. media_info = await self._handle_url(url_to_download, user) logger.debug("got media_info of '%s'", media_info) @@ -291,55 +292,48 @@ class UrlPreviewer: body = file.read() tree = decode_body(body, media_info.uri, media_info.media_type) + og_from_html: JsonDict = {} if tree is not None: - # Check if this HTML document points to oEmbed information and - # defer to that. - oembed_url = self._oembed.autodiscover_from_html(tree) - og_from_oembed: JsonDict = {} - # Only download to the oEmbed URL if it is allowed. - if oembed_url: - try: - oembed_info = await self._handle_url( - oembed_url, user, allow_data_urls=True - ) - except Exception as e: - # Fetching the oEmbed info failed, don't block the entire URL preview. - logger.warning( - "oEmbed fetch failed during URL preview: %s errored with %s", - oembed_url, - e, - ) - else: - ( - og_from_oembed, - author_name, - expiration_ms, - ) = await self._handle_oembed_response( - url, oembed_info, expiration_ms - ) + # Attempt to autodiscover an oEmbed URL in the document if one + # is not already known. + if not oembed_url: + oembed_url = self._oembed.autodiscover_from_html(tree) # Parse Open Graph information from the HTML in case the oEmbed # response failed or is incomplete. og_from_html = parse_html_to_open_graph(tree) - # Compile the Open Graph response by using the scraped - # information from the HTML and overlaying any information - # from the oEmbed response. - og = {**og_from_html, **og_from_oembed} - - await self._precache_image_url(user, media_info, og) - else: - og = {} + og_from_oembed: JsonDict = {} + # If an oEmbed URL exists, also fetch it. + if oembed_url: + try: + oembed_info = await self._handle_url( + oembed_url, user, allow_data_urls=True + ) + except Exception as e: + # Fetching the oEmbed info failed, don't block the entire URL preview. + logger.warning( + "oEmbed fetch failed during URL preview: %s errored with %s", + oembed_url, + e, + ) + else: + ( + og_from_oembed, + author_name, + expiration_ms, + ) = await self._handle_oembed_response( + url, oembed_info, expiration_ms + ) - elif oembed_url: - # Handle the oEmbed information. - og, author_name, expiration_ms = await self._handle_oembed_response( - url, media_info, expiration_ms - ) + # Compile the Open Graph response by using the scraped + # information from the HTML and overlaying any information + # from the oEmbed response. + og = {**og_from_html, **og_from_oembed} await self._precache_image_url(user, media_info, og) else: - logger.warning("Failed to find any OG data in %s", url) + logger.warning("Failed to find any Open Graph data in %s", url) og = {} # If we don't have a title but we have author_name, copy it as diff --git a/synapse/res/providers.json b/synapse/res/providers.json index 2dc9fec8e3..c196eea0dc 100644 --- a/synapse/res/providers.json +++ b/synapse/res/providers.json @@ -5,6 +5,8 @@ "endpoints": [ { "schemes": [ + "https://twitter.com/*/status/*", + "https://*.twitter.com/*/status/*", "https://twitter.com/*/moments/*", "https://*.twitter.com/*/moments/*" ], |