summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <patrickc@matrix.org>2023-05-26 16:26:14 -0400
committerPatrick Cloke <patrickc@matrix.org>2023-05-26 16:26:14 -0400
commit1b12e34d067b7f9d96401b35f36272eb42187273 (patch)
treea84b717e98f5e26ab0faedcb58a2fa8289985061
parentAdd Unix socket support for Redis connections (#15644) (diff)
downloadsynapse-clokep/oembed-and-html.tar.xz
-rw-r--r--synapse/media/oembed.py1
-rw-r--r--synapse/media/url_previewer.py82
-rw-r--r--synapse/res/providers.json2
3 files changed, 41 insertions, 44 deletions
diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py
index c0eaf04be5..699e11f04e 100644
--- a/synapse/media/oembed.py
+++ b/synapse/media/oembed.py
@@ -136,6 +136,7 @@ class OEmbedProvider:
         Returns:
             json-encoded Open Graph data
         """
+        breakpoint()
 
         try:
             # oEmbed responses *must* be UTF-8 according to the spec.
diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py
index 70b32cee17..60aac58c2a 100644
--- a/synapse/media/url_previewer.py
+++ b/synapse/media/url_previewer.py
@@ -218,7 +218,7 @@ class UrlPreviewer:
         if not observable:
             download = run_in_background(self._do_preview, url, user, ts)
             observable = ObservableDeferred(download, consumeErrors=True)
-            self._cache[url] = observable
+            # self._cache[url] = observable
         else:
             logger.info("Returning cached response")
 
@@ -239,7 +239,8 @@ class UrlPreviewer:
         # historical previews, if we have any)
         cache_result = await self.store.get_url_cache(url, ts)
         if (
-            cache_result
+            False
+            and cache_result
             and cache_result["expires_ts"] > ts
             and cache_result["response_code"] / 100 == 2
         ):
@@ -250,12 +251,12 @@ class UrlPreviewer:
                 og = og.encode("utf8")
             return og
 
-        # If this URL can be accessed via an allowed oEmbed, use that instead.
+        # Check if this URl has a corresponding oEmbed URL.
         url_to_download = url
         oembed_url = self._oembed.get_oembed_url(url)
-        if oembed_url:
-            url_to_download = oembed_url
 
+        # TODO If fetching the URL fails and we have an oEmbed URL, try that
+        # instead.
         media_info = await self._handle_url(url_to_download, user)
 
         logger.debug("got media_info of '%s'", media_info)
@@ -291,55 +292,48 @@ class UrlPreviewer:
                 body = file.read()
 
             tree = decode_body(body, media_info.uri, media_info.media_type)
+            og_from_html: JsonDict = {}
             if tree is not None:
-                # Check if this HTML document points to oEmbed information and
-                # defer to that.
-                oembed_url = self._oembed.autodiscover_from_html(tree)
-                og_from_oembed: JsonDict = {}
-                # Only download to the oEmbed URL if it is allowed.
-                if oembed_url:
-                    try:
-                        oembed_info = await self._handle_url(
-                            oembed_url, user, allow_data_urls=True
-                        )
-                    except Exception as e:
-                        # Fetching the oEmbed info failed, don't block the entire URL preview.
-                        logger.warning(
-                            "oEmbed fetch failed during URL preview: %s errored with %s",
-                            oembed_url,
-                            e,
-                        )
-                    else:
-                        (
-                            og_from_oembed,
-                            author_name,
-                            expiration_ms,
-                        ) = await self._handle_oembed_response(
-                            url, oembed_info, expiration_ms
-                        )
+                # Attempt to autodiscover an oEmbed URL in the document if one
+                # is not already known.
+                if not oembed_url:
+                    oembed_url = self._oembed.autodiscover_from_html(tree)
 
                 # Parse Open Graph information from the HTML in case the oEmbed
                 # response failed or is incomplete.
                 og_from_html = parse_html_to_open_graph(tree)
 
-                # Compile the Open Graph response by using the scraped
-                # information from the HTML and overlaying any information
-                # from the oEmbed response.
-                og = {**og_from_html, **og_from_oembed}
-
-                await self._precache_image_url(user, media_info, og)
-            else:
-                og = {}
+            og_from_oembed: JsonDict = {}
+            # If an oEmbed URL exists, also fetch it.
+            if oembed_url:
+                try:
+                    oembed_info = await self._handle_url(
+                        oembed_url, user, allow_data_urls=True
+                    )
+                except Exception as e:
+                    # Fetching the oEmbed info failed, don't block the entire URL preview.
+                    logger.warning(
+                        "oEmbed fetch failed during URL preview: %s errored with %s",
+                        oembed_url,
+                        e,
+                    )
+                else:
+                    (
+                        og_from_oembed,
+                        author_name,
+                        expiration_ms,
+                    ) = await self._handle_oembed_response(
+                        url, oembed_info, expiration_ms
+                    )
 
-        elif oembed_url:
-            # Handle the oEmbed information.
-            og, author_name, expiration_ms = await self._handle_oembed_response(
-                url, media_info, expiration_ms
-            )
+            # Compile the Open Graph response by using the scraped
+            # information from the HTML and overlaying any information
+            # from the oEmbed response.
+            og = {**og_from_html, **og_from_oembed}
             await self._precache_image_url(user, media_info, og)
 
         else:
-            logger.warning("Failed to find any OG data in %s", url)
+            logger.warning("Failed to find any Open Graph data in %s", url)
             og = {}
 
         # If we don't have a title but we have author_name, copy it as
diff --git a/synapse/res/providers.json b/synapse/res/providers.json
index 2dc9fec8e3..c196eea0dc 100644
--- a/synapse/res/providers.json
+++ b/synapse/res/providers.json
@@ -5,6 +5,8 @@
         "endpoints": [
             {
                 "schemes": [
+                    "https://twitter.com/*/status/*",
+                    "https://*.twitter.com/*/status/*",
                     "https://twitter.com/*/moments/*",
                     "https://*.twitter.com/*/moments/*"
                 ],