summary refs log tree commit diff
path: root/synapse/rest/media/v1/oembed.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/rest/media/v1/oembed.py')
-rw-r--r--synapse/rest/media/v1/oembed.py135
1 files changed, 135 insertions, 0 deletions
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
new file mode 100644
index 0000000000..afe41823e4
--- /dev/null
+++ b/synapse/rest/media/v1/oembed.py
@@ -0,0 +1,135 @@
+#  Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Optional
+
+import attr
+
+from synapse.http.client import SimpleHttpClient
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s(slots=True, auto_attribs=True)
+class OEmbedResult:
+    # Either HTML content or URL must be provided.
+    html: Optional[str]
+    url: Optional[str]
+    title: Optional[str]
+    # Number of seconds to cache the content.
+    cache_age: int
+
+
+class OEmbedError(Exception):
+    """An error occurred processing the oEmbed object."""
+
+
+class OEmbedProvider:
+    """
+    A helper for accessing oEmbed content.
+
+    It can be used to check if a URL should be accessed via oEmbed and for
+    requesting/parsing oEmbed content.
+    """
+
+    def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
+        self._oembed_patterns = {}
+        for oembed_endpoint in hs.config.oembed.oembed_patterns:
+            for pattern in oembed_endpoint.url_patterns:
+                self._oembed_patterns[pattern] = oembed_endpoint.api_endpoint
+        self._client = client
+
+    def get_oembed_url(self, url: str) -> Optional[str]:
+        """
+        Check whether the URL should be downloaded as oEmbed content instead.
+
+        Args:
+            url: The URL to check.
+
+        Returns:
+            A URL to use instead or None if the original URL should be used.
+        """
+        for url_pattern, endpoint in self._oembed_patterns.items():
+            if url_pattern.fullmatch(url):
+                return endpoint
+
+        # No match.
+        return None
+
+    async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult:
+        """
+        Request content from an oEmbed endpoint.
+
+        Args:
+            endpoint: The oEmbed API endpoint.
+            url: The URL to pass to the API.
+
+        Returns:
+            An object representing the metadata returned.
+
+        Raises:
+            OEmbedError if fetching or parsing of the oEmbed information fails.
+        """
+        try:
+            logger.debug("Trying to get oEmbed content for url '%s'", url)
+            result = await self._client.get_json(
+                endpoint,
+                # TODO Specify max height / width.
+                # Note that only the JSON format is supported.
+                args={"url": url},
+            )
+
+            # Ensure there's a version of 1.0.
+            if result.get("version") != "1.0":
+                raise OEmbedError("Invalid version: %s" % (result.get("version"),))
+
+            oembed_type = result.get("type")
+
+            # Ensure the cache age is None or an int.
+            cache_age = result.get("cache_age")
+            if cache_age:
+                cache_age = int(cache_age)
+
+            oembed_result = OEmbedResult(None, None, result.get("title"), cache_age)
+
+            # HTML content.
+            if oembed_type == "rich":
+                oembed_result.html = result.get("html")
+                return oembed_result
+
+            if oembed_type == "photo":
+                oembed_result.url = result.get("url")
+                return oembed_result
+
+            # TODO Handle link and video types.
+
+            if "thumbnail_url" in result:
+                oembed_result.url = result.get("thumbnail_url")
+                return oembed_result
+
+            raise OEmbedError("Incompatible oEmbed information.")
+
+        except OEmbedError as e:
+            # Trap OEmbedErrors first so we can directly re-raise them.
+            logger.warning("Error parsing oEmbed metadata from %s: %r", url, e)
+            raise
+
+        except Exception as e:
+            # Trap any exception and let the code follow as usual.
+            # FIXME: pass through 404s and other error messages nicely
+            logger.warning("Error downloading oEmbed metadata from %s: %r", url, e)
+            raise OEmbedError() from e