summary refs log tree commit diff
path: root/synapse/rest/media/v1/oembed.py
blob: 2e6706dbfa7f23ac250a9663a0036f11e56bf904 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#  Copyright 2021 The Matrix.org Foundation C.I.C.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
import logging
from typing import TYPE_CHECKING, Optional

import attr

from synapse.http.client import SimpleHttpClient

if TYPE_CHECKING:
    from synapse.server import HomeServer

logger = logging.getLogger(__name__)


@attr.s(slots=True, auto_attribs=True)
class OEmbedResult:
    # Either HTML content or URL must be provided.
    html: Optional[str]
    url: Optional[str]
    title: Optional[str]
    # Number of seconds to cache the content.
    cache_age: int


class OEmbedError(Exception):
    """An error occurred processing the oEmbed object."""


class OEmbedProvider:
    """
    A helper for accessing oEmbed content.

    It can be used to check if a URL should be accessed via oEmbed and for
    requesting/parsing oEmbed content.
    """

    def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
        self._oembed_patterns = {}
        for oembed_endpoint in hs.config.oembed.oembed_patterns:
            api_endpoint = oembed_endpoint.api_endpoint

            # Only JSON is supported at the moment. This could be declared in
            # the formats field. Otherwise, if the endpoint ends in .xml assume
            # it doesn't support JSON.
            if (
                oembed_endpoint.formats is not None
                and "json" not in oembed_endpoint.formats
            ) or api_endpoint.endswith(".xml"):
                logger.info(
                    "Ignoring oEmbed endpoint due to not supporting JSON: %s",
                    api_endpoint,
                )
                continue

            # Iterate through each URL pattern and point it to the endpoint.
            for pattern in oembed_endpoint.url_patterns:
                self._oembed_patterns[pattern] = api_endpoint
        self._client = client

    def get_oembed_url(self, url: str) -> Optional[str]:
        """
        Check whether the URL should be downloaded as oEmbed content instead.

        Args:
            url: The URL to check.

        Returns:
            A URL to use instead or None if the original URL should be used.
        """
        for url_pattern, endpoint in self._oembed_patterns.items():
            if url_pattern.fullmatch(url):
                return endpoint

        # No match.
        return None

    async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult:
        """
        Request content from an oEmbed endpoint.

        Args:
            endpoint: The oEmbed API endpoint.
            url: The URL to pass to the API.

        Returns:
            An object representing the metadata returned.

        Raises:
            OEmbedError if fetching or parsing of the oEmbed information fails.
        """
        try:
            logger.debug("Trying to get oEmbed content for url '%s'", url)

            # Note that only the JSON format is supported, some endpoints want
            # this in the URL, others want it as an argument.
            endpoint = endpoint.replace("{format}", "json")

            result = await self._client.get_json(
                endpoint,
                # TODO Specify max height / width.
                args={"url": url, "format": "json"},
            )

            # Ensure there's a version of 1.0.
            if result.get("version") != "1.0":
                raise OEmbedError("Invalid version: %s" % (result.get("version"),))

            oembed_type = result.get("type")

            # Ensure the cache age is None or an int.
            cache_age = result.get("cache_age")
            if cache_age:
                cache_age = int(cache_age)

            oembed_result = OEmbedResult(None, None, result.get("title"), cache_age)

            # HTML content.
            if oembed_type == "rich":
                oembed_result.html = result.get("html")
                return oembed_result

            if oembed_type == "photo":
                oembed_result.url = result.get("url")
                return oembed_result

            # TODO Handle link and video types.

            if "thumbnail_url" in result:
                oembed_result.url = result.get("thumbnail_url")
                return oembed_result

            raise OEmbedError("Incompatible oEmbed information.")

        except OEmbedError as e:
            # Trap OEmbedErrors first so we can directly re-raise them.
            logger.warning("Error parsing oEmbed metadata from %s: %r", url, e)
            raise

        except Exception as e:
            # Trap any exception and let the code follow as usual.
            # FIXME: pass through 404s and other error messages nicely
            logger.warning("Error downloading oEmbed metadata from %s: %r", url, e)
            raise OEmbedError() from e