diff --git a/changelog.d/14781.misc b/changelog.d/14781.misc
new file mode 100644
index 0000000000..04f565b410
--- /dev/null
+++ b/changelog.d/14781.misc
@@ -0,0 +1 @@
+Unescape HTML entities in URL preview titles making use of oEmbed responses.
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index 827afd868d..a3738a6250 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import html
import logging
import urllib.parse
from typing import TYPE_CHECKING, List, Optional
@@ -161,7 +162,9 @@ class OEmbedProvider:
title = oembed.get("title")
if title and isinstance(title, str):
- open_graph_response["og:title"] = title
+ # A common WordPress plug-in seems to incorrectly escape entities
+ # in the oEmbed response.
+ open_graph_response["og:title"] = html.unescape(title)
author_name = oembed.get("author_name")
if not isinstance(author_name, str):
@@ -180,9 +183,9 @@ class OEmbedProvider:
# Process each type separately.
oembed_type = oembed.get("type")
if oembed_type == "rich":
- html = oembed.get("html")
- if isinstance(html, str):
- calc_description_and_urls(open_graph_response, html)
+ html_str = oembed.get("html")
+ if isinstance(html_str, str):
+ calc_description_and_urls(open_graph_response, html_str)
elif oembed_type == "photo":
# If this is a photo, use the full image, not the thumbnail.
@@ -192,8 +195,8 @@ class OEmbedProvider:
elif oembed_type == "video":
open_graph_response["og:type"] = "video.other"
- html = oembed.get("html")
- if html and isinstance(html, str):
+ html_str = oembed.get("html")
+ if html_str and isinstance(html_str, str):
calc_description_and_urls(open_graph_response, oembed["html"])
for size in ("width", "height"):
val = oembed.get(size)
diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py
index 319ae8b1cc..3f7f1dbab9 100644
--- a/tests/rest/media/v1/test_oembed.py
+++ b/tests/rest/media/v1/test_oembed.py
@@ -150,3 +150,13 @@ class OEmbedTests(HomeserverTestCase):
result = self.parse_response({"type": "link"})
self.assertIn("og:type", result.open_graph_result)
self.assertEqual(result.open_graph_result["og:type"], "website")
+
+ def test_title_html_entities(self) -> None:
+ """Test HTML entities in title"""
+ result = self.parse_response(
+ {"title": "Why JSON isn’t a Good Configuration Language"}
+ )
+ self.assertEqual(
+ result.open_graph_result["og:title"],
+ "Why JSON isn’t a Good Configuration Language",
+ )
|