From 317248d42cb05ffa39119d6fefb7da286cb46225 Mon Sep 17 00:00:00 2001
From: reivilibre
Date: Thu, 26 May 2022 16:07:27 +0100
Subject: Improve URL previews by not including the content of media tags in
the generated description. (#12887)
---
changelog.d/12887.misc | 1 +
synapse/rest/media/v1/preview_html.py | 10 +++++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
create mode 100644 changelog.d/12887.misc
diff --git a/changelog.d/12887.misc b/changelog.d/12887.misc
new file mode 100644
index 0000000000..7f6f731832
--- /dev/null
+++ b/changelog.d/12887.misc
@@ -0,0 +1 @@
+Improve URL previews by not including the content of media tags in the generated description.
\ No newline at end of file
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index ca73965fc2..0358c68a64 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -246,7 +246,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
Grabs any text nodes which are inside the tag, unless they are within
an HTML5 semantic markup tag (, , , ), or
- if they are within a or tag.
+ if they are within a , or tag, or if they are within
+ a tag whose content is usually only shown to old browsers
+ (, , , ).
This is a very very very coarse approximation to a plain text render of the page.
@@ -268,6 +270,12 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
"script",
"noscript",
"style",
+ "svg",
+ "iframe",
+ "video",
+ "canvas",
+ "img",
+ "picture",
etree.Comment,
)
--
cgit 1.4.1