summary refs log tree commit diff
path: root/synapse/rest/media/v1/preview_html.py
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2022-05-26 16:07:27 +0100
committerGitHub <noreply@github.com>2022-05-26 16:07:27 +0100
commit317248d42cb05ffa39119d6fefb7da286cb46225 (patch)
tree752951aeb2d7c7abe0b033f73684547f7e92fc0c /synapse/rest/media/v1/preview_html.py
parentUpdate changelog (diff)
downloadsynapse-317248d42cb05ffa39119d6fefb7da286cb46225.tar.xz
Improve URL previews by not including the content of media tags in the generated description. (#12887)
Diffstat (limited to 'synapse/rest/media/v1/preview_html.py')
-rw-r--r--synapse/rest/media/v1/preview_html.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index ca73965fc2..0358c68a64 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -246,7 +246,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
 
     Grabs any text nodes which are inside the <body/> tag, unless they are within
     an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or
-    if they are within a <script/> or <style/> tag.
+    if they are within a <script/>, <svg/> or <style/> tag, or if they are within
+    a tag whose content is usually only shown to old browsers
+    (<iframe/>, <video/>, <canvas/>, <picture/>).
 
     This is a very very very coarse approximation to a plain text render of the page.
 
@@ -268,6 +270,12 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
         "script",
         "noscript",
         "style",
+        "svg",
+        "iframe",
+        "video",
+        "canvas",
+        "img",
+        "picture",
         etree.Comment,
     )