diff options
author | reivilibre <oliverw@matrix.org> | 2022-05-26 16:07:27 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-26 16:07:27 +0100 |
commit | 317248d42cb05ffa39119d6fefb7da286cb46225 (patch) | |
tree | 752951aeb2d7c7abe0b033f73684547f7e92fc0c | |
parent | Update changelog (diff) | |
download | synapse-317248d42cb05ffa39119d6fefb7da286cb46225.tar.xz |
Improve URL previews by not including the content of media tags in the generated description. (#12887)
-rw-r--r-- | changelog.d/12887.misc | 1 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_html.py | 10 |
2 files changed, 10 insertions, 1 deletions
diff --git a/changelog.d/12887.misc b/changelog.d/12887.misc new file mode 100644 index 0000000000..7f6f731832 --- /dev/null +++ b/changelog.d/12887.misc @@ -0,0 +1 @@ +Improve URL previews by not including the content of media tags in the generated description. \ No newline at end of file diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index ca73965fc2..0358c68a64 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -246,7 +246,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: Grabs any text nodes which are inside the <body/> tag, unless they are within an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or - if they are within a <script/> or <style/> tag. + if they are within a <script/>, <svg/> or <style/> tag, or if they are within + a tag whose content is usually only shown to old browsers + (<iframe/>, <video/>, <canvas/>, <picture/>). This is a very very very coarse approximation to a plain text render of the page. @@ -268,6 +270,12 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: "script", "noscript", "style", + "svg", + "iframe", + "video", + "canvas", + "img", + "picture", etree.Comment, ) |