summary refs log tree commit diff
path: root/synapse/rest/media
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2021-11-02 14:28:27 +0000
committerErik Johnston <erik@matrix.org>2021-11-02 14:28:27 +0000
commit237f7eb87ad097ecf40f945630ac49cef1aa1154 (patch)
tree2fb7f95f8130b4756b5603ff8994c687c8726a9e /synapse/rest/media
parentAdd remaining type hints to `synapse.events`. (#11098) (diff)
parentUpdate changelog (diff)
downloadsynapse-237f7eb87ad097ecf40f945630ac49cef1aa1154.tar.xz
Merge remote-tracking branch 'origin/master' into develop
Diffstat (limited to 'synapse/rest/media')
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py9
1 files changed, 6 insertions, 3 deletions
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py

index 278fd901e2..8ca97b5b18 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -718,9 +718,12 @@ def decode_body( if not body: return None + # The idea here is that multiple encodings are tried until one works. + # Unfortunately the result is never used and then LXML will decode the string + # again with the found encoding. for encoding in get_html_media_encodings(body, content_type): try: - body_str = body.decode(encoding) + body.decode(encoding) except Exception: pass else: @@ -732,11 +735,11 @@ def decode_body( from lxml import etree # Create an HTML parser. - parser = etree.HTMLParser(recover=True, encoding="utf-8") + parser = etree.HTMLParser(recover=True, encoding=encoding) # Attempt to parse the body. Returns None if the body was successfully # parsed, but no tree was found. - return etree.fromstring(body_str, parser) + return etree.fromstring(body, parser) def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: