summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2021-11-02 10:04:06 +0000
committerErik Johnston <erik@matrix.org>2021-11-02 10:04:06 +0000
commit459d2ae1579dbfe3ceea94e3839d22f3b285ecbb (patch)
tree9322693c9e5896b787f2e29d7617af2f2793ceaf /synapse
parentDon't seqscan event_json due to relates_to_id (diff)
parentForce deb compression with `xz`. (#11197) (diff)
downloadsynapse-459d2ae1579dbfe3ceea94e3839d22f3b285ecbb.tar.xz
Merge remote-tracking branch 'origin/release-v1.46' into matrix-org-hotfixes
Diffstat (limited to 'synapse')
-rw-r--r--synapse/app/_base.py1
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py9
2 files changed, 7 insertions, 3 deletions
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 03627cdcba..f4c3f867a8 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -346,6 +346,7 @@ async def start(hs: "HomeServer"):
     # numbers of DNS requests don't starve out other users of the threadpool.
     resolver_threadpool = ThreadPool(name="gai_resolver")
     resolver_threadpool.start()
+    reactor.addSystemEventTrigger("during", "shutdown", resolver_threadpool.stop)
     reactor.installNameResolver(
         GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool)
     )
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 278fd901e2..8ca97b5b18 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -718,9 +718,12 @@ def decode_body(
     if not body:
         return None
 
+    # The idea here is that multiple encodings are tried until one works.
+    # Unfortunately the result is never used and then LXML will decode the string
+    # again with the found encoding.
     for encoding in get_html_media_encodings(body, content_type):
         try:
-            body_str = body.decode(encoding)
+            body.decode(encoding)
         except Exception:
             pass
         else:
@@ -732,11 +735,11 @@ def decode_body(
     from lxml import etree
 
     # Create an HTML parser.
-    parser = etree.HTMLParser(recover=True, encoding="utf-8")
+    parser = etree.HTMLParser(recover=True, encoding=encoding)
 
     # Attempt to parse the body. Returns None if the body was successfully
     # parsed, but no tree was found.
-    return etree.fromstring(body_str, parser)
+    return etree.fromstring(body, parser)
 
 
 def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: