diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 03627cdcba..f4c3f867a8 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -346,6 +346,7 @@ async def start(hs: "HomeServer"):
# numbers of DNS requests don't starve out other users of the threadpool.
resolver_threadpool = ThreadPool(name="gai_resolver")
resolver_threadpool.start()
+ reactor.addSystemEventTrigger("during", "shutdown", resolver_threadpool.stop)
reactor.installNameResolver(
GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool)
)
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 278fd901e2..8ca97b5b18 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -718,9 +718,12 @@ def decode_body(
if not body:
return None
+ # The idea here is that multiple encodings are tried until one works.
+ # Unfortunately the result is never used and then LXML will decode the string
+ # again with the found encoding.
for encoding in get_html_media_encodings(body, content_type):
try:
- body_str = body.decode(encoding)
+ body.decode(encoding)
except Exception:
pass
else:
@@ -732,11 +735,11 @@ def decode_body(
from lxml import etree
# Create an HTML parser.
- parser = etree.HTMLParser(recover=True, encoding="utf-8")
+ parser = etree.HTMLParser(recover=True, encoding=encoding)
# Attempt to parse the body. Returns None if the body was successfully
# parsed, but no tree was found.
- return etree.fromstring(body_str, parser)
+ return etree.fromstring(body, parser)
def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]:
|