From 72626b78ef4aa9ab0bd11e332495f34bd43bbc26 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Oct 2021 12:33:21 +0100 Subject: Fix thread BG update to not seq scan event_json (#11192) For some reason the query optimiser decided to seq scan both tables, rather than index scanning `event_json`. --- synapse/storage/databases/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index f92d824876..ae3a8a63e4 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1108,7 +1108,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): """ SELECT event_id, json FROM event_json LEFT JOIN event_relations USING (event_id) - WHERE event_id > ? AND relates_to_id IS NULL + WHERE event_id > ? AND event_relations.event_id IS NULL ORDER BY event_id LIMIT ? """, (last_event_id, batch_size), -- cgit 1.4.1 From f3a4be870092e28531957702931c5d74b9d1f18f Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 27 Oct 2021 13:04:56 +0100 Subject: Shut down the DNS threadpool (#11190) The DNS threadpool must be explicitly stopped, otherwise Synapse will hang indefinitely when asked to shut down. --- changelog.d/11190.bugfix | 1 + synapse/app/_base.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/11190.bugfix (limited to 'synapse') diff --git a/changelog.d/11190.bugfix b/changelog.d/11190.bugfix new file mode 100644 index 0000000000..0d913805ac --- /dev/null +++ b/changelog.d/11190.bugfix @@ -0,0 +1 @@ +Fix a performance regression introduced in 1.44.0 which could cause client requests to time out when making large numbers of outbound requests. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 03627cdcba..f4c3f867a8 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -346,6 +346,7 @@ async def start(hs: "HomeServer"): # numbers of DNS requests don't starve out other users of the threadpool. resolver_threadpool = ThreadPool(name="gai_resolver") resolver_threadpool.start() + reactor.addSystemEventTrigger("during", "shutdown", resolver_threadpool.stop) reactor.installNameResolver( GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) ) -- cgit 1.4.1 From b3e843be88d67633d11711ecc80d4e0390b1e723 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 27 Oct 2021 10:48:02 -0400 Subject: Fix URL preview errors when previewing XML documents. (#11196) --- changelog.d/11196.bugfix | 1 + synapse/rest/media/v1/preview_url_resource.py | 9 ++++++--- tests/test_preview.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 changelog.d/11196.bugfix (limited to 'synapse') diff --git a/changelog.d/11196.bugfix b/changelog.d/11196.bugfix new file mode 100644 index 0000000000..3861eeb908 --- /dev/null +++ b/changelog.d/11196.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.46.0rc1 where URL previews of some XML documents would fail. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 278fd901e2..8ca97b5b18 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -718,9 +718,12 @@ def decode_body( if not body: return None + # The idea here is that multiple encodings are tried until one works. + # Unfortunately the result is never used and then LXML will decode the string + # again with the found encoding. for encoding in get_html_media_encodings(body, content_type): try: - body_str = body.decode(encoding) + body.decode(encoding) except Exception: pass else: @@ -732,11 +735,11 @@ def decode_body( from lxml import etree # Create an HTML parser. - parser = etree.HTMLParser(recover=True, encoding="utf-8") + parser = etree.HTMLParser(recover=True, encoding=encoding) # Attempt to parse the body. Returns None if the body was successfully # parsed, but no tree was found. - return etree.fromstring(body_str, parser) + return etree.fromstring(body, parser) def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: diff --git a/tests/test_preview.py b/tests/test_preview.py index 9a576f9a4e..40b89fb2ef 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -277,6 +277,21 @@ class CalcOgTestCase(unittest.TestCase): tree = decode_body(html, "http://example.com/test.html") self.assertIsNone(tree) + def test_xml(self): + """Test decoding XML and ensure it works properly.""" + # Note that the strip() call is important to ensure the xml tag starts + # at the initial byte. + html = b""" + + + + + FooSome text. + """.strip() + tree = decode_body(html, "http://example.com/test.html") + og = _calc_og(tree, "http://example.com/test.html") + self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) + def test_invalid_encoding(self): """An invalid character encoding should be ignored and treated as UTF-8, if possible.""" html = b""" -- cgit 1.4.1