summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2021-11-02 14:28:27 +0000
committerErik Johnston <erik@matrix.org>2021-11-02 14:28:27 +0000
commit237f7eb87ad097ecf40f945630ac49cef1aa1154 (patch)
tree2fb7f95f8130b4756b5603ff8994c687c8726a9e
parentAdd remaining type hints to `synapse.events`. (#11098) (diff)
parentUpdate changelog (diff)
downloadsynapse-237f7eb87ad097ecf40f945630ac49cef1aa1154.tar.xz
Merge remote-tracking branch 'origin/master' into develop
-rw-r--r--CHANGES.md13
-rw-r--r--debian/changelog10
-rwxr-xr-xdebian/rules6
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py9
-rw-r--r--tests/test_preview.py15
6 files changed, 49 insertions, 6 deletions
diff --git a/CHANGES.md b/CHANGES.md
index f61d5c706f..e74544f489 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,8 +1,17 @@
-Synapse 1.46.0rc1 (2021-10-27)
-==============================
+Synapse 1.46.0 (2021-11-02)
+===========================
 
 The cause of the [performance regression affecting Synapse 1.44](https://github.com/matrix-org/synapse/issues/11049) has been identified and fixed. ([\#11177](https://github.com/matrix-org/synapse/issues/11177))
 
+Bugfixes
+--------
+
+- Fix a bug introduced in v1.46.0rc1 where URL previews of some XML documents would fail. ([\#11196](https://github.com/matrix-org/synapse/issues/11196))
+
+
+Synapse 1.46.0rc1 (2021-10-27)
+==============================
+
 Features
 --------
 
diff --git a/debian/changelog b/debian/changelog
index 24842192b8..14748f8c25 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,16 @@ matrix-synapse-py3 (1.47.0+nmu1) UNRELEASED; urgency=medium
 
  -- root <root@cae79a6e79d7>  Fri, 22 Oct 2021 22:20:31 +0000
 
+matrix-synapse-py3 (1.46.0) stable; urgency=medium
+
+  [ Richard van der Hoff ]
+  * Compress debs with xz, to fix incompatibility of impish debs with reprepro.
+
+  [ Synapse Packaging team ]
+  * New synapse release 1.46.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 02 Nov 2021 13:22:53 +0000
+
 matrix-synapse-py3 (1.46.0~rc1) stable; urgency=medium
 
   * New synapse release 1.46.0~rc1.
diff --git a/debian/rules b/debian/rules
index b9d490adc9..5baf2475f0 100755
--- a/debian/rules
+++ b/debian/rules
@@ -51,5 +51,11 @@ override_dh_shlibdeps:
 override_dh_virtualenv:
 	./debian/build_virtualenv
 
+override_dh_builddeb:
+        # force the compression to xzip, to stop dpkg-deb on impish defaulting to zstd
+        # (which requires reprepro 5.3.0-1.3, which is currently only in 'experimental' in Debian:
+        # https://metadata.ftp-master.debian.org/changelogs/main/r/reprepro/reprepro_5.3.0-1.3_changelog)
+	dh_builddeb -- -Zxz
+
 %:
 	dh $@ --with python-virtualenv
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 355b36fc63..5ef34bce40 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.46.0rc1"
+__version__ = "1.46.0"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 278fd901e2..8ca97b5b18 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -718,9 +718,12 @@ def decode_body(
     if not body:
         return None
 
+    # The idea here is that multiple encodings are tried until one works.
+    # Unfortunately the result is never used and then LXML will decode the string
+    # again with the found encoding.
     for encoding in get_html_media_encodings(body, content_type):
         try:
-            body_str = body.decode(encoding)
+            body.decode(encoding)
         except Exception:
             pass
         else:
@@ -732,11 +735,11 @@ def decode_body(
     from lxml import etree
 
     # Create an HTML parser.
-    parser = etree.HTMLParser(recover=True, encoding="utf-8")
+    parser = etree.HTMLParser(recover=True, encoding=encoding)
 
     # Attempt to parse the body. Returns None if the body was successfully
     # parsed, but no tree was found.
-    return etree.fromstring(body_str, parser)
+    return etree.fromstring(body, parser)
 
 
 def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]:
diff --git a/tests/test_preview.py b/tests/test_preview.py
index 9a576f9a4e..40b89fb2ef 100644
--- a/tests/test_preview.py
+++ b/tests/test_preview.py
@@ -277,6 +277,21 @@ class CalcOgTestCase(unittest.TestCase):
         tree = decode_body(html, "http://example.com/test.html")
         self.assertIsNone(tree)
 
+    def test_xml(self):
+        """Test decoding XML and ensure it works properly."""
+        # Note that the strip() call is important to ensure the xml tag starts
+        # at the initial byte.
+        html = b"""
+        <?xml version="1.0" encoding="UTF-8"?>
+
+        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+        <head><title>Foo</title></head><body>Some text.</body></html>
+        """.strip()
+        tree = decode_body(html, "http://example.com/test.html")
+        og = _calc_og(tree, "http://example.com/test.html")
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
     def test_invalid_encoding(self):
         """An invalid character encoding should be ignored and treated as UTF-8, if possible."""
         html = b"""