summary refs log tree commit diff
diff options
context:
space:
mode:
authorMarcin Bachry <hegel666@gmail.com>2016-12-14 22:38:18 +0100
committerMarcin Bachry <hegel666@gmail.com>2016-12-14 22:38:18 +0100
commit24c16fc3494ce91ba97a06f5d42cdea1c4c38c93 (patch)
treefadb35dae7b6276937c0e88f0415aa3d52fb88f0
parentFixup membership query (diff)
downloadsynapse-24c16fc3494ce91ba97a06f5d42cdea1c4c38c93.tar.xz
Fix crash in url preview when html tag has no text
Signed-off-by: Marcin Bachry <hegel666@gmail.com>
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py5
-rw-r--r--tests/test_preview.py50
2 files changed, 54 insertions, 1 deletions
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 6a5a57102f..99760d622f 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -381,7 +381,10 @@ def _calc_og(tree, media_uri):
     if 'og:title' not in og:
         # do some basic spidering of the HTML
         title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
-        og['og:title'] = title[0].text.strip() if title else None
+        if title and title[0].text is not None:
+            og['og:title'] = title[0].text.strip()
+        else:
+            og['og:title'] = None
 
     if 'og:image' not in og:
         # TODO: extract a favicon failing all else
diff --git a/tests/test_preview.py b/tests/test_preview.py
index ffa52e5dd4..5bd36c74aa 100644
--- a/tests/test_preview.py
+++ b/tests/test_preview.py
@@ -215,3 +215,53 @@ class PreviewUrlTestCase(unittest.TestCase):
             u"og:title": u"Foo",
             u"og:description": u"Some text."
         })
+
+    def test_missing_title(self):
+        html = u"""
+        <html>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+
+        self.assertEquals(og, {
+            u"og:title": None,
+            u"og:description": u"Some text."
+        })
+
+    def test_h1_as_title(self):
+        html = u"""
+        <html>
+        <meta property="og:description" content="Some text."/>
+        <body>
+        <h1>Title</h1>
+        </body>
+        </html>
+        """
+
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+
+        self.assertEquals(og, {
+            u"og:title": u"Title",
+            u"og:description": u"Some text."
+        })
+
+    def test_missing_title_and_broken_h1(self):
+        html = u"""
+        <html>
+        <body>
+        <h1><a href="foo"/></h1>
+        Some text.
+        </body>
+        </html>
+        """
+
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+
+        self.assertEquals(og, {
+            u"og:title": None,
+            u"og:description": u"Some text."
+        })