summary refs log tree commit diff
path: root/synapse/rest/media
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2019-11-05 17:08:59 +0000
committerGitHub <noreply@github.com>2019-11-05 17:08:59 +0000
commit02f99906f2d61771fa52aed719c518c6bb128f95 (patch)
treec0445991317e3dc26def157bfeb70d76d1b830c5 /synapse/rest/media
parentImprove documentation for EventContext fields (#6319) (diff)
parentMerge branch 'develop' into rav/url_preview_limit_title (diff)
downloadsynapse-02f99906f2d61771fa52aed719c518c6bb128f95.tar.xz
Merge pull request #6331 from matrix-org/rav/url_preview_limit_title
Strip overlong OpenGraph data from url preview
Diffstat (limited to 'synapse/rest/media')
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py20
1 files changed, 19 insertions, 1 deletions
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 531d923f76..69544b3711 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -56,6 +56,9 @@ logger = logging.getLogger(__name__)
 _charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
 _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
 
+OG_TAG_NAME_MAXLEN = 50
+OG_TAG_VALUE_MAXLEN = 1000
+
 
 class PreviewUrlResource(DirectServeResource):
     isLeaf = True
@@ -171,7 +174,7 @@ class PreviewUrlResource(DirectServeResource):
             ts (int):
 
         Returns:
-            Deferred[str]: json-encoded og data
+            Deferred[bytes]: json-encoded og data
         """
         # check the URL cache in the DB (which will also provide us with
         # historical previews, if we have any)
@@ -272,6 +275,17 @@ class PreviewUrlResource(DirectServeResource):
             logger.warning("Failed to find any OG data in %s", url)
             og = {}
 
+        # filter out any stupidly long values
+        keys_to_remove = []
+        for k, v in og.items():
+            if len(k) > OG_TAG_NAME_MAXLEN or len(v) > OG_TAG_VALUE_MAXLEN:
+                logger.warning(
+                    "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
+                )
+                keys_to_remove.append(k)
+        for k in keys_to_remove:
+            del og[k]
+
         logger.debug("Calculated OG for %s as %s", url, og)
 
         jsonog = json.dumps(og)
@@ -506,6 +520,10 @@ def _calc_og(tree, media_uri):
     og = {}
     for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
         if "content" in tag.attrib:
+            # if we've got more than 50 tags, someone is taking the piss
+            if len(og) >= 50:
+                logger.warning("Skipping OG for page with too many 'og:' tags")
+                return {}
             og[tag.attrib["property"]] = tag.attrib["content"]
 
     # TODO: grab article: meta tags too, e.g.: