summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2019-11-05 18:18:02 +0000
committerRichard van der Hoff <richard@matrix.org>2019-11-05 18:18:02 +0000
commit7fa4586e36a65aed67615b3f0c94aef884889d4e (patch)
tree9348bf2e3a53daf8a647d7a3c9cd2d188af43da8 /synapse
parentMerge branch 'release-v1.5.0' of github.com:matrix-org/synapse into matrix-or... (diff)
parentFix exception when OpenGraph tag values are ints (diff)
downloadsynapse-7fa4586e36a65aed67615b3f0c94aef884889d4e.tar.xz
Merge branch 'rav/url_preview_limit_title_2' into matrix-org-hotfixes
Diffstat (limited to 'synapse')
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py21
2 files changed, 21 insertions, 2 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py

index d0f92ffbf3..8587ffa76f 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py
@@ -36,7 +36,7 @@ try: except ImportError: pass -__version__ = "1.5.0rc2" +__version__ = "1.5.0" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 094ebad770..999fcc2f04 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -56,6 +56,9 @@ logger = logging.getLogger(__name__) _charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) +OG_TAG_NAME_MAXLEN = 50 +OG_TAG_VALUE_MAXLEN = 1000 + class PreviewUrlResource(DirectServeResource): isLeaf = True @@ -169,7 +172,7 @@ class PreviewUrlResource(DirectServeResource): ts (int): Returns: - Deferred[str]: json-encoded og data + Deferred[bytes]: json-encoded og data """ # check the URL cache in the DB (which will also provide us with # historical previews, if we have any) @@ -270,6 +273,18 @@ class PreviewUrlResource(DirectServeResource): logger.warn("Failed to find any OG data in %s", url) og = {} + # filter out any stupidly long values + keys_to_remove = [] + for k, v in og.items(): + # values can be numeric as well as strings, hence the cast to str + if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN: + logger.warning( + "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN] + ) + keys_to_remove.append(k) + for k in keys_to_remove: + del og[k] + logger.debug("Calculated OG for %s as %s", url, og) jsonog = json.dumps(og) @@ -504,6 +519,10 @@ def _calc_og(tree, media_uri): og = {} for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"): if "content" in tag.attrib: + # if we've got more than 50 tags, someone is taking the piss + if len(og) >= 50: + logger.warning("Skipping OG for page with too many 'og:' tags") + return {} og[tag.attrib["property"]] = tag.attrib["content"] # TODO: grab article: meta tags too, e.g.: