summary refs log tree commit diff
path: root/synapse/rest/media/v1/preview_html.py
diff options
context:
space:
mode:
authorBrendan Abolivier <babolivier@matrix.org>2022-02-08 13:25:54 +0000
committerBrendan Abolivier <babolivier@matrix.org>2022-02-08 13:25:54 +0000
commit0b561a0ea1384db214c274f45b160c538d2ab65d (patch)
treeaad71a937464551ac28cae53e36820f669431980 /synapse/rest/media/v1/preview_html.py
parentUse changelog from develop (diff)
parentFix wording (diff)
downloadsynapse-0b561a0ea1384db214c274f45b160c538d2ab65d.tar.xz
Merge branch 'release-v1.52'
Diffstat (limited to 'synapse/rest/media/v1/preview_html.py')
-rw-r--r--synapse/rest/media/v1/preview_html.py31
1 files changed, 25 insertions, 6 deletions
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py

index 30b067dd42..872a9e72e8 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py
@@ -321,14 +321,33 @@ def _iterate_over_text( def rebase_url(url: str, base: str) -> str: - base_parts = list(urlparse.urlparse(base)) + """ + Resolves a potentially relative `url` against an absolute `base` URL. + + For example: + + >>> rebase_url("subpage", "https://example.com/foo/") + 'https://example.com/foo/subpage' + >>> rebase_url("sibling", "https://example.com/foo") + 'https://example.com/sibling' + >>> rebase_url("/bar", "https://example.com/foo/") + 'https://example.com/bar' + >>> rebase_url("https://alice.com/a/", "https://example.com/foo/") + 'https://alice.com/a' + """ + base_parts = urlparse.urlparse(base) + # Convert the parsed URL to a list for (potential) modification. url_parts = list(urlparse.urlparse(url)) - if not url_parts[0]: # fix up schema - url_parts[0] = base_parts[0] or "http" - if not url_parts[1]: # fix up hostname - url_parts[1] = base_parts[1] + # Add a scheme, if one does not exist. + if not url_parts[0]: + url_parts[0] = base_parts.scheme or "http" + # Fix up the hostname, if this is not a data URL. + if url_parts[0] != "data" and not url_parts[1]: + url_parts[1] = base_parts.netloc + # If the path does not start with a /, nest it under the base path's last + # directory. if not url_parts[2].startswith("/"): - url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2] + url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2] return urlparse.urlunparse(url_parts)