1 files changed, 25 insertions, 6 deletions
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index 30b067dd42..872a9e72e8 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -321,14 +321,33 @@ def _iterate_over_text(
def rebase_url(url: str, base: str) -> str:
- base_parts = list(urlparse.urlparse(base))
+ """
+ Resolves a potentially relative `url` against an absolute `base` URL.
+
+ For example:
+
+ >>> rebase_url("subpage", "https://example.com/foo/")
+ 'https://example.com/foo/subpage'
+ >>> rebase_url("sibling", "https://example.com/foo")
+ 'https://example.com/sibling'
+ >>> rebase_url("/bar", "https://example.com/foo/")
+ 'https://example.com/bar'
+ >>> rebase_url("https://alice.com/a/", "https://example.com/foo/")
+ 'https://alice.com/a'
+ """
+ base_parts = urlparse.urlparse(base)
+ # Convert the parsed URL to a list for (potential) modification.
url_parts = list(urlparse.urlparse(url))
- if not url_parts[0]: # fix up schema
- url_parts[0] = base_parts[0] or "http"
- if not url_parts[1]: # fix up hostname
- url_parts[1] = base_parts[1]
+ # Add a scheme, if one does not exist.
+ if not url_parts[0]:
+ url_parts[0] = base_parts.scheme or "http"
+ # Fix up the hostname, if this is not a data URL.
+ if url_parts[0] != "data" and not url_parts[1]:
+ url_parts[1] = base_parts.netloc
+ # If the path does not start with a /, nest it under the base path's last
+ # directory.
if not url_parts[2].startswith("/"):
- url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2]
+ url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2]
return urlparse.urlunparse(url_parts)
|