Merge branch 'release-v1.52'

author: Brendan Abolivier <babolivier@matrix.org> 2022-02-08 13:25:54 +0000
committer: Brendan Abolivier <babolivier@matrix.org> 2022-02-08 13:25:54 +0000
commit: 0b561a0ea1384db214c274f45b160c538d2ab65d (patch)
tree: aad71a937464551ac28cae53e36820f669431980 /synapse/rest/media/v1/preview_html.py
parent: Use changelog from develop (diff)
parent: Fix wording (diff)
download: synapse-0b561a0ea1384db214c274f45b160c538d2ab65d.tar.xz
1 files changed, 25 insertions, 6 deletions
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index 30b067dd42..872a9e72e8 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -321,14 +321,33 @@ def _iterate_over_text(
 
 
 def rebase_url(url: str, base: str) -> str:
-    base_parts = list(urlparse.urlparse(base))
+    """
+    Resolves a potentially relative `url` against an absolute `base` URL.
+
+    For example:
+
+        >>> rebase_url("subpage", "https://example.com/foo/")
+        'https://example.com/foo/subpage'
+        >>> rebase_url("sibling", "https://example.com/foo")
+        'https://example.com/sibling'
+        >>> rebase_url("/bar", "https://example.com/foo/")
+        'https://example.com/bar'
+        >>> rebase_url("https://alice.com/a/", "https://example.com/foo/")
+        'https://alice.com/a'
+    """
+    base_parts = urlparse.urlparse(base)
+    # Convert the parsed URL to a list for (potential) modification.
     url_parts = list(urlparse.urlparse(url))
-    if not url_parts[0]:  # fix up schema
-        url_parts[0] = base_parts[0] or "http"
-    if not url_parts[1]:  # fix up hostname
-        url_parts[1] = base_parts[1]
+    # Add a scheme, if one does not exist.
+    if not url_parts[0]:
+        url_parts[0] = base_parts.scheme or "http"
+    # Fix up the hostname, if this is not a data URL.
+    if url_parts[0] != "data" and not url_parts[1]:
+        url_parts[1] = base_parts.netloc
+        # If the path does not start with a /, nest it under the base path's last
+        # directory.
         if not url_parts[2].startswith("/"):
-            url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2]
+            url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2]
     return urlparse.urlunparse(url_parts)
author	Brendan Abolivier <babolivier@matrix.org>	2022-02-08 13:25:54 +0000
committer	Brendan Abolivier <babolivier@matrix.org>	2022-02-08 13:25:54 +0000
commit	0b561a0ea1384db214c274f45b160c538d2ab65d (patch)
tree	aad71a937464551ac28cae53e36820f669431980 /synapse/rest/media/v1/preview_html.py
parent	Use changelog from develop (diff)
parent	Fix wording (diff)
download	synapse-0b561a0ea1384db214c274f45b160c538d2ab65d.tar.xz