diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index 872a9e72e8..4cc9c66fbe 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -16,7 +16,6 @@ import itertools
import logging
import re
from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Union
-from urllib import parse as urlparse
if TYPE_CHECKING:
from lxml import etree
@@ -144,9 +143,7 @@ def decode_body(
return etree.fromstring(body, parser)
-def parse_html_to_open_graph(
- tree: "etree.Element", media_uri: str
-) -> Dict[str, Optional[str]]:
+def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
"""
Parse the HTML document into an Open Graph response.
@@ -155,7 +152,6 @@ def parse_html_to_open_graph(
Args:
tree: The parsed HTML document.
- media_url: The URI used to download the body.
Returns:
The Open Graph response as a dictionary.
@@ -209,7 +205,7 @@ def parse_html_to_open_graph(
"//*/meta[translate(@itemprop, 'IMAGE', 'image')='image']/@content"
)
if meta_image:
- og["og:image"] = rebase_url(meta_image[0], media_uri)
+ og["og:image"] = meta_image[0]
else:
# TODO: consider inlined CSS styles as well as width & height attribs
images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]")
@@ -320,37 +316,6 @@ def _iterate_over_text(
)
-def rebase_url(url: str, base: str) -> str:
- """
- Resolves a potentially relative `url` against an absolute `base` URL.
-
- For example:
-
- >>> rebase_url("subpage", "https://example.com/foo/")
- 'https://example.com/foo/subpage'
- >>> rebase_url("sibling", "https://example.com/foo")
- 'https://example.com/sibling'
- >>> rebase_url("/bar", "https://example.com/foo/")
- 'https://example.com/bar'
- >>> rebase_url("https://alice.com/a/", "https://example.com/foo/")
- 'https://alice.com/a'
- """
- base_parts = urlparse.urlparse(base)
- # Convert the parsed URL to a list for (potential) modification.
- url_parts = list(urlparse.urlparse(url))
- # Add a scheme, if one does not exist.
- if not url_parts[0]:
- url_parts[0] = base_parts.scheme or "http"
- # Fix up the hostname, if this is not a data URL.
- if url_parts[0] != "data" and not url_parts[1]:
- url_parts[1] = base_parts.netloc
- # If the path does not start with a /, nest it under the base path's last
- # directory.
- if not url_parts[2].startswith("/"):
- url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2]
- return urlparse.urlunparse(url_parts)
-
-
def summarize_paragraphs(
text_nodes: Iterable[str], min_size: int = 200, max_size: int = 500
) -> Optional[str]:
|