From 066171643b5812b05dd9352ee650f524567de877 Mon Sep 17 00:00:00 2001 From: AndrewRyanChama <89478935+AndrewRyanChama@users.noreply.github.com> Date: Tue, 22 Feb 2022 04:11:39 -0800 Subject: Fetch images when previewing Twitter URLs. (#11985) By including "bot" in the User-Agent, which some sites use to decide whether to include additional Open Graph information. --- synapse/res/providers.json | 4 +--- synapse/rest/media/v1/preview_url_resource.py | 10 +++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/res/providers.json b/synapse/res/providers.json index f1838f9559..7b9958e454 100644 --- a/synapse/res/providers.json +++ b/synapse/res/providers.json @@ -5,8 +5,6 @@ "endpoints": [ { "schemes": [ - "https://twitter.com/*/status/*", - "https://*.twitter.com/*/status/*", "https://twitter.com/*/moments/*", "https://*.twitter.com/*/moments/*" ], @@ -14,4 +12,4 @@ } ] } -] \ No newline at end of file +] diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 8d3d1e54dc..c08b60d10a 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -402,7 +402,15 @@ class PreviewUrlResource(DirectServeJsonResource): url, output_stream=output_stream, max_size=self.max_spider_size, - headers={"Accept-Language": self.url_preview_accept_language}, + headers={ + b"Accept-Language": self.url_preview_accept_language, + # Use a custom user agent for the preview because some sites will only return + # Open Graph metadata to crawler user agents. Omit the Synapse version + # string to avoid leaking information. + b"User-Agent": [ + "Synapse (bot; +https://github.com/matrix-org/synapse)" + ], + }, is_allowed_content_type=_is_previewable, ) except SynapseError: -- cgit 1.4.1