summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorAndrewRyanChama <89478935+AndrewRyanChama@users.noreply.github.com>2022-02-22 04:11:39 -0800
committerGitHub <noreply@github.com>2022-02-22 07:11:39 -0500
commit066171643b5812b05dd9352ee650f524567de877 (patch)
tree8ad178c43f79808b359c640690394332c5a0676b /synapse
parentMerge remote-tracking branch 'origin/master' into develop (diff)
downloadsynapse-066171643b5812b05dd9352ee650f524567de877.tar.xz
Fetch images when previewing Twitter URLs. (#11985)
By including "bot" in the User-Agent, which some sites use
to decide whether to include additional Open Graph information.
Diffstat (limited to 'synapse')
-rw-r--r--synapse/res/providers.json4
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py10
2 files changed, 10 insertions, 4 deletions
diff --git a/synapse/res/providers.json b/synapse/res/providers.json
index f1838f9559..7b9958e454 100644
--- a/synapse/res/providers.json
+++ b/synapse/res/providers.json
@@ -5,8 +5,6 @@
         "endpoints": [
             {
                 "schemes": [
-                    "https://twitter.com/*/status/*",
-                    "https://*.twitter.com/*/status/*",
                     "https://twitter.com/*/moments/*",
                     "https://*.twitter.com/*/moments/*"
                 ],
@@ -14,4 +12,4 @@
             }
         ]
     }
-]
\ No newline at end of file
+]
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 8d3d1e54dc..c08b60d10a 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -402,7 +402,15 @@ class PreviewUrlResource(DirectServeJsonResource):
                 url,
                 output_stream=output_stream,
                 max_size=self.max_spider_size,
-                headers={"Accept-Language": self.url_preview_accept_language},
+                headers={
+                    b"Accept-Language": self.url_preview_accept_language,
+                    # Use a custom user agent for the preview because some sites will only return
+                    # Open Graph metadata to crawler user agents. Omit the Synapse version
+                    # string to avoid leaking information.
+                    b"User-Agent": [
+                        "Synapse (bot; +https://github.com/matrix-org/synapse)"
+                    ],
+                },
                 is_allowed_content_type=_is_previewable,
             )
         except SynapseError: