summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/7265.feature1
-rw-r--r--docs/sample_config.yaml25
-rw-r--r--synapse/config/repository.py29
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py8
-rw-r--r--tests/rest/media/v1/test_url_preview.py55
5 files changed, 116 insertions, 2 deletions
diff --git a/changelog.d/7265.feature b/changelog.d/7265.feature
new file mode 100644
index 0000000000..345b63e0b7
--- /dev/null
+++ b/changelog.d/7265.feature
@@ -0,0 +1 @@
+Add a config option for specifying the value of the Accept-Language HTTP header when generating URL previews.
\ No newline at end of file
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 3417813750..81dccbd997 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -859,6 +859,31 @@ media_store_path: "DATADIR/media_store"
 #
 #max_spider_size: 10M
 
+# A list of values for the Accept-Language HTTP header used when
+# downloading webpages during URL preview generation. This allows
+# Synapse to specify the preferred languages that URL previews should
+# be in when communicating with remote servers.
+#
+# Each value is a IETF language tag; a 2-3 letter identifier for a
+# language, optionally followed by subtags separated by '-', specifying
+# a country or region variant.
+#
+# Multiple values can be provided, and a weight can be added to each by
+# using quality value syntax (;q=). '*' translates to any language.
+#
+# Defaults to "en".
+#
+# Example:
+#
+# url_preview_accept_language:
+#   - en-UK
+#   - en-US;q=0.9
+#   - fr;q=0.8
+#   - *;q=0.7
+#
+url_preview_accept_language:
+#   - en
+
 
 ## Captcha ##
 # See docs/CAPTCHA_SETUP for full details of configuring this.
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 7d2dd27fd0..7193ea1114 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -192,6 +192,10 @@ class ContentRepositoryConfig(Config):
 
             self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ())
 
+            self.url_preview_accept_language = config.get(
+                "url_preview_accept_language"
+            ) or ["en"]
+
     def generate_config_section(self, data_dir_path, **kwargs):
         media_store = os.path.join(data_dir_path, "media_store")
         uploads_path = os.path.join(data_dir_path, "uploads")
@@ -329,6 +333,31 @@ class ContentRepositoryConfig(Config):
         # The largest allowed URL preview spidering size in bytes
         #
         #max_spider_size: 10M
+
+        # A list of values for the Accept-Language HTTP header used when
+        # downloading webpages during URL preview generation. This allows
+        # Synapse to specify the preferred languages that URL previews should
+        # be in when communicating with remote servers.
+        #
+        # Each value is a IETF language tag; a 2-3 letter identifier for a
+        # language, optionally followed by subtags separated by '-', specifying
+        # a country or region variant.
+        #
+        # Multiple values can be provided, and a weight can be added to each by
+        # using quality value syntax (;q=). '*' translates to any language.
+        #
+        # Defaults to "en".
+        #
+        # Example:
+        #
+        # url_preview_accept_language:
+        #   - en-UK
+        #   - en-US;q=0.9
+        #   - fr;q=0.8
+        #   - *;q=0.7
+        #
+        url_preview_accept_language:
+        #   - en
         """
             % locals()
         )
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index c46676f8fc..f68e18ea8a 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -86,6 +86,7 @@ class PreviewUrlResource(DirectServeResource):
         self.media_storage = media_storage
 
         self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
+        self.url_preview_accept_language = hs.config.url_preview_accept_language
 
         # memory cache mapping urls to an ObservableDeferred returning
         # JSON-encoded OG metadata
@@ -315,9 +316,12 @@ class PreviewUrlResource(DirectServeResource):
 
         with self.media_storage.store_into_file(file_info) as (f, fname, finish):
             try:
-                logger.debug("Trying to get url '%s'", url)
+                logger.debug("Trying to get preview for url '%s'", url)
                 length, headers, uri, code = await self.client.get_file(
-                    url, output_stream=f, max_size=self.max_spider_size
+                    url,
+                    output_stream=f,
+                    max_size=self.max_spider_size,
+                    headers={"Accept-Language": self.url_preview_accept_language},
                 )
             except SynapseError:
                 # Pass SynapseErrors through directly, so that the servlet
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 852b8ab11c..2826211f32 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -74,6 +74,12 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         )
         config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
         config["url_preview_url_blacklist"] = []
+        config["url_preview_accept_language"] = [
+            "en-UK",
+            "en-US;q=0.9",
+            "fr;q=0.8",
+            "*;q=0.7",
+        ]
 
         self.storage_path = self.mktemp()
         self.media_store_path = self.mktemp()
@@ -507,3 +513,52 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         self.pump()
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body, {})
+
+    def test_accept_language_config_option(self):
+        """
+        Accept-Language header is sent to the remote server
+        """
+        self.lookups["example.com"] = [(IPv4Address, "8.8.8.8")]
+
+        # Build and make a request to the server
+        request, channel = self.make_request(
+            "GET", "url_preview?url=http://example.com", shorthand=False
+        )
+        request.render(self.preview_url)
+        self.pump()
+
+        # Extract Synapse's tcp client
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+
+        # Build a fake remote server to reply with
+        server = AccumulatingProtocol()
+
+        # Connect the two together
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+
+        # Tell Synapse that it has received some data from the remote server
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        # Move the reactor along until we get a response on our original channel
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+        # Check that the server received the Accept-Language header as part
+        # of the request from Synapse
+        self.assertIn(
+            (
+                b"Accept-Language: en-UK\r\n"
+                b"Accept-Language: en-US;q=0.9\r\n"
+                b"Accept-Language: fr;q=0.8\r\n"
+                b"Accept-Language: *;q=0.7"
+            ),
+            server.data,
+        )