summary refs log tree commit diff
diff options
context:
space:
mode:
authorTim Leung <tim95@hotmail.co.uk>2021-02-26 17:37:57 +0000
committerGitHub <noreply@github.com>2021-02-26 17:37:57 +0000
commitddb240293a3d7e0a903f322088e937d7e4f3de68 (patch)
treeb3b840433330217df6d031d97a962be73d482269
parentSSO: redirect to public URL before setting cookies (#9436) (diff)
downloadsynapse-ddb240293a3d7e0a903f322088e937d7e4f3de68.tar.xz
Add support for no_proxy and case insensitive env variables (#9372)
### Changes proposed in this PR

- Add support for the `no_proxy` and `NO_PROXY` environment variables
  - Internally rely on urllib's [`proxy_bypass_environment`](https://github.com/python/cpython/blob/bdb941be423bde8b02a5695ccf51c303d6204bed/Lib/urllib/request.py#L2519)
- Extract env variables using urllib's `getproxies`/[`getproxies_environment`](https://github.com/python/cpython/blob/bdb941be423bde8b02a5695ccf51c303d6204bed/Lib/urllib/request.py#L2488) which supports lowercase + uppercase, preferring lowercase, except for `HTTP_PROXY` in a CGI environment

This does contain behaviour changes for consumers so making sure these are called out:
- `no_proxy`/`NO_PROXY` is now respected
- lowercase `https_proxy` is now allowed and taken over `HTTPS_PROXY`

Related to #9306 which also uses `ProxyAgent`

Signed-off-by: Timothy Leung tim95@hotmail.co.uk
-rw-r--r--changelog.d/9372.feature1
-rw-r--r--synapse/http/client.py10
-rw-r--r--synapse/http/proxyagent.py37
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py3
-rw-r--r--synapse/server.py10
-rw-r--r--tests/http/test_proxyagent.py117
6 files changed, 114 insertions, 64 deletions
diff --git a/changelog.d/9372.feature b/changelog.d/9372.feature
new file mode 100644
index 0000000000..3cb01004c9
--- /dev/null
+++ b/changelog.d/9372.feature
@@ -0,0 +1 @@
+The `no_proxy` and `NO_PROXY` environment variables are now respected in proxied HTTP clients with the lowercase form taking precedence if both are present. Additionally, the lowercase `https_proxy` environment variable is now respected in proxied HTTP clients on top of existing support for the uppercase `HTTPS_PROXY` form and takes precedence if both are present. Contributed by Timothy Leung.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index e54d9bd213..a910548f1e 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -289,8 +289,7 @@ class SimpleHttpClient:
         treq_args: Dict[str, Any] = {},
         ip_whitelist: Optional[IPSet] = None,
         ip_blacklist: Optional[IPSet] = None,
-        http_proxy: Optional[bytes] = None,
-        https_proxy: Optional[bytes] = None,
+        use_proxy: bool = False,
     ):
         """
         Args:
@@ -300,8 +299,8 @@ class SimpleHttpClient:
                 we may not request.
             ip_whitelist: The whitelisted IP addresses, that we can
                request if it were otherwise caught in a blacklist.
-            http_proxy: proxy server to use for http connections. host[:port]
-            https_proxy: proxy server to use for https connections. host[:port]
+            use_proxy: Whether proxy settings should be discovered and used
+                from conventional environment variables.
         """
         self.hs = hs
 
@@ -345,8 +344,7 @@ class SimpleHttpClient:
             connectTimeout=15,
             contextFactory=self.hs.get_http_client_context_factory(),
             pool=pool,
-            http_proxy=http_proxy,
-            https_proxy=https_proxy,
+            use_proxy=use_proxy,
         )
 
         if self._ip_blacklist:
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index b730d2c634..3d553ae236 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 import logging
 import re
+from urllib.request import getproxies_environment, proxy_bypass_environment
 
 from zope.interface import implementer
 
@@ -58,6 +59,9 @@ class ProxyAgent(_AgentBase):
 
         pool (HTTPConnectionPool|None): connection pool to be used. If None, a
             non-persistent pool instance will be created.
+
+        use_proxy (bool): Whether proxy settings should be discovered and used
+            from conventional environment variables.
     """
 
     def __init__(
@@ -68,8 +72,7 @@ class ProxyAgent(_AgentBase):
         connectTimeout=None,
         bindAddress=None,
         pool=None,
-        http_proxy=None,
-        https_proxy=None,
+        use_proxy=False,
     ):
         _AgentBase.__init__(self, reactor, pool)
 
@@ -84,6 +87,15 @@ class ProxyAgent(_AgentBase):
         if bindAddress is not None:
             self._endpoint_kwargs["bindAddress"] = bindAddress
 
+        http_proxy = None
+        https_proxy = None
+        no_proxy = None
+        if use_proxy:
+            proxies = getproxies_environment()
+            http_proxy = proxies["http"].encode() if "http" in proxies else None
+            https_proxy = proxies["https"].encode() if "https" in proxies else None
+            no_proxy = proxies["no"] if "no" in proxies else None
+
         self.http_proxy_endpoint = _http_proxy_endpoint(
             http_proxy, self.proxy_reactor, **self._endpoint_kwargs
         )
@@ -92,6 +104,8 @@ class ProxyAgent(_AgentBase):
             https_proxy, self.proxy_reactor, **self._endpoint_kwargs
         )
 
+        self.no_proxy = no_proxy
+
         self._policy_for_https = contextFactory
         self._reactor = reactor
 
@@ -139,13 +153,28 @@ class ProxyAgent(_AgentBase):
         pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
         request_path = parsed_uri.originForm
 
-        if parsed_uri.scheme == b"http" and self.http_proxy_endpoint:
+        should_skip_proxy = False
+        if self.no_proxy is not None:
+            should_skip_proxy = proxy_bypass_environment(
+                parsed_uri.host.decode(),
+                proxies={"no": self.no_proxy},
+            )
+
+        if (
+            parsed_uri.scheme == b"http"
+            and self.http_proxy_endpoint
+            and not should_skip_proxy
+        ):
             # Cache *all* connections under the same key, since we are only
             # connecting to a single destination, the proxy:
             pool_key = ("http-proxy", self.http_proxy_endpoint)
             endpoint = self.http_proxy_endpoint
             request_path = uri
-        elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint:
+        elif (
+            parsed_uri.scheme == b"https"
+            and self.https_proxy_endpoint
+            and not should_skip_proxy
+        ):
             endpoint = HTTPConnectProxyEndpoint(
                 self.proxy_reactor,
                 self.https_proxy_endpoint,
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 6104ef4e46..89dc6b1c98 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -149,8 +149,7 @@ class PreviewUrlResource(DirectServeJsonResource):
             treq_args={"browser_like_redirects": True},
             ip_whitelist=hs.config.url_preview_ip_range_whitelist,
             ip_blacklist=hs.config.url_preview_ip_range_blacklist,
-            http_proxy=os.getenvb(b"http_proxy"),
-            https_proxy=os.getenvb(b"HTTPS_PROXY"),
+            use_proxy=True,
         )
         self.media_repo = media_repo
         self.primary_base_path = media_repo.primary_base_path
diff --git a/synapse/server.py b/synapse/server.py
index 4b9ec7f0ae..1d4370e0ba 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -24,7 +24,6 @@
 import abc
 import functools
 import logging
-import os
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -370,11 +369,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         """
         An HTTP client that uses configured HTTP(S) proxies.
         """
-        return SimpleHttpClient(
-            self,
-            http_proxy=os.getenvb(b"http_proxy"),
-            https_proxy=os.getenvb(b"HTTPS_PROXY"),
-        )
+        return SimpleHttpClient(self, use_proxy=True)
 
     @cache_in_self
     def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient:
@@ -386,8 +381,7 @@ class HomeServer(metaclass=abc.ABCMeta):
             self,
             ip_whitelist=self.config.ip_range_whitelist,
             ip_blacklist=self.config.ip_range_blacklist,
-            http_proxy=os.getenvb(b"http_proxy"),
-            https_proxy=os.getenvb(b"HTTPS_PROXY"),
+            use_proxy=True,
         )
 
     @cache_in_self
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
index 9a56e1c14a..505ffcd300 100644
--- a/tests/http/test_proxyagent.py
+++ b/tests/http/test_proxyagent.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import os
+from unittest.mock import patch
 
 import treq
 from netaddr import IPSet
@@ -100,22 +102,36 @@ class MatrixFederationAgentTests(TestCase):
 
         return http_protocol
 
-    def test_http_request(self):
-        agent = ProxyAgent(self.reactor)
+    def _test_request_direct_connection(self, agent, scheme, hostname, path):
+        """Runs a test case for a direct connection not going through a proxy.
 
-        self.reactor.lookups["test.com"] = "1.2.3.4"
-        d = agent.request(b"GET", b"http://test.com")
+        Args:
+            agent (ProxyAgent): the proxy agent being tested
+
+            scheme (bytes): expected to be either "http" or "https"
+
+            hostname (bytes): the hostname to connect to in the test
+
+            path (bytes): the path to connect to in the test
+        """
+        is_https = scheme == b"https"
+
+        self.reactor.lookups[hostname.decode()] = "1.2.3.4"
+        d = agent.request(b"GET", scheme + b"://" + hostname + b"/" + path)
 
         # there should be a pending TCP connection
         clients = self.reactor.tcpClients
         self.assertEqual(len(clients), 1)
         (host, port, client_factory, _timeout, _bindAddress) = clients[0]
         self.assertEqual(host, "1.2.3.4")
-        self.assertEqual(port, 80)
+        self.assertEqual(port, 443 if is_https else 80)
 
         # make a test server, and wire up the client
         http_server = self._make_connection(
-            client_factory, _get_test_protocol_factory()
+            client_factory,
+            _get_test_protocol_factory(),
+            ssl=is_https,
+            expected_sni=hostname if is_https else None,
         )
 
         # the FakeTransport is async, so we need to pump the reactor
@@ -126,8 +142,8 @@ class MatrixFederationAgentTests(TestCase):
 
         request = http_server.requests[0]
         self.assertEqual(request.method, b"GET")
-        self.assertEqual(request.path, b"/")
-        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
+        self.assertEqual(request.path, b"/" + path)
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [hostname])
         request.write(b"result")
         request.finish()
 
@@ -137,48 +153,58 @@ class MatrixFederationAgentTests(TestCase):
         body = self.successResultOf(treq.content(resp))
         self.assertEqual(body, b"result")
 
+    def test_http_request(self):
+        agent = ProxyAgent(self.reactor)
+        self._test_request_direct_connection(agent, b"http", b"test.com", b"")
+
     def test_https_request(self):
         agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy())
+        self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
 
-        self.reactor.lookups["test.com"] = "1.2.3.4"
-        d = agent.request(b"GET", b"https://test.com/abc")
+    def test_http_request_use_proxy_empty_environment(self):
+        agent = ProxyAgent(self.reactor, use_proxy=True)
+        self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
-        # there should be a pending TCP connection
-        clients = self.reactor.tcpClients
-        self.assertEqual(len(clients), 1)
-        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
-        self.assertEqual(host, "1.2.3.4")
-        self.assertEqual(port, 443)
+    @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"})
+    def test_http_request_via_uppercase_no_proxy(self):
+        agent = ProxyAgent(self.reactor, use_proxy=True)
+        self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
-        # make a test server, and wire up the client
-        http_server = self._make_connection(
-            client_factory,
-            _get_test_protocol_factory(),
-            ssl=True,
-            expected_sni=b"test.com",
-        )
-
-        # the FakeTransport is async, so we need to pump the reactor
-        self.reactor.advance(0)
-
-        # now there should be a pending request
-        self.assertEqual(len(http_server.requests), 1)
+    @patch.dict(
+        os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"}
+    )
+    def test_http_request_via_no_proxy(self):
+        agent = ProxyAgent(self.reactor, use_proxy=True)
+        self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
-        request = http_server.requests[0]
-        self.assertEqual(request.method, b"GET")
-        self.assertEqual(request.path, b"/abc")
-        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
-        request.write(b"result")
-        request.finish()
+    @patch.dict(
+        os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"}
+    )
+    def test_https_request_via_no_proxy(self):
+        agent = ProxyAgent(
+            self.reactor,
+            contextFactory=get_test_https_policy(),
+            use_proxy=True,
+        )
+        self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
 
-        self.reactor.advance(0)
+    @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"})
+    def test_http_request_via_no_proxy_star(self):
+        agent = ProxyAgent(self.reactor, use_proxy=True)
+        self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
-        resp = self.successResultOf(d)
-        body = self.successResultOf(treq.content(resp))
-        self.assertEqual(body, b"result")
+    @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"})
+    def test_https_request_via_no_proxy_star(self):
+        agent = ProxyAgent(
+            self.reactor,
+            contextFactory=get_test_https_policy(),
+            use_proxy=True,
+        )
+        self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
 
+    @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
     def test_http_request_via_proxy(self):
-        agent = ProxyAgent(self.reactor, http_proxy=b"proxy.com:8888")
+        agent = ProxyAgent(self.reactor, use_proxy=True)
 
         self.reactor.lookups["proxy.com"] = "1.2.3.5"
         d = agent.request(b"GET", b"http://test.com")
@@ -214,11 +240,12 @@ class MatrixFederationAgentTests(TestCase):
         body = self.successResultOf(treq.content(resp))
         self.assertEqual(body, b"result")
 
+    @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
     def test_https_request_via_proxy(self):
         agent = ProxyAgent(
             self.reactor,
             contextFactory=get_test_https_policy(),
-            https_proxy=b"proxy.com",
+            use_proxy=True,
         )
 
         self.reactor.lookups["proxy.com"] = "1.2.3.5"
@@ -294,6 +321,7 @@ class MatrixFederationAgentTests(TestCase):
         body = self.successResultOf(treq.content(resp))
         self.assertEqual(body, b"result")
 
+    @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
     def test_http_request_via_proxy_with_blacklist(self):
         # The blacklist includes the configured proxy IP.
         agent = ProxyAgent(
@@ -301,7 +329,7 @@ class MatrixFederationAgentTests(TestCase):
                 self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"])
             ),
             self.reactor,
-            http_proxy=b"proxy.com:8888",
+            use_proxy=True,
         )
 
         self.reactor.lookups["proxy.com"] = "1.2.3.5"
@@ -338,7 +366,8 @@ class MatrixFederationAgentTests(TestCase):
         body = self.successResultOf(treq.content(resp))
         self.assertEqual(body, b"result")
 
-    def test_https_request_via_proxy_with_blacklist(self):
+    @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"})
+    def test_https_request_via_uppercase_proxy_with_blacklist(self):
         # The blacklist includes the configured proxy IP.
         agent = ProxyAgent(
             BlacklistingReactorWrapper(
@@ -346,7 +375,7 @@ class MatrixFederationAgentTests(TestCase):
             ),
             self.reactor,
             contextFactory=get_test_https_policy(),
-            https_proxy=b"proxy.com",
+            use_proxy=True,
         )
 
         self.reactor.lookups["proxy.com"] = "1.2.3.5"