From 8520bc3109c2de6175391497941f3fc0b74c08e5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 25 Jan 2019 12:38:16 +0000 Subject: Fix Host header sent by MatrixFederationAgent (#4468) Move the Host header logic down here so that (a) it is used if we reuse the agent elsewhere, and (b) we can mess about with it with .well-known. --- synapse/http/federation/matrix_federation_agent.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'synapse/http/federation/matrix_federation_agent.py') diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 0ec28c6696..1788e9a34a 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -19,6 +19,7 @@ from zope.interface import implementer from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import URI, Agent, HTTPConnectionPool +from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent from synapse.http.endpoint import parse_server_name @@ -109,6 +110,15 @@ class MatrixFederationAgent(object): else: target = pick_server_from_list(server_list) + # make sure that the Host header is set correctly + if headers is None: + headers = Headers() + else: + headers = headers.copy() + + if not headers.hasHeader(b'host'): + headers.addRawHeader(b'host', server_name_bytes) + class EndpointFactory(object): @staticmethod def endpointForURI(_uri): -- cgit 1.5.1 From d840019192769f900f6a1a5c768368a080e651cd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 28 Jan 2019 09:56:59 +0000 Subject: Fix idna and ipv6 literal handling in MatrixFederationAgent (#4487) Turns out that the library does a better job of parsing URIs than our reinvented wheel. Who knew. There are two things going on here. The first is that, unlike parse_server_name, URI.fromBytes will strip off square brackets from IPv6 literals, which means that it is valid input to ClientTLSOptionsFactory and HostnameEndpoint. The second is that we stay in `bytes` throughout (except for the argument to ClientTLSOptionsFactory), which avoids the weirdness of (sometimes) ending up with idna-encoded values being held in `unicode` variables. TBH it probably would have been ok but it made the tests fragile. --- changelog.d/4487.misc | 1 + synapse/http/federation/matrix_federation_agent.py | 23 +-- .../federation/test_matrix_federation_agent.py | 181 ++++++++++++++++++++- 3 files changed, 193 insertions(+), 12 deletions(-) create mode 100644 changelog.d/4487.misc (limited to 'synapse/http/federation/matrix_federation_agent.py') diff --git a/changelog.d/4487.misc b/changelog.d/4487.misc new file mode 100644 index 0000000000..79de8eb3ad --- /dev/null +++ b/changelog.d/4487.misc @@ -0,0 +1 @@ +Fix idna and ipv6 literal handling in MatrixFederationAgent diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 1788e9a34a..9526f39cca 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -22,7 +22,6 @@ from twisted.web.client import URI, Agent, HTTPConnectionPool from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent -from synapse.http.endpoint import parse_server_name from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list from synapse.util.logcontext import make_deferred_yieldable @@ -87,9 +86,7 @@ class MatrixFederationAgent(object): from being sent). """ - parsed_uri = URI.fromBytes(uri) - server_name_bytes = parsed_uri.netloc - host, port = parse_server_name(server_name_bytes.decode("ascii")) + parsed_uri = URI.fromBytes(uri, defaultPort=-1) # XXX disabling TLS is really only supported here for the benefit of the # unit tests. We should make the UTs cope with TLS rather than having to make @@ -97,16 +94,20 @@ class MatrixFederationAgent(object): if self._tls_client_options_factory is None: tls_options = None else: - tls_options = self._tls_client_options_factory.get_options(host) + tls_options = self._tls_client_options_factory.get_options( + parsed_uri.host.decode("ascii") + ) - if port is not None: - target = (host, port) + if parsed_uri.port != -1: + # there was an explicit port in the URI + target = parsed_uri.host, parsed_uri.port else: - service_name = b"_matrix._tcp.%s" % (server_name_bytes, ) + service_name = b"_matrix._tcp.%s" % (parsed_uri.host, ) server_list = yield self._srv_resolver.resolve_service(service_name) if not server_list: - target = (host, 8448) - logger.debug("No SRV record for %s, using %s", host, target) + target = (parsed_uri.host, 8448) + logger.debug( + "No SRV record for %s, using %s", service_name, target) else: target = pick_server_from_list(server_list) @@ -117,7 +118,7 @@ class MatrixFederationAgent(object): headers = headers.copy() if not headers.hasHeader(b'host'): - headers.addRawHeader(b'host', server_name_bytes) + headers.addRawHeader(b'host', parsed_uri.netloc) class EndpointFactory(object): @staticmethod diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 261afb5f41..f144092a51 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -209,6 +209,95 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_get_ipv6_address(self): + """ + Test the behaviour when the server name contains an explicit IPv6 address + (with no port) + """ + + # the SRV lookup will return an empty list (XXX: why do we even do an SRV lookup?) + self.mock_resolver.resolve_service.side_effect = lambda _: [] + + # then there will be a getaddrinfo on the IP + self.reactor.lookups["::1"] = "::1" + + test_d = self._make_get_request(b"matrix://[::1]/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.::1", + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '::1') + self.assertEqual(port, 8448) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=None, + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'[::1]'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_get_ipv6_address_with_port(self): + """ + Test the behaviour when the server name contains an explicit IPv6 address + (with explicit port) + """ + + # there will be a getaddrinfo on the IP + self.reactor.lookups["::1"] = "::1" + + test_d = self._make_get_request(b"matrix://[::1]:80/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '::1') + self.assertEqual(port, 80) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=None, + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'[::1]:80'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + def test_get_hostname_no_srv(self): """ Test the behaviour when the server name has no port, and no SRV record @@ -258,7 +347,7 @@ class MatrixFederationAgentTests(TestCase): Test the behaviour when there is a single SRV record """ self.mock_resolver.resolve_service.side_effect = lambda _: [ - Server(host="srvtarget", port=8443) + Server(host=b"srvtarget", port=8443) ] self.reactor.lookups["srvtarget"] = "1.2.3.4" @@ -298,6 +387,96 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_idna_servername(self): + """test the behaviour when the server name has idna chars in""" + + self.mock_resolver.resolve_service.side_effect = lambda _: [] + + # hostnameendpoint does the lookup on the unicode value (getaddrinfo encodes + # it back to idna) + self.reactor.lookups[u"bücher.com"] = "1.2.3.4" + + # this is idna for bücher.com + test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.xn--bcher-kva.com", + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 8448) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=b'xn--bcher-kva.com', + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'xn--bcher-kva.com'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_idna_srv_target(self): + """test the behaviour when the target of a SRV record has idna chars""" + + self.mock_resolver.resolve_service.side_effect = lambda _: [ + Server(host=b"xn--trget-3qa.com", port=8443) # târget.com + ] + self.reactor.lookups[u"târget.com"] = "1.2.3.4" + + test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.xn--bcher-kva.com", + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 8443) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=b'xn--bcher-kva.com', + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'xn--bcher-kva.com'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + def _check_logcontext(context): current = LoggingContext.current_context() -- cgit 1.5.1 From 51958df766667a75236764c9df57cfd60d57bd5e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sun, 27 Jan 2019 23:24:17 +0000 Subject: MatrixFederationAgent: factor out routing logic This is going to get too big and unmanageable. --- synapse/http/federation/matrix_federation_agent.py | 80 +++++++++++++++++----- 1 file changed, 62 insertions(+), 18 deletions(-) (limited to 'synapse/http/federation/matrix_federation_agent.py') diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 9526f39cca..2a8bceed9a 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -14,6 +14,7 @@ # limitations under the License. import logging +import attr from zope.interface import implementer from twisted.internet import defer @@ -85,9 +86,11 @@ class MatrixFederationAgent(object): response from being received (including problems that prevent the request from being sent). """ - parsed_uri = URI.fromBytes(uri, defaultPort=-1) + res = yield self._route_matrix_uri(parsed_uri) + # set up the TLS connection params + # # XXX disabling TLS is really only supported here for the benefit of the # unit tests. We should make the UTs cope with TLS rather than having to make # the code support the unit tests. @@ -95,22 +98,9 @@ class MatrixFederationAgent(object): tls_options = None else: tls_options = self._tls_client_options_factory.get_options( - parsed_uri.host.decode("ascii") + res.tls_server_name.decode("ascii") ) - if parsed_uri.port != -1: - # there was an explicit port in the URI - target = parsed_uri.host, parsed_uri.port - else: - service_name = b"_matrix._tcp.%s" % (parsed_uri.host, ) - server_list = yield self._srv_resolver.resolve_service(service_name) - if not server_list: - target = (parsed_uri.host, 8448) - logger.debug( - "No SRV record for %s, using %s", service_name, target) - else: - target = pick_server_from_list(server_list) - # make sure that the Host header is set correctly if headers is None: headers = Headers() @@ -118,13 +108,13 @@ class MatrixFederationAgent(object): headers = headers.copy() if not headers.hasHeader(b'host'): - headers.addRawHeader(b'host', parsed_uri.netloc) + headers.addRawHeader(b'host', res.host_header) class EndpointFactory(object): @staticmethod def endpointForURI(_uri): - logger.info("Connecting to %s:%s", target[0], target[1]) - ep = HostnameEndpoint(self._reactor, host=target[0], port=target[1]) + logger.info("Connecting to %s:%s", res.target_host, res.target_port) + ep = HostnameEndpoint(self._reactor, res.target_host, res.target_port) if tls_options is not None: ep = wrapClientTLS(tls_options, ep) return ep @@ -134,3 +124,57 @@ class MatrixFederationAgent(object): agent.request(method, uri, headers, bodyProducer) ) defer.returnValue(res) + + @defer.inlineCallbacks + def _route_matrix_uri(self, parsed_uri): + """Helper for `request`: determine the routing for a Matrix URI + + Args: + parsed_uri (twisted.web.client.URI): uri to route. Note that it should be + parsed with URI.fromBytes(uri, defaultPort=-1) to set the `port` to -1 + if there is no explicit port given. + + Returns: + Deferred[_RoutingResult] + """ + if parsed_uri.port != -1: + # there is an explicit port + defer.returnValue(_RoutingResult( + host_header=parsed_uri.netloc, + tls_server_name=parsed_uri.host, + target_host=parsed_uri.host, + target_port=parsed_uri.port, + )) + + # try a SRV lookup + service_name = b"_matrix._tcp.%s" % (parsed_uri.host,) + server_list = yield self._srv_resolver.resolve_service(service_name) + + if not server_list: + target_host = parsed_uri.host + port = 8448 + logger.debug( + "No SRV record for %s, using %s:%i", + parsed_uri.host.decode("ascii"), target_host.decode("ascii"), port, + ) + else: + target_host, port = pick_server_from_list(server_list) + logger.debug( + "Picked %s:%i from SRV records for %s", + target_host.decode("ascii"), port, parsed_uri.host.decode("ascii"), + ) + + defer.returnValue(_RoutingResult( + host_header=parsed_uri.netloc, + tls_server_name=parsed_uri.host, + target_host=target_host, + target_port=port, + )) + + +@attr.s +class _RoutingResult(object): + host_header = attr.ib() + tls_server_name = attr.ib() + target_host = attr.ib() + target_port = attr.ib() -- cgit 1.5.1 From 0fd5b3b53e312a48d98afec27ff3686d8ffce199 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 26 Jan 2019 21:48:50 +0000 Subject: Handle IP literals explicitly We don't want to be doing .well-known lookups on these guys. --- synapse/http/federation/matrix_federation_agent.py | 19 +++++++++++++++++++ tests/http/federation/test_matrix_federation_agent.py | 19 ++----------------- 2 files changed, 21 insertions(+), 17 deletions(-) (limited to 'synapse/http/federation/matrix_federation_agent.py') diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 2a8bceed9a..4d674cdb93 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -15,6 +15,7 @@ import logging import attr +from netaddr import IPAddress from zope.interface import implementer from twisted.internet import defer @@ -137,6 +138,24 @@ class MatrixFederationAgent(object): Returns: Deferred[_RoutingResult] """ + # check for an IP literal + try: + ip_address = IPAddress(parsed_uri.host.decode("ascii")) + except Exception: + # not an IP address + ip_address = None + + if ip_address: + port = parsed_uri.port + if port == -1: + port = 8448 + defer.returnValue(_RoutingResult( + host_header=parsed_uri.netloc, + tls_server_name=parsed_uri.host, + target_host=parsed_uri.host, + target_port=port, + )) + if parsed_uri.port != -1: # there is an explicit port defer.returnValue(_RoutingResult( diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index f144092a51..8257594fb8 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -166,11 +166,7 @@ class MatrixFederationAgentTests(TestCase): """ Test the behaviour when the server name contains an explicit IP (with no port) """ - - # the SRV lookup will return an empty list (XXX: why do we even do an SRV lookup?) - self.mock_resolver.resolve_service.side_effect = lambda _: [] - - # then there will be a getaddrinfo on the IP + # there will be a getaddrinfo on the IP self.reactor.lookups["1.2.3.4"] = "1.2.3.4" test_d = self._make_get_request(b"matrix://1.2.3.4/foo/bar") @@ -178,10 +174,6 @@ class MatrixFederationAgentTests(TestCase): # Nothing happened yet self.assertNoResult(test_d) - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.1.2.3.4", - ) - # Make sure treq is trying to connect clients = self.reactor.tcpClients self.assertEqual(len(clients), 1) @@ -215,10 +207,7 @@ class MatrixFederationAgentTests(TestCase): (with no port) """ - # the SRV lookup will return an empty list (XXX: why do we even do an SRV lookup?) - self.mock_resolver.resolve_service.side_effect = lambda _: [] - - # then there will be a getaddrinfo on the IP + # there will be a getaddrinfo on the IP self.reactor.lookups["::1"] = "::1" test_d = self._make_get_request(b"matrix://[::1]/foo/bar") @@ -226,10 +215,6 @@ class MatrixFederationAgentTests(TestCase): # Nothing happened yet self.assertNoResult(test_d) - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.::1", - ) - # Make sure treq is trying to connect clients = self.reactor.tcpClients self.assertEqual(len(clients), 1) -- cgit 1.5.1 From 3bd0f1a4a3b735794f4a19d352b36c2e86e86dc0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 28 Jan 2019 12:43:09 +0000 Subject: docstrings for _RoutingResult --- synapse/http/federation/matrix_federation_agent.py | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'synapse/http/federation/matrix_federation_agent.py') diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 4d674cdb93..4a6f634c8b 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -193,7 +193,43 @@ class MatrixFederationAgent(object): @attr.s class _RoutingResult(object): + """The result returned by `_route_matrix_uri`. + + Contains the parameters needed to direct a federation connection to a particular + server. + + Where a SRV record points to several servers, this object contains a single server + chosen from the list. + """ + host_header = attr.ib() + """ + The value we should assign to the Host header (host:port from the matrix + URI, or .well-known). + + :type: bytes + """ + tls_server_name = attr.ib() + """ + The server name we should set in the SNI (typically host, without port, from the + matrix URI or .well-known) + + :type: bytes + """ + target_host = attr.ib() + """ + The hostname (or IP literal) we should route the TCP connection to (the target of the + SRV record, or the hostname from the URL/.well-known) + + :type: bytes + """ + target_port = attr.ib() + """ + The port we should route the TCP connection to (the target of the SRV record, or + the port from the URL/.well-known, or 8448) + + :type: int + """ -- cgit 1.5.1