diff --git a/changelog.d/16137.feature b/changelog.d/16137.feature
new file mode 100644
index 0000000000..bba6f161cd
--- /dev/null
+++ b/changelog.d/16137.feature
@@ -0,0 +1 @@
+Support resolving homeservers using `matrix-fed` DNS SRV records from [MSC4040](https://github.com/matrix-org/matrix-spec-proposals/pull/4040).
diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py
index 5ad334b4d8..e8baeac5e2 100755
--- a/scripts-dev/federation_client.py
+++ b/scripts-dev/federation_client.py
@@ -329,6 +329,17 @@ class MatrixConnectionAdapter(HTTPAdapter):
raise ValueError("Invalid host:port '%s'" % (server_name,))
return out[0], port, out[0]
+ # Look up SRV for Matrix 1.8 `matrix-fed` service first
+ try:
+ srv = srvlookup.lookup("matrix-fed", "tcp", server_name)[0]
+ print(
+ f"SRV lookup on _matrix-fed._tcp.{server_name} gave {srv}",
+ file=sys.stderr,
+ )
+ return srv.host, srv.port, server_name
+ except Exception:
+ pass
+ # Fall back to deprecated `matrix` service
try:
srv = srvlookup.lookup("matrix", "tcp", server_name)[0]
print(
@@ -337,6 +348,7 @@ class MatrixConnectionAdapter(HTTPAdapter):
)
return srv.host, srv.port, server_name
except Exception:
+ # Fall even further back to just port 8448
return server_name, 8448, server_name
@staticmethod
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 91a24efcd0..a3a396bb37 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -399,15 +399,34 @@ class MatrixHostnameEndpoint:
if port or _is_ip_literal(host):
return [Server(host, port or 8448)]
+ # Check _matrix-fed._tcp SRV record.
logger.debug("Looking up SRV record for %s", host.decode(errors="replace"))
+ server_list = await self._srv_resolver.resolve_service(
+ b"_matrix-fed._tcp." + host
+ )
+
+ if server_list:
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug(
+ "Got %s from SRV lookup for %s",
+ ", ".join(map(str, server_list)),
+ host.decode(errors="replace"),
+ )
+ return server_list
+
+ # No _matrix-fed._tcp SRV record, fallback to legacy _matrix._tcp SRV record.
+ logger.debug(
+ "Looking up deprecated SRV record for %s", host.decode(errors="replace")
+ )
server_list = await self._srv_resolver.resolve_service(b"_matrix._tcp." + host)
if server_list:
- logger.debug(
- "Got %s from SRV lookup for %s",
- ", ".join(map(str, server_list)),
- host.decode(errors="replace"),
- )
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug(
+ "Got %s from deprecated SRV lookup for %s",
+ ", ".join(map(str, server_list)),
+ host.decode(errors="replace"),
+ )
return server_list
# No SRV records, so we fallback to host and 8448
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index 0d17f2fe5b..9f63fa6fa8 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -15,7 +15,7 @@ import base64
import logging
import os
from typing import Generator, List, Optional, cast
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, call, patch
import treq
from netaddr import IPSet
@@ -651,9 +651,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
# .well-known request fails.
self.reactor.pump((0.4,))
- # now there should be a SRV lookup
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.testserv1"
+ # now there should be two SRV lookups
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [call(b"_matrix-fed._tcp.testserv1"), call(b"_matrix._tcp.testserv1")]
)
# we should fall back to a direct connection
@@ -737,9 +737,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
# .well-known request fails.
self.reactor.pump((0.4,))
- # now there should be a SRV lookup
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.testserv"
+ # now there should be two SRV lookups
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
)
# we should fall back to a direct connection
@@ -788,9 +788,12 @@ class MatrixFederationAgentTests(unittest.TestCase):
content=b'{ "m.server": "target-server" }',
)
- # there should be a SRV lookup
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.target-server"
+ # there should be two SRV lookups
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [
+ call(b"_matrix-fed._tcp.target-server"),
+ call(b"_matrix._tcp.target-server"),
+ ]
)
# now we should get a connection to the target server
@@ -878,9 +881,12 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.reactor.pump((0.1,))
- # there should be a SRV lookup
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.target-server"
+ # there should be two SRV lookups
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [
+ call(b"_matrix-fed._tcp.target-server"),
+ call(b"_matrix._tcp.target-server"),
+ ]
)
# now we should get a connection to the target server
@@ -942,9 +948,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
client_factory, expected_sni=b"testserv", content=b"NOT JSON"
)
- # now there should be a SRV lookup
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.testserv"
+ # now there should be two SRV lookups
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
)
# we should fall back to a direct connection
@@ -1016,14 +1022,14 @@ class MatrixFederationAgentTests(unittest.TestCase):
# there should be no requests
self.assertEqual(len(http_proto.requests), 0)
- # and there should be a SRV lookup instead
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.testserv"
+ # and there should be two SRV lookups instead
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
)
def test_get_hostname_srv(self) -> None:
"""
- Test the behaviour when there is a single SRV record
+ Test the behaviour when there is a single SRV record for _matrix-fed.
"""
self.agent = self._make_agent()
@@ -1039,7 +1045,51 @@ class MatrixFederationAgentTests(unittest.TestCase):
# the request for a .well-known will have failed with a DNS lookup error.
self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.testserv"
+ b"_matrix-fed._tcp.testserv"
+ )
+
+ # Make sure treq is trying to connect
+ clients = self.reactor.tcpClients
+ self.assertEqual(len(clients), 1)
+ (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+ self.assertEqual(host, "1.2.3.4")
+ self.assertEqual(port, 8443)
+
+ # make a test server, and wire up the client
+ http_server = self._make_connection(client_factory, expected_sni=b"testserv")
+
+ self.assertEqual(len(http_server.requests), 1)
+ request = http_server.requests[0]
+ self.assertEqual(request.method, b"GET")
+ self.assertEqual(request.path, b"/foo/bar")
+ self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"])
+
+ # finish the request
+ request.finish()
+ self.reactor.pump((0.1,))
+ self.successResultOf(test_d)
+
+ def test_get_hostname_srv_legacy(self) -> None:
+ """
+ Test the behaviour when there is a single SRV record for _matrix.
+ """
+ self.agent = self._make_agent()
+
+ # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+ self.mock_resolver.resolve_service.side_effect = [
+ [],
+ [Server(host=b"srvtarget", port=8443)],
+ ]
+ self.reactor.lookups["srvtarget"] = "1.2.3.4"
+
+ test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+ # Nothing happened yet
+ self.assertNoResult(test_d)
+
+ # the request for a .well-known will have failed with a DNS lookup error.
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
)
# Make sure treq is trying to connect
@@ -1065,7 +1115,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
def test_get_well_known_srv(self) -> None:
"""Test the behaviour when the .well-known redirects to a place where there
- is a SRV.
+ is a _matrix-fed SRV record.
"""
self.agent = self._make_agent()
@@ -1096,7 +1146,72 @@ class MatrixFederationAgentTests(unittest.TestCase):
# there should be a SRV lookup
self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.target-server"
+ b"_matrix-fed._tcp.target-server"
+ )
+
+ # now we should get a connection to the target of the SRV record
+ self.assertEqual(len(clients), 2)
+ (host, port, client_factory, _timeout, _bindAddress) = clients[1]
+ self.assertEqual(host, "5.6.7.8")
+ self.assertEqual(port, 8443)
+
+ # make a test server, and wire up the client
+ http_server = self._make_connection(
+ client_factory, expected_sni=b"target-server"
+ )
+
+ self.assertEqual(len(http_server.requests), 1)
+ request = http_server.requests[0]
+ self.assertEqual(request.method, b"GET")
+ self.assertEqual(request.path, b"/foo/bar")
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"host"), [b"target-server"]
+ )
+
+ # finish the request
+ request.finish()
+ self.reactor.pump((0.1,))
+ self.successResultOf(test_d)
+
+ def test_get_well_known_srv_legacy(self) -> None:
+ """Test the behaviour when the .well-known redirects to a place where there
+ is a _matrix SRV record.
+ """
+ self.agent = self._make_agent()
+
+ self.reactor.lookups["testserv"] = "1.2.3.4"
+ self.reactor.lookups["srvtarget"] = "5.6.7.8"
+
+ test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+ # Nothing happened yet
+ self.assertNoResult(test_d)
+
+ # there should be an attempt to connect on port 443 for the .well-known
+ clients = self.reactor.tcpClients
+ self.assertEqual(len(clients), 1)
+ (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+ self.assertEqual(host, "1.2.3.4")
+ self.assertEqual(port, 443)
+
+ # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+ self.mock_resolver.resolve_service.side_effect = [
+ [],
+ [Server(host=b"srvtarget", port=8443)],
+ ]
+
+ self._handle_well_known_connection(
+ client_factory,
+ expected_sni=b"testserv",
+ content=b'{ "m.server": "target-server" }',
+ )
+
+ # there should be two SRV lookups
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [
+ call(b"_matrix-fed._tcp.target-server"),
+ call(b"_matrix._tcp.target-server"),
+ ]
)
# now we should get a connection to the target of the SRV record
@@ -1158,8 +1273,11 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.reactor.pump((0.4,))
# now there should have been a SRV lookup
- self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.xn--bcher-kva.com"
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [
+ call(b"_matrix-fed._tcp.xn--bcher-kva.com"),
+ call(b"_matrix._tcp.xn--bcher-kva.com"),
+ ]
)
# We should fall back to port 8448
@@ -1188,7 +1306,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.successResultOf(test_d)
def test_idna_srv_target(self) -> None:
- """test the behaviour when the target of a SRV record has idna chars"""
+ """test the behaviour when the target of a _matrix-fed SRV record has idna chars"""
self.agent = self._make_agent()
self.mock_resolver.resolve_service.return_value = [
@@ -1204,7 +1322,57 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.assertNoResult(test_d)
self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.xn--bcher-kva.com"
+ b"_matrix-fed._tcp.xn--bcher-kva.com"
+ )
+
+ # Make sure treq is trying to connect
+ clients = self.reactor.tcpClients
+ self.assertEqual(len(clients), 1)
+ (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+ self.assertEqual(host, "1.2.3.4")
+ self.assertEqual(port, 8443)
+
+ # make a test server, and wire up the client
+ http_server = self._make_connection(
+ client_factory, expected_sni=b"xn--bcher-kva.com"
+ )
+
+ self.assertEqual(len(http_server.requests), 1)
+ request = http_server.requests[0]
+ self.assertEqual(request.method, b"GET")
+ self.assertEqual(request.path, b"/foo/bar")
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"host"), [b"xn--bcher-kva.com"]
+ )
+
+ # finish the request
+ request.finish()
+ self.reactor.pump((0.1,))
+ self.successResultOf(test_d)
+
+ def test_idna_srv_target_legacy(self) -> None:
+ """test the behaviour when the target of a _matrix SRV record has idna chars"""
+ self.agent = self._make_agent()
+
+ # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+ self.mock_resolver.resolve_service.side_effect = [
+ [],
+ [Server(host=b"xn--trget-3qa.com", port=8443)],
+ ] # târget.com
+ self.reactor.lookups["xn--trget-3qa.com"] = "1.2.3.4"
+
+ test_d = self._make_get_request(
+ b"matrix-federation://xn--bcher-kva.com/foo/bar"
+ )
+
+ # Nothing happened yet
+ self.assertNoResult(test_d)
+
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [
+ call(b"_matrix-fed._tcp.xn--bcher-kva.com"),
+ call(b"_matrix._tcp.xn--bcher-kva.com"),
+ ]
)
# Make sure treq is trying to connect
@@ -1394,7 +1562,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.assertIsNone(r.delegated_server)
def test_srv_fallbacks(self) -> None:
- """Test that other SRV results are tried if the first one fails."""
+ """Test that other SRV results are tried if the first one fails for _matrix-fed SRV."""
self.agent = self._make_agent()
self.mock_resolver.resolve_service.return_value = [
@@ -1409,7 +1577,67 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.assertNoResult(test_d)
self.mock_resolver.resolve_service.assert_called_once_with(
- b"_matrix._tcp.testserv"
+ b"_matrix-fed._tcp.testserv"
+ )
+
+ # We should see an attempt to connect to the first server
+ clients = self.reactor.tcpClients
+ self.assertEqual(len(clients), 1)
+ (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
+ self.assertEqual(host, "1.2.3.4")
+ self.assertEqual(port, 8443)
+
+ # Fonx the connection
+ client_factory.clientConnectionFailed(None, Exception("nope"))
+
+ # There's a 300ms delay in HostnameEndpoint
+ self.reactor.pump((0.4,))
+
+ # Hasn't failed yet
+ self.assertNoResult(test_d)
+
+ # We shouldnow see an attempt to connect to the second server
+ clients = self.reactor.tcpClients
+ self.assertEqual(len(clients), 1)
+ (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
+ self.assertEqual(host, "1.2.3.4")
+ self.assertEqual(port, 8444)
+
+ # make a test server, and wire up the client
+ http_server = self._make_connection(client_factory, expected_sni=b"testserv")
+
+ self.assertEqual(len(http_server.requests), 1)
+ request = http_server.requests[0]
+ self.assertEqual(request.method, b"GET")
+ self.assertEqual(request.path, b"/foo/bar")
+ self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"])
+
+ # finish the request
+ request.finish()
+ self.reactor.pump((0.1,))
+ self.successResultOf(test_d)
+
+ def test_srv_fallbacks_legacy(self) -> None:
+ """Test that other SRV results are tried if the first one fails for _matrix SRV."""
+ self.agent = self._make_agent()
+
+ # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+ self.mock_resolver.resolve_service.side_effect = [
+ [],
+ [
+ Server(host=b"target.com", port=8443),
+ Server(host=b"target.com", port=8444),
+ ],
+ ]
+ self.reactor.lookups["target.com"] = "1.2.3.4"
+
+ test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+ # Nothing happened yet
+ self.assertNoResult(test_d)
+
+ self.mock_resolver.resolve_service.assert_has_calls(
+ [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
)
# We should see an attempt to connect to the first server
@@ -1449,6 +1677,43 @@ class MatrixFederationAgentTests(unittest.TestCase):
self.reactor.pump((0.1,))
self.successResultOf(test_d)
+ def test_srv_no_fallback_to_legacy(self) -> None:
+ """Test that _matrix SRV results are not tried if the _matrix-fed one fails."""
+ self.agent = self._make_agent()
+
+ # Return a failing entry for _matrix-fed.
+ self.mock_resolver.resolve_service.side_effect = [
+ [Server(host=b"target.com", port=8443)],
+ [],
+ ]
+ self.reactor.lookups["target.com"] = "1.2.3.4"
+
+ test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+ # Nothing happened yet
+ self.assertNoResult(test_d)
+
+ # Only the _matrix-fed is checked, _matrix is ignored.
+ self.mock_resolver.resolve_service.assert_called_once_with(
+ b"_matrix-fed._tcp.testserv"
+ )
+
+ # We should see an attempt to connect to the first server
+ clients = self.reactor.tcpClients
+ self.assertEqual(len(clients), 1)
+ (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
+ self.assertEqual(host, "1.2.3.4")
+ self.assertEqual(port, 8443)
+
+ # Fonx the connection
+ client_factory.clientConnectionFailed(None, Exception("nope"))
+
+ # There's a 300ms delay in HostnameEndpoint
+ self.reactor.pump((0.4,))
+
+ # Failed to resolve a server.
+ self.assertFailure(test_d, Exception)
+
class TestCachePeriodFromHeaders(unittest.TestCase):
def test_cache_control(self) -> None:
|