From 30fba6210834a4ecd91badf0c8f3eb278b72e746 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 2 Dec 2020 11:09:24 -0500 Subject: Apply an IP range blacklist to push and key revocation requests. (#8821) Replaces the `federation_ip_range_blacklist` configuration setting with an `ip_range_blacklist` setting with wider scope. It now applies to: * Federation * Identity servers * Push notifications * Checking key validitity for third-party invite events The old `federation_ip_range_blacklist` setting is still honored if present, but with reduced scope (it only applies to federation and identity servers). --- synapse/http/client.py | 46 +++++++++++++++------- synapse/http/federation/matrix_federation_agent.py | 16 ++++++-- synapse/http/matrixfederationclient.py | 26 ++++-------- 3 files changed, 52 insertions(+), 36 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/client.py b/synapse/http/client.py index e5b13593f2..df7730078f 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -125,7 +125,7 @@ def _make_scheduler(reactor): return _scheduler -class IPBlacklistingResolver: +class _IPBlacklistingResolver: """ A proxy for reactor.nameResolver which only produces non-blacklisted IP addresses, preventing DNS rebinding attacks on URL preview. @@ -199,6 +199,35 @@ class IPBlacklistingResolver: return r +@implementer(IReactorPluggableNameResolver) +class BlacklistingReactorWrapper: + """ + A Reactor wrapper which will prevent DNS resolution to blacklisted IP + addresses, to prevent DNS rebinding. + """ + + def __init__( + self, + reactor: IReactorPluggableNameResolver, + ip_whitelist: Optional[IPSet], + ip_blacklist: IPSet, + ): + self._reactor = reactor + + # We need to use a DNS resolver which filters out blacklisted IP + # addresses, to prevent DNS rebinding. + self._nameResolver = _IPBlacklistingResolver( + self._reactor, ip_whitelist, ip_blacklist + ) + + def __getattr__(self, attr: str) -> Any: + # Passthrough to the real reactor except for the DNS resolver. + if attr == "nameResolver": + return self._nameResolver + else: + return getattr(self._reactor, attr) + + class BlacklistingAgentWrapper(Agent): """ An Agent wrapper which will prevent access to IP addresses being accessed @@ -292,22 +321,11 @@ class SimpleHttpClient: self.user_agent = self.user_agent.encode("ascii") if self._ip_blacklist: - real_reactor = hs.get_reactor() # If we have an IP blacklist, we need to use a DNS resolver which # filters out blacklisted IP addresses, to prevent DNS rebinding. - nameResolver = IPBlacklistingResolver( - real_reactor, self._ip_whitelist, self._ip_blacklist + self.reactor = BlacklistingReactorWrapper( + hs.get_reactor(), self._ip_whitelist, self._ip_blacklist ) - - @implementer(IReactorPluggableNameResolver) - class Reactor: - def __getattr__(_self, attr): - if attr == "nameResolver": - return nameResolver - else: - return getattr(real_reactor, attr) - - self.reactor = Reactor() else: self.reactor = hs.get_reactor() diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index e77f9587d0..3b756a7dc2 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -16,7 +16,7 @@ import logging import urllib.parse from typing import List, Optional -from netaddr import AddrFormatError, IPAddress +from netaddr import AddrFormatError, IPAddress, IPSet from zope.interface import implementer from twisted.internet import defer @@ -31,6 +31,7 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IAgentEndpointFactory, IBodyProducer from synapse.crypto.context_factory import FederationPolicyForHTTPS +from synapse.http.client import BlacklistingAgentWrapper from synapse.http.federation.srv_resolver import Server, SrvResolver from synapse.http.federation.well_known_resolver import WellKnownResolver from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -70,6 +71,7 @@ class MatrixFederationAgent: reactor: IReactorCore, tls_client_options_factory: Optional[FederationPolicyForHTTPS], user_agent: bytes, + ip_blacklist: IPSet, _srv_resolver: Optional[SrvResolver] = None, _well_known_resolver: Optional[WellKnownResolver] = None, ): @@ -90,12 +92,18 @@ class MatrixFederationAgent: self.user_agent = user_agent if _well_known_resolver is None: + # Note that the name resolver has already been wrapped in a + # IPBlacklistingResolver by MatrixFederationHttpClient. _well_known_resolver = WellKnownResolver( self._reactor, - agent=Agent( + agent=BlacklistingAgentWrapper( + Agent( + self._reactor, + pool=self._pool, + contextFactory=tls_client_options_factory, + ), self._reactor, - pool=self._pool, - contextFactory=tls_client_options_factory, + ip_blacklist=ip_blacklist, ), user_agent=self.user_agent, ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 4e27f93b7a..c962994727 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -26,11 +26,10 @@ import treq from canonicaljson import encode_canonical_json from prometheus_client import Counter from signedjson.sign import sign_json -from zope.interface import implementer from twisted.internet import defer from twisted.internet.error import DNSLookupError -from twisted.internet.interfaces import IReactorPluggableNameResolver, IReactorTime +from twisted.internet.interfaces import IReactorTime from twisted.internet.task import _EPSILON, Cooperator from twisted.web.http_headers import Headers from twisted.web.iweb import IBodyProducer, IResponse @@ -45,7 +44,7 @@ from synapse.api.errors import ( from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( BlacklistingAgentWrapper, - IPBlacklistingResolver, + BlacklistingReactorWrapper, encode_query_args, readBodyToFile, ) @@ -221,31 +220,22 @@ class MatrixFederationHttpClient: self.signing_key = hs.signing_key self.server_name = hs.hostname - real_reactor = hs.get_reactor() - # We need to use a DNS resolver which filters out blacklisted IP # addresses, to prevent DNS rebinding. - nameResolver = IPBlacklistingResolver( - real_reactor, None, hs.config.federation_ip_range_blacklist + self.reactor = BlacklistingReactorWrapper( + hs.get_reactor(), None, hs.config.federation_ip_range_blacklist ) - @implementer(IReactorPluggableNameResolver) - class Reactor: - def __getattr__(_self, attr): - if attr == "nameResolver": - return nameResolver - else: - return getattr(real_reactor, attr) - - self.reactor = Reactor() - user_agent = hs.version_string if hs.config.user_agent_suffix: user_agent = "%s %s" % (user_agent, hs.config.user_agent_suffix) user_agent = user_agent.encode("ascii") self.agent = MatrixFederationAgent( - self.reactor, tls_client_options_factory, user_agent + self.reactor, + tls_client_options_factory, + user_agent, + hs.config.federation_ip_range_blacklist, ) # Use a BlacklistingAgentWrapper to prevent circumventing the IP -- cgit 1.5.1 From 1821f7cc265ab01cfee4055cfddb90563b61ce5b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 10 Dec 2020 12:42:55 +0000 Subject: Fix buglet in DirectRenderJsonResource (#8897) this was using `canonical_json` without setting it, so when you used it as a standalone class, you would get exceptions. --- changelog.d/8897.feature | 1 + synapse/http/server.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8897.feature (limited to 'synapse/http') diff --git a/changelog.d/8897.feature b/changelog.d/8897.feature new file mode 100644 index 0000000000..d450ef4998 --- /dev/null +++ b/changelog.d/8897.feature @@ -0,0 +1 @@ +Add support for allowing users to pick their own user ID during a single-sign-on login. diff --git a/synapse/http/server.py b/synapse/http/server.py index 6a4e429a6c..e464bfe6c7 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -275,6 +275,10 @@ class DirectServeJsonResource(_AsyncResource): formatting responses and errors as JSON. """ + def __init__(self, canonical_json=False, extract_context=False): + super().__init__(extract_context) + self.canonical_json = canonical_json + def _send_response( self, request: Request, code: int, response_object: Any, ): @@ -318,9 +322,7 @@ class JsonResource(DirectServeJsonResource): ) def __init__(self, hs, canonical_json=True, extract_context=False): - super().__init__(extract_context) - - self.canonical_json = canonical_json + super().__init__(canonical_json, extract_context) self.clock = hs.get_clock() self.path_regexs = {} self.hs = hs -- cgit 1.5.1 From 1619802228033455ff6e5863c52556996b38e8c6 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 14 Dec 2020 14:19:47 -0500 Subject: Various clean-ups to the logging context code (#8935) --- changelog.d/8916.misc | 2 +- changelog.d/8935.misc | 1 + synapse/config/logger.py | 2 +- synapse/http/site.py | 3 +-- synapse/logging/context.py | 24 +++++------------------- synapse/metrics/background_process_metrics.py | 7 +++---- synapse/replication/tcp/protocol.py | 3 +-- tests/handlers/test_federation.py | 6 +++--- tests/logging/test_terse_json.py | 7 ++----- tests/test_federation.py | 2 +- tests/test_utils/logging_setup.py | 2 +- 11 files changed, 20 insertions(+), 39 deletions(-) create mode 100644 changelog.d/8935.misc (limited to 'synapse/http') diff --git a/changelog.d/8916.misc b/changelog.d/8916.misc index c71ef480e6..bf94135fd5 100644 --- a/changelog.d/8916.misc +++ b/changelog.d/8916.misc @@ -1 +1 @@ -Improve structured logging tests. +Various clean-ups to the structured logging and logging context code. diff --git a/changelog.d/8935.misc b/changelog.d/8935.misc new file mode 100644 index 0000000000..bf94135fd5 --- /dev/null +++ b/changelog.d/8935.misc @@ -0,0 +1 @@ +Various clean-ups to the structured logging and logging context code. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index d4e887a3e0..4df3f93c1c 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -206,7 +206,7 @@ def _setup_stdlib_logging(config, log_config_path, logBeginner: LogBeginner) -> # filter options, but care must when using e.g. MemoryHandler to buffer # writes. - log_context_filter = LoggingContextFilter(request="") + log_context_filter = LoggingContextFilter() log_metadata_filter = MetadataFilter({"server_name": config.server_name}) old_factory = logging.getLogRecordFactory() diff --git a/synapse/http/site.py b/synapse/http/site.py index 5f0581dc3f..5a5790831b 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -128,8 +128,7 @@ class SynapseRequest(Request): # create a LogContext for this request request_id = self.get_request_id() - logcontext = self.logcontext = LoggingContext(request_id) - logcontext.request = request_id + self.logcontext = LoggingContext(request_id, request=request_id) # override the Server header which is set by twisted self.setHeader("Server", self.site.server_version_string) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index ca0c774cc5..a507a83e93 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -203,10 +203,6 @@ class _Sentinel: def copy_to(self, record): pass - def copy_to_twisted_log_entry(self, record): - record["request"] = None - record["scope"] = None - def start(self, rusage: "Optional[resource._RUsage]"): pass @@ -372,13 +368,6 @@ class LoggingContext: # we also track the current scope: record.scope = self.scope - def copy_to_twisted_log_entry(self, record) -> None: - """ - Copy logging fields from this context to a Twisted log record. - """ - record["request"] = self.request - record["scope"] = self.scope - def start(self, rusage: "Optional[resource._RUsage]") -> None: """ Record that this logcontext is currently running. @@ -542,13 +531,10 @@ class LoggingContext: class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each record. - Args: - **defaults: Default values to avoid formatters complaining about - missing fields """ - def __init__(self, **defaults) -> None: - self.defaults = defaults + def __init__(self, request: str = ""): + self._default_request = request def filter(self, record) -> Literal[True]: """Add each fields from the logging contexts to the record. @@ -556,14 +542,14 @@ class LoggingContextFilter(logging.Filter): True to include the record in the log output. """ context = current_context() - for key, value in self.defaults.items(): - setattr(record, key, value) + record.request = self._default_request # context should never be None, but if it somehow ends up being, then # we end up in a death spiral of infinite loops, so let's check, for # robustness' sake. if context is not None: - context.copy_to(record) + # Logging is interested in the request. + record.request = context.request return True diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 76b7decf26..70e0fa45d9 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -199,8 +199,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar _background_process_start_count.labels(desc).inc() _background_process_in_flight_count.labels(desc).inc() - with BackgroundProcessLoggingContext(desc) as context: - context.request = "%s-%i" % (desc, count) + with BackgroundProcessLoggingContext(desc, "%s-%i" % (desc, count)) as context: try: ctx = noop_context_manager() if bg_start_span: @@ -244,8 +243,8 @@ class BackgroundProcessLoggingContext(LoggingContext): __slots__ = ["_proc"] - def __init__(self, name: str): - super().__init__(name) + def __init__(self, name: str, request: Optional[str] = None): + super().__init__(name, request=request) self._proc = _BackgroundProcess(name, self) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index a509e599c2..804da994ea 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -172,8 +172,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. ctx_name = "replication-conn-%s" % self.conn_id - self._logging_context = BackgroundProcessLoggingContext(ctx_name) - self._logging_context.request = ctx_name + self._logging_context = BackgroundProcessLoggingContext(ctx_name, ctx_name) def connectionMade(self): logger.info("[%s] Connection established", self.id()) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index d0452e1490..0b24b89a2e 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -126,7 +126,7 @@ class FederationTestCase(unittest.HomeserverTestCase): room_version, ) - with LoggingContext(request="send_rejected"): + with LoggingContext("send_rejected"): d = run_in_background(self.handler.on_receive_pdu, OTHER_SERVER, ev) self.get_success(d) @@ -178,7 +178,7 @@ class FederationTestCase(unittest.HomeserverTestCase): room_version, ) - with LoggingContext(request="send_rejected"): + with LoggingContext("send_rejected"): d = run_in_background(self.handler.on_receive_pdu, OTHER_SERVER, ev) self.get_success(d) @@ -198,7 +198,7 @@ class FederationTestCase(unittest.HomeserverTestCase): # the auth code requires that a signature exists, but doesn't check that # signature... go figure. join_event.signatures[other_server] = {"x": "y"} - with LoggingContext(request="send_join"): + with LoggingContext("send_join"): d = run_in_background( self.handler.on_send_join_request, other_server, join_event ) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index f6e7e5fdaa..48a74e2eee 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -117,11 +117,10 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase): """ handler = logging.StreamHandler(self.output) handler.setFormatter(JsonFormatter()) - handler.addFilter(LoggingContextFilter(request="")) + handler.addFilter(LoggingContextFilter()) logger = self.get_logger(handler) - with LoggingContext() as context_one: - context_one.request = "test" + with LoggingContext(request="test"): logger.info("Hello there, %s!", "wally") log = self.get_log_line() @@ -132,9 +131,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase): "level", "namespace", "request", - "scope", ] self.assertCountEqual(log.keys(), expected_log_keys) self.assertEqual(log["log"], "Hello there, wally!") self.assertEqual(log["request"], "test") - self.assertIsNone(log["scope"]) diff --git a/tests/test_federation.py b/tests/test_federation.py index fa45f8b3b7..fc9aab32d0 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -134,7 +134,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase): } ) - with LoggingContext(request="lying_event"): + with LoggingContext(): failure = self.get_failure( self.handler.on_receive_pdu( "test.serv", lying_event, sent_to_us_directly=True diff --git a/tests/test_utils/logging_setup.py b/tests/test_utils/logging_setup.py index fdfb840b62..52ae5c5713 100644 --- a/tests/test_utils/logging_setup.py +++ b/tests/test_utils/logging_setup.py @@ -48,7 +48,7 @@ def setup_logging(): handler = ToTwistedHandler() formatter = logging.Formatter(log_format) handler.setFormatter(formatter) - handler.addFilter(LoggingContextFilter(request="")) + handler.addFilter(LoggingContextFilter()) root_logger.addHandler(handler) log_level = os.environ.get("SYNAPSE_TEST_LOG_LEVEL", "ERROR") -- cgit 1.5.1 From ff5c4da1289cb5e097902b3e55b771be342c29d6 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 16 Dec 2020 17:25:24 -0500 Subject: Add a maximum size for well-known lookups. (#8950) --- changelog.d/8950.misc | 1 + synapse/http/client.py | 32 ++++++++++++---------- synapse/http/federation/well_known_resolver.py | 25 +++++++++++++++-- synapse/http/matrixfederationclient.py | 13 +++++++-- .../federation/test_matrix_federation_agent.py | 27 ++++++++++++++++++ 5 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 changelog.d/8950.misc (limited to 'synapse/http') diff --git a/changelog.d/8950.misc b/changelog.d/8950.misc new file mode 100644 index 0000000000..42e0335afc --- /dev/null +++ b/changelog.d/8950.misc @@ -0,0 +1 @@ +Add a maximum size of 50 kilobytes to .well-known lookups. diff --git a/synapse/http/client.py b/synapse/http/client.py index df7730078f..29f40ddf5f 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -720,11 +720,14 @@ class SimpleHttpClient: try: length = await make_deferred_yieldable( - readBodyToFile(response, output_stream, max_size) + read_body_with_max_size(response, output_stream, max_size) + ) + except BodyExceededMaxSize: + SynapseError( + 502, + "Requested file is too large > %r bytes" % (max_size,), + Codes.TOO_LARGE, ) - except SynapseError: - # This can happen e.g. because the body is too large. - raise except Exception as e: raise SynapseError(502, ("Failed to download remote body: %s" % e)) from e @@ -748,7 +751,11 @@ def _timeout_to_request_timed_out_error(f: Failure): return f -class _ReadBodyToFileProtocol(protocol.Protocol): +class BodyExceededMaxSize(Exception): + """The maximum allowed size of the HTTP body was exceeded.""" + + +class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): def __init__( self, stream: BinaryIO, deferred: defer.Deferred, max_size: Optional[int] ): @@ -761,13 +768,7 @@ class _ReadBodyToFileProtocol(protocol.Protocol): self.stream.write(data) self.length += len(data) if self.max_size is not None and self.length >= self.max_size: - self.deferred.errback( - SynapseError( - 502, - "Requested file is too large > %r bytes" % (self.max_size,), - Codes.TOO_LARGE, - ) - ) + self.deferred.errback(BodyExceededMaxSize()) self.deferred = defer.Deferred() self.transport.loseConnection() @@ -782,12 +783,15 @@ class _ReadBodyToFileProtocol(protocol.Protocol): self.deferred.errback(reason) -def readBodyToFile( +def read_body_with_max_size( response: IResponse, stream: BinaryIO, max_size: Optional[int] ) -> defer.Deferred: """ Read a HTTP response body to a file-object. Optionally enforcing a maximum file size. + If the maximum file size is reached, the returned Deferred will resolve to a + Failure with a BodyExceededMaxSize exception. + Args: response: The HTTP response to read from. stream: The file-object to write to. @@ -798,7 +802,7 @@ def readBodyToFile( """ d = defer.Deferred() - response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) + response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size)) return d diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index 5e08ef1664..b3b6dbcab0 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -15,17 +15,19 @@ import logging import random import time +from io import BytesIO from typing import Callable, Dict, Optional, Tuple import attr from twisted.internet import defer from twisted.internet.interfaces import IReactorTime -from twisted.web.client import RedirectAgent, readBody +from twisted.web.client import RedirectAgent from twisted.web.http import stringToDatetime from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IResponse +from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size from synapse.logging.context import make_deferred_yieldable from synapse.util import Clock, json_decoder from synapse.util.caches.ttlcache import TTLCache @@ -53,6 +55,9 @@ WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 # lower bound for .well-known cache period WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 +# The maximum size (in bytes) to allow a well-known file to be. +WELL_KNOWN_MAX_SIZE = 50 * 1024 # 50 KiB + # Attempt to refetch a cached well-known N% of the TTL before it expires. # e.g. if set to 0.2 and we have a cached entry with a TTL of 5mins, then # we'll start trying to refetch 1 minute before it expires. @@ -229,6 +234,9 @@ class WellKnownResolver: server_name: name of the server, from the requested url retry: Whether to retry the request if it fails. + Raises: + _FetchWellKnownFailure if we fail to lookup a result + Returns: Returns the response object and body. Response may be a non-200 response. """ @@ -250,7 +258,11 @@ class WellKnownResolver: b"GET", uri, headers=Headers(headers) ) ) - body = await make_deferred_yieldable(readBody(response)) + body_stream = BytesIO() + await make_deferred_yieldable( + read_body_with_max_size(response, body_stream, WELL_KNOWN_MAX_SIZE) + ) + body = body_stream.getvalue() if 500 <= response.code < 600: raise Exception("Non-200 response %s" % (response.code,)) @@ -259,6 +271,15 @@ class WellKnownResolver: except defer.CancelledError: # Bail if we've been cancelled raise + except BodyExceededMaxSize: + # If the well-known file was too large, do not keep attempting + # to download it, but consider it a temporary error. + logger.warning( + "Requested .well-known file for %s is too large > %r bytes", + server_name.decode("ascii"), + WELL_KNOWN_MAX_SIZE, + ) + raise _FetchWellKnownFailure(temporary=True) except Exception as e: if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS: logger.info("Error fetching %s: %s", uri_str, e) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c962994727..b261e078c4 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -37,16 +37,19 @@ from twisted.web.iweb import IBodyProducer, IResponse import synapse.metrics import synapse.util.retryutils from synapse.api.errors import ( + Codes, FederationDeniedError, HttpResponseException, RequestSendFailed, + SynapseError, ) from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( BlacklistingAgentWrapper, BlacklistingReactorWrapper, + BodyExceededMaxSize, encode_query_args, - readBodyToFile, + read_body_with_max_size, ) from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging.context import make_deferred_yieldable @@ -975,9 +978,15 @@ class MatrixFederationHttpClient: headers = dict(response.headers.getAllRawHeaders()) try: - d = readBodyToFile(response, output_stream, max_size) + d = read_body_with_max_size(response, output_stream, max_size) d.addTimeout(self.default_timeout, self.reactor) length = await make_deferred_yieldable(d) + except BodyExceededMaxSize: + msg = "Requested file is too large > %r bytes" % (max_size,) + logger.warning( + "{%s} [%s] %s", request.txn_id, request.destination, msg, + ) + SynapseError(502, msg, Codes.TOO_LARGE) except Exception as e: logger.warning( "{%s} [%s] Error reading response: %s", diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 626acdcaa3..4e51839d0f 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -36,6 +36,7 @@ from synapse.crypto.context_factory import FederationPolicyForHTTPS from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.http.federation.srv_resolver import Server from synapse.http.federation.well_known_resolver import ( + WELL_KNOWN_MAX_SIZE, WellKnownResolver, _cache_period_from_headers, ) @@ -1107,6 +1108,32 @@ class MatrixFederationAgentTests(unittest.TestCase): r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, None) + def test_well_known_too_large(self): + """A well-known query that returns a result which is too large should be rejected.""" + self.reactor.lookups["testserv"] = "1.2.3.4" + + fetch_d = defer.ensureDeferred( + self.well_known_resolver.get_well_known(b"testserv") + ) + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 443) + + self._handle_well_known_connection( + client_factory, + expected_sni=b"testserv", + response_headers={b"Cache-Control": b"max-age=1000"}, + content=b'{ "m.server": "' + (b"a" * WELL_KNOWN_MAX_SIZE) + b'" }', + ) + + # The result is sucessful, but disabled delegation. + r = self.successResultOf(fetch_d) + self.assertIsNone(r.delegated_server) + def test_srv_fallbacks(self): """Test that other SRV results are tried if the first one fails. """ -- cgit 1.5.1 From 2ec8ca5e6046b207eabe008101631b978758ac6d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 12 Jan 2021 12:34:16 +0000 Subject: Remove SynapseRequest.get_user_agent (#9069) SynapseRequest is in danger of becoming a bit of a dumping-ground for "useful stuff relating to Requests", which isn't really its intention (its purpose is to override render, finished and connectionLost to set up the LoggingContext and write the right entries to the request log). Putting utility functions inside SynapseRequest means that lots of our code ends up requiring a SynapseRequest when there is nothing synapse-specific about the Request at all, and any old twisted.web.iweb.IRequest will do. This increases code coupling and makes testing more difficult. In short: move get_user_agent out to a utility function. --- changelog.d/9069.misc | 1 + synapse/api/auth.py | 3 ++- synapse/handlers/auth.py | 6 +++--- synapse/handlers/sso.py | 5 +++-- synapse/http/__init__.py | 15 +++++++++++++++ synapse/http/site.py | 18 ++---------------- tests/handlers/test_cas.py | 2 +- tests/handlers/test_oidc.py | 3 +-- tests/handlers/test_saml.py | 2 +- 9 files changed, 29 insertions(+), 26 deletions(-) create mode 100644 changelog.d/9069.misc (limited to 'synapse/http') diff --git a/changelog.d/9069.misc b/changelog.d/9069.misc new file mode 100644 index 0000000000..5e9e62d252 --- /dev/null +++ b/changelog.d/9069.misc @@ -0,0 +1 @@ +Remove `SynapseRequest.get_user_agent`. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 48c4d7b0be..6d6703250b 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -33,6 +33,7 @@ from synapse.api.errors import ( from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.appservice import ApplicationService from synapse.events import EventBase +from synapse.http import get_request_user_agent from synapse.http.site import SynapseRequest from synapse.logging import opentracing as opentracing from synapse.storage.databases.main.registration import TokenLookupResult @@ -187,7 +188,7 @@ class Auth: """ try: ip_addr = self.hs.get_ip_from_request(request) - user_agent = request.get_user_agent("") + user_agent = get_request_user_agent(request) access_token = self.get_access_token_from_request(request) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f4434673dc..5b86ee85c7 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -49,8 +49,10 @@ from synapse.api.errors import ( UserDeactivatedError, ) from synapse.api.ratelimiting import Ratelimiter +from synapse.handlers._base import BaseHandler from synapse.handlers.ui_auth import INTERACTIVE_AUTH_CHECKERS from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker +from synapse.http import get_request_user_agent from synapse.http.server import finish_request, respond_with_html from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread @@ -62,8 +64,6 @@ from synapse.util.async_helpers import maybe_awaitable from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.threepids import canonicalise_email -from ._base import BaseHandler - if TYPE_CHECKING: from synapse.app.homeserver import HomeServer @@ -539,7 +539,7 @@ class AuthHandler(BaseHandler): # authentication flow. await self.store.set_ui_auth_clientdict(sid, clientdict) - user_agent = request.get_user_agent("") + user_agent = get_request_user_agent(request) await self.store.add_user_agent_ip_to_ui_auth_session( session.session_id, user_agent, clientip diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 2da1ea2223..740df7e4a0 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -23,6 +23,7 @@ from typing_extensions import NoReturn, Protocol from twisted.web.http import Request from synapse.api.errors import Codes, RedirectException, SynapseError +from synapse.http import get_request_user_agent from synapse.http.server import respond_with_html from synapse.http.site import SynapseRequest from synapse.types import JsonDict, UserID, contains_invalid_mxid_characters @@ -362,7 +363,7 @@ class SsoHandler: attributes, auth_provider_id, remote_user_id, - request.get_user_agent(""), + get_request_user_agent(request), request.getClientIP(), ) @@ -628,7 +629,7 @@ class SsoHandler: attributes, session.auth_provider_id, session.remote_user_id, - request.get_user_agent(""), + get_request_user_agent(request), request.getClientIP(), ) diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 59b01b812c..4bc3cb53f0 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -17,6 +17,7 @@ import re from twisted.internet import task from twisted.web.client import FileBodyProducer +from twisted.web.iweb import IRequest from synapse.api.errors import SynapseError @@ -50,3 +51,17 @@ class QuieterFileBodyProducer(FileBodyProducer): FileBodyProducer.stopProducing(self) except task.TaskStopped: pass + + +def get_request_user_agent(request: IRequest, default: str = "") -> str: + """Return the last User-Agent header, or the given default. + """ + # There could be raw utf-8 bytes in the User-Agent header. + + # N.B. if you don't do this, the logger explodes cryptically + # with maximum recursion trying to log errors about + # the charset problem. + # c.f. https://github.com/matrix-org/synapse/issues/3471 + + h = request.getHeader(b"User-Agent") + return h.decode("ascii", "replace") if h else default diff --git a/synapse/http/site.py b/synapse/http/site.py index 5a5790831b..12ec3f851f 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -20,7 +20,7 @@ from twisted.python.failure import Failure from twisted.web.server import Request, Site from synapse.config.server import ListenerConfig -from synapse.http import redact_uri +from synapse.http import get_request_user_agent, redact_uri from synapse.http.request_metrics import RequestMetrics, requests_counter from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.types import Requester @@ -113,15 +113,6 @@ class SynapseRequest(Request): method = self.method.decode("ascii") return method - def get_user_agent(self, default: str) -> str: - """Return the last User-Agent header, or the given default. - """ - user_agent = self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] - if user_agent is None: - return default - - return user_agent.decode("ascii", "replace") - def render(self, resrc): # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. @@ -292,12 +283,7 @@ class SynapseRequest(Request): # and can see that we're doing something wrong. authenticated_entity = repr(self.requester) # type: ignore[unreachable] - # ...or could be raw utf-8 bytes in the User-Agent header. - # N.B. if you don't do this, the logger explodes cryptically - # with maximum recursion trying to log errors about - # the charset problem. - # c.f. https://github.com/matrix-org/synapse/issues/3471 - user_agent = self.get_user_agent("-") + user_agent = get_request_user_agent(self, "-") code = str(self.code) if not self.finished: diff --git a/tests/handlers/test_cas.py b/tests/handlers/test_cas.py index bd7a1b6891..c37bb6440e 100644 --- a/tests/handlers/test_cas.py +++ b/tests/handlers/test_cas.py @@ -118,4 +118,4 @@ class CasHandlerTestCase(HomeserverTestCase): def _mock_request(): """Returns a mock which will stand in as a SynapseRequest""" - return Mock(spec=["getClientIP", "get_user_agent"]) + return Mock(spec=["getClientIP", "getHeader"]) diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py index f5df657814..4ce0f74f22 100644 --- a/tests/handlers/test_oidc.py +++ b/tests/handlers/test_oidc.py @@ -1011,7 +1011,7 @@ def _build_callback_request( "addCookie", "requestHeaders", "getClientIP", - "get_user_agent", + "getHeader", ] ) @@ -1020,5 +1020,4 @@ def _build_callback_request( request.args[b"code"] = [code.encode("utf-8")] request.args[b"state"] = [state.encode("utf-8")] request.getClientIP.return_value = ip_address - request.get_user_agent.return_value = user_agent return request diff --git a/tests/handlers/test_saml.py b/tests/handlers/test_saml.py index 548038214b..261c7083d1 100644 --- a/tests/handlers/test_saml.py +++ b/tests/handlers/test_saml.py @@ -262,4 +262,4 @@ class SamlHandlerTestCase(HomeserverTestCase): def _mock_request(): """Returns a mock which will stand in as a SynapseRequest""" - return Mock(spec=["getClientIP", "get_user_agent"]) + return Mock(spec=["getClientIP", "getHeader"]) -- cgit 1.5.1 From 723b19748aca0de4ae0eb2d6d89844fad04f29ae Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 12 Jan 2021 11:07:01 -0500 Subject: Handle bad JSON data being returned from the federation API. (#9070) --- changelog.d/9070.bugfix | 1 + synapse/http/matrixfederationclient.py | 10 ++++++++++ tests/http/test_fedclient.py | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 changelog.d/9070.bugfix (limited to 'synapse/http') diff --git a/changelog.d/9070.bugfix b/changelog.d/9070.bugfix new file mode 100644 index 0000000000..72b8fe9f1c --- /dev/null +++ b/changelog.d/9070.bugfix @@ -0,0 +1 @@ +Fix `JSONDecodeError` spamming the logs when sending transactions to remote servers. diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b261e078c4..b7103d6541 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -174,6 +174,16 @@ async def _handle_json_response( d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor) body = await make_deferred_yieldable(d) + except ValueError as e: + # The JSON content was invalid. + logger.warning( + "{%s} [%s] Failed to parse JSON response - %s %s", + request.txn_id, + request.destination, + request.method, + request.uri.decode("ascii"), + ) + raise RequestSendFailed(e, can_retry=False) from e except defer.TimeoutError as e: logger.warning( "{%s} [%s] Timed out reading response - %s %s", diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index 212484a7fe..9c52c8fdca 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -560,4 +560,4 @@ class FederationClientTests(HomeserverTestCase): self.pump() f = self.failureResultOf(test_d) - self.assertIsInstance(f.value, ValueError) + self.assertIsInstance(f.value, RequestSendFailed) -- cgit 1.5.1 From e385c8b4734b95c0738d6f4022d7bbb1621167db Mon Sep 17 00:00:00 2001 From: Marcus Date: Tue, 12 Jan 2021 18:20:30 +0100 Subject: Don't apply the IP range blacklist to proxy connections (#9084) It is expected that the proxy would be on a private IP address so the configured proxy should be connected to regardless of the IP range blacklist. --- changelog.d/9084.bugfix | 1 + synapse/http/client.py | 1 + synapse/http/proxyagent.py | 16 +++++- tests/http/test_proxyagent.py | 130 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 changelog.d/9084.bugfix (limited to 'synapse/http') diff --git a/changelog.d/9084.bugfix b/changelog.d/9084.bugfix new file mode 100644 index 0000000000..415dd8b259 --- /dev/null +++ b/changelog.d/9084.bugfix @@ -0,0 +1 @@ +Don't blacklist connections to the configured proxy. Contributed by @Bubu. diff --git a/synapse/http/client.py b/synapse/http/client.py index 29f40ddf5f..5f74ee1149 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -341,6 +341,7 @@ class SimpleHttpClient: self.agent = ProxyAgent( self.reactor, + hs.get_reactor(), connectTimeout=15, contextFactory=self.hs.get_http_client_context_factory(), pool=pool, diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index e32d3f43e0..b730d2c634 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -39,6 +39,10 @@ class ProxyAgent(_AgentBase): reactor: twisted reactor to place outgoing connections. + proxy_reactor: twisted reactor to use for connections to the proxy server + reactor might have some blacklisting applied (i.e. for DNS queries), + but we need unblocked access to the proxy. + contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the verification parameters of OpenSSL. The default is to use a `BrowserLikePolicyForHTTPS`, so unless you have special @@ -59,6 +63,7 @@ class ProxyAgent(_AgentBase): def __init__( self, reactor, + proxy_reactor=None, contextFactory=BrowserLikePolicyForHTTPS(), connectTimeout=None, bindAddress=None, @@ -68,6 +73,11 @@ class ProxyAgent(_AgentBase): ): _AgentBase.__init__(self, reactor, pool) + if proxy_reactor is None: + self.proxy_reactor = reactor + else: + self.proxy_reactor = proxy_reactor + self._endpoint_kwargs = {} if connectTimeout is not None: self._endpoint_kwargs["timeout"] = connectTimeout @@ -75,11 +85,11 @@ class ProxyAgent(_AgentBase): self._endpoint_kwargs["bindAddress"] = bindAddress self.http_proxy_endpoint = _http_proxy_endpoint( - http_proxy, reactor, **self._endpoint_kwargs + http_proxy, self.proxy_reactor, **self._endpoint_kwargs ) self.https_proxy_endpoint = _http_proxy_endpoint( - https_proxy, reactor, **self._endpoint_kwargs + https_proxy, self.proxy_reactor, **self._endpoint_kwargs ) self._policy_for_https = contextFactory @@ -137,7 +147,7 @@ class ProxyAgent(_AgentBase): request_path = uri elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint: endpoint = HTTPConnectProxyEndpoint( - self._reactor, + self.proxy_reactor, self.https_proxy_endpoint, parsed_uri.host, parsed_uri.port, diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index 22abf76515..9a56e1c14a 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -15,12 +15,14 @@ import logging import treq +from netaddr import IPSet from twisted.internet import interfaces # noqa: F401 from twisted.internet.protocol import Factory from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.web.http import HTTPChannel +from synapse.http.client import BlacklistingReactorWrapper from synapse.http.proxyagent import ProxyAgent from tests.http import TestServerTLSConnectionFactory, get_test_https_policy @@ -292,6 +294,134 @@ class MatrixFederationAgentTests(TestCase): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + def test_http_request_via_proxy_with_blacklist(self): + # The blacklist includes the configured proxy IP. + agent = ProxyAgent( + BlacklistingReactorWrapper( + self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) + ), + self.reactor, + http_proxy=b"proxy.com:8888", + ) + + self.reactor.lookups["proxy.com"] = "1.2.3.5" + d = agent.request(b"GET", b"http://test.com") + + # there should be a pending TCP connection + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.5") + self.assertEqual(port, 8888) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, _get_test_protocol_factory() + ) + + # the FakeTransport is async, so we need to pump the reactor + self.reactor.advance(0) + + # now there should be a pending request + self.assertEqual(len(http_server.requests), 1) + + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"http://test.com") + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) + request.write(b"result") + request.finish() + + self.reactor.advance(0) + + resp = self.successResultOf(d) + body = self.successResultOf(treq.content(resp)) + self.assertEqual(body, b"result") + + def test_https_request_via_proxy_with_blacklist(self): + # The blacklist includes the configured proxy IP. + agent = ProxyAgent( + BlacklistingReactorWrapper( + self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) + ), + self.reactor, + contextFactory=get_test_https_policy(), + https_proxy=b"proxy.com", + ) + + self.reactor.lookups["proxy.com"] = "1.2.3.5" + d = agent.request(b"GET", b"https://test.com/abc") + + # there should be a pending TCP connection + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.5") + self.assertEqual(port, 1080) + + # make a test HTTP server, and wire up the client + proxy_server = self._make_connection( + client_factory, _get_test_protocol_factory() + ) + + # fish the transports back out so that we can do the old switcheroo + s2c_transport = proxy_server.transport + client_protocol = s2c_transport.other + c2s_transport = client_protocol.transport + + # the FakeTransport is async, so we need to pump the reactor + self.reactor.advance(0) + + # now there should be a pending CONNECT request + self.assertEqual(len(proxy_server.requests), 1) + + request = proxy_server.requests[0] + self.assertEqual(request.method, b"CONNECT") + self.assertEqual(request.path, b"test.com:443") + + # tell the proxy server not to close the connection + proxy_server.persistent = True + + # this just stops the http Request trying to do a chunked response + # request.setHeader(b"Content-Length", b"0") + request.finish() + + # now we can replace the proxy channel with a new, SSL-wrapped HTTP channel + ssl_factory = _wrap_server_factory_for_tls(_get_test_protocol_factory()) + ssl_protocol = ssl_factory.buildProtocol(None) + http_server = ssl_protocol.wrappedProtocol + + ssl_protocol.makeConnection( + FakeTransport(client_protocol, self.reactor, ssl_protocol) + ) + c2s_transport.other = ssl_protocol + + self.reactor.advance(0) + + server_name = ssl_protocol._tlsConnection.get_servername() + expected_sni = b"test.com" + self.assertEqual( + server_name, + expected_sni, + "Expected SNI %s but got %s" % (expected_sni, server_name), + ) + + # now there should be a pending request + self.assertEqual(len(http_server.requests), 1) + + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"/abc") + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) + request.write(b"result") + request.finish() + + self.reactor.advance(0) + + resp = self.successResultOf(d) + body = self.successResultOf(treq.content(resp)) + self.assertEqual(body, b"result") + def _wrap_server_factory_for_tls(factory, sanlist=None): """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory -- cgit 1.5.1 From aee8e6a95d26391a8449409c836fa3965cdc6c51 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 13 Jan 2021 13:27:49 -0500 Subject: Reduce scope of exception handler. (#9106) Removes a bare `except Exception` clause and replaces it with catching a specific exception around the portion that might throw. --- changelog.d/9106.misc | 1 + synapse/http/client.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/9106.misc (limited to 'synapse/http') diff --git a/changelog.d/9106.misc b/changelog.d/9106.misc new file mode 100644 index 0000000000..4cd2605754 --- /dev/null +++ b/changelog.d/9106.misc @@ -0,0 +1 @@ +Reduce the scope of caught exceptions in `BlacklistingAgentWrapper`. diff --git a/synapse/http/client.py b/synapse/http/client.py index 5f74ee1149..dc4b81ca60 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -32,7 +32,7 @@ from typing import ( import treq from canonicaljson import encode_canonical_json -from netaddr import IPAddress, IPSet +from netaddr import AddrFormatError, IPAddress, IPSet from prometheus_client import Counter from zope.interface import implementer, provider @@ -261,16 +261,16 @@ class BlacklistingAgentWrapper(Agent): try: ip_address = IPAddress(h.hostname) - + except AddrFormatError: + # Not an IP + pass + else: if check_against_blacklist( ip_address, self._ip_whitelist, self._ip_blacklist ): logger.info("Blocking access to %s due to blacklist" % (ip_address,)) e = SynapseError(403, "IP address blocked by IP blacklist entry") return defer.fail(Failure(e)) - except Exception: - # Not an IP - pass return self._agent.request( method, uri, headers=headers, bodyProducer=bodyProducer -- cgit 1.5.1 From 12702be951de37b0166827ef5f4ccf630a2a59c4 Mon Sep 17 00:00:00 2001 From: Tim Leung Date: Thu, 14 Jan 2021 11:59:26 +0000 Subject: Fix wrong arguments being passed to BlacklistingAgentWrapper (#9098) A reactor was being passed instead of a whitelist for the BlacklistingAgentWrapper used by the WellyKnownResolver. This coulld cause exceptions when attempting to connect to IP addresses that are blacklisted, but in reality this did not have any observable affect since this code is not used for IP literals. --- changelog.d/9098.misc | 1 + synapse/http/federation/matrix_federation_agent.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 changelog.d/9098.misc (limited to 'synapse/http') diff --git a/changelog.d/9098.misc b/changelog.d/9098.misc new file mode 100644 index 0000000000..907020d428 --- /dev/null +++ b/changelog.d/9098.misc @@ -0,0 +1 @@ +Fix the wrong arguments being passed to `BlacklistingAgentWrapper` from `MatrixFederationAgent`. Contributed by Timothy Leung. diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 3b756a7dc2..4c06a117d3 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -102,7 +102,6 @@ class MatrixFederationAgent: pool=self._pool, contextFactory=tls_client_options_factory, ), - self._reactor, ip_blacklist=ip_blacklist, ), user_agent=self.user_agent, -- cgit 1.5.1 From 74dd90604189a0310c7b2f7eed0e6b2ac26d04f1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 15 Jan 2021 11:00:13 -0500 Subject: Avoid raising the body exceeded error multiple times. (#9108) Previously this code generated unreferenced `Deferred` instances which caused "Unhandled Deferreds" errors to appear in error situations. --- changelog.d/9108.bugfix | 1 + synapse/http/client.py | 12 ++- .../federation/test_matrix_federation_agent.py | 4 +- tests/http/test_client.py | 101 +++++++++++++++++++++ 4 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 changelog.d/9108.bugfix create mode 100644 tests/http/test_client.py (limited to 'synapse/http') diff --git a/changelog.d/9108.bugfix b/changelog.d/9108.bugfix new file mode 100644 index 0000000000..465ef63508 --- /dev/null +++ b/changelog.d/9108.bugfix @@ -0,0 +1 @@ +Fix "Unhandled error in Deferred: BodyExceededMaxSize" errors when .well-known files that are too large. diff --git a/synapse/http/client.py b/synapse/http/client.py index dc4b81ca60..df498c8645 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -766,14 +766,24 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): self.max_size = max_size def dataReceived(self, data: bytes) -> None: + # If the deferred was called, bail early. + if self.deferred.called: + return + self.stream.write(data) self.length += len(data) + # The first time the maximum size is exceeded, error and cancel the + # connection. dataReceived might be called again if data was received + # in the meantime. if self.max_size is not None and self.length >= self.max_size: self.deferred.errback(BodyExceededMaxSize()) - self.deferred = defer.Deferred() self.transport.loseConnection() def connectionLost(self, reason: Failure) -> None: + # If the maximum size was already exceeded, there's nothing to do. + if self.deferred.called: + return + if reason.check(ResponseDone): self.deferred.callback(self.length) elif reason.check(PotentialDataLoss): diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 4e51839d0f..686012dd25 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -1095,7 +1095,7 @@ class MatrixFederationAgentTests(unittest.TestCase): # Expire both caches and repeat the request self.reactor.pump((10000.0,)) - # Repated the request, this time it should fail if the lookup fails. + # Repeat the request, this time it should fail if the lookup fails. fetch_d = defer.ensureDeferred( self.well_known_resolver.get_well_known(b"testserv") ) @@ -1130,7 +1130,7 @@ class MatrixFederationAgentTests(unittest.TestCase): content=b'{ "m.server": "' + (b"a" * WELL_KNOWN_MAX_SIZE) + b'" }', ) - # The result is sucessful, but disabled delegation. + # The result is successful, but disabled delegation. r = self.successResultOf(fetch_d) self.assertIsNone(r.delegated_server) diff --git a/tests/http/test_client.py b/tests/http/test_client.py new file mode 100644 index 0000000000..f17c122e93 --- /dev/null +++ b/tests/http/test_client.py @@ -0,0 +1,101 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from io import BytesIO + +from mock import Mock + +from twisted.python.failure import Failure +from twisted.web.client import ResponseDone + +from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size + +from tests.unittest import TestCase + + +class ReadBodyWithMaxSizeTests(TestCase): + def setUp(self): + """Start reading the body, returns the response, result and proto""" + self.response = Mock() + self.result = BytesIO() + self.deferred = read_body_with_max_size(self.response, self.result, 6) + + # Fish the protocol out of the response. + self.protocol = self.response.deliverBody.call_args[0][0] + self.protocol.transport = Mock() + + def _cleanup_error(self): + """Ensure that the error in the Deferred is handled gracefully.""" + called = [False] + + def errback(f): + called[0] = True + + self.deferred.addErrback(errback) + self.assertTrue(called[0]) + + def test_no_error(self): + """A response that is NOT too large.""" + + # Start sending data. + self.protocol.dataReceived(b"12345") + # Close the connection. + self.protocol.connectionLost(Failure(ResponseDone())) + + self.assertEqual(self.result.getvalue(), b"12345") + self.assertEqual(self.deferred.result, 5) + + def test_too_large(self): + """A response which is too large raises an exception.""" + + # Start sending data. + self.protocol.dataReceived(b"1234567890") + # Close the connection. + self.protocol.connectionLost(Failure(ResponseDone())) + + self.assertEqual(self.result.getvalue(), b"1234567890") + self.assertIsInstance(self.deferred.result, Failure) + self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize) + self._cleanup_error() + + def test_multiple_packets(self): + """Data should be accummulated through mutliple packets.""" + + # Start sending data. + self.protocol.dataReceived(b"12") + self.protocol.dataReceived(b"34") + # Close the connection. + self.protocol.connectionLost(Failure(ResponseDone())) + + self.assertEqual(self.result.getvalue(), b"1234") + self.assertEqual(self.deferred.result, 4) + + def test_additional_data(self): + """A connection can receive data after being closed.""" + + # Start sending data. + self.protocol.dataReceived(b"1234567890") + self.assertIsInstance(self.deferred.result, Failure) + self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize) + self.protocol.transport.loseConnection.assert_called_once() + + # More data might have come in. + self.protocol.dataReceived(b"1234567890") + # Close the connection. + self.protocol.connectionLost(Failure(ResponseDone())) + + self.assertEqual(self.result.getvalue(), b"1234567890") + self.assertIsInstance(self.deferred.result, Failure) + self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize) + self._cleanup_error() -- cgit 1.5.1 From 2b467d0b61ff094a305c2c92f0cec6ca04b5d16d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 18 Jan 2021 10:21:42 -0500 Subject: Properly raise an exception when the body exceeds the max size. (#9145) ...instead of just creating the exception object and doing nothing with it. --- changelog.d/9145.bugfix | 1 + synapse/http/client.py | 2 +- synapse/http/matrixfederationclient.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/9145.bugfix (limited to 'synapse/http') diff --git a/changelog.d/9145.bugfix b/changelog.d/9145.bugfix new file mode 100644 index 0000000000..947cf1dc25 --- /dev/null +++ b/changelog.d/9145.bugfix @@ -0,0 +1 @@ +Fix "UnboundLocalError: local variable 'length' referenced before assignment" errors when the response body exceeds the expected size. This bug was introduced in v1.25.0. diff --git a/synapse/http/client.py b/synapse/http/client.py index df498c8645..37ccf5ab98 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -724,7 +724,7 @@ class SimpleHttpClient: read_body_with_max_size(response, output_stream, max_size) ) except BodyExceededMaxSize: - SynapseError( + raise SynapseError( 502, "Requested file is too large > %r bytes" % (max_size,), Codes.TOO_LARGE, diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b7103d6541..19293bf673 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -996,7 +996,7 @@ class MatrixFederationHttpClient: logger.warning( "{%s} [%s] %s", request.txn_id, request.destination, msg, ) - SynapseError(502, msg, Codes.TOO_LARGE) + raise SynapseError(502, msg, Codes.TOO_LARGE) except Exception as e: logger.warning( "{%s} [%s] Error reading response: %s", -- cgit 1.5.1 From 0cd2938bc854d947ae8102ded688a626c9fac5b5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 20 Jan 2021 13:15:14 +0000 Subject: Support icons for Identity Providers (#9154) --- changelog.d/9154.feature | 1 + docs/sample_config.yaml | 4 ++ mypy.ini | 1 + synapse/config/oidc_config.py | 20 ++++++ synapse/config/server.py | 2 +- synapse/federation/federation_server.py | 2 +- synapse/federation/transport/server.py | 2 +- synapse/handlers/cas_handler.py | 4 ++ synapse/handlers/oidc_handler.py | 3 + synapse/handlers/room.py | 2 +- synapse/handlers/saml_handler.py | 4 ++ synapse/handlers/sso.py | 5 ++ synapse/http/endpoint.py | 79 --------------------- synapse/res/templates/sso_login_idp_picker.html | 3 + synapse/rest/client/v1/room.py | 3 +- synapse/storage/databases/main/room.py | 6 +- synapse/types.py | 2 +- synapse/util/stringutils.py | 92 +++++++++++++++++++++++++ tests/http/test_endpoint.py | 2 +- 19 files changed, 146 insertions(+), 91 deletions(-) create mode 100644 changelog.d/9154.feature delete mode 100644 synapse/http/endpoint.py (limited to 'synapse/http') diff --git a/changelog.d/9154.feature b/changelog.d/9154.feature new file mode 100644 index 0000000000..01a24dcf49 --- /dev/null +++ b/changelog.d/9154.feature @@ -0,0 +1 @@ +Add support for multiple SSO Identity Providers. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 7fdd798d70..b49a5da8cc 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1726,6 +1726,10 @@ saml2_config: # idp_name: A user-facing name for this identity provider, which is used to # offer the user a choice of login mechanisms. # +# idp_icon: An optional icon for this identity provider, which is presented +# by identity picker pages. If given, must be an MXC URI of the format +# mxc:/// +# # discover: set to 'false' to disable the use of the OIDC discovery mechanism # to discover endpoints. Defaults to true. # diff --git a/mypy.ini b/mypy.ini index b996867121..bd99069c81 100644 --- a/mypy.ini +++ b/mypy.ini @@ -100,6 +100,7 @@ files = synapse/util/async_helpers.py, synapse/util/caches, synapse/util/metrics.py, + synapse/util/stringutils.py, tests/replication, tests/test_utils, tests/handlers/test_password_providers.py, diff --git a/synapse/config/oidc_config.py b/synapse/config/oidc_config.py index df55367434..f257fcd412 100644 --- a/synapse/config/oidc_config.py +++ b/synapse/config/oidc_config.py @@ -23,6 +23,7 @@ from synapse.config._util import validate_config from synapse.python_dependencies import DependencyException, check_requirements from synapse.types import Collection, JsonDict from synapse.util.module_loader import load_module +from synapse.util.stringutils import parse_and_validate_mxc_uri from ._base import Config, ConfigError @@ -66,6 +67,10 @@ class OIDCConfig(Config): # idp_name: A user-facing name for this identity provider, which is used to # offer the user a choice of login mechanisms. # + # idp_icon: An optional icon for this identity provider, which is presented + # by identity picker pages. If given, must be an MXC URI of the format + # mxc:/// + # # discover: set to 'false' to disable the use of the OIDC discovery mechanism # to discover endpoints. Defaults to true. # @@ -207,6 +212,7 @@ OIDC_PROVIDER_CONFIG_SCHEMA = { "properties": { "idp_id": {"type": "string", "minLength": 1, "maxLength": 128}, "idp_name": {"type": "string"}, + "idp_icon": {"type": "string"}, "discover": {"type": "boolean"}, "issuer": {"type": "string"}, "client_id": {"type": "string"}, @@ -336,9 +342,20 @@ def _parse_oidc_config_dict( config_path + ("idp_id",), ) + # MSC2858 also specifies that the idp_icon must be a valid MXC uri + idp_icon = oidc_config.get("idp_icon") + if idp_icon is not None: + try: + parse_and_validate_mxc_uri(idp_icon) + except ValueError as e: + raise ConfigError( + "idp_icon must be a valid MXC URI", config_path + ("idp_icon",) + ) from e + return OidcProviderConfig( idp_id=idp_id, idp_name=oidc_config.get("idp_name", "OIDC"), + idp_icon=idp_icon, discover=oidc_config.get("discover", True), issuer=oidc_config["issuer"], client_id=oidc_config["client_id"], @@ -366,6 +383,9 @@ class OidcProviderConfig: # user-facing name for this identity provider. idp_name = attr.ib(type=str) + # Optional MXC URI for icon for this IdP. + idp_icon = attr.ib(type=Optional[str]) + # whether the OIDC discovery mechanism is used to discover endpoints discover = attr.ib(type=bool) diff --git a/synapse/config/server.py b/synapse/config/server.py index 75ba161f35..47a0370173 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -26,7 +26,7 @@ import yaml from netaddr import IPSet from synapse.api.room_versions import KNOWN_ROOM_VERSIONS -from synapse.http.endpoint import parse_and_validate_server_name +from synapse.util.stringutils import parse_and_validate_server_name from ._base import Config, ConfigError diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e5339aca23..171d25c945 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -49,7 +49,6 @@ from synapse.events import EventBase from synapse.federation.federation_base import FederationBase, event_from_pdu_json from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction -from synapse.http.endpoint import parse_server_name from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import ( make_deferred_yieldable, @@ -66,6 +65,7 @@ from synapse.types import JsonDict, get_domain_from_id from synapse.util import glob_to_regex, json_decoder, unwrapFirstError from synapse.util.async_helpers import Linearizer, concurrently_execute from synapse.util.caches.response_cache import ResponseCache +from synapse.util.stringutils import parse_server_name if TYPE_CHECKING: from synapse.server import HomeServer diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index cfd094e58f..95c64510a9 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -28,7 +28,6 @@ from synapse.api.urls import ( FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX, ) -from synapse.http.endpoint import parse_and_validate_server_name from synapse.http.server import JsonResource from synapse.http.servlet import ( parse_boolean_from_args, @@ -45,6 +44,7 @@ from synapse.logging.opentracing import ( ) from synapse.server import HomeServer from synapse.types import ThirdPartyInstanceID, get_domain_from_id +from synapse.util.stringutils import parse_and_validate_server_name from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) diff --git a/synapse/handlers/cas_handler.py b/synapse/handlers/cas_handler.py index f3430c6713..0f342c607b 100644 --- a/synapse/handlers/cas_handler.py +++ b/synapse/handlers/cas_handler.py @@ -80,6 +80,10 @@ class CasHandler: # user-facing name of this auth provider self.idp_name = "CAS" + # we do not currently support icons for CAS auth, but this is required by + # the SsoIdentityProvider protocol type. + self.idp_icon = None + self._sso_handler = hs.get_sso_handler() self._sso_handler.register_identity_provider(self) diff --git a/synapse/handlers/oidc_handler.py b/synapse/handlers/oidc_handler.py index ba686d74b2..1607e12935 100644 --- a/synapse/handlers/oidc_handler.py +++ b/synapse/handlers/oidc_handler.py @@ -271,6 +271,9 @@ class OidcProvider: # user-facing name of this auth provider self.idp_name = provider.idp_name + # MXC URI for icon for this auth provider + self.idp_icon = provider.idp_icon + self._sso_handler = hs.get_sso_handler() self._sso_handler.register_identity_provider(self) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 3bece6d668..ee27d99135 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -38,7 +38,6 @@ from synapse.api.filtering import Filter from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.events import EventBase from synapse.events.utils import copy_power_levels_contents -from synapse.http.endpoint import parse_and_validate_server_name from synapse.storage.state import StateFilter from synapse.types import ( JsonDict, @@ -55,6 +54,7 @@ from synapse.types import ( from synapse.util import stringutils from synapse.util.async_helpers import Linearizer from synapse.util.caches.response_cache import ResponseCache +from synapse.util.stringutils import parse_and_validate_server_name from synapse.visibility import filter_events_for_client from ._base import BaseHandler diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index a8376543c9..38461cf79d 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -78,6 +78,10 @@ class SamlHandler(BaseHandler): # user-facing name of this auth provider self.idp_name = "SAML" + # we do not currently support icons for SAML auth, but this is required by + # the SsoIdentityProvider protocol type. + self.idp_icon = None + # a map from saml session id to Saml2SessionData object self._outstanding_requests_dict = {} # type: Dict[str, Saml2SessionData] diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index dcc85e9871..d493327a10 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -75,6 +75,11 @@ class SsoIdentityProvider(Protocol): def idp_name(self) -> str: """User-facing name for this provider""" + @property + def idp_icon(self) -> Optional[str]: + """Optional MXC URI for user-facing icon""" + return None + @abc.abstractmethod async def handle_redirect_request( self, diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py deleted file mode 100644 index 92a5b606c8..0000000000 --- a/synapse/http/endpoint.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -import re - -logger = logging.getLogger(__name__) - - -def parse_server_name(server_name): - """Split a server name into host/port parts. - - Args: - server_name (str): server name to parse - - Returns: - Tuple[str, int|None]: host/port parts. - - Raises: - ValueError if the server name could not be parsed. - """ - try: - if server_name[-1] == "]": - # ipv6 literal, hopefully - return server_name, None - - domain_port = server_name.rsplit(":", 1) - domain = domain_port[0] - port = int(domain_port[1]) if domain_port[1:] else None - return domain, port - except Exception: - raise ValueError("Invalid server name '%s'" % server_name) - - -VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z") - - -def parse_and_validate_server_name(server_name): - """Split a server name into host/port parts and do some basic validation. - - Args: - server_name (str): server name to parse - - Returns: - Tuple[str, int|None]: host/port parts. - - Raises: - ValueError if the server name could not be parsed. - """ - host, port = parse_server_name(server_name) - - # these tests don't need to be bulletproof as we'll find out soon enough - # if somebody is giving us invalid data. What we *do* need is to be sure - # that nobody is sneaking IP literals in that look like hostnames, etc. - - # look for ipv6 literals - if host[0] == "[": - if host[-1] != "]": - raise ValueError("Mismatched [...] in server name '%s'" % (server_name,)) - return host, port - - # otherwise it should only be alphanumerics. - if not VALID_HOST_REGEX.match(host): - raise ValueError( - "Server name '%s' contains invalid characters" % (server_name,) - ) - - return host, port diff --git a/synapse/res/templates/sso_login_idp_picker.html b/synapse/res/templates/sso_login_idp_picker.html index f53c9cd679..5b38481012 100644 --- a/synapse/res/templates/sso_login_idp_picker.html +++ b/synapse/res/templates/sso_login_idp_picker.html @@ -17,6 +17,9 @@
  • +{% if p.idp_icon %} + +{% endif %}
  • {% endfor %} diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index e6725b03b0..f95627ee61 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -32,7 +32,6 @@ from synapse.api.errors import ( ) from synapse.api.filtering import Filter from synapse.events.utils import format_event_for_client_v2 -from synapse.http.endpoint import parse_and_validate_server_name from synapse.http.servlet import ( RestServlet, assert_params_in_dict, @@ -47,7 +46,7 @@ from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID from synapse.util import json_decoder -from synapse.util.stringutils import random_string +from synapse.util.stringutils import parse_and_validate_server_name, random_string if TYPE_CHECKING: import synapse.server diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 284f2ce77c..a9fcb5f59c 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -16,7 +16,6 @@ import collections import logging -import re from abc import abstractmethod from enum import Enum from typing import Any, Dict, List, Optional, Tuple @@ -30,6 +29,7 @@ from synapse.storage.databases.main.search import SearchStore from synapse.types import JsonDict, ThirdPartyInstanceID from synapse.util import json_encoder from synapse.util.caches.descriptors import cached +from synapse.util.stringutils import MXC_REGEX logger = logging.getLogger(__name__) @@ -660,8 +660,6 @@ class RoomWorkerStore(SQLBaseStore): The local and remote media as a lists of tuples where the key is the hostname and the value is the media ID. """ - mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") - sql = """ SELECT stream_ordering, json FROM events JOIN event_json USING (room_id, event_id) @@ -688,7 +686,7 @@ class RoomWorkerStore(SQLBaseStore): for url in (content_url, thumbnail_url): if not url: continue - matches = mxc_re.match(url) + matches = MXC_REGEX.match(url) if matches: hostname = matches.group(1) media_id = matches.group(2) diff --git a/synapse/types.py b/synapse/types.py index 20a43d05bf..eafe729dfe 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -37,7 +37,7 @@ from signedjson.key import decode_verify_key_bytes from unpaddedbase64 import decode_base64 from synapse.api.errors import Codes, SynapseError -from synapse.http.endpoint import parse_and_validate_server_name +from synapse.util.stringutils import parse_and_validate_server_name if TYPE_CHECKING: from synapse.appservice.api import ApplicationService diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index b103c8694c..f8038bf861 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -18,6 +18,7 @@ import random import re import string from collections.abc import Iterable +from typing import Optional, Tuple from synapse.api.errors import Codes, SynapseError @@ -26,6 +27,15 @@ _string_with_symbols = string.digits + string.ascii_letters + ".,;:^&*-_+=#~@" # https://matrix.org/docs/spec/client_server/r0.6.0#post-matrix-client-r0-register-email-requesttoken client_secret_regex = re.compile(r"^[0-9a-zA-Z\.\=\_\-]+$") +# https://matrix.org/docs/spec/client_server/r0.6.1#matrix-content-mxc-uris, +# together with https://github.com/matrix-org/matrix-doc/issues/2177 which basically +# says "there is no grammar for media ids" +# +# The server_name part of this is purposely lax: use parse_and_validate_mxc for +# additional validation. +# +MXC_REGEX = re.compile("^mxc://([^/]+)/([^/#?]+)$") + # random_string and random_string_with_symbols are used for a range of things, # some cryptographically important, some less so. We use SystemRandom to make sure # we get cryptographically-secure randoms. @@ -59,6 +69,88 @@ def assert_valid_client_secret(client_secret): ) +def parse_server_name(server_name: str) -> Tuple[str, Optional[int]]: + """Split a server name into host/port parts. + + Args: + server_name: server name to parse + + Returns: + host/port parts. + + Raises: + ValueError if the server name could not be parsed. + """ + try: + if server_name[-1] == "]": + # ipv6 literal, hopefully + return server_name, None + + domain_port = server_name.rsplit(":", 1) + domain = domain_port[0] + port = int(domain_port[1]) if domain_port[1:] else None + return domain, port + except Exception: + raise ValueError("Invalid server name '%s'" % server_name) + + +VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z") + + +def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int]]: + """Split a server name into host/port parts and do some basic validation. + + Args: + server_name: server name to parse + + Returns: + host/port parts. + + Raises: + ValueError if the server name could not be parsed. + """ + host, port = parse_server_name(server_name) + + # these tests don't need to be bulletproof as we'll find out soon enough + # if somebody is giving us invalid data. What we *do* need is to be sure + # that nobody is sneaking IP literals in that look like hostnames, etc. + + # look for ipv6 literals + if host[0] == "[": + if host[-1] != "]": + raise ValueError("Mismatched [...] in server name '%s'" % (server_name,)) + return host, port + + # otherwise it should only be alphanumerics. + if not VALID_HOST_REGEX.match(host): + raise ValueError( + "Server name '%s' contains invalid characters" % (server_name,) + ) + + return host, port + + +def parse_and_validate_mxc_uri(mxc: str) -> Tuple[str, Optional[int], str]: + """Parse the given string as an MXC URI + + Checks that the "server name" part is a valid server name + + Args: + mxc: the (alleged) MXC URI to be checked + Returns: + hostname, port, media id + Raises: + ValueError if the URI cannot be parsed + """ + m = MXC_REGEX.match(mxc) + if not m: + raise ValueError("mxc URI %r did not match expected format" % (mxc,)) + server_name = m.group(1) + media_id = m.group(2) + host, port = parse_and_validate_server_name(server_name) + return host, port, media_id + + def shortstr(iterable: Iterable, maxitems: int = 5) -> str: """If iterable has maxitems or fewer, return the stringification of a list containing those items. diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py index b2e9533b07..d06ea518ce 100644 --- a/tests/http/test_endpoint.py +++ b/tests/http/test_endpoint.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.endpoint import parse_and_validate_server_name, parse_server_name +from synapse.util.stringutils import parse_and_validate_server_name, parse_server_name from tests import unittest -- cgit 1.5.1 From a737cc27134c50059440ca33510b0baea53b4225 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 27 Jan 2021 12:41:24 +0000 Subject: Implement MSC2858 support (#9183) Fixes #8928. --- changelog.d/9183.feature | 1 + synapse/config/_base.pyi | 2 + synapse/config/experimental.py | 29 ++++++++++++ synapse/config/homeserver.py | 2 + synapse/handlers/sso.py | 23 +++++++--- synapse/http/server.py | 44 ++++++++++++++---- synapse/rest/client/v1/login.py | 55 ++++++++++++++++++++--- tests/rest/client/v1/test_login.py | 92 ++++++++++++++++++++++++++++++++++++++ tests/utils.py | 3 +- 9 files changed, 230 insertions(+), 21 deletions(-) create mode 100644 changelog.d/9183.feature create mode 100644 synapse/config/experimental.py (limited to 'synapse/http') diff --git a/changelog.d/9183.feature b/changelog.d/9183.feature new file mode 100644 index 0000000000..2d5c735042 --- /dev/null +++ b/changelog.d/9183.feature @@ -0,0 +1 @@ +Add experimental support for allowing clients to pick an SSO Identity Provider ([MSC2858](https://github.com/matrix-org/matrix-doc/pull/2858). diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index 29aa064e57..3ccea4b02d 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -9,6 +9,7 @@ from synapse.config import ( consent_config, database, emailconfig, + experimental, groups, jwt_config, key, @@ -48,6 +49,7 @@ def path_exists(file_path: str): ... class RootConfig: server: server.ServerConfig + experimental: experimental.ExperimentalConfig tls: tls.TlsConfig database: database.DatabaseConfig logging: logger.LoggingConfig diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py new file mode 100644 index 0000000000..b1c1c51e4d --- /dev/null +++ b/synapse/config/experimental.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.config._base import Config +from synapse.types import JsonDict + + +class ExperimentalConfig(Config): + """Config section for enabling experimental features""" + + section = "experimental" + + def read_config(self, config: JsonDict, **kwargs): + experimental = config.get("experimental_features") or {} + + # MSC2858 (multiple SSO identity providers) + self.msc2858_enabled = experimental.get("msc2858_enabled", False) # type: bool diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 4bd2b3587b..64a2429f77 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -24,6 +24,7 @@ from .cas import CasConfig from .consent_config import ConsentConfig from .database import DatabaseConfig from .emailconfig import EmailConfig +from .experimental import ExperimentalConfig from .federation import FederationConfig from .groups import GroupsConfig from .jwt_config import JWTConfig @@ -57,6 +58,7 @@ class HomeServerConfig(RootConfig): config_classes = [ ServerConfig, + ExperimentalConfig, TlsConfig, FederationConfig, CacheConfig, diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index d493327a10..afc1341d09 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -23,7 +23,7 @@ from typing_extensions import NoReturn, Protocol from twisted.web.http import Request from synapse.api.constants import LoginType -from synapse.api.errors import Codes, RedirectException, SynapseError +from synapse.api.errors import Codes, NotFoundError, RedirectException, SynapseError from synapse.handlers.ui_auth import UIAuthSessionDataConstants from synapse.http import get_request_user_agent from synapse.http.server import respond_with_html @@ -235,7 +235,10 @@ class SsoHandler: respond_with_html(request, code, html) async def handle_redirect_request( - self, request: SynapseRequest, client_redirect_url: bytes, + self, + request: SynapseRequest, + client_redirect_url: bytes, + idp_id: Optional[str], ) -> str: """Handle a request to /login/sso/redirect @@ -243,6 +246,7 @@ class SsoHandler: request: incoming HTTP request client_redirect_url: the URL that we should redirect the client to after login. + idp_id: optional identity provider chosen by the client Returns: the URI to redirect to @@ -252,10 +256,19 @@ class SsoHandler: 400, "Homeserver not configured for SSO.", errcode=Codes.UNRECOGNIZED ) + # if the client chose an IdP, use that + idp = None # type: Optional[SsoIdentityProvider] + if idp_id: + idp = self._identity_providers.get(idp_id) + if not idp: + raise NotFoundError("Unknown identity provider") + # if we only have one auth provider, redirect to it directly - if len(self._identity_providers) == 1: - ap = next(iter(self._identity_providers.values())) - return await ap.handle_redirect_request(request, client_redirect_url) + elif len(self._identity_providers) == 1: + idp = next(iter(self._identity_providers.values())) + + if idp: + return await idp.handle_redirect_request(request, client_redirect_url) # otherwise, redirect to the IDP picker return "/_synapse/client/pick_idp?" + urlencode( diff --git a/synapse/http/server.py b/synapse/http/server.py index e464bfe6c7..d69d579b3a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -22,10 +22,22 @@ import types import urllib from http import HTTPStatus from io import BytesIO -from typing import Any, Callable, Dict, Iterator, List, Tuple, Union +from typing import ( + Any, + Awaitable, + Callable, + Dict, + Iterable, + Iterator, + List, + Pattern, + Tuple, + Union, +) import jinja2 from canonicaljson import iterencode_canonical_json +from typing_extensions import Protocol from zope.interface import implementer from twisted.internet import defer, interfaces @@ -168,11 +180,25 @@ def wrap_async_request_handler(h): return preserve_fn(wrapped_async_request_handler) -class HttpServer: +# Type of a callback method for processing requests +# it is actually called with a SynapseRequest and a kwargs dict for the params, +# but I can't figure out how to represent that. +ServletCallback = Callable[ + ..., Union[None, Awaitable[None], Tuple[int, Any], Awaitable[Tuple[int, Any]]] +] + + +class HttpServer(Protocol): """ Interface for registering callbacks on a HTTP server """ - def register_paths(self, method, path_patterns, callback): + def register_paths( + self, + method: str, + path_patterns: Iterable[Pattern], + callback: ServletCallback, + servlet_classname: str, + ) -> None: """ Register a callback that gets fired if we receive a http request with the given method for a path that matches the given regex. @@ -180,12 +206,14 @@ class HttpServer: an unpacked tuple. Args: - method (str): The method to listen to. - path_patterns (list): The regex used to match requests. - callback (function): The function to fire if we receive a matched + method: The HTTP method to listen to. + path_patterns: The regex used to match requests. + callback: The function to fire if we receive a matched request. The first argument will be the request object and subsequent arguments will be any matched groups from the regex. - This should return a tuple of (code, response). + This should return either tuple of (code, response), or None. + servlet_classname (str): The name of the handler to be used in prometheus + and opentracing logs. """ pass @@ -354,7 +382,7 @@ class JsonResource(DirectServeJsonResource): def _get_handler_for_request( self, request: SynapseRequest - ) -> Tuple[Callable, str, Dict[str, str]]: + ) -> Tuple[ServletCallback, str, Dict[str, str]]: """Finds a callback method to handle the given request. Returns: diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index be938df962..0a561eea60 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -19,7 +19,8 @@ from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Optional from synapse.api.errors import Codes, LoginError, SynapseError from synapse.api.ratelimiting import Ratelimiter from synapse.appservice import ApplicationService -from synapse.http.server import finish_request +from synapse.handlers.sso import SsoIdentityProvider +from synapse.http.server import HttpServer, finish_request from synapse.http.servlet import ( RestServlet, parse_json_object_from_request, @@ -60,11 +61,14 @@ class LoginRestServlet(RestServlet): self.saml2_enabled = hs.config.saml2_enabled self.cas_enabled = hs.config.cas_enabled self.oidc_enabled = hs.config.oidc_enabled + self._msc2858_enabled = hs.config.experimental.msc2858_enabled self.auth = hs.get_auth() self.auth_handler = self.hs.get_auth_handler() self.registration_handler = hs.get_registration_handler() + self._sso_handler = hs.get_sso_handler() + self._well_known_builder = WellKnownBuilder(hs) self._address_ratelimiter = Ratelimiter( clock=hs.get_clock(), @@ -89,8 +93,17 @@ class LoginRestServlet(RestServlet): flows.append({"type": LoginRestServlet.CAS_TYPE}) if self.cas_enabled or self.saml2_enabled or self.oidc_enabled: - flows.append({"type": LoginRestServlet.SSO_TYPE}) - # While its valid for us to advertise this login type generally, + sso_flow = {"type": LoginRestServlet.SSO_TYPE} # type: JsonDict + + if self._msc2858_enabled: + sso_flow["org.matrix.msc2858.identity_providers"] = [ + _get_auth_flow_dict_for_idp(idp) + for idp in self._sso_handler.get_identity_providers().values() + ] + + flows.append(sso_flow) + + # While it's valid for us to advertise this login type generally, # synapse currently only gives out these tokens as part of the # SSO login flow. # Generally we don't want to advertise login flows that clients @@ -311,8 +324,20 @@ class LoginRestServlet(RestServlet): return result +def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict: + """Return an entry for the login flow dict + + Returns an entry suitable for inclusion in "identity_providers" in the + response to GET /_matrix/client/r0/login + """ + e = {"id": idp.idp_id, "name": idp.idp_name} # type: JsonDict + if idp.idp_icon: + e["icon"] = idp.idp_icon + return e + + class SsoRedirectServlet(RestServlet): - PATTERNS = client_patterns("/login/(cas|sso)/redirect", v1=True) + PATTERNS = client_patterns("/login/(cas|sso)/redirect$", v1=True) def __init__(self, hs: "HomeServer"): # make sure that the relevant handlers are instantiated, so that they @@ -324,13 +349,31 @@ class SsoRedirectServlet(RestServlet): if hs.config.oidc_enabled: hs.get_oidc_handler() self._sso_handler = hs.get_sso_handler() + self._msc2858_enabled = hs.config.experimental.msc2858_enabled + + def register(self, http_server: HttpServer) -> None: + super().register(http_server) + if self._msc2858_enabled: + # expose additional endpoint for MSC2858 support + http_server.register_paths( + "GET", + client_patterns( + "/org.matrix.msc2858/login/sso/redirect/(?P[A-Za-z0-9_.~-]+)$", + releases=(), + unstable=True, + ), + self.on_GET, + self.__class__.__name__, + ) - async def on_GET(self, request: SynapseRequest): + async def on_GET( + self, request: SynapseRequest, idp_id: Optional[str] = None + ) -> None: client_redirect_url = parse_string( request, "redirectUrl", required=True, encoding=None ) sso_url = await self._sso_handler.handle_redirect_request( - request, client_redirect_url + request, client_redirect_url, idp_id, ) logger.info("Redirecting to %s", sso_url) request.redirect(sso_url) diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py index 2672ce24c6..e2bb945453 100644 --- a/tests/rest/client/v1/test_login.py +++ b/tests/rest/client/v1/test_login.py @@ -75,6 +75,10 @@ TEST_CLIENT_REDIRECT_URL = 'https://x?&q"+%3D%2B"="fö%26=o"' # the query params in TEST_CLIENT_REDIRECT_URL EXPECTED_CLIENT_REDIRECT_URL_PARAMS = [("", ""), ('q" =+"', '"fö&=o"')] +# (possibly experimental) login flows we expect to appear in the list after the normal +# ones +ADDITIONAL_LOGIN_FLOWS = [{"type": "uk.half-shot.msc2778.login.application_service"}] + class LoginRestServletTestCase(unittest.HomeserverTestCase): @@ -426,6 +430,57 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): d["/_synapse/oidc"] = OIDCResource(self.hs) return d + def test_get_login_flows(self): + """GET /login should return password and SSO flows""" + channel = self.make_request("GET", "/_matrix/client/r0/login") + self.assertEqual(channel.code, 200, channel.result) + + expected_flows = [ + {"type": "m.login.cas"}, + {"type": "m.login.sso"}, + {"type": "m.login.token"}, + {"type": "m.login.password"}, + ] + ADDITIONAL_LOGIN_FLOWS + + self.assertCountEqual(channel.json_body["flows"], expected_flows) + + @override_config({"experimental_features": {"msc2858_enabled": True}}) + def test_get_msc2858_login_flows(self): + """The SSO flow should include IdP info if MSC2858 is enabled""" + channel = self.make_request("GET", "/_matrix/client/r0/login") + self.assertEqual(channel.code, 200, channel.result) + + # stick the flows results in a dict by type + flow_results = {} # type: Dict[str, Any] + for f in channel.json_body["flows"]: + flow_type = f["type"] + self.assertNotIn( + flow_type, flow_results, "duplicate flow type %s" % (flow_type,) + ) + flow_results[flow_type] = f + + self.assertIn("m.login.sso", flow_results, "m.login.sso was not returned") + sso_flow = flow_results.pop("m.login.sso") + # we should have a set of IdPs + self.assertCountEqual( + sso_flow["org.matrix.msc2858.identity_providers"], + [ + {"id": "cas", "name": "CAS"}, + {"id": "saml", "name": "SAML"}, + {"id": "oidc-idp1", "name": "IDP1"}, + {"id": "oidc", "name": "OIDC"}, + ], + ) + + # the rest of the flows are simple + expected_flows = [ + {"type": "m.login.cas"}, + {"type": "m.login.token"}, + {"type": "m.login.password"}, + ] + ADDITIONAL_LOGIN_FLOWS + + self.assertCountEqual(flow_results.values(), expected_flows) + def test_multi_sso_redirect(self): """/login/sso/redirect should redirect to an identity picker""" # first hit the redirect url, which should redirect to our idp picker @@ -564,6 +619,43 @@ class MultiSSOTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 400, channel.result) + def test_client_idp_redirect_msc2858_disabled(self): + """If the client tries to pick an IdP but MSC2858 is disabled, return a 400""" + channel = self.make_request( + "GET", + "/_matrix/client/unstable/org.matrix.msc2858/login/sso/redirect/oidc?redirectUrl=" + + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL), + ) + self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.json_body["errcode"], "M_UNRECOGNIZED") + + @override_config({"experimental_features": {"msc2858_enabled": True}}) + def test_client_idp_redirect_to_unknown(self): + """If the client tries to pick an unknown IdP, return a 404""" + channel = self.make_request( + "GET", + "/_matrix/client/unstable/org.matrix.msc2858/login/sso/redirect/xxx?redirectUrl=" + + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL), + ) + self.assertEqual(channel.code, 404, channel.result) + self.assertEqual(channel.json_body["errcode"], "M_NOT_FOUND") + + @override_config({"experimental_features": {"msc2858_enabled": True}}) + def test_client_idp_redirect_to_oidc(self): + """If the client pick a known IdP, redirect to it""" + channel = self.make_request( + "GET", + "/_matrix/client/unstable/org.matrix.msc2858/login/sso/redirect/oidc?redirectUrl=" + + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL), + ) + + self.assertEqual(channel.code, 302, channel.result) + oidc_uri = channel.headers.getRawHeaders("Location")[0] + oidc_uri_path, oidc_uri_query = oidc_uri.split("?", 1) + + # it should redirect us to the auth page of the OIDC server + self.assertEqual(oidc_uri_path, TEST_OIDC_AUTH_ENDPOINT) + @staticmethod def _get_value_from_macaroon(macaroon: pymacaroons.Macaroon, key: str) -> str: prefix = key + " = " diff --git a/tests/utils.py b/tests/utils.py index 09614093bc..022223cf24 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -33,7 +33,6 @@ from synapse.api.room_versions import RoomVersions from synapse.config.database import DatabaseConnectionConfig from synapse.config.homeserver import HomeServerConfig from synapse.config.server import DEFAULT_ROOM_VERSION -from synapse.http.server import HttpServer from synapse.logging.context import current_context, set_current_context from synapse.server import HomeServer from synapse.storage import DataStore @@ -351,7 +350,7 @@ def mock_getRawHeaders(headers=None): # This is a mock /resource/ not an entire server -class MockHttpResource(HttpServer): +class MockHttpResource: def __init__(self, prefix=""): self.callbacks = [] # 3-tuple of method/pattern/function self.prefix = prefix -- cgit 1.5.1 From f78d07bf005f7212bcc74256721677a3b255ea0e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 1 Feb 2021 13:15:51 +0000 Subject: Split out a separate endpoint to complete SSO registration (#9262) There are going to be a couple of paths to get to the final step of SSO reg, and I want the URL in the browser to consistent. So, let's move the final step onto a separate path, which we redirect to. --- changelog.d/9262.feature | 1 + synapse/app/homeserver.py | 2 + synapse/handlers/sso.py | 81 ++++++++++++++++++++++------ synapse/http/server.py | 7 +++ synapse/rest/synapse/client/pick_username.py | 16 +++--- synapse/rest/synapse/client/sso_register.py | 50 +++++++++++++++++ tests/rest/client/v1/test_login.py | 14 ++++- 7 files changed, 145 insertions(+), 26 deletions(-) create mode 100644 changelog.d/9262.feature create mode 100644 synapse/rest/synapse/client/sso_register.py (limited to 'synapse/http') diff --git a/changelog.d/9262.feature b/changelog.d/9262.feature new file mode 100644 index 0000000000..c21b197ca1 --- /dev/null +++ b/changelog.d/9262.feature @@ -0,0 +1 @@ +Improve the user experience of setting up an account via single-sign on. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 57a2f5237c..86d6f73674 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -62,6 +62,7 @@ from synapse.rest.health import HealthResource from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.synapse.client.pick_idp import PickIdpResource from synapse.rest.synapse.client.pick_username import pick_username_resource +from synapse.rest.synapse.client.sso_register import SsoRegisterResource from synapse.rest.well_known import WellKnownResource from synapse.server import HomeServer from synapse.storage import DataStore @@ -192,6 +193,7 @@ class SynapseHomeServer(HomeServer): "/_synapse/admin": AdminRestResource(self), "/_synapse/client/pick_username": pick_username_resource(self), "/_synapse/client/pick_idp": PickIdpResource(self), + "/_synapse/client/sso_register": SsoRegisterResource(self), } ) diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 3308b037d2..50c5ae142a 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -21,12 +21,13 @@ import attr from typing_extensions import NoReturn, Protocol from twisted.web.http import Request +from twisted.web.iweb import IRequest from synapse.api.constants import LoginType from synapse.api.errors import Codes, NotFoundError, RedirectException, SynapseError from synapse.handlers.ui_auth import UIAuthSessionDataConstants from synapse.http import get_request_user_agent -from synapse.http.server import respond_with_html +from synapse.http.server import respond_with_html, respond_with_redirect from synapse.http.site import SynapseRequest from synapse.types import JsonDict, UserID, contains_invalid_mxid_characters from synapse.util.async_helpers import Linearizer @@ -141,6 +142,9 @@ class UsernameMappingSession: # expiry time for the session, in milliseconds expiry_time_ms = attr.ib(type=int) + # choices made by the user + chosen_localpart = attr.ib(type=Optional[str], default=None) + # the HTTP cookie used to track the mapping session id USERNAME_MAPPING_SESSION_COOKIE_NAME = b"username_mapping_session" @@ -647,6 +651,25 @@ class SsoHandler: ) respond_with_html(request, 200, html) + def get_mapping_session(self, session_id: str) -> UsernameMappingSession: + """Look up the given username mapping session + + If it is not found, raises a SynapseError with an http code of 400 + + Args: + session_id: session to look up + Returns: + active mapping session + Raises: + SynapseError if the session is not found/has expired + """ + self._expire_old_sessions() + session = self._username_mapping_sessions.get(session_id) + if session: + return session + logger.info("Couldn't find session id %s", session_id) + raise SynapseError(400, "unknown session") + async def check_username_availability( self, localpart: str, session_id: str, ) -> bool: @@ -663,12 +686,7 @@ class SsoHandler: # make sure that there is a valid mapping session, to stop people dictionary- # scanning for accounts - - self._expire_old_sessions() - session = self._username_mapping_sessions.get(session_id) - if not session: - logger.info("Couldn't find session id %s", session_id) - raise SynapseError(400, "unknown session") + self.get_mapping_session(session_id) logger.info( "[session %s] Checking for availability of username %s", @@ -696,16 +714,33 @@ class SsoHandler: localpart: localpart requested by the user session_id: ID of the username mapping session, extracted from a cookie """ - self._expire_old_sessions() - session = self._username_mapping_sessions.get(session_id) - if not session: - logger.info("Couldn't find session id %s", session_id) - raise SynapseError(400, "unknown session") + session = self.get_mapping_session(session_id) + + # update the session with the user's choices + session.chosen_localpart = localpart + + # we're done; now we can register the user + respond_with_redirect(request, b"/_synapse/client/sso_register") + + async def register_sso_user(self, request: Request, session_id: str) -> None: + """Called once we have all the info we need to register a new user. - logger.info("[session %s] Registering localpart %s", session_id, localpart) + Does so and serves an HTTP response + + Args: + request: HTTP request + session_id: ID of the username mapping session, extracted from a cookie + """ + session = self.get_mapping_session(session_id) + + logger.info( + "[session %s] Registering localpart %s", + session_id, + session.chosen_localpart, + ) attributes = UserAttributes( - localpart=localpart, + localpart=session.chosen_localpart, display_name=session.display_name, emails=session.emails, ) @@ -720,7 +755,12 @@ class SsoHandler: request.getClientIP(), ) - logger.info("[session %s] Registered userid %s", session_id, user_id) + logger.info( + "[session %s] Registered userid %s with attributes %s", + session_id, + user_id, + attributes, + ) # delete the mapping session and the cookie del self._username_mapping_sessions[session_id] @@ -751,3 +791,14 @@ class SsoHandler: for session_id in to_expire: logger.info("Expiring mapping session %s", session_id) del self._username_mapping_sessions[session_id] + + +def get_username_mapping_session_cookie_from_request(request: IRequest) -> str: + """Extract the session ID from the cookie + + Raises a SynapseError if the cookie isn't found + """ + session_id = request.getCookie(USERNAME_MAPPING_SESSION_COOKIE_NAME) + if not session_id: + raise SynapseError(code=400, msg="missing session_id") + return session_id.decode("ascii", errors="replace") diff --git a/synapse/http/server.py b/synapse/http/server.py index d69d579b3a..8249732b27 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -761,6 +761,13 @@ def set_clickjacking_protection_headers(request: Request): request.setHeader(b"Content-Security-Policy", b"frame-ancestors 'none';") +def respond_with_redirect(request: Request, url: bytes) -> None: + """Write a 302 response to the request, if it is still alive.""" + logger.debug("Redirect to %s", url.decode("utf-8")) + request.redirect(url) + finish_request(request) + + def finish_request(request: Request): """ Finish writing the response to the request. diff --git a/synapse/rest/synapse/client/pick_username.py b/synapse/rest/synapse/client/pick_username.py index d3b6803e65..1bc737bad0 100644 --- a/synapse/rest/synapse/client/pick_username.py +++ b/synapse/rest/synapse/client/pick_username.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from typing import TYPE_CHECKING import pkg_resources @@ -20,8 +21,7 @@ from twisted.web.http import Request from twisted.web.resource import Resource from twisted.web.static import File -from synapse.api.errors import SynapseError -from synapse.handlers.sso import USERNAME_MAPPING_SESSION_COOKIE_NAME +from synapse.handlers.sso import get_username_mapping_session_cookie_from_request from synapse.http.server import DirectServeHtmlResource, DirectServeJsonResource from synapse.http.servlet import parse_string from synapse.http.site import SynapseRequest @@ -61,12 +61,10 @@ class AvailabilityCheckResource(DirectServeJsonResource): async def _async_render_GET(self, request: Request): localpart = parse_string(request, "username", required=True) - session_id = request.getCookie(USERNAME_MAPPING_SESSION_COOKIE_NAME) - if not session_id: - raise SynapseError(code=400, msg="missing session_id") + session_id = get_username_mapping_session_cookie_from_request(request) is_available = await self._sso_handler.check_username_availability( - localpart, session_id.decode("ascii", errors="replace") + localpart, session_id ) return 200, {"available": is_available} @@ -79,10 +77,8 @@ class SubmitResource(DirectServeHtmlResource): async def _async_render_POST(self, request: SynapseRequest): localpart = parse_string(request, "username", required=True) - session_id = request.getCookie(USERNAME_MAPPING_SESSION_COOKIE_NAME) - if not session_id: - raise SynapseError(code=400, msg="missing session_id") + session_id = get_username_mapping_session_cookie_from_request(request) await self._sso_handler.handle_submit_username_request( - request, localpart, session_id.decode("ascii", errors="replace") + request, localpart, session_id ) diff --git a/synapse/rest/synapse/client/sso_register.py b/synapse/rest/synapse/client/sso_register.py new file mode 100644 index 0000000000..dfefeb7796 --- /dev/null +++ b/synapse/rest/synapse/client/sso_register.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING + +from twisted.web.http import Request + +from synapse.api.errors import SynapseError +from synapse.handlers.sso import get_username_mapping_session_cookie_from_request +from synapse.http.server import DirectServeHtmlResource + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class SsoRegisterResource(DirectServeHtmlResource): + """A resource which completes SSO registration + + This resource gets mounted at /_synapse/client/sso_register, and is shown + after we collect username and/or consent for a new SSO user. It (finally) registers + the user, and confirms redirect to the client + """ + + def __init__(self, hs: "HomeServer"): + super().__init__() + self._sso_handler = hs.get_sso_handler() + + async def _async_render_GET(self, request: Request) -> None: + try: + session_id = get_username_mapping_session_cookie_from_request(request) + except SynapseError as e: + logger.warning("Error fetching session cookie: %s", e) + self._sso_handler.render_error(request, "bad_session", e.msg, code=e.code) + return + await self._sso_handler.register_sso_user(request, session_id) diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py index e2bb945453..f01215ed1c 100644 --- a/tests/rest/client/v1/test_login.py +++ b/tests/rest/client/v1/test_login.py @@ -31,6 +31,7 @@ from synapse.rest.client.v2_alpha import devices, register from synapse.rest.client.v2_alpha.account import WhoamiRestServlet from synapse.rest.synapse.client.pick_idp import PickIdpResource from synapse.rest.synapse.client.pick_username import pick_username_resource +from synapse.rest.synapse.client.sso_register import SsoRegisterResource from synapse.types import create_requester from tests import unittest @@ -1215,6 +1216,7 @@ class UsernamePickerTestCase(HomeserverTestCase): d = super().create_resource_dict() d["/_synapse/client/pick_username"] = pick_username_resource(self.hs) + d["/_synapse/client/sso_register"] = SsoRegisterResource(self.hs) d["/_synapse/oidc"] = OIDCResource(self.hs) return d @@ -1253,7 +1255,7 @@ class UsernamePickerTestCase(HomeserverTestCase): self.assertApproximates(session.expiry_time_ms, expected_expiry, tolerance=1000) # Now, submit a username to the username picker, which should serve a redirect - # back to the client + # to the completion page submit_path = picker_url + "/submit" content = urlencode({b"username": b"bobby"}).encode("utf8") chan = self.make_request( @@ -1270,6 +1272,16 @@ class UsernamePickerTestCase(HomeserverTestCase): ) self.assertEqual(chan.code, 302, chan.result) location_headers = chan.headers.getRawHeaders("Location") + + # send a request to the completion page, which should 302 to the client redirectUrl + chan = self.make_request( + "GET", + path=location_headers[0], + custom_headers=[("Cookie", "username_mapping_session=" + session_id)], + ) + self.assertEqual(chan.code, 302, chan.result) + location_headers = chan.headers.getRawHeaders("Location") + # ensure that the returned location matches the requested redirect URL path, query = location_headers[0].split("?", 1) self.assertEqual(path, "https://x") -- cgit 1.5.1