diff --git a/synapse/http/client.py b/synapse/http/client.py
index e5b13593f2..29f40ddf5f 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -125,7 +125,7 @@ def _make_scheduler(reactor):
return _scheduler
-class IPBlacklistingResolver:
+class _IPBlacklistingResolver:
"""
A proxy for reactor.nameResolver which only produces non-blacklisted IP
addresses, preventing DNS rebinding attacks on URL preview.
@@ -199,6 +199,35 @@ class IPBlacklistingResolver:
return r
+@implementer(IReactorPluggableNameResolver)
+class BlacklistingReactorWrapper:
+ """
+ A Reactor wrapper which will prevent DNS resolution to blacklisted IP
+ addresses, to prevent DNS rebinding.
+ """
+
+ def __init__(
+ self,
+ reactor: IReactorPluggableNameResolver,
+ ip_whitelist: Optional[IPSet],
+ ip_blacklist: IPSet,
+ ):
+ self._reactor = reactor
+
+ # We need to use a DNS resolver which filters out blacklisted IP
+ # addresses, to prevent DNS rebinding.
+ self._nameResolver = _IPBlacklistingResolver(
+ self._reactor, ip_whitelist, ip_blacklist
+ )
+
+ def __getattr__(self, attr: str) -> Any:
+ # Passthrough to the real reactor except for the DNS resolver.
+ if attr == "nameResolver":
+ return self._nameResolver
+ else:
+ return getattr(self._reactor, attr)
+
+
class BlacklistingAgentWrapper(Agent):
"""
An Agent wrapper which will prevent access to IP addresses being accessed
@@ -292,22 +321,11 @@ class SimpleHttpClient:
self.user_agent = self.user_agent.encode("ascii")
if self._ip_blacklist:
- real_reactor = hs.get_reactor()
# If we have an IP blacklist, we need to use a DNS resolver which
# filters out blacklisted IP addresses, to prevent DNS rebinding.
- nameResolver = IPBlacklistingResolver(
- real_reactor, self._ip_whitelist, self._ip_blacklist
+ self.reactor = BlacklistingReactorWrapper(
+ hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
)
-
- @implementer(IReactorPluggableNameResolver)
- class Reactor:
- def __getattr__(_self, attr):
- if attr == "nameResolver":
- return nameResolver
- else:
- return getattr(real_reactor, attr)
-
- self.reactor = Reactor()
else:
self.reactor = hs.get_reactor()
@@ -702,11 +720,14 @@ class SimpleHttpClient:
try:
length = await make_deferred_yieldable(
- readBodyToFile(response, output_stream, max_size)
+ read_body_with_max_size(response, output_stream, max_size)
+ )
+ except BodyExceededMaxSize:
+ SynapseError(
+ 502,
+ "Requested file is too large > %r bytes" % (max_size,),
+ Codes.TOO_LARGE,
)
- except SynapseError:
- # This can happen e.g. because the body is too large.
- raise
except Exception as e:
raise SynapseError(502, ("Failed to download remote body: %s" % e)) from e
@@ -730,7 +751,11 @@ def _timeout_to_request_timed_out_error(f: Failure):
return f
-class _ReadBodyToFileProtocol(protocol.Protocol):
+class BodyExceededMaxSize(Exception):
+ """The maximum allowed size of the HTTP body was exceeded."""
+
+
+class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
def __init__(
self, stream: BinaryIO, deferred: defer.Deferred, max_size: Optional[int]
):
@@ -743,13 +768,7 @@ class _ReadBodyToFileProtocol(protocol.Protocol):
self.stream.write(data)
self.length += len(data)
if self.max_size is not None and self.length >= self.max_size:
- self.deferred.errback(
- SynapseError(
- 502,
- "Requested file is too large > %r bytes" % (self.max_size,),
- Codes.TOO_LARGE,
- )
- )
+ self.deferred.errback(BodyExceededMaxSize())
self.deferred = defer.Deferred()
self.transport.loseConnection()
@@ -764,12 +783,15 @@ class _ReadBodyToFileProtocol(protocol.Protocol):
self.deferred.errback(reason)
-def readBodyToFile(
+def read_body_with_max_size(
response: IResponse, stream: BinaryIO, max_size: Optional[int]
) -> defer.Deferred:
"""
Read a HTTP response body to a file-object. Optionally enforcing a maximum file size.
+ If the maximum file size is reached, the returned Deferred will resolve to a
+ Failure with a BodyExceededMaxSize exception.
+
Args:
response: The HTTP response to read from.
stream: The file-object to write to.
@@ -780,7 +802,7 @@ def readBodyToFile(
"""
d = defer.Deferred()
- response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size))
+ response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size))
return d
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index e77f9587d0..3b756a7dc2 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -16,7 +16,7 @@ import logging
import urllib.parse
from typing import List, Optional
-from netaddr import AddrFormatError, IPAddress
+from netaddr import AddrFormatError, IPAddress, IPSet
from zope.interface import implementer
from twisted.internet import defer
@@ -31,6 +31,7 @@ from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent, IAgentEndpointFactory, IBodyProducer
from synapse.crypto.context_factory import FederationPolicyForHTTPS
+from synapse.http.client import BlacklistingAgentWrapper
from synapse.http.federation.srv_resolver import Server, SrvResolver
from synapse.http.federation.well_known_resolver import WellKnownResolver
from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -70,6 +71,7 @@ class MatrixFederationAgent:
reactor: IReactorCore,
tls_client_options_factory: Optional[FederationPolicyForHTTPS],
user_agent: bytes,
+ ip_blacklist: IPSet,
_srv_resolver: Optional[SrvResolver] = None,
_well_known_resolver: Optional[WellKnownResolver] = None,
):
@@ -90,12 +92,18 @@ class MatrixFederationAgent:
self.user_agent = user_agent
if _well_known_resolver is None:
+ # Note that the name resolver has already been wrapped in a
+ # IPBlacklistingResolver by MatrixFederationHttpClient.
_well_known_resolver = WellKnownResolver(
self._reactor,
- agent=Agent(
+ agent=BlacklistingAgentWrapper(
+ Agent(
+ self._reactor,
+ pool=self._pool,
+ contextFactory=tls_client_options_factory,
+ ),
self._reactor,
- pool=self._pool,
- contextFactory=tls_client_options_factory,
+ ip_blacklist=ip_blacklist,
),
user_agent=self.user_agent,
)
diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py
index 5e08ef1664..b3b6dbcab0 100644
--- a/synapse/http/federation/well_known_resolver.py
+++ b/synapse/http/federation/well_known_resolver.py
@@ -15,17 +15,19 @@
import logging
import random
import time
+from io import BytesIO
from typing import Callable, Dict, Optional, Tuple
import attr
from twisted.internet import defer
from twisted.internet.interfaces import IReactorTime
-from twisted.web.client import RedirectAgent, readBody
+from twisted.web.client import RedirectAgent
from twisted.web.http import stringToDatetime
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent, IResponse
+from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size
from synapse.logging.context import make_deferred_yieldable
from synapse.util import Clock, json_decoder
from synapse.util.caches.ttlcache import TTLCache
@@ -53,6 +55,9 @@ WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600
# lower bound for .well-known cache period
WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60
+# The maximum size (in bytes) to allow a well-known file to be.
+WELL_KNOWN_MAX_SIZE = 50 * 1024 # 50 KiB
+
# Attempt to refetch a cached well-known N% of the TTL before it expires.
# e.g. if set to 0.2 and we have a cached entry with a TTL of 5mins, then
# we'll start trying to refetch 1 minute before it expires.
@@ -229,6 +234,9 @@ class WellKnownResolver:
server_name: name of the server, from the requested url
retry: Whether to retry the request if it fails.
+ Raises:
+ _FetchWellKnownFailure if we fail to lookup a result
+
Returns:
Returns the response object and body. Response may be a non-200 response.
"""
@@ -250,7 +258,11 @@ class WellKnownResolver:
b"GET", uri, headers=Headers(headers)
)
)
- body = await make_deferred_yieldable(readBody(response))
+ body_stream = BytesIO()
+ await make_deferred_yieldable(
+ read_body_with_max_size(response, body_stream, WELL_KNOWN_MAX_SIZE)
+ )
+ body = body_stream.getvalue()
if 500 <= response.code < 600:
raise Exception("Non-200 response %s" % (response.code,))
@@ -259,6 +271,15 @@ class WellKnownResolver:
except defer.CancelledError:
# Bail if we've been cancelled
raise
+ except BodyExceededMaxSize:
+ # If the well-known file was too large, do not keep attempting
+ # to download it, but consider it a temporary error.
+ logger.warning(
+ "Requested .well-known file for %s is too large > %r bytes",
+ server_name.decode("ascii"),
+ WELL_KNOWN_MAX_SIZE,
+ )
+ raise _FetchWellKnownFailure(temporary=True)
except Exception as e:
if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS:
logger.info("Error fetching %s: %s", uri_str, e)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 4e27f93b7a..b261e078c4 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -26,11 +26,10 @@ import treq
from canonicaljson import encode_canonical_json
from prometheus_client import Counter
from signedjson.sign import sign_json
-from zope.interface import implementer
from twisted.internet import defer
from twisted.internet.error import DNSLookupError
-from twisted.internet.interfaces import IReactorPluggableNameResolver, IReactorTime
+from twisted.internet.interfaces import IReactorTime
from twisted.internet.task import _EPSILON, Cooperator
from twisted.web.http_headers import Headers
from twisted.web.iweb import IBodyProducer, IResponse
@@ -38,16 +37,19 @@ from twisted.web.iweb import IBodyProducer, IResponse
import synapse.metrics
import synapse.util.retryutils
from synapse.api.errors import (
+ Codes,
FederationDeniedError,
HttpResponseException,
RequestSendFailed,
+ SynapseError,
)
from synapse.http import QuieterFileBodyProducer
from synapse.http.client import (
BlacklistingAgentWrapper,
- IPBlacklistingResolver,
+ BlacklistingReactorWrapper,
+ BodyExceededMaxSize,
encode_query_args,
- readBodyToFile,
+ read_body_with_max_size,
)
from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
from synapse.logging.context import make_deferred_yieldable
@@ -221,31 +223,22 @@ class MatrixFederationHttpClient:
self.signing_key = hs.signing_key
self.server_name = hs.hostname
- real_reactor = hs.get_reactor()
-
# We need to use a DNS resolver which filters out blacklisted IP
# addresses, to prevent DNS rebinding.
- nameResolver = IPBlacklistingResolver(
- real_reactor, None, hs.config.federation_ip_range_blacklist
+ self.reactor = BlacklistingReactorWrapper(
+ hs.get_reactor(), None, hs.config.federation_ip_range_blacklist
)
- @implementer(IReactorPluggableNameResolver)
- class Reactor:
- def __getattr__(_self, attr):
- if attr == "nameResolver":
- return nameResolver
- else:
- return getattr(real_reactor, attr)
-
- self.reactor = Reactor()
-
user_agent = hs.version_string
if hs.config.user_agent_suffix:
user_agent = "%s %s" % (user_agent, hs.config.user_agent_suffix)
user_agent = user_agent.encode("ascii")
self.agent = MatrixFederationAgent(
- self.reactor, tls_client_options_factory, user_agent
+ self.reactor,
+ tls_client_options_factory,
+ user_agent,
+ hs.config.federation_ip_range_blacklist,
)
# Use a BlacklistingAgentWrapper to prevent circumventing the IP
@@ -985,9 +978,15 @@ class MatrixFederationHttpClient:
headers = dict(response.headers.getAllRawHeaders())
try:
- d = readBodyToFile(response, output_stream, max_size)
+ d = read_body_with_max_size(response, output_stream, max_size)
d.addTimeout(self.default_timeout, self.reactor)
length = await make_deferred_yieldable(d)
+ except BodyExceededMaxSize:
+ msg = "Requested file is too large > %r bytes" % (max_size,)
+ logger.warning(
+ "{%s} [%s] %s", request.txn_id, request.destination, msg,
+ )
+ SynapseError(502, msg, Codes.TOO_LARGE)
except Exception as e:
logger.warning(
"{%s} [%s] Error reading response: %s",
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 6a4e429a6c..e464bfe6c7 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -275,6 +275,10 @@ class DirectServeJsonResource(_AsyncResource):
formatting responses and errors as JSON.
"""
+ def __init__(self, canonical_json=False, extract_context=False):
+ super().__init__(extract_context)
+ self.canonical_json = canonical_json
+
def _send_response(
self, request: Request, code: int, response_object: Any,
):
@@ -318,9 +322,7 @@ class JsonResource(DirectServeJsonResource):
)
def __init__(self, hs, canonical_json=True, extract_context=False):
- super().__init__(extract_context)
-
- self.canonical_json = canonical_json
+ super().__init__(canonical_json, extract_context)
self.clock = hs.get_clock()
self.path_regexs = {}
self.hs = hs
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 5f0581dc3f..5a5790831b 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -128,8 +128,7 @@ class SynapseRequest(Request):
# create a LogContext for this request
request_id = self.get_request_id()
- logcontext = self.logcontext = LoggingContext(request_id)
- logcontext.request = request_id
+ self.logcontext = LoggingContext(request_id, request=request_id)
# override the Server header which is set by twisted
self.setHeader("Server", self.site.server_version_string)
|