diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index 59b01b812c..4bc3cb53f0 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -17,6 +17,7 @@ import re
from twisted.internet import task
from twisted.web.client import FileBodyProducer
+from twisted.web.iweb import IRequest
from synapse.api.errors import SynapseError
@@ -50,3 +51,17 @@ class QuieterFileBodyProducer(FileBodyProducer):
FileBodyProducer.stopProducing(self)
except task.TaskStopped:
pass
+
+
+def get_request_user_agent(request: IRequest, default: str = "") -> str:
+ """Return the last User-Agent header, or the given default.
+ """
+ # There could be raw utf-8 bytes in the User-Agent header.
+
+ # N.B. if you don't do this, the logger explodes cryptically
+ # with maximum recursion trying to log errors about
+ # the charset problem.
+ # c.f. https://github.com/matrix-org/synapse/issues/3471
+
+ h = request.getHeader(b"User-Agent")
+ return h.decode("ascii", "replace") if h else default
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 29f40ddf5f..37ccf5ab98 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -32,7 +32,7 @@ from typing import (
import treq
from canonicaljson import encode_canonical_json
-from netaddr import IPAddress, IPSet
+from netaddr import AddrFormatError, IPAddress, IPSet
from prometheus_client import Counter
from zope.interface import implementer, provider
@@ -261,16 +261,16 @@ class BlacklistingAgentWrapper(Agent):
try:
ip_address = IPAddress(h.hostname)
-
+ except AddrFormatError:
+ # Not an IP
+ pass
+ else:
if check_against_blacklist(
ip_address, self._ip_whitelist, self._ip_blacklist
):
logger.info("Blocking access to %s due to blacklist" % (ip_address,))
e = SynapseError(403, "IP address blocked by IP blacklist entry")
return defer.fail(Failure(e))
- except Exception:
- # Not an IP
- pass
return self._agent.request(
method, uri, headers=headers, bodyProducer=bodyProducer
@@ -341,6 +341,7 @@ class SimpleHttpClient:
self.agent = ProxyAgent(
self.reactor,
+ hs.get_reactor(),
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
@@ -723,7 +724,7 @@ class SimpleHttpClient:
read_body_with_max_size(response, output_stream, max_size)
)
except BodyExceededMaxSize:
- SynapseError(
+ raise SynapseError(
502,
"Requested file is too large > %r bytes" % (max_size,),
Codes.TOO_LARGE,
@@ -765,14 +766,24 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
self.max_size = max_size
def dataReceived(self, data: bytes) -> None:
+ # If the deferred was called, bail early.
+ if self.deferred.called:
+ return
+
self.stream.write(data)
self.length += len(data)
+ # The first time the maximum size is exceeded, error and cancel the
+ # connection. dataReceived might be called again if data was received
+ # in the meantime.
if self.max_size is not None and self.length >= self.max_size:
self.deferred.errback(BodyExceededMaxSize())
- self.deferred = defer.Deferred()
self.transport.loseConnection()
def connectionLost(self, reason: Failure) -> None:
+ # If the maximum size was already exceeded, there's nothing to do.
+ if self.deferred.called:
+ return
+
if reason.check(ResponseDone):
self.deferred.callback(self.length)
elif reason.check(PotentialDataLoss):
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
deleted file mode 100644
index 92a5b606c8..0000000000
--- a/synapse/http/endpoint.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2014-2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-import re
-
-logger = logging.getLogger(__name__)
-
-
-def parse_server_name(server_name):
- """Split a server name into host/port parts.
-
- Args:
- server_name (str): server name to parse
-
- Returns:
- Tuple[str, int|None]: host/port parts.
-
- Raises:
- ValueError if the server name could not be parsed.
- """
- try:
- if server_name[-1] == "]":
- # ipv6 literal, hopefully
- return server_name, None
-
- domain_port = server_name.rsplit(":", 1)
- domain = domain_port[0]
- port = int(domain_port[1]) if domain_port[1:] else None
- return domain, port
- except Exception:
- raise ValueError("Invalid server name '%s'" % server_name)
-
-
-VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z")
-
-
-def parse_and_validate_server_name(server_name):
- """Split a server name into host/port parts and do some basic validation.
-
- Args:
- server_name (str): server name to parse
-
- Returns:
- Tuple[str, int|None]: host/port parts.
-
- Raises:
- ValueError if the server name could not be parsed.
- """
- host, port = parse_server_name(server_name)
-
- # these tests don't need to be bulletproof as we'll find out soon enough
- # if somebody is giving us invalid data. What we *do* need is to be sure
- # that nobody is sneaking IP literals in that look like hostnames, etc.
-
- # look for ipv6 literals
- if host[0] == "[":
- if host[-1] != "]":
- raise ValueError("Mismatched [...] in server name '%s'" % (server_name,))
- return host, port
-
- # otherwise it should only be alphanumerics.
- if not VALID_HOST_REGEX.match(host):
- raise ValueError(
- "Server name '%s' contains invalid characters" % (server_name,)
- )
-
- return host, port
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 3b756a7dc2..4c06a117d3 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -102,7 +102,6 @@ class MatrixFederationAgent:
pool=self._pool,
contextFactory=tls_client_options_factory,
),
- self._reactor,
ip_blacklist=ip_blacklist,
),
user_agent=self.user_agent,
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index b261e078c4..19293bf673 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -174,6 +174,16 @@ async def _handle_json_response(
d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor)
body = await make_deferred_yieldable(d)
+ except ValueError as e:
+ # The JSON content was invalid.
+ logger.warning(
+ "{%s} [%s] Failed to parse JSON response - %s %s",
+ request.txn_id,
+ request.destination,
+ request.method,
+ request.uri.decode("ascii"),
+ )
+ raise RequestSendFailed(e, can_retry=False) from e
except defer.TimeoutError as e:
logger.warning(
"{%s} [%s] Timed out reading response - %s %s",
@@ -986,7 +996,7 @@ class MatrixFederationHttpClient:
logger.warning(
"{%s} [%s] %s", request.txn_id, request.destination, msg,
)
- SynapseError(502, msg, Codes.TOO_LARGE)
+ raise SynapseError(502, msg, Codes.TOO_LARGE)
except Exception as e:
logger.warning(
"{%s} [%s] Error reading response: %s",
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index e32d3f43e0..b730d2c634 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -39,6 +39,10 @@ class ProxyAgent(_AgentBase):
reactor: twisted reactor to place outgoing
connections.
+ proxy_reactor: twisted reactor to use for connections to the proxy server
+ reactor might have some blacklisting applied (i.e. for DNS queries),
+ but we need unblocked access to the proxy.
+
contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the
verification parameters of OpenSSL. The default is to use a
`BrowserLikePolicyForHTTPS`, so unless you have special
@@ -59,6 +63,7 @@ class ProxyAgent(_AgentBase):
def __init__(
self,
reactor,
+ proxy_reactor=None,
contextFactory=BrowserLikePolicyForHTTPS(),
connectTimeout=None,
bindAddress=None,
@@ -68,6 +73,11 @@ class ProxyAgent(_AgentBase):
):
_AgentBase.__init__(self, reactor, pool)
+ if proxy_reactor is None:
+ self.proxy_reactor = reactor
+ else:
+ self.proxy_reactor = proxy_reactor
+
self._endpoint_kwargs = {}
if connectTimeout is not None:
self._endpoint_kwargs["timeout"] = connectTimeout
@@ -75,11 +85,11 @@ class ProxyAgent(_AgentBase):
self._endpoint_kwargs["bindAddress"] = bindAddress
self.http_proxy_endpoint = _http_proxy_endpoint(
- http_proxy, reactor, **self._endpoint_kwargs
+ http_proxy, self.proxy_reactor, **self._endpoint_kwargs
)
self.https_proxy_endpoint = _http_proxy_endpoint(
- https_proxy, reactor, **self._endpoint_kwargs
+ https_proxy, self.proxy_reactor, **self._endpoint_kwargs
)
self._policy_for_https = contextFactory
@@ -137,7 +147,7 @@ class ProxyAgent(_AgentBase):
request_path = uri
elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint:
endpoint = HTTPConnectProxyEndpoint(
- self._reactor,
+ self.proxy_reactor,
self.https_proxy_endpoint,
parsed_uri.host,
parsed_uri.port,
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 5a5790831b..12ec3f851f 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -20,7 +20,7 @@ from twisted.python.failure import Failure
from twisted.web.server import Request, Site
from synapse.config.server import ListenerConfig
-from synapse.http import redact_uri
+from synapse.http import get_request_user_agent, redact_uri
from synapse.http.request_metrics import RequestMetrics, requests_counter
from synapse.logging.context import LoggingContext, PreserveLoggingContext
from synapse.types import Requester
@@ -113,15 +113,6 @@ class SynapseRequest(Request):
method = self.method.decode("ascii")
return method
- def get_user_agent(self, default: str) -> str:
- """Return the last User-Agent header, or the given default.
- """
- user_agent = self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
- if user_agent is None:
- return default
-
- return user_agent.decode("ascii", "replace")
-
def render(self, resrc):
# this is called once a Resource has been found to serve the request; in our
# case the Resource in question will normally be a JsonResource.
@@ -292,12 +283,7 @@ class SynapseRequest(Request):
# and can see that we're doing something wrong.
authenticated_entity = repr(self.requester) # type: ignore[unreachable]
- # ...or could be raw utf-8 bytes in the User-Agent header.
- # N.B. if you don't do this, the logger explodes cryptically
- # with maximum recursion trying to log errors about
- # the charset problem.
- # c.f. https://github.com/matrix-org/synapse/issues/3471
- user_agent = self.get_user_agent("-")
+ user_agent = get_request_user_agent(self, "-")
code = str(self.code)
if not self.finished:
|