diff options
Diffstat (limited to 'synapse/http')
-rw-r--r-- | synapse/http/__init__.py | 15 | ||||
-rw-r--r-- | synapse/http/client.py | 25 | ||||
-rw-r--r-- | synapse/http/endpoint.py | 79 | ||||
-rw-r--r-- | synapse/http/federation/matrix_federation_agent.py | 1 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 12 | ||||
-rw-r--r-- | synapse/http/proxyagent.py | 16 | ||||
-rw-r--r-- | synapse/http/site.py | 18 |
7 files changed, 59 insertions, 107 deletions
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 59b01b812c..4bc3cb53f0 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -17,6 +17,7 @@ import re from twisted.internet import task from twisted.web.client import FileBodyProducer +from twisted.web.iweb import IRequest from synapse.api.errors import SynapseError @@ -50,3 +51,17 @@ class QuieterFileBodyProducer(FileBodyProducer): FileBodyProducer.stopProducing(self) except task.TaskStopped: pass + + +def get_request_user_agent(request: IRequest, default: str = "") -> str: + """Return the last User-Agent header, or the given default. + """ + # There could be raw utf-8 bytes in the User-Agent header. + + # N.B. if you don't do this, the logger explodes cryptically + # with maximum recursion trying to log errors about + # the charset problem. + # c.f. https://github.com/matrix-org/synapse/issues/3471 + + h = request.getHeader(b"User-Agent") + return h.decode("ascii", "replace") if h else default diff --git a/synapse/http/client.py b/synapse/http/client.py index 29f40ddf5f..37ccf5ab98 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -32,7 +32,7 @@ from typing import ( import treq from canonicaljson import encode_canonical_json -from netaddr import IPAddress, IPSet +from netaddr import AddrFormatError, IPAddress, IPSet from prometheus_client import Counter from zope.interface import implementer, provider @@ -261,16 +261,16 @@ class BlacklistingAgentWrapper(Agent): try: ip_address = IPAddress(h.hostname) - + except AddrFormatError: + # Not an IP + pass + else: if check_against_blacklist( ip_address, self._ip_whitelist, self._ip_blacklist ): logger.info("Blocking access to %s due to blacklist" % (ip_address,)) e = SynapseError(403, "IP address blocked by IP blacklist entry") return defer.fail(Failure(e)) - except Exception: - # Not an IP - pass return self._agent.request( method, uri, headers=headers, bodyProducer=bodyProducer @@ -341,6 +341,7 @@ class SimpleHttpClient: self.agent = ProxyAgent( self.reactor, + hs.get_reactor(), connectTimeout=15, contextFactory=self.hs.get_http_client_context_factory(), pool=pool, @@ -723,7 +724,7 @@ class SimpleHttpClient: read_body_with_max_size(response, output_stream, max_size) ) except BodyExceededMaxSize: - SynapseError( + raise SynapseError( 502, "Requested file is too large > %r bytes" % (max_size,), Codes.TOO_LARGE, @@ -765,14 +766,24 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): self.max_size = max_size def dataReceived(self, data: bytes) -> None: + # If the deferred was called, bail early. + if self.deferred.called: + return + self.stream.write(data) self.length += len(data) + # The first time the maximum size is exceeded, error and cancel the + # connection. dataReceived might be called again if data was received + # in the meantime. if self.max_size is not None and self.length >= self.max_size: self.deferred.errback(BodyExceededMaxSize()) - self.deferred = defer.Deferred() self.transport.loseConnection() def connectionLost(self, reason: Failure) -> None: + # If the maximum size was already exceeded, there's nothing to do. + if self.deferred.called: + return + if reason.check(ResponseDone): self.deferred.callback(self.length) elif reason.check(PotentialDataLoss): diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py deleted file mode 100644 index 92a5b606c8..0000000000 --- a/synapse/http/endpoint.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -import re - -logger = logging.getLogger(__name__) - - -def parse_server_name(server_name): - """Split a server name into host/port parts. - - Args: - server_name (str): server name to parse - - Returns: - Tuple[str, int|None]: host/port parts. - - Raises: - ValueError if the server name could not be parsed. - """ - try: - if server_name[-1] == "]": - # ipv6 literal, hopefully - return server_name, None - - domain_port = server_name.rsplit(":", 1) - domain = domain_port[0] - port = int(domain_port[1]) if domain_port[1:] else None - return domain, port - except Exception: - raise ValueError("Invalid server name '%s'" % server_name) - - -VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z") - - -def parse_and_validate_server_name(server_name): - """Split a server name into host/port parts and do some basic validation. - - Args: - server_name (str): server name to parse - - Returns: - Tuple[str, int|None]: host/port parts. - - Raises: - ValueError if the server name could not be parsed. - """ - host, port = parse_server_name(server_name) - - # these tests don't need to be bulletproof as we'll find out soon enough - # if somebody is giving us invalid data. What we *do* need is to be sure - # that nobody is sneaking IP literals in that look like hostnames, etc. - - # look for ipv6 literals - if host[0] == "[": - if host[-1] != "]": - raise ValueError("Mismatched [...] in server name '%s'" % (server_name,)) - return host, port - - # otherwise it should only be alphanumerics. - if not VALID_HOST_REGEX.match(host): - raise ValueError( - "Server name '%s' contains invalid characters" % (server_name,) - ) - - return host, port diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 3b756a7dc2..4c06a117d3 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -102,7 +102,6 @@ class MatrixFederationAgent: pool=self._pool, contextFactory=tls_client_options_factory, ), - self._reactor, ip_blacklist=ip_blacklist, ), user_agent=self.user_agent, diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b261e078c4..19293bf673 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -174,6 +174,16 @@ async def _handle_json_response( d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor) body = await make_deferred_yieldable(d) + except ValueError as e: + # The JSON content was invalid. + logger.warning( + "{%s} [%s] Failed to parse JSON response - %s %s", + request.txn_id, + request.destination, + request.method, + request.uri.decode("ascii"), + ) + raise RequestSendFailed(e, can_retry=False) from e except defer.TimeoutError as e: logger.warning( "{%s} [%s] Timed out reading response - %s %s", @@ -986,7 +996,7 @@ class MatrixFederationHttpClient: logger.warning( "{%s} [%s] %s", request.txn_id, request.destination, msg, ) - SynapseError(502, msg, Codes.TOO_LARGE) + raise SynapseError(502, msg, Codes.TOO_LARGE) except Exception as e: logger.warning( "{%s} [%s] Error reading response: %s", diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index e32d3f43e0..b730d2c634 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -39,6 +39,10 @@ class ProxyAgent(_AgentBase): reactor: twisted reactor to place outgoing connections. + proxy_reactor: twisted reactor to use for connections to the proxy server + reactor might have some blacklisting applied (i.e. for DNS queries), + but we need unblocked access to the proxy. + contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the verification parameters of OpenSSL. The default is to use a `BrowserLikePolicyForHTTPS`, so unless you have special @@ -59,6 +63,7 @@ class ProxyAgent(_AgentBase): def __init__( self, reactor, + proxy_reactor=None, contextFactory=BrowserLikePolicyForHTTPS(), connectTimeout=None, bindAddress=None, @@ -68,6 +73,11 @@ class ProxyAgent(_AgentBase): ): _AgentBase.__init__(self, reactor, pool) + if proxy_reactor is None: + self.proxy_reactor = reactor + else: + self.proxy_reactor = proxy_reactor + self._endpoint_kwargs = {} if connectTimeout is not None: self._endpoint_kwargs["timeout"] = connectTimeout @@ -75,11 +85,11 @@ class ProxyAgent(_AgentBase): self._endpoint_kwargs["bindAddress"] = bindAddress self.http_proxy_endpoint = _http_proxy_endpoint( - http_proxy, reactor, **self._endpoint_kwargs + http_proxy, self.proxy_reactor, **self._endpoint_kwargs ) self.https_proxy_endpoint = _http_proxy_endpoint( - https_proxy, reactor, **self._endpoint_kwargs + https_proxy, self.proxy_reactor, **self._endpoint_kwargs ) self._policy_for_https = contextFactory @@ -137,7 +147,7 @@ class ProxyAgent(_AgentBase): request_path = uri elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint: endpoint = HTTPConnectProxyEndpoint( - self._reactor, + self.proxy_reactor, self.https_proxy_endpoint, parsed_uri.host, parsed_uri.port, diff --git a/synapse/http/site.py b/synapse/http/site.py index 5a5790831b..12ec3f851f 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -20,7 +20,7 @@ from twisted.python.failure import Failure from twisted.web.server import Request, Site from synapse.config.server import ListenerConfig -from synapse.http import redact_uri +from synapse.http import get_request_user_agent, redact_uri from synapse.http.request_metrics import RequestMetrics, requests_counter from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.types import Requester @@ -113,15 +113,6 @@ class SynapseRequest(Request): method = self.method.decode("ascii") return method - def get_user_agent(self, default: str) -> str: - """Return the last User-Agent header, or the given default. - """ - user_agent = self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] - if user_agent is None: - return default - - return user_agent.decode("ascii", "replace") - def render(self, resrc): # this is called once a Resource has been found to serve the request; in our # case the Resource in question will normally be a JsonResource. @@ -292,12 +283,7 @@ class SynapseRequest(Request): # and can see that we're doing something wrong. authenticated_entity = repr(self.requester) # type: ignore[unreachable] - # ...or could be raw utf-8 bytes in the User-Agent header. - # N.B. if you don't do this, the logger explodes cryptically - # with maximum recursion trying to log errors about - # the charset problem. - # c.f. https://github.com/matrix-org/synapse/issues/3471 - user_agent = self.get_user_agent("-") + user_agent = get_request_user_agent(self, "-") code = str(self.code) if not self.finished: |