summary refs log tree commit diff
path: root/synapse/http/matrixfederationclient.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--synapse/http/matrixfederationclient.py364
1 files changed, 163 insertions, 201 deletions
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py

index c23a4d7c0c..4e27f93b7a 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py
@@ -17,8 +17,9 @@ import cgi import logging import random import sys -import urllib +import urllib.parse from io import BytesIO +from typing import Callable, Dict, List, Optional, Tuple, Union import attr import treq @@ -27,25 +28,27 @@ from prometheus_client import Counter from signedjson.sign import sign_json from zope.interface import implementer -from twisted.internet import defer, protocol +from twisted.internet import defer from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import IReactorPluggableNameResolver, IReactorTime from twisted.internet.task import _EPSILON, Cooperator -from twisted.web._newclient import ResponseDone from twisted.web.http_headers import Headers -from twisted.web.iweb import IResponse +from twisted.web.iweb import IBodyProducer, IResponse import synapse.metrics import synapse.util.retryutils from synapse.api.errors import ( - Codes, FederationDeniedError, HttpResponseException, RequestSendFailed, - SynapseError, ) from synapse.http import QuieterFileBodyProducer -from synapse.http.client import BlacklistingAgentWrapper, IPBlacklistingResolver +from synapse.http.client import ( + BlacklistingAgentWrapper, + IPBlacklistingResolver, + encode_query_args, + readBodyToFile, +) from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging.context import make_deferred_yieldable from synapse.logging.opentracing import ( @@ -54,6 +57,7 @@ from synapse.logging.opentracing import ( start_active_span, tags, ) +from synapse.types import JsonDict from synapse.util import json_decoder from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure @@ -76,47 +80,44 @@ MAXINT = sys.maxsize _next_id = 1 +QueryArgs = Dict[str, Union[str, List[str]]] + + @attr.s(slots=True, frozen=True) class MatrixFederationRequest: - method = attr.ib() + method = attr.ib(type=str) """HTTP method - :type: str """ - path = attr.ib() + path = attr.ib(type=str) """HTTP path - :type: str """ - destination = attr.ib() + destination = attr.ib(type=str) """The remote server to send the HTTP request to. - :type: str""" + """ - json = attr.ib(default=None) + json = attr.ib(default=None, type=Optional[JsonDict]) """JSON to send in the body. - :type: dict|None """ - json_callback = attr.ib(default=None) + json_callback = attr.ib(default=None, type=Optional[Callable[[], JsonDict]]) """A callback to generate the JSON. - :type: func|None """ - query = attr.ib(default=None) + query = attr.ib(default=None, type=Optional[dict]) """Query arguments. - :type: dict|None """ - txn_id = attr.ib(default=None) + txn_id = attr.ib(default=None, type=Optional[str]) """Unique ID for this request (for logging) - :type: str|None """ uri = attr.ib(init=False, type=bytes) """The URI of this request """ - def __attrs_post_init__(self): + def __attrs_post_init__(self) -> None: global _next_id txn_id = "%s-O-%s" % (self.method, _next_id) _next_id = (_next_id + 1) % (MAXINT - 1) @@ -136,7 +137,7 @@ class MatrixFederationRequest: ) object.__setattr__(self, "uri", uri) - def get_json(self): + def get_json(self) -> Optional[JsonDict]: if self.json_callback: return self.json_callback() return self.json @@ -148,7 +149,7 @@ async def _handle_json_response( request: MatrixFederationRequest, response: IResponse, start_ms: int, -): +) -> JsonDict: """ Reads the JSON body of a response, with a timeout @@ -160,7 +161,7 @@ async def _handle_json_response( start_ms: Timestamp when request was made Returns: - dict: parsed JSON response + The parsed JSON response """ try: check_content_type_is_json(response.headers) @@ -250,9 +251,7 @@ class MatrixFederationHttpClient: # Use a BlacklistingAgentWrapper to prevent circumventing the IP # blacklist via IP literals in server names self.agent = BlacklistingAgentWrapper( - self.agent, - self.reactor, - ip_blacklist=hs.config.federation_ip_range_blacklist, + self.agent, ip_blacklist=hs.config.federation_ip_range_blacklist, ) self.clock = hs.get_clock() @@ -266,27 +265,29 @@ class MatrixFederationHttpClient: self._cooperator = Cooperator(scheduler=schedule) async def _send_request_with_optional_trailing_slash( - self, request, try_trailing_slash_on_400=False, **send_request_args - ): + self, + request: MatrixFederationRequest, + try_trailing_slash_on_400: bool = False, + **send_request_args + ) -> IResponse: """Wrapper for _send_request which can optionally retry the request upon receiving a combination of a 400 HTTP response code and a 'M_UNRECOGNIZED' errcode. This is a workaround for Synapse <= v0.99.3 due to #3622. Args: - request (MatrixFederationRequest): details of request to be sent - try_trailing_slash_on_400 (bool): Whether on receiving a 400 + request: details of request to be sent + try_trailing_slash_on_400: Whether on receiving a 400 'M_UNRECOGNIZED' from the server to retry the request with a trailing slash appended to the request path. - send_request_args (Dict): A dictionary of arguments to pass to - `_send_request()`. + send_request_args: A dictionary of arguments to pass to `_send_request()`. Raises: HttpResponseException: If we get an HTTP response code >= 300 (except 429). Returns: - Dict: Parsed JSON response body. + Parsed JSON response body. """ try: response = await self._send_request(request, **send_request_args) @@ -313,24 +314,26 @@ class MatrixFederationHttpClient: async def _send_request( self, - request, - retry_on_dns_fail=True, - timeout=None, - long_retries=False, - ignore_backoff=False, - backoff_on_404=False, - ): + request: MatrixFederationRequest, + retry_on_dns_fail: bool = True, + timeout: Optional[int] = None, + long_retries: bool = False, + ignore_backoff: bool = False, + backoff_on_404: bool = False, + ) -> IResponse: """ Sends a request to the given server. Args: - request (MatrixFederationRequest): details of request to be sent + request: details of request to be sent + + retry_on_dns_fail: true if the request should be retied on DNS failures - timeout (int|None): number of milliseconds to wait for the response headers + timeout: number of milliseconds to wait for the response headers (including connecting to the server), *for each attempt*. 60s by default. - long_retries (bool): whether to use the long retry algorithm. + long_retries: whether to use the long retry algorithm. The regular retry algorithm makes 4 attempts, with intervals [0.5s, 1s, 2s]. @@ -346,14 +349,13 @@ class MatrixFederationHttpClient: NB: the long retry algorithm takes over 20 minutes to complete, with a default timeout of 60s! - ignore_backoff (bool): true to ignore the historical backoff data + ignore_backoff: true to ignore the historical backoff data and try the request anyway. - backoff_on_404 (bool): Back off if we get a 404 + backoff_on_404: Back off if we get a 404 Returns: - twisted.web.client.Response: resolves with the HTTP - response object on success. + Resolves with the HTTP response object on success. Raises: HttpResponseException: If we get an HTTP response code >= 300 @@ -404,7 +406,7 @@ class MatrixFederationHttpClient: ) # Inject the span into the headers - headers_dict = {} + headers_dict = {} # type: Dict[bytes, List[bytes]] inject_active_span_byte_dict(headers_dict, request.destination) headers_dict[b"User-Agent"] = [self.version_string_bytes] @@ -435,7 +437,7 @@ class MatrixFederationHttpClient: data = encode_canonical_json(json) producer = QuieterFileBodyProducer( BytesIO(data), cooperator=self._cooperator - ) + ) # type: Optional[IBodyProducer] else: producer = None auth_headers = self.build_auth_headers( @@ -524,14 +526,16 @@ class MatrixFederationHttpClient: ) body = None - e = HttpResponseException(response.code, response_phrase, body) + exc = HttpResponseException( + response.code, response_phrase, body + ) # Retry if the error is a 429 (Too Many Requests), # otherwise just raise a standard HttpResponseException if response.code == 429: - raise RequestSendFailed(e, can_retry=True) from e + raise RequestSendFailed(exc, can_retry=True) from exc else: - raise e + raise exc break except RequestSendFailed as e: @@ -582,22 +586,27 @@ class MatrixFederationHttpClient: return response def build_auth_headers( - self, destination, method, url_bytes, content=None, destination_is=None - ): + self, + destination: Optional[bytes], + method: bytes, + url_bytes: bytes, + content: Optional[JsonDict] = None, + destination_is: Optional[bytes] = None, + ) -> List[bytes]: """ Builds the Authorization headers for a federation request Args: - destination (bytes|None): The desination homeserver of the request. + destination: The destination homeserver of the request. May be None if the destination is an identity server, in which case destination_is must be non-None. - method (bytes): The HTTP method of the request - url_bytes (bytes): The URI path of the request - content (object): The body of the request - destination_is (bytes): As 'destination', but if the destination is an + method: The HTTP method of the request + url_bytes: The URI path of the request + content: The body of the request + destination_is: As 'destination', but if the destination is an identity server Returns: - list[bytes]: a list of headers to be added as "Authorization:" headers + A list of headers to be added as "Authorization:" headers """ request = { "method": method.decode("ascii"), @@ -629,33 +638,32 @@ class MatrixFederationHttpClient: async def put_json( self, - destination, - path, - args={}, - data={}, - json_data_callback=None, - long_retries=False, - timeout=None, - ignore_backoff=False, - backoff_on_404=False, - try_trailing_slash_on_400=False, - ): - """ Sends the specifed json data using PUT + destination: str, + path: str, + args: Optional[QueryArgs] = None, + data: Optional[JsonDict] = None, + json_data_callback: Optional[Callable[[], JsonDict]] = None, + long_retries: bool = False, + timeout: Optional[int] = None, + ignore_backoff: bool = False, + backoff_on_404: bool = False, + try_trailing_slash_on_400: bool = False, + ) -> Union[JsonDict, list]: + """ Sends the specified json data using PUT Args: - destination (str): The remote server to send the HTTP request - to. - path (str): The HTTP path. - args (dict): query params - data (dict): A dict containing the data that will be used as + destination: The remote server to send the HTTP request to. + path: The HTTP path. + args: query params + data: A dict containing the data that will be used as the request body. This will be encoded as JSON. - json_data_callback (callable): A callable returning the dict to + json_data_callback: A callable returning the dict to use as the request body. - long_retries (bool): whether to use the long retry algorithm. See + long_retries: whether to use the long retry algorithm. See docs on _send_request for details. - timeout (int|None): number of milliseconds to wait for the response. + timeout: number of milliseconds to wait for the response. self._default_timeout (60s) by default. Note that we may make several attempts to send the request; this @@ -663,19 +671,19 @@ class MatrixFederationHttpClient: *each* attempt (including connection time) as well as the time spent reading the response body after a 200 response. - ignore_backoff (bool): true to ignore the historical backoff data + ignore_backoff: true to ignore the historical backoff data and try the request anyway. - backoff_on_404 (bool): True if we should count a 404 response as + backoff_on_404: True if we should count a 404 response as a failure of the server (and should therefore back off future requests). - try_trailing_slash_on_400 (bool): True if on a 400 M_UNRECOGNIZED + try_trailing_slash_on_400: True if on a 400 M_UNRECOGNIZED response we should try appending a trailing slash to the end of the request. Workaround for #3622 in Synapse <= v0.99.3. This will be attempted before backing off if backing off has been enabled. Returns: - dict|list: Succeeds when we get a 2xx HTTP response. The + Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. Raises: @@ -721,29 +729,28 @@ class MatrixFederationHttpClient: async def post_json( self, - destination, - path, - data={}, - long_retries=False, - timeout=None, - ignore_backoff=False, - args={}, - ): - """ Sends the specifed json data using POST + destination: str, + path: str, + data: Optional[JsonDict] = None, + long_retries: bool = False, + timeout: Optional[int] = None, + ignore_backoff: bool = False, + args: Optional[QueryArgs] = None, + ) -> Union[JsonDict, list]: + """ Sends the specified json data using POST Args: - destination (str): The remote server to send the HTTP request - to. + destination: The remote server to send the HTTP request to. - path (str): The HTTP path. + path: The HTTP path. - data (dict): A dict containing the data that will be used as + data: A dict containing the data that will be used as the request body. This will be encoded as JSON. - long_retries (bool): whether to use the long retry algorithm. See + long_retries: whether to use the long retry algorithm. See docs on _send_request for details. - timeout (int|None): number of milliseconds to wait for the response. + timeout: number of milliseconds to wait for the response. self._default_timeout (60s) by default. Note that we may make several attempts to send the request; this @@ -751,10 +758,10 @@ class MatrixFederationHttpClient: *each* attempt (including connection time) as well as the time spent reading the response body after a 200 response. - ignore_backoff (bool): true to ignore the historical backoff data and + ignore_backoff: true to ignore the historical backoff data and try the request anyway. - args (dict): query params + args: query params Returns: dict|list: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. @@ -795,26 +802,25 @@ class MatrixFederationHttpClient: async def get_json( self, - destination, - path, - args=None, - retry_on_dns_fail=True, - timeout=None, - ignore_backoff=False, - try_trailing_slash_on_400=False, - ): + destination: str, + path: str, + args: Optional[QueryArgs] = None, + retry_on_dns_fail: bool = True, + timeout: Optional[int] = None, + ignore_backoff: bool = False, + try_trailing_slash_on_400: bool = False, + ) -> Union[JsonDict, list]: """ GETs some json from the given host homeserver and path Args: - destination (str): The remote server to send the HTTP request - to. + destination: The remote server to send the HTTP request to. - path (str): The HTTP path. + path: The HTTP path. - args (dict|None): A dictionary used to create query strings, defaults to + args: A dictionary used to create query strings, defaults to None. - timeout (int|None): number of milliseconds to wait for the response. + timeout: number of milliseconds to wait for the response. self._default_timeout (60s) by default. Note that we may make several attempts to send the request; this @@ -822,14 +828,14 @@ class MatrixFederationHttpClient: *each* attempt (including connection time) as well as the time spent reading the response body after a 200 response. - ignore_backoff (bool): true to ignore the historical backoff data + ignore_backoff: true to ignore the historical backoff data and try the request anyway. - try_trailing_slash_on_400 (bool): True if on a 400 M_UNRECOGNIZED + try_trailing_slash_on_400: True if on a 400 M_UNRECOGNIZED response we should try appending a trailing slash to the end of the request. Workaround for #3622 in Synapse <= v0.99.3. Returns: - dict|list: Succeeds when we get a 2xx HTTP response. The + Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. Raises: @@ -870,24 +876,23 @@ class MatrixFederationHttpClient: async def delete_json( self, - destination, - path, - long_retries=False, - timeout=None, - ignore_backoff=False, - args={}, - ): + destination: str, + path: str, + long_retries: bool = False, + timeout: Optional[int] = None, + ignore_backoff: bool = False, + args: Optional[QueryArgs] = None, + ) -> Union[JsonDict, list]: """Send a DELETE request to the remote expecting some json response Args: - destination (str): The remote server to send the HTTP request - to. - path (str): The HTTP path. + destination: The remote server to send the HTTP request to. + path: The HTTP path. - long_retries (bool): whether to use the long retry algorithm. See + long_retries: whether to use the long retry algorithm. See docs on _send_request for details. - timeout (int|None): number of milliseconds to wait for the response. + timeout: number of milliseconds to wait for the response. self._default_timeout (60s) by default. Note that we may make several attempts to send the request; this @@ -895,12 +900,12 @@ class MatrixFederationHttpClient: *each* attempt (including connection time) as well as the time spent reading the response body after a 200 response. - ignore_backoff (bool): true to ignore the historical backoff data and + ignore_backoff: true to ignore the historical backoff data and try the request anyway. - args (dict): query params + args: query params Returns: - dict|list: Succeeds when we get a 2xx HTTP response. The + Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. Raises: @@ -938,25 +943,25 @@ class MatrixFederationHttpClient: async def get_file( self, - destination, - path, + destination: str, + path: str, output_stream, - args={}, - retry_on_dns_fail=True, - max_size=None, - ignore_backoff=False, - ): + args: Optional[QueryArgs] = None, + retry_on_dns_fail: bool = True, + max_size: Optional[int] = None, + ignore_backoff: bool = False, + ) -> Tuple[int, Dict[bytes, List[bytes]]]: """GETs a file from a given homeserver Args: - destination (str): The remote server to send the HTTP request to. - path (str): The HTTP path to GET. - output_stream (file): File to write the response body to. - args (dict): Optional dictionary used to create the query string. - ignore_backoff (bool): true to ignore the historical backoff data + destination: The remote server to send the HTTP request to. + path: The HTTP path to GET. + output_stream: File to write the response body to. + args: Optional dictionary used to create the query string. + ignore_backoff: true to ignore the historical backoff data and try the request anyway. Returns: - tuple[int, dict]: Resolves with an (int,dict) tuple of + Resolves with an (int,dict) tuple of the file length and a dict of the response headers. Raises: @@ -980,7 +985,7 @@ class MatrixFederationHttpClient: headers = dict(response.headers.getAllRawHeaders()) try: - d = _readBodyToFile(response, output_stream, max_size) + d = readBodyToFile(response, output_stream, max_size) d.addTimeout(self.default_timeout, self.reactor) length = await make_deferred_yieldable(d) except Exception as e: @@ -1004,40 +1009,6 @@ class MatrixFederationHttpClient: return (length, headers) -class _ReadBodyToFileProtocol(protocol.Protocol): - def __init__(self, stream, deferred, max_size): - self.stream = stream - self.deferred = deferred - self.length = 0 - self.max_size = max_size - - def dataReceived(self, data): - self.stream.write(data) - self.length += len(data) - if self.max_size is not None and self.length >= self.max_size: - self.deferred.errback( - SynapseError( - 502, - "Requested file is too large > %r bytes" % (self.max_size,), - Codes.TOO_LARGE, - ) - ) - self.deferred = defer.Deferred() - self.transport.loseConnection() - - def connectionLost(self, reason): - if reason.check(ResponseDone): - self.deferred.callback(self.length) - else: - self.deferred.errback(reason) - - -def _readBodyToFile(response, stream, max_size): - d = defer.Deferred() - response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) - return d - - def _flatten_response_never_received(e): if hasattr(e, "reasons"): reasons = ", ".join( @@ -1049,13 +1020,13 @@ def _flatten_response_never_received(e): return repr(e) -def check_content_type_is_json(headers): +def check_content_type_is_json(headers: Headers) -> None: """ Check that a set of HTTP headers have a Content-Type header, and that it is application/json. Args: - headers (twisted.web.http_headers.Headers): headers to check + headers: headers to check Raises: RequestSendFailed: if the Content-Type header is missing or isn't JSON @@ -1063,27 +1034,18 @@ def check_content_type_is_json(headers): """ c_type = headers.getRawHeaders(b"Content-Type") if c_type is None: - raise RequestSendFailed(RuntimeError("No Content-Type header"), can_retry=False) + raise RequestSendFailed( + RuntimeError("No Content-Type header received from remote server"), + can_retry=False, + ) c_type = c_type[0].decode("ascii") # only the first header val, options = cgi.parse_header(c_type) if val != "application/json": raise RequestSendFailed( - RuntimeError("Content-Type not application/json: was '%s'" % c_type), + RuntimeError( + "Remote server sent Content-Type header of '%s', not 'application/json'" + % c_type, + ), can_retry=False, ) - - -def encode_query_args(args): - if args is None: - return b"" - - encoded_args = {} - for k, vs in args.items(): - if isinstance(vs, str): - vs = [vs] - encoded_args[k] = [v.encode("UTF-8") for v in vs] - - query_bytes = urllib.parse.urlencode(encoded_args, True) - - return query_bytes.encode("utf8")