diff options
author | Patrick Cloke <clokep@users.noreply.github.com> | 2023-11-29 14:03:42 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-29 19:03:42 +0000 |
commit | d6c3b7584fc46571e65226793304df35d7081534 (patch) | |
tree | 663afeec9b90d9a47718c33a090c03ab6cbd7004 /synapse | |
parent | Reduce DB load when forget on leave setting is disabled (#16668) (diff) | |
download | synapse-d6c3b7584fc46571e65226793304df35d7081534.tar.xz |
Request & follow redirects for /media/v3/download (#16701)
Implement MSC3860 to follow redirects for federated media downloads. Note that the Client-Server API doesn't support this (yet) since the media repository in Synapse doesn't have a way of supporting redirects.
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/federation/federation_client.py | 38 | ||||
-rw-r--r-- | synapse/federation/transport/client.py | 53 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 77 | ||||
-rw-r--r-- | synapse/media/media_repository.py | 17 |
4 files changed, 152 insertions, 33 deletions
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 1a7fa175ec..0ba03b0d05 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -21,6 +21,7 @@ from typing import ( TYPE_CHECKING, AbstractSet, Awaitable, + BinaryIO, Callable, Collection, Container, @@ -1862,6 +1863,43 @@ class FederationClient(FederationBase): return filtered_statuses, filtered_failures + async def download_media( + self, + destination: str, + media_id: str, + output_stream: BinaryIO, + max_size: int, + max_timeout_ms: int, + ) -> Tuple[int, Dict[bytes, List[bytes]]]: + try: + return await self.transport_layer.download_media_v3( + destination, + media_id, + output_stream=output_stream, + max_size=max_size, + max_timeout_ms=max_timeout_ms, + ) + except HttpResponseException as e: + # If an error is received that is due to an unrecognised endpoint, + # fallback to the r0 endpoint. Otherwise, consider it a legitimate error + # and raise. + if not is_unknown_endpoint(e): + raise + + logger.debug( + "Couldn't download media %s/%s with the v3 API, falling back to the r0 API", + destination, + media_id, + ) + + return await self.transport_layer.download_media_r0( + destination, + media_id, + output_stream=output_stream, + max_size=max_size, + max_timeout_ms=max_timeout_ms, + ) + @attr.s(frozen=True, slots=True, auto_attribs=True) class TimestampToEventResponse: diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index fab4800717..5e36638b0a 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -18,6 +18,7 @@ import urllib from typing import ( TYPE_CHECKING, Any, + BinaryIO, Callable, Collection, Dict, @@ -804,6 +805,58 @@ class TransportLayerClient: destination=destination, path=path, data={"user_ids": user_ids} ) + async def download_media_r0( + self, + destination: str, + media_id: str, + output_stream: BinaryIO, + max_size: int, + max_timeout_ms: int, + ) -> Tuple[int, Dict[bytes, List[bytes]]]: + path = f"/_matrix/media/r0/download/{destination}/{media_id}" + + return await self.client.get_file( + destination, + path, + output_stream=output_stream, + max_size=max_size, + args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + "timeout_ms": str(max_timeout_ms), + }, + ) + + async def download_media_v3( + self, + destination: str, + media_id: str, + output_stream: BinaryIO, + max_size: int, + max_timeout_ms: int, + ) -> Tuple[int, Dict[bytes, List[bytes]]]: + path = f"/_matrix/media/v3/download/{destination}/{media_id}" + + return await self.client.get_file( + destination, + path, + output_stream=output_stream, + max_size=max_size, + args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + "timeout_ms": str(max_timeout_ms), + # Matrix 1.7 allows for this to redirect to another URL, this should + # just be ignored for an old homeserver, so always provide it. + "allow_redirect": "true", + }, + follow_redirects=True, + ) + def _create_path(federation_prefix: str, path: str, *args: str) -> str: """ diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index d5013e8e97..cc1db763ae 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -153,12 +153,18 @@ class MatrixFederationRequest: """Query arguments. """ - txn_id: Optional[str] = None - """Unique ID for this request (for logging) + txn_id: str = attr.ib(init=False) + """Unique ID for this request (for logging), this is autogenerated. """ - uri: bytes = attr.ib(init=False) - """The URI of this request + uri: bytes = b"" + """The URI of this request, usually generated from the above information. + """ + + _generate_uri: bool = True + """True to automatically generate the uri field based on the above information. + + Set to False if manually configuring the URI. """ def __attrs_post_init__(self) -> None: @@ -168,22 +174,23 @@ class MatrixFederationRequest: object.__setattr__(self, "txn_id", txn_id) - destination_bytes = self.destination.encode("ascii") - path_bytes = self.path.encode("ascii") - query_bytes = encode_query_args(self.query) - - # The object is frozen so we can pre-compute this. - uri = urllib.parse.urlunparse( - ( - b"matrix-federation", - destination_bytes, - path_bytes, - None, - query_bytes, - b"", + if self._generate_uri: + destination_bytes = self.destination.encode("ascii") + path_bytes = self.path.encode("ascii") + query_bytes = encode_query_args(self.query) + + # The object is frozen so we can pre-compute this. + uri = urllib.parse.urlunparse( + ( + b"matrix-federation", + destination_bytes, + path_bytes, + None, + query_bytes, + b"", + ) ) - ) - object.__setattr__(self, "uri", uri) + object.__setattr__(self, "uri", uri) def get_json(self) -> Optional[JsonDict]: if self.json_callback: @@ -513,6 +520,7 @@ class MatrixFederationHttpClient: ignore_backoff: bool = False, backoff_on_404: bool = False, backoff_on_all_error_codes: bool = False, + follow_redirects: bool = False, ) -> IResponse: """ Sends a request to the given server. @@ -555,6 +563,9 @@ class MatrixFederationHttpClient: backoff_on_404: Back off if we get a 404 backoff_on_all_error_codes: Back off if we get any error response + follow_redirects: True to follow the Location header of 307/308 redirect + responses. This does not recurse. + Returns: Resolves with the HTTP response object on success. @@ -714,6 +725,26 @@ class MatrixFederationHttpClient: response.code, response_phrase, ) + elif ( + response.code in (307, 308) + and follow_redirects + and response.headers.hasHeader("Location") + ): + # The Location header *might* be relative so resolve it. + location = response.headers.getRawHeaders(b"Location")[0] + new_uri = urllib.parse.urljoin(request.uri, location) + + return await self._send_request( + attr.evolve(request, uri=new_uri, generate_uri=False), + retry_on_dns_fail, + timeout, + long_retries, + ignore_backoff, + backoff_on_404, + backoff_on_all_error_codes, + # Do not continue following redirects. + follow_redirects=False, + ) else: logger.info( "{%s} [%s] Got response headers: %d %s", @@ -1383,6 +1414,7 @@ class MatrixFederationHttpClient: retry_on_dns_fail: bool = True, max_size: Optional[int] = None, ignore_backoff: bool = False, + follow_redirects: bool = False, ) -> Tuple[int, Dict[bytes, List[bytes]]]: """GETs a file from a given homeserver Args: @@ -1392,6 +1424,8 @@ class MatrixFederationHttpClient: args: Optional dictionary used to create the query string. ignore_backoff: true to ignore the historical backoff data and try the request anyway. + follow_redirects: True to follow the Location header of 307/308 redirect + responses. This does not recurse. Returns: Resolves with an (int,dict) tuple of @@ -1412,7 +1446,10 @@ class MatrixFederationHttpClient: ) response = await self._send_request( - request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff + request, + retry_on_dns_fail=retry_on_dns_fail, + ignore_backoff=ignore_backoff, + follow_redirects=follow_redirects, ) headers = dict(response.headers.getAllRawHeaders()) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index bf976b9e7c..d62af22adb 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -77,7 +77,7 @@ class MediaRepository: def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = hs.get_auth() - self.client = hs.get_federation_http_client() + self.client = hs.get_federation_client() self.clock = hs.get_clock() self.server_name = hs.hostname self.store = hs.get_datastores().main @@ -644,22 +644,13 @@ class MediaRepository: file_info = FileInfo(server_name=server_name, file_id=file_id) with self.media_storage.store_into_file(file_info) as (f, fname, finish): - request_path = "/".join( - ("/_matrix/media/r0/download", server_name, media_id) - ) try: - length, headers = await self.client.get_file( + length, headers = await self.client.download_media( server_name, - request_path, + media_id, output_stream=f, max_size=self.max_upload_size, - args={ - # tell the remote server to 404 if it doesn't - # recognise the server_name, to make sure we don't - # end up with a routing loop. - "allow_remote": "false", - "timeout_ms": str(max_timeout_ms), - }, + max_timeout_ms=max_timeout_ms, ) except RequestSendFailed as e: logger.warning( |