From 170ccc9de5c09a543a60a7d9eada2e02ba9c9980 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 13 Mar 2017 13:50:16 +0000 Subject: Fix routing loop when fetching remote media When we proxy a media request to a remote server, add a query-param, which will tell the remote server to 404 if it doesn't recognise the server_name. This should fix a routing loop where the server keeps forwarding back to itself. Also improves the error handling on remote media fetches, so that we don't always return a rather obscure 502. --- synapse/rest/media/v1/download_resource.py | 12 ++++++++++++ synapse/rest/media/v1/media_repository.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) (limited to 'synapse/rest') diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index dfb87ffd15..6788375e85 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import synapse.http.servlet from ._base import parse_media_id, respond_with_file, respond_404 from twisted.web.resource import Resource @@ -81,6 +82,17 @@ class DownloadResource(Resource): @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id, name): + # don't forward requests for remote media if allow_remote is false + allow_remote = synapse.http.servlet.parse_boolean( + request, "allow_remote", default=True) + if not allow_remote: + logger.info( + "Rejecting request for remote media %s/%s due to allow_remote", + server_name, media_id, + ) + respond_404(request) + return + media_info = yield self.media_repo.get_remote_media(server_name, media_id) media_type = media_info["media_type"] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 481ffee200..66464cfe66 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import twisted.internet.error from .upload_resource import UploadResource from .download_resource import DownloadResource @@ -26,7 +27,8 @@ from .thumbnailer import Thumbnailer from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string -from synapse.api.errors import SynapseError +from synapse.api.errors import SynapseError, HttpResponseException, \ + NotFoundError from twisted.internet import defer, threads @@ -157,11 +159,31 @@ class MediaRepository(object): try: length, headers = yield self.client.get_file( server_name, request_path, output_stream=f, - max_size=self.max_upload_size, + max_size=self.max_upload_size, args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + } ) + except twisted.internet.error.DNSLookupError as e: + logger.warn("HTTP error fetching remote media %s/%s: %r", + server_name, media_id, e) + raise NotFoundError() + + except HttpResponseException as e: + logger.warn("HTTP error fetching remote media %s/%s: %s", + server_name, media_id, e.response) + raise SynapseError.from_http_response_exception(e) + except Exception as e: - logger.warn("Failed to fetch remoted media %r", e) - raise SynapseError(502, "Failed to fetch remoted media") + logger.warn("Failed to fetch remote media %s/%s", + server_name, media_id, + exc_info=True) + if isinstance(e, SynapseError): + raise e + else: + raise SynapseError(502, "Failed to fetch remote media") media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() -- cgit 1.4.1 From 1d09586599a495e01bfb6b887b1a59419673600a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 14 Mar 2017 13:36:06 +0000 Subject: Address review comments - don't blindly proxy all HTTPRequestExceptions - log unexpected exceptions at error - avoid `isinstance` - improve docs on `from_http_response_exception` --- synapse/api/errors.py | 19 +++++++++++++----- synapse/rest/media/v1/media_repository.py | 32 ++++++++++++++++--------------- 2 files changed, 31 insertions(+), 20 deletions(-) (limited to 'synapse/rest') diff --git a/synapse/api/errors.py b/synapse/api/errors.py index d5391a80cd..6fbd5d6876 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -94,6 +94,17 @@ class SynapseError(CodeMessageException): def from_http_response_exception(cls, err): """Make a SynapseError based on an HTTPResponseException + This is useful when a proxied request has failed, and we need to + decide how to map the failure onto a matrix error to send back to the + client. + + An attempt is made to parse the body of the http response as a matrix + error. If that succeeds, the errcode and error message from the body + are used as the errcode and error message in the new synapse error. + + Otherwise, the errcode is set to M_UNKNOWN, and the error message is + set to the reason code from the HTTP response. + Args: err (HttpResponseException): @@ -137,13 +148,11 @@ class UnrecognizedRequestError(SynapseError): class NotFoundError(SynapseError): """An error indicating we can't find the thing you asked for""" - def __init__(self, *args, **kwargs): - if "errcode" not in kwargs: - kwargs["errcode"] = Codes.NOT_FOUND + def __init__(self, msg="Not found", errcode=Codes.NOT_FOUND): super(NotFoundError, self).__init__( 404, - "Not found", - **kwargs + msg, + errcode=errcode ) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 66464cfe66..c43b185e08 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -12,7 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from twisted.internet import defer, threads import twisted.internet.error +import twisted.web.http +from twisted.web.resource import Resource from .upload_resource import UploadResource from .download_resource import DownloadResource @@ -20,9 +24,6 @@ from .thumbnail_resource import ThumbnailResource from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths - -from twisted.web.resource import Resource - from .thumbnailer import Thumbnailer from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -30,8 +31,6 @@ from synapse.util.stringutils import random_string from synapse.api.errors import SynapseError, HttpResponseException, \ NotFoundError -from twisted.internet import defer, threads - from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii from synapse.util.logcontext import preserve_context_over_fn @@ -174,16 +173,19 @@ class MediaRepository(object): except HttpResponseException as e: logger.warn("HTTP error fetching remote media %s/%s: %s", server_name, media_id, e.response) - raise SynapseError.from_http_response_exception(e) - - except Exception as e: - logger.warn("Failed to fetch remote media %s/%s", - server_name, media_id, - exc_info=True) - if isinstance(e, SynapseError): - raise e - else: - raise SynapseError(502, "Failed to fetch remote media") + if e.code == twisted.web.http.NOT_FOUND: + raise SynapseError.from_http_response_exception(e) + raise SynapseError(502, "Failed to fetch remote media") + + except SynapseError: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise + + except Exception: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise SynapseError(502, "Failed to fetch remote media") media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() -- cgit 1.4.1