summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2017-03-13 13:50:16 +0000
committerRichard van der Hoff <richard@matrix.org>2017-03-13 16:30:36 +0000
commit170ccc9de5c09a543a60a7d9eada2e02ba9c9980 (patch)
tree1dec6a895c7ec7725dbeab7014eb9057fba969da
parentMerge pull request #1978 from matrix-org/rav/refactor_received_pdu (diff)
downloadsynapse-170ccc9de5c09a543a60a7d9eada2e02ba9c9980.tar.xz
Fix routing loop when fetching remote media
When we proxy a media request to a remote server, add a query-param, which will
tell the remote server to 404 if it doesn't recognise the server_name.

This should fix a routing loop where the server keeps forwarding back to
itself.

Also improves the error handling on remote media fetches, so that we don't
always return a rather obscure 502.
Diffstat (limited to '')
-rw-r--r--synapse/api/errors.py59
-rw-r--r--synapse/http/matrixfederationclient.py15
-rw-r--r--synapse/rest/media/v1/download_resource.py12
-rw-r--r--synapse/rest/media/v1/media_repository.py30
4 files changed, 102 insertions, 14 deletions
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 921c457738..014bd60b9d 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -15,6 +15,7 @@
 
 """Contains exceptions and error codes."""
 
+import json
 import logging
 
 logger = logging.getLogger(__name__)
@@ -50,12 +51,15 @@ class Codes(object):
 
 
 class CodeMessageException(RuntimeError):
-    """An exception with integer code and message string attributes."""
+    """An exception with integer code and message string attributes.
 
-    def __init__(self, code, msg):
-        super(CodeMessageException, self).__init__("%d: %s" % (code, msg))
+    Attributes:
+        code (int): HTTP error code
+        response_code_message (str): HTTP reason phrase. None for the default.
+    """
+    def __init__(self, code):
+        super(CodeMessageException, self).__init__("%d" % code)
         self.code = code
-        self.msg = msg
         self.response_code_message = None
 
     def error_dict(self):
@@ -70,17 +74,44 @@ class SynapseError(CodeMessageException):
         Args:
             code (int): The integer error code (an HTTP response code)
             msg (str): The human-readable error message.
-            err (str): The error code e.g 'M_FORBIDDEN'
+            errcode (str): The synapse error code e.g 'M_FORBIDDEN'
         """
-        super(SynapseError, self).__init__(code, msg)
+        super(SynapseError, self).__init__(code)
+        self.msg = msg
         self.errcode = errcode
 
+    def __str__(self):
+        return "%d: %s %s" % (self.code, self.errcode, self.msg)
+
     def error_dict(self):
         return cs_error(
             self.msg,
             self.errcode,
         )
 
+    @classmethod
+    def from_http_response_exception(cls, err):
+        """Make a SynapseError based on an HTTPResponseException
+
+        Args:
+            err (HttpResponseException):
+
+        Returns:
+            SynapseError:
+        """
+        # try to parse the body as json, to get better errcode/msg, but
+        # default to M_UNKNOWN with the HTTP status as the error text
+        try:
+            j = json.loads(err.response)
+        except ValueError:
+            j = {}
+        errcode = j.get('errcode', Codes.UNKNOWN)
+        errmsg = j.get('error', err.response_code_message)
+
+        res = SynapseError(err.code, errmsg, errcode)
+        res.response_code_message = err.response_code_message
+        return res
+
 
 class RegistrationError(SynapseError):
     """An error raised when a registration event fails."""
@@ -243,6 +274,20 @@ class FederationError(RuntimeError):
 
 
 class HttpResponseException(CodeMessageException):
+    """
+    Represents an HTTP-level failure of an outbound request
+
+    Attributes:
+        response (str): body of response
+    """
     def __init__(self, code, msg, response):
+        """
+
+        Args:
+            code (int): HTTP status code
+            msg (str): reason phrase from HTTP response status line
+            response (str): body of response
+        """
+        super(HttpResponseException, self).__init__(code)
+        self.response_code_message = msg
         self.response = response
-        super(HttpResponseException, self).__init__(code, msg)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 78b92cef36..82586e3dea 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -108,6 +108,12 @@ class MatrixFederationHttpClient(object):
                         query_bytes=b"", retry_on_dns_fail=True,
                         timeout=None, long_retries=False):
         """ Creates and sends a request to the given url
+
+        Returns:
+            Deferred: resolves with the http response object on success.
+
+            Fails with ``HTTPRequestException``: if we get an HTTP response
+            code >= 300.
         """
         headers_dict[b"User-Agent"] = [self.version_string]
         headers_dict[b"Host"] = [destination]
@@ -408,8 +414,11 @@ class MatrixFederationHttpClient(object):
             output_stream (file): File to write the response body to.
             args (dict): Optional dictionary used to create the query string.
         Returns:
-            A (int,dict) tuple of the file length and a dict of the response
-            headers.
+            Deferred: resolves with an (int,dict) tuple of the file length and
+            a dict of the response headers.
+
+            Fails with ``HTTPRequestException`` if we get an HTTP response code
+            >= 300
         """
 
         encoded_args = {}
@@ -419,7 +428,7 @@ class MatrixFederationHttpClient(object):
             encoded_args[k] = [v.encode("UTF-8") for v in vs]
 
         query_bytes = urllib.urlencode(encoded_args, True)
-        logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail)
+        logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail)
 
         def body_callback(method, url_bytes, headers_dict):
             self.sign_request(destination, method, url_bytes, headers_dict)
diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
index dfb87ffd15..6788375e85 100644
--- a/synapse/rest/media/v1/download_resource.py
+++ b/synapse/rest/media/v1/download_resource.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import synapse.http.servlet
 
 from ._base import parse_media_id, respond_with_file, respond_404
 from twisted.web.resource import Resource
@@ -81,6 +82,17 @@ class DownloadResource(Resource):
 
     @defer.inlineCallbacks
     def _respond_remote_file(self, request, server_name, media_id, name):
+        # don't forward requests for remote media if allow_remote is false
+        allow_remote = synapse.http.servlet.parse_boolean(
+            request, "allow_remote", default=True)
+        if not allow_remote:
+            logger.info(
+                "Rejecting request for remote media %s/%s due to allow_remote",
+                server_name, media_id,
+            )
+            respond_404(request)
+            return
+
         media_info = yield self.media_repo.get_remote_media(server_name, media_id)
 
         media_type = media_info["media_type"]
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 481ffee200..66464cfe66 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import twisted.internet.error
 
 from .upload_resource import UploadResource
 from .download_resource import DownloadResource
@@ -26,7 +27,8 @@ from .thumbnailer import Thumbnailer
 
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.util.stringutils import random_string
-from synapse.api.errors import SynapseError
+from synapse.api.errors import SynapseError, HttpResponseException, \
+    NotFoundError
 
 from twisted.internet import defer, threads
 
@@ -157,11 +159,31 @@ class MediaRepository(object):
                 try:
                     length, headers = yield self.client.get_file(
                         server_name, request_path, output_stream=f,
-                        max_size=self.max_upload_size,
+                        max_size=self.max_upload_size, args={
+                            # tell the remote server to 404 if it doesn't
+                            # recognise the server_name, to make sure we don't
+                            # end up with a routing loop.
+                            "allow_remote": "false",
+                        }
                     )
+                except twisted.internet.error.DNSLookupError as e:
+                    logger.warn("HTTP error fetching remote media %s/%s: %r",
+                                server_name, media_id, e)
+                    raise NotFoundError()
+
+                except HttpResponseException as e:
+                    logger.warn("HTTP error fetching remote media %s/%s: %s",
+                                server_name, media_id, e.response)
+                    raise SynapseError.from_http_response_exception(e)
+
                 except Exception as e:
-                    logger.warn("Failed to fetch remoted media %r", e)
-                    raise SynapseError(502, "Failed to fetch remoted media")
+                    logger.warn("Failed to fetch remote media %s/%s",
+                                server_name, media_id,
+                                exc_info=True)
+                    if isinstance(e, SynapseError):
+                        raise e
+                    else:
+                        raise SynapseError(502, "Failed to fetch remote media")
 
             media_type = headers["Content-Type"][0]
             time_now_ms = self.clock.time_msec()