summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--synapse/http/client.py11
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py5
2 files changed, 11 insertions, 5 deletions
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 1b6f7cb795..b21bf17378 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -23,7 +23,8 @@ from canonicaljson import encode_canonical_json
 
 from twisted.internet import defer, reactor, ssl, protocol
 from twisted.web.client import (
-    BrowserLikeRedirectAgent, Agent, readBody, FileBodyProducer, PartialDownloadError,
+    BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent,
+    readBody, FileBodyProducer, PartialDownloadError,
 )
 from twisted.web.http import PotentialDataLoss
 from twisted.web.http_headers import Headers
@@ -269,6 +270,10 @@ class SimpleHttpClient(object):
             # XXX: do we want to explicitly drop the connection here somehow? if so, how?
             raise # what should we be raising here?
 
+        if response.code > 299:
+            logger.warn("Got %d when downloading %s" % (response.code, url))
+            raise
+
         # TODO: if our Content-Type is HTML or something, just read the first
         # N bytes into RAM rather than saving it all to disk only to read it
         # straight back in again
@@ -366,11 +371,11 @@ class SpiderHttpClient(SimpleHttpClient):
     def __init__(self, hs):
         SimpleHttpClient.__init__(self, hs)
         # clobber the base class's agent and UA:
-        self.agent = BrowserLikeRedirectAgent(Agent(
+        self.agent = ContentDecoderAgent(BrowserLikeRedirectAgent(Agent(
             reactor,
             connectTimeout=15,
             contextFactory=hs.get_http_client_context_factory()
-        ))
+        )), [('gzip', GzipDecoder)])
         # Look like Chrome for now
         #self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) Chrome Safari" % hs.version_string)
 
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 3ffdafce09..162e09ba71 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -200,7 +200,7 @@ class PreviewUrlResource(BaseMediaResource):
                     og["og:image:height"] = dims['height']
                 else:
                     logger.warn("Couldn't get dims for %s" % og["og:image"])
-                    
+
                 og["og:image"] = "mxc://%s/%s" % (self.server_name, image_info['filesystem_id'])
                 og["og:image:type"] = image_info['media_type']
             else:
@@ -259,7 +259,8 @@ class PreviewUrlResource(BaseMediaResource):
                 length, headers, uri = yield self.client.get_file(
                     url, output_stream=f, max_size=self.max_spider_size,
                 )
-                # FIXME: handle 404s sanely - don't spider an error page
+                # FIXME: pass through 404s and other error messages nicely
+
             media_type = headers["Content-Type"][0]
             time_now_ms = self.clock.time_msec()