diff --git a/synapse/http/client.py b/synapse/http/client.py
index 71b2e3375e..30f31a915d 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -23,8 +23,9 @@ from canonicaljson import encode_canonical_json
from twisted.internet import defer, reactor, ssl, protocol
from twisted.web.client import (
- RedirectAgent, Agent, readBody, FileBodyProducer, PartialDownloadError,
+ BrowserLikeRedirectAgent, Agent, readBody, FileBodyProducer, PartialDownloadError,
)
+from twisted.web.http import PotentialDataLoss
from twisted.web.http_headers import Headers
from twisted.web._newclient import ResponseDone
@@ -59,11 +60,11 @@ class SimpleHttpClient(object):
# The default context factory in Twisted 14.0.0 (which we require) is
# BrowserLikePolicyForHTTPS which will do regular cert validation
# 'like a browser'
- self.agent = RedirectAgent(Agent(
+ self.agent = Agent(
reactor,
connectTimeout=15,
contextFactory=hs.get_http_client_context_factory()
- ))
+ )
self.user_agent = hs.version_string
if hs.config.user_agent_suffix:
self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,)
@@ -253,10 +254,6 @@ class SimpleHttpClient(object):
headers.
"""
- def body_callback(method, url_bytes, headers_dict):
- self.sign_request(destination, method, url_bytes, headers_dict)
- return None
-
response = yield self.request(
"GET",
url.encode("ascii"),
@@ -309,6 +306,10 @@ class _ReadBodyToFileProtocol(protocol.Protocol):
def connectionLost(self, reason):
if reason.check(ResponseDone):
self.deferred.callback(self.length)
+ elif reason.check(PotentialDataLoss):
+ # stolen from https://github.com/twisted/treq/pull/49/files
+ # http://twistedmatrix.com/trac/ticket/4840
+ self.deferred.callback(self.length)
else:
self.deferred.errback(reason)
@@ -350,6 +351,24 @@ class CaptchaServerHttpClient(SimpleHttpClient):
# twisted dislikes google's response, no content length.
defer.returnValue(e.response)
+class SpiderHttpClient(SimpleHttpClient):
+ """
+ Separate HTTP client for spidering arbitrary URLs.
+ Special in that it follows retries and has a UA that looks
+ like a browser.
+
+ used by the preview_url endpoint in the content repo.
+ """
+ def __init__(self, hs):
+ SimpleHttpClient.__init__(self, hs)
+ # clobber the base class's agent and UA:
+ self.agent = BrowserLikeRedirectAgent(Agent(
+ reactor,
+ connectTimeout=15,
+ contextFactory=hs.get_http_client_context_factory()
+ ))
+ # Look like Chrome for now
+ #self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) Chrome Safari" % hs.version_string)
def encode_urlencode_args(args):
return {k: encode_urlencode_arg(v) for k, v in args.items()}
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index b999944e86..ca2529cc10 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -19,7 +19,7 @@ from twisted.web.server import NOT_DONE_YET
from twisted.internet import defer
from lxml import html
from synapse.util.stringutils import random_string
-from synapse.http.client import SimpleHttpClient
+from synapse.http.client import SpiderHttpClient
from synapse.http.server import request_handler, respond_with_json, respond_with_json_bytes
import os
@@ -33,7 +33,7 @@ class PreviewUrlResource(BaseMediaResource):
def __init__(self, hs, filepaths):
BaseMediaResource.__init__(self, hs, filepaths)
- self.client = SimpleHttpClient(hs)
+ self.client = SpiderHttpClient(hs)
def render_GET(self, request):
self._async_render_GET(request)
|