diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index d61e525e62..8810079848 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -100,8 +100,13 @@ class ContentRepositoryConfig(Config):
"to work"
)
- if "url_preview_url_blacklist" in config:
- self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
+ self.url_preview_ip_range_whitelist = IPSet(
+ config.get("url_preview_ip_range_whitelist", ())
+ )
+
+ self.url_preview_url_blacklist = config.get(
+ "url_preview_url_blacklist", ()
+ )
def default_config(self, **kwargs):
media_store = self.default_path("media_store")
@@ -162,6 +167,15 @@ class ContentRepositoryConfig(Config):
# - '10.0.0.0/8'
# - '172.16.0.0/12'
# - '192.168.0.0/16'
+ #
+ # List of IP address CIDR ranges that the URL preview spider is allowed
+ # to access even if they are specified in url_preview_ip_range_blacklist.
+ # This is useful for specifying exceptions to wide-ranging blacklisted
+ # target IP ranges - e.g. for enabling URL previews for a specific private
+ # website only visible in your network.
+ #
+ # url_preview_ip_range_whitelist:
+ # - '192.168.1.1'
# Optional list of URL matches that the URL preview spider is
# denied from accessing. You should use url_preview_ip_range_blacklist
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 902ae7a203..c7fa692435 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -380,13 +380,14 @@ class CaptchaServerHttpClient(SimpleHttpClient):
class SpiderEndpointFactory(object):
def __init__(self, hs):
self.blacklist = hs.config.url_preview_ip_range_blacklist
+ self.whitelist = hs.config.url_preview_ip_range_whitelist
self.policyForHTTPS = hs.get_http_client_context_factory()
def endpointForURI(self, uri):
logger.info("Getting endpoint for %s", uri.toBytes())
if uri.scheme == "http":
return SpiderEndpoint(
- reactor, uri.host, uri.port, self.blacklist,
+ reactor, uri.host, uri.port, self.blacklist, self.whitelist,
endpoint=TCP4ClientEndpoint,
endpoint_kw_args={
'timeout': 15
@@ -395,7 +396,7 @@ class SpiderEndpointFactory(object):
elif uri.scheme == "https":
tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port)
return SpiderEndpoint(
- reactor, uri.host, uri.port, self.blacklist,
+ reactor, uri.host, uri.port, self.blacklist, self.whitelist,
endpoint=SSL4ClientEndpoint,
endpoint_kw_args={
'sslContextFactory': tlsPolicy,
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
index a456dc19da..442696d393 100644
--- a/synapse/http/endpoint.py
+++ b/synapse/http/endpoint.py
@@ -79,12 +79,13 @@ class SpiderEndpoint(object):
"""An endpoint which refuses to connect to blacklisted IP addresses
Implements twisted.internet.interfaces.IStreamClientEndpoint.
"""
- def __init__(self, reactor, host, port, blacklist,
+ def __init__(self, reactor, host, port, blacklist, whitelist,
endpoint=TCP4ClientEndpoint, endpoint_kw_args={}):
self.reactor = reactor
self.host = host
self.port = port
self.blacklist = blacklist
+ self.whitelist = whitelist
self.endpoint = endpoint
self.endpoint_kw_args = endpoint_kw_args
@@ -93,10 +94,13 @@ class SpiderEndpoint(object):
address = yield self.reactor.resolve(self.host)
from netaddr import IPAddress
- if IPAddress(address) in self.blacklist:
- raise ConnectError(
- "Refusing to spider blacklisted IP address %s" % address
- )
+ ip_address = IPAddress(address)
+
+ if ip_address in self.blacklist:
+ if self.whitelist is None or ip_address not in self.whitelist:
+ raise ConnectError(
+ "Refusing to spider blacklisted IP address %s" % address
+ )
logger.info("Connecting to %s:%s", address, self.port)
endpoint = self.endpoint(
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index dc1e5fbdb3..37dd1de899 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -56,8 +56,7 @@ class PreviewUrlResource(Resource):
self.client = SpiderHttpClient(hs)
self.media_repo = media_repo
- if hasattr(hs.config, "url_preview_url_blacklist"):
- self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
+ self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
# simple memory cache mapping urls to OG metadata
self.cache = ExpiringCache(
@@ -86,39 +85,37 @@ class PreviewUrlResource(Resource):
else:
ts = self.clock.time_msec()
- # impose the URL pattern blacklist
- if hasattr(self, "url_preview_url_blacklist"):
- url_tuple = urlparse.urlsplit(url)
- for entry in self.url_preview_url_blacklist:
- match = True
- for attrib in entry:
- pattern = entry[attrib]
- value = getattr(url_tuple, attrib)
- logger.debug((
- "Matching attrib '%s' with value '%s' against"
- " pattern '%s'"
- ) % (attrib, value, pattern))
-
- if value is None:
+ url_tuple = urlparse.urlsplit(url)
+ for entry in self.url_preview_url_blacklist:
+ match = True
+ for attrib in entry:
+ pattern = entry[attrib]
+ value = getattr(url_tuple, attrib)
+ logger.debug((
+ "Matching attrib '%s' with value '%s' against"
+ " pattern '%s'"
+ ) % (attrib, value, pattern))
+
+ if value is None:
+ match = False
+ continue
+
+ if pattern.startswith('^'):
+ if not re.match(pattern, getattr(url_tuple, attrib)):
match = False
continue
-
- if pattern.startswith('^'):
- if not re.match(pattern, getattr(url_tuple, attrib)):
- match = False
- continue
- else:
- if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
- match = False
- continue
- if match:
- logger.warn(
- "URL %s blocked by url_blacklist entry %s", url, entry
- )
- raise SynapseError(
- 403, "URL blocked by url pattern blacklist entry",
- Codes.UNKNOWN
- )
+ else:
+ if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
+ match = False
+ continue
+ if match:
+ logger.warn(
+ "URL %s blocked by url_blacklist entry %s", url, entry
+ )
+ raise SynapseError(
+ 403, "URL blocked by url pattern blacklist entry",
+ Codes.UNKNOWN
+ )
# first check the memory cache - good to handle all the clients on this
# HS thundering away to preview the same URL at the same time.
|