diff options
author | Matthew Hodgson <matthew@arasphere.net> | 2016-05-16 13:13:26 +0100 |
---|---|---|
committer | Matthew Hodgson <matthew@arasphere.net> | 2016-05-16 13:13:26 +0100 |
commit | 2d98c960ecfe50faae2eaedff45eebe8ba54cf6e (patch) | |
tree | 7f7cff56157068180daef95e1994af8d22f107d7 /synapse | |
parent | fix logo (diff) | |
parent | Clean up the blacklist/whitelist handling. (diff) | |
download | synapse-2d98c960ecfe50faae2eaedff45eebe8ba54cf6e.tar.xz |
Merge pull request #760 from matrix-org/matthew/preview_url_ip_whitelist
add a url_preview_ip_range_whitelist config param
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/config/repository.py | 18 | ||||
-rw-r--r-- | synapse/http/client.py | 5 | ||||
-rw-r--r-- | synapse/http/endpoint.py | 14 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_url_resource.py | 63 |
4 files changed, 58 insertions, 42 deletions
diff --git a/synapse/config/repository.py b/synapse/config/repository.py index d61e525e62..8810079848 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -100,8 +100,13 @@ class ContentRepositoryConfig(Config): "to work" ) - if "url_preview_url_blacklist" in config: - self.url_preview_url_blacklist = config["url_preview_url_blacklist"] + self.url_preview_ip_range_whitelist = IPSet( + config.get("url_preview_ip_range_whitelist", ()) + ) + + self.url_preview_url_blacklist = config.get( + "url_preview_url_blacklist", () + ) def default_config(self, **kwargs): media_store = self.default_path("media_store") @@ -162,6 +167,15 @@ class ContentRepositoryConfig(Config): # - '10.0.0.0/8' # - '172.16.0.0/12' # - '192.168.0.0/16' + # + # List of IP address CIDR ranges that the URL preview spider is allowed + # to access even if they are specified in url_preview_ip_range_blacklist. + # This is useful for specifying exceptions to wide-ranging blacklisted + # target IP ranges - e.g. for enabling URL previews for a specific private + # website only visible in your network. + # + # url_preview_ip_range_whitelist: + # - '192.168.1.1' # Optional list of URL matches that the URL preview spider is # denied from accessing. You should use url_preview_ip_range_blacklist diff --git a/synapse/http/client.py b/synapse/http/client.py index 902ae7a203..c7fa692435 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -380,13 +380,14 @@ class CaptchaServerHttpClient(SimpleHttpClient): class SpiderEndpointFactory(object): def __init__(self, hs): self.blacklist = hs.config.url_preview_ip_range_blacklist + self.whitelist = hs.config.url_preview_ip_range_whitelist self.policyForHTTPS = hs.get_http_client_context_factory() def endpointForURI(self, uri): logger.info("Getting endpoint for %s", uri.toBytes()) if uri.scheme == "http": return SpiderEndpoint( - reactor, uri.host, uri.port, self.blacklist, + reactor, uri.host, uri.port, self.blacklist, self.whitelist, endpoint=TCP4ClientEndpoint, endpoint_kw_args={ 'timeout': 15 @@ -395,7 +396,7 @@ class SpiderEndpointFactory(object): elif uri.scheme == "https": tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port) return SpiderEndpoint( - reactor, uri.host, uri.port, self.blacklist, + reactor, uri.host, uri.port, self.blacklist, self.whitelist, endpoint=SSL4ClientEndpoint, endpoint_kw_args={ 'sslContextFactory': tlsPolicy, diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index a456dc19da..442696d393 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -79,12 +79,13 @@ class SpiderEndpoint(object): """An endpoint which refuses to connect to blacklisted IP addresses Implements twisted.internet.interfaces.IStreamClientEndpoint. """ - def __init__(self, reactor, host, port, blacklist, + def __init__(self, reactor, host, port, blacklist, whitelist, endpoint=TCP4ClientEndpoint, endpoint_kw_args={}): self.reactor = reactor self.host = host self.port = port self.blacklist = blacklist + self.whitelist = whitelist self.endpoint = endpoint self.endpoint_kw_args = endpoint_kw_args @@ -93,10 +94,13 @@ class SpiderEndpoint(object): address = yield self.reactor.resolve(self.host) from netaddr import IPAddress - if IPAddress(address) in self.blacklist: - raise ConnectError( - "Refusing to spider blacklisted IP address %s" % address - ) + ip_address = IPAddress(address) + + if ip_address in self.blacklist: + if self.whitelist is None or ip_address not in self.whitelist: + raise ConnectError( + "Refusing to spider blacklisted IP address %s" % address + ) logger.info("Connecting to %s:%s", address, self.port) endpoint = self.endpoint( diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index dc1e5fbdb3..37dd1de899 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -56,8 +56,7 @@ class PreviewUrlResource(Resource): self.client = SpiderHttpClient(hs) self.media_repo = media_repo - if hasattr(hs.config, "url_preview_url_blacklist"): - self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist + self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist # simple memory cache mapping urls to OG metadata self.cache = ExpiringCache( @@ -86,39 +85,37 @@ class PreviewUrlResource(Resource): else: ts = self.clock.time_msec() - # impose the URL pattern blacklist - if hasattr(self, "url_preview_url_blacklist"): - url_tuple = urlparse.urlsplit(url) - for entry in self.url_preview_url_blacklist: - match = True - for attrib in entry: - pattern = entry[attrib] - value = getattr(url_tuple, attrib) - logger.debug(( - "Matching attrib '%s' with value '%s' against" - " pattern '%s'" - ) % (attrib, value, pattern)) - - if value is None: + url_tuple = urlparse.urlsplit(url) + for entry in self.url_preview_url_blacklist: + match = True + for attrib in entry: + pattern = entry[attrib] + value = getattr(url_tuple, attrib) + logger.debug(( + "Matching attrib '%s' with value '%s' against" + " pattern '%s'" + ) % (attrib, value, pattern)) + + if value is None: + match = False + continue + + if pattern.startswith('^'): + if not re.match(pattern, getattr(url_tuple, attrib)): match = False continue - - if pattern.startswith('^'): - if not re.match(pattern, getattr(url_tuple, attrib)): - match = False - continue - else: - if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): - match = False - continue - if match: - logger.warn( - "URL %s blocked by url_blacklist entry %s", url, entry - ) - raise SynapseError( - 403, "URL blocked by url pattern blacklist entry", - Codes.UNKNOWN - ) + else: + if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): + match = False + continue + if match: + logger.warn( + "URL %s blocked by url_blacklist entry %s", url, entry + ) + raise SynapseError( + 403, "URL blocked by url pattern blacklist entry", + Codes.UNKNOWN + ) # first check the memory cache - good to handle all the clients on this # HS thundering away to preview the same URL at the same time. |