summary refs log tree commit diff
path: root/synapse/config/repository.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/config/repository.py')
-rw-r--r--synapse/config/repository.py121
1 files changed, 118 insertions, 3 deletions
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 2e96c09013..8810079848 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -13,9 +13,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._base import Config
+from ._base import Config, ConfigError
 from collections import namedtuple
 
+
+MISSING_NETADDR = (
+    "Missing netaddr library. This is required for URL preview API."
+)
+
+MISSING_LXML = (
+    """Missing lxml library. This is required for URL preview API.
+
+    Install by running:
+        pip install lxml
+
+    Requires libxslt1-dev system package.
+    """
+)
+
+
 ThumbnailRequirement = namedtuple(
     "ThumbnailRequirement", ["width", "height", "method", "media_type"]
 )
@@ -23,7 +39,7 @@ ThumbnailRequirement = namedtuple(
 
 def parse_thumbnail_requirements(thumbnail_sizes):
     """ Takes a list of dictionaries with "width", "height", and "method" keys
-    and creates a map from image media types to the thumbnail size, thumnailing
+    and creates a map from image media types to the thumbnail size, thumbnailing
     method, and thumbnail media type to precalculate
 
     Args:
@@ -53,12 +69,44 @@ class ContentRepositoryConfig(Config):
     def read_config(self, config):
         self.max_upload_size = self.parse_size(config["max_upload_size"])
         self.max_image_pixels = self.parse_size(config["max_image_pixels"])
+        self.max_spider_size = self.parse_size(config["max_spider_size"])
         self.media_store_path = self.ensure_directory(config["media_store_path"])
         self.uploads_path = self.ensure_directory(config["uploads_path"])
         self.dynamic_thumbnails = config["dynamic_thumbnails"]
         self.thumbnail_requirements = parse_thumbnail_requirements(
             config["thumbnail_sizes"]
         )
+        self.url_preview_enabled = config.get("url_preview_enabled", False)
+        if self.url_preview_enabled:
+            try:
+                import lxml
+                lxml  # To stop unused lint.
+            except ImportError:
+                raise ConfigError(MISSING_LXML)
+
+            try:
+                from netaddr import IPSet
+            except ImportError:
+                raise ConfigError(MISSING_NETADDR)
+
+            if "url_preview_ip_range_blacklist" in config:
+                self.url_preview_ip_range_blacklist = IPSet(
+                    config["url_preview_ip_range_blacklist"]
+                )
+            else:
+                raise ConfigError(
+                    "For security, you must specify an explicit target IP address "
+                    "blacklist in url_preview_ip_range_blacklist for url previewing "
+                    "to work"
+                )
+
+            self.url_preview_ip_range_whitelist = IPSet(
+                config.get("url_preview_ip_range_whitelist", ())
+            )
+
+            self.url_preview_url_blacklist = config.get(
+                "url_preview_url_blacklist", ()
+            )
 
     def default_config(self, **kwargs):
         media_store = self.default_path("media_store")
@@ -80,7 +128,7 @@ class ContentRepositoryConfig(Config):
         # the resolution requested by the client. If true then whenever
         # a new resolution is requested by the client the server will
         # generate a new thumbnail. If false the server will pick a thumbnail
-        # from a precalcualted list.
+        # from a precalculated list.
         dynamic_thumbnails: false
 
         # List of thumbnail to precalculate when an image is uploaded.
@@ -100,4 +148,71 @@ class ContentRepositoryConfig(Config):
         - width: 800
           height: 600
           method: scale
+
+        # Is the preview URL API enabled?  If enabled, you *must* specify
+        # an explicit url_preview_ip_range_blacklist of IPs that the spider is
+        # denied from accessing.
+        url_preview_enabled: False
+
+        # List of IP address CIDR ranges that the URL preview spider is denied
+        # from accessing.  There are no defaults: you must explicitly
+        # specify a list for URL previewing to work.  You should specify any
+        # internal services in your network that you do not want synapse to try
+        # to connect to, otherwise anyone in any Matrix room could cause your
+        # synapse to issue arbitrary GET requests to your internal services,
+        # causing serious security issues.
+        #
+        # url_preview_ip_range_blacklist:
+        # - '127.0.0.0/8'
+        # - '10.0.0.0/8'
+        # - '172.16.0.0/12'
+        # - '192.168.0.0/16'
+        #
+        # List of IP address CIDR ranges that the URL preview spider is allowed
+        # to access even if they are specified in url_preview_ip_range_blacklist.
+        # This is useful for specifying exceptions to wide-ranging blacklisted
+        # target IP ranges - e.g. for enabling URL previews for a specific private
+        # website only visible in your network.
+        #
+        # url_preview_ip_range_whitelist:
+        # - '192.168.1.1'
+
+        # Optional list of URL matches that the URL preview spider is
+        # denied from accessing.  You should use url_preview_ip_range_blacklist
+        # in preference to this, otherwise someone could define a public DNS
+        # entry that points to a private IP address and circumvent the blacklist.
+        # This is more useful if you know there is an entire shape of URL that
+        # you know that will never want synapse to try to spider.
+        #
+        # Each list entry is a dictionary of url component attributes as returned
+        # by urlparse.urlsplit as applied to the absolute form of the URL.  See
+        # https://docs.python.org/2/library/urlparse.html#urlparse.urlsplit
+        # The values of the dictionary are treated as an filename match pattern
+        # applied to that component of URLs, unless they start with a ^ in which
+        # case they are treated as a regular expression match.  If all the
+        # specified component matches for a given list item succeed, the URL is
+        # blacklisted.
+        #
+        # url_preview_url_blacklist:
+        # # blacklist any URL with a username in its URI
+        # - username: '*'
+        #
+        # # blacklist all *.google.com URLs
+        # - netloc: 'google.com'
+        # - netloc: '*.google.com'
+        #
+        # # blacklist all plain HTTP URLs
+        # - scheme: 'http'
+        #
+        # # blacklist http(s)://www.acme.com/foo
+        # - netloc: 'www.acme.com'
+        #   path: '/foo'
+        #
+        # # blacklist any URL with a literal IPv4 address
+        # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'
+
+        # The largest allowed URL preview spidering size in bytes
+        max_spider_size: "10M"
+
+
         """ % locals()