summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2016-04-14 09:59:29 +0100
committerErik Johnston <erik@matrix.org>2016-04-14 09:59:29 +0100
commitceeb5b909f38839c4561399737d764484780de41 (patch)
treeb3afc767a983adb0a72d3f9f2f429d025cca0010
parentMerge pull request #722 from matrix-org/dbkr/only_unread_event_actions (diff)
parentGive install requirements (diff)
downloadsynapse-ceeb5b909f38839c4561399737d764484780de41.tar.xz
Merge pull request #721 from matrix-org/erikj/spider
Sanitize the optional dependencies for spider API
-rw-r--r--synapse/config/repository.py48
-rw-r--r--synapse/python_dependencies.py1
-rw-r--r--synapse/rest/media/v1/media_repository.py6
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py24
4 files changed, 41 insertions, 38 deletions
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 49922c6d03..d61e525e62 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -13,10 +13,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._base import Config
+from ._base import Config, ConfigError
 from collections import namedtuple
 
-import sys
+
+MISSING_NETADDR = (
+    "Missing netaddr library. This is required for URL preview API."
+)
+
+MISSING_LXML = (
+    """Missing lxml library. This is required for URL preview API.
+
+    Install by running:
+        pip install lxml
+
+    Requires libxslt1-dev system package.
+    """
+)
+
 
 ThumbnailRequirement = namedtuple(
     "ThumbnailRequirement", ["width", "height", "method", "media_type"]
@@ -62,18 +76,32 @@ class ContentRepositoryConfig(Config):
         self.thumbnail_requirements = parse_thumbnail_requirements(
             config["thumbnail_sizes"]
         )
-        self.url_preview_enabled = config["url_preview_enabled"]
+        self.url_preview_enabled = config.get("url_preview_enabled", False)
         if self.url_preview_enabled:
             try:
+                import lxml
+                lxml  # To stop unused lint.
+            except ImportError:
+                raise ConfigError(MISSING_LXML)
+
+            try:
                 from netaddr import IPSet
-                if "url_preview_ip_range_blacklist" in config:
-                    self.url_preview_ip_range_blacklist = IPSet(
-                        config["url_preview_ip_range_blacklist"]
-                    )
-                if "url_preview_url_blacklist" in config:
-                    self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
             except ImportError:
-                sys.stderr.write("\nmissing netaddr dep - disabling preview_url API\n")
+                raise ConfigError(MISSING_NETADDR)
+
+            if "url_preview_ip_range_blacklist" in config:
+                self.url_preview_ip_range_blacklist = IPSet(
+                    config["url_preview_ip_range_blacklist"]
+                )
+            else:
+                raise ConfigError(
+                    "For security, you must specify an explicit target IP address "
+                    "blacklist in url_preview_ip_range_blacklist for url previewing "
+                    "to work"
+                )
+
+            if "url_preview_url_blacklist" in config:
+                self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
 
     def default_config(self, **kwargs):
         media_store = self.default_path("media_store")
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 1adbdd9421..b25b736493 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -43,7 +43,6 @@ CONDITIONAL_REQUIREMENTS = {
         "matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"],
     },
     "preview_url": {
-        "lxml>=3.6.0": ["lxml"],
         "netaddr>=0.7.18": ["netaddr"],
     },
 }
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 97b7e84af9..77fb0313c5 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -80,8 +80,4 @@ class MediaRepositoryResource(Resource):
         self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
         self.putChild("identicon", IdenticonResource())
         if hs.config.url_preview_enabled:
-            try:
-                self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
-            except Exception as e:
-                logger.warn("Failed to mount preview_url")
-                logger.exception(e)
+            self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 4dd97ac0e3..8e1cf6e2fb 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -40,33 +40,11 @@ import ujson as json
 import logging
 logger = logging.getLogger(__name__)
 
-try:
-    from lxml import html
-except ImportError:
-    pass
-
 
 class PreviewUrlResource(BaseMediaResource):
     isLeaf = True
 
     def __init__(self, hs, filepaths):
-        try:
-            if html:
-                pass
-        except:
-            raise RuntimeError("Disabling PreviewUrlResource as lxml not available")
-
-        if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
-            logger.warn(
-                "For security, you must specify an explicit target IP address "
-                "blacklist in url_preview_ip_range_blacklist for url previewing "
-                "to work"
-            )
-            raise RuntimeError(
-                "Disabling PreviewUrlResource as "
-                "url_preview_ip_range_blacklist not specified"
-            )
-
         BaseMediaResource.__init__(self, hs, filepaths)
         self.client = SpiderHttpClient(hs)
         if hasattr(hs.config, "url_preview_url_blacklist"):
@@ -201,6 +179,8 @@ class PreviewUrlResource(BaseMediaResource):
         elif self._is_html(media_info['media_type']):
             # TODO: somehow stop a big HTML tree from exploding synapse's RAM
 
+            from lxml import html
+
             try:
                 tree = html.parse(media_info['filename'])
                 og = yield self._calc_og(tree, media_info, requester)