From 0cb169900ebd39b6f46dbff1b1909cc5b3c17493 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 11 Aug 2020 16:08:10 +0100
Subject: Implement login blocking based on SAML attributes (#8052)

Hopefully this mostly speaks for itself. I also did a bit of cleaning up of the
error handling.

Fixes #8047
---
 synapse/config/_util.py        | 49 +++++++++++++++++++++++++++++++++++++++++
 synapse/config/saml2_config.py | 50 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 synapse/config/_util.py

(limited to 'synapse/config')
diff --git a/synapse/config/_util.py b/synapse/config/_util.py
new file mode 100644
index 0000000000..cd31b1c3c9
--- /dev/null
+++ b/synapse/config/_util.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List
+
+import jsonschema
+
+from synapse.config._base import ConfigError
+from synapse.types import JsonDict
+
+
+def validate_config(json_schema: JsonDict, config: Any, config_path: List[str]) -> None:
+    """Validates a config setting against a JsonSchema definition
+
+    This can be used to validate a section of the config file against a schema
+    definition. If the validation fails, a ConfigError is raised with a textual
+    description of the problem.
+
+    Args:
+        json_schema: the schema to validate against
+        config: the configuration value to be validated
+        config_path: the path within the config file. This will be used as a basis
+           for the error message.
+    """
+    try:
+        jsonschema.validate(config, json_schema)
+    except jsonschema.ValidationError as e:
+        # copy `config_path` before modifying it.
+        path = list(config_path)
+        for p in list(e.path):
+            if isinstance(p, int):
+                path.append("<item %i>" % p)
+            else:
+                path.append(str(p))
+
+        raise ConfigError(
+            "Unable to parse configuration: %s at %s" % (e.message, ".".join(path))
+        )
diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py
index 293643b2de..9277b5f342 100644
--- a/synapse/config/saml2_config.py
+++ b/synapse/config/saml2_config.py
@@ -15,7 +15,9 @@
 # limitations under the License.
 
 import logging
+from typing import Any, List
 
+import attr
 import jinja2
 import pkg_resources
 
@@ -23,6 +25,7 @@ from synapse.python_dependencies import DependencyException, check_requirements
 from synapse.util.module_loader import load_module, load_python_module
 
 from ._base import Config, ConfigError
+from ._util import validate_config
 
 logger = logging.getLogger(__name__)
 
@@ -80,6 +83,11 @@ class SAML2Config(Config):
 
         self.saml2_enabled = True
 
+        attribute_requirements = saml2_config.get("attribute_requirements") or []
+        self.attribute_requirements = _parse_attribute_requirements_def(
+            attribute_requirements
+        )
+
         self.saml2_grandfathered_mxid_source_attribute = saml2_config.get(
             "grandfathered_mxid_source_attribute", "uid"
         )
@@ -341,6 +349,17 @@ class SAML2Config(Config):
           #
           #grandfathered_mxid_source_attribute: upn
 
+          # It is possible to configure Synapse to only allow logins if SAML attributes
+          # match particular values. The requirements can be listed under
+          # `attribute_requirements` as shown below. All of the listed attributes must
+          # match for the login to be permitted.
+          #
+          #attribute_requirements:
+          #  - attribute: userGroup
+          #    value: "staff"
+          #  - attribute: department
+          #    value: "sales"
+
           # Directory in which Synapse will try to find the template files below.
           # If not set, default templates from within the Synapse package will be used.
           #
@@ -368,3 +387,34 @@ class SAML2Config(Config):
         """ % {
             "config_dir_path": config_dir_path
         }
+
+
+@attr.s(frozen=True)
+class SamlAttributeRequirement:
+    """Object describing a single requirement for SAML attributes."""
+
+    attribute = attr.ib(type=str)
+    value = attr.ib(type=str)
+
+    JSON_SCHEMA = {
+        "type": "object",
+        "properties": {"attribute": {"type": "string"}, "value": {"type": "string"}},
+        "required": ["attribute", "value"],
+    }
+
+
+ATTRIBUTE_REQUIREMENTS_SCHEMA = {
+    "type": "array",
+    "items": SamlAttributeRequirement.JSON_SCHEMA,
+}
+
+
+def _parse_attribute_requirements_def(
+    attribute_requirements: Any,
+) -> List[SamlAttributeRequirement]:
+    validate_config(
+        ATTRIBUTE_REQUIREMENTS_SCHEMA,
+        attribute_requirements,
+        config_path=["saml2_config", "attribute_requirements"],
+    )
+    return [SamlAttributeRequirement(**x) for x in attribute_requirements]
-- 
cgit 1.5.1


From db131b6b22350ea73128ac69ea82a48a6cf78a20 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 11 Aug 2020 18:09:46 +0100
Subject: Change the default log config to reduce disk I/O and storage (#8040)

* Change default log config to buffer by default.

This batches up writes to the filesystem, which is more efficient for
disk I/O. This means that it can take some time for logs to get written
to disk. Note that ERROR logs (and above) immediately flush the buffer.

This only effects new installs, as we only write the log config if
started with `--generate-config` (in the same way we do for generating
signing keys).

* Default to keeping last 4 days of logs.

This hopefully reduces the amount of logs kept for new servers. Keeping
the last 1GB of logs is likely overkill for new servers, but equally may
not be enough for busy ones.

Instead, we keep the last four days worth of logs, enough so that admins
can investigate any problems that happened over e.g. a long weekend.
---
 changelog.d/8040.misc       |  1 +
 docs/sample_log_config.yaml | 41 ++++++++++++++++++++++++++++++++++++-----
 synapse/config/logger.py    | 41 ++++++++++++++++++++++++++++++++++++-----
 3 files changed, 73 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/8040.misc

(limited to 'synapse/config')

diff --git a/changelog.d/8040.misc b/changelog.d/8040.misc
new file mode 100644
index 0000000000..a126151392
--- /dev/null
+++ b/changelog.d/8040.misc
@@ -0,0 +1 @@
+Change the default log config to reduce disk I/O and storage for new servers.
diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml
index 1a2739455e..403ac005ee 100644
--- a/docs/sample_log_config.yaml
+++ b/docs/sample_log_config.yaml
@@ -18,13 +18,29 @@ filters:
 
 handlers:
     file:
-        class: logging.handlers.RotatingFileHandler
+        class: logging.handlers.TimedRotatingFileHandler
         formatter: precise
         filename: /var/log/matrix-synapse/homeserver.log
-        maxBytes: 104857600
-        backupCount: 10
-        filters: [context]
+        when: midnight
+        backupCount: 3  # Does not include the current log file.
         encoding: utf8
+
+    # Default to buffering writes to log file for efficiency. This means that
+    # will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR
+    # logs will still be flushed immediately.
+    buffer:
+        class: logging.handlers.MemoryHandler
+        filters: [context]
+        target: file
+        # The capacity is the number of log lines that are buffered before
+        # being written to disk. Increasing this will lead to better
+        # performance, at the expensive of it taking longer for log lines to
+        # be written to disk.
+        capacity: 10
+        flushLevel: 30  # Flush for WARNING logs as well
+
+    # A handler that writes logs to stderr. Unused by default, but can be used
+    # instead of "buffer" and "file" in the logger handlers.
     console:
         class: logging.StreamHandler
         formatter: precise
@@ -36,8 +52,23 @@ loggers:
         # information such as access tokens.
         level: INFO
 
+    twisted:
+        # We send the twisted logging directly to the file handler,
+        # to work around https://github.com/matrix-org/synapse/issues/3471
+        # when using "buffer" logger. Use "console" to log to stderr instead.
+        handlers: [file]
+        propagate: false
+
 root:
     level: INFO
-    handlers: [file, console]
+
+    # Write logs to the `buffer` handler, which will buffer them together in memory,
+    # then write them to a file.
+    #
+    # Replace "buffer" with "console" to log to stderr instead. (Note that you'll
+    # also need to update the configuation for the `twisted` logger above, in
+    # this case.)
+    #
+    handlers: [buffer]
 
 disable_existing_loggers: false
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index dd775a97e8..493e98462d 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -62,13 +62,29 @@ filters:
 
 handlers:
     file:
-        class: logging.handlers.RotatingFileHandler
+        class: logging.handlers.TimedRotatingFileHandler
         formatter: precise
         filename: ${log_file}
-        maxBytes: 104857600
-        backupCount: 10
-        filters: [context]
+        when: midnight
+        backupCount: 3  # Does not include the current log file.
         encoding: utf8
+
+    # Default to buffering writes to log file for efficiency. This means that
+    # will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR
+    # logs will still be flushed immediately.
+    buffer:
+        class: logging.handlers.MemoryHandler
+        filters: [context]
+        target: file
+        # The capacity is the number of log lines that are buffered before
+        # being written to disk. Increasing this will lead to better
+        # performance, at the expensive of it taking longer for log lines to
+        # be written to disk.
+        capacity: 10
+        flushLevel: 30  # Flush for WARNING logs as well
+
+    # A handler that writes logs to stderr. Unused by default, but can be used
+    # instead of "buffer" and "file" in the logger handlers.
     console:
         class: logging.StreamHandler
         formatter: precise
@@ -80,9 +96,24 @@ loggers:
         # information such as access tokens.
         level: INFO
 
+    twisted:
+        # We send the twisted logging directly to the file handler,
+        # to work around https://github.com/matrix-org/synapse/issues/3471
+        # when using "buffer" logger. Use "console" to log to stderr instead.
+        handlers: [file]
+        propagate: false
+
 root:
     level: INFO
-    handlers: [file, console]
+
+    # Write logs to the `buffer` handler, which will buffer them together in memory,
+    # then write them to a file.
+    #
+    # Replace "buffer" with "console" to log to stderr instead. (Note that you'll
+    # also need to update the configuation for the `twisted` logger above, in
+    # this case.)
+    #
+    handlers: [buffer]
 
 disable_existing_loggers: false
 """
-- 
cgit 1.5.1


From 0304ad0c3d79e44e78f9658e71f1e1533e3aa4e2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 7 Aug 2020 11:39:29 +0100
Subject: Move setting of Filter into code.

We do this to prevent foot guns. The default config uses a MemoryFilter,
but users are free to change to logging to files directly. If they do
then they have to ensure to set the `filters: [context]` on the right
handler, otherwise records get written with the wrong context.

Instead we move the logic to happen when we generate a record, which is
when we *log* rather than *handle*.

(It's possible to add filters to loggers in the config, however they
don't apply to descendant loggers and so they have to be manually set on
*every* logger used in the code base)
---
 docker/conf/log.config      |  6 ------
 docs/sample_log_config.yaml |  7 -------
 synapse/config/logger.py    | 24 ++++++++++++++++--------
 3 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'synapse/config')

diff --git a/docker/conf/log.config b/docker/conf/log.config
index ed418a57cd..491bbcc87a 100644
--- a/docker/conf/log.config
+++ b/docker/conf/log.config
@@ -4,16 +4,10 @@ formatters:
   precise:
    format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
 
-filters:
-  context:
-    (): synapse.logging.context.LoggingContextFilter
-    request: ""
-
 handlers:
   console:
     class: logging.StreamHandler
     formatter: precise
-    filters: [context]
 
 loggers:
     synapse.storage.SQL:
diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml
index 403ac005ee..55a48a9ed6 100644
--- a/docs/sample_log_config.yaml
+++ b/docs/sample_log_config.yaml
@@ -11,11 +11,6 @@ formatters:
     precise:
         format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
 
-filters:
-    context:
-        (): synapse.logging.context.LoggingContextFilter
-        request: ""
-
 handlers:
     file:
         class: logging.handlers.TimedRotatingFileHandler
@@ -30,7 +25,6 @@ handlers:
     # logs will still be flushed immediately.
     buffer:
         class: logging.handlers.MemoryHandler
-        filters: [context]
         target: file
         # The capacity is the number of log lines that are buffered before
         # being written to disk. Increasing this will lead to better
@@ -44,7 +38,6 @@ handlers:
     console:
         class: logging.StreamHandler
         formatter: precise
-        filters: [context]
 
 loggers:
     synapse.storage.SQL:
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 493e98462d..c96e6ef62a 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -55,11 +55,6 @@ formatters:
         format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - \
 %(request)s - %(message)s'
 
-filters:
-    context:
-        (): synapse.logging.context.LoggingContextFilter
-        request: ""
-
 handlers:
     file:
         class: logging.handlers.TimedRotatingFileHandler
@@ -74,7 +69,6 @@ handlers:
     # logs will still be flushed immediately.
     buffer:
         class: logging.handlers.MemoryHandler
-        filters: [context]
         target: file
         # The capacity is the number of log lines that are buffered before
         # being written to disk. Increasing this will lead to better
@@ -88,7 +82,6 @@ handlers:
     console:
         class: logging.StreamHandler
         formatter: precise
-        filters: [context]
 
 loggers:
     synapse.storage.SQL:
@@ -199,11 +192,26 @@ def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner):
 
         handler = logging.StreamHandler()
         handler.setFormatter(formatter)
-        handler.addFilter(LoggingContextFilter(request=""))
         logger.addHandler(handler)
     else:
         logging.config.dictConfig(log_config)
 
+    # We add a log record factory that runs all messages through the
+    # LoggingContextFilter so that we get the context *at the time we log*
+    # rather than when we write to a handler. This can be done in config using
+    # filter options, but care must when using e.g. MemoryHandler to buffer
+    # writes.
+
+    log_filter = LoggingContextFilter(request="")
+    old_factory = logging.getLogRecordFactory()
+
+    def factory(*args, **kwargs):
+        record = old_factory(*args, **kwargs)
+        log_filter.filter(record)
+        return record
+
+    logging.setLogRecordFactory(factory)
+
     # Route Twisted's native logging through to the standard library logging
     # system.
     observer = STDLibLogObserver()
-- 
cgit 1.5.1