Merge pull request #6358 from matrix-org/babolivier/message_retention

* commit 'd31f69afa': clean up buildkite output Don't restrict the tests to v1 rooms Fix worker mode Fix 3PID invite exchange Lint again Lint again Lint Don't apply retention policy based filtering on state events Implement per-room message retention policies
author: Andrew Morgan <andrew@amorgan.xyz> 2020-03-18 17:22:33 +0000
committer: Andrew Morgan <andrew@amorgan.xyz> 2020-03-18 17:22:33 +0000
commit: b8344cabcd52fdeb4cf9f18c9c6adb1e5df0ee84 (patch)
tree: 7ed3171bcbc8ae713a984744bc256f3cb4b7d86b
parent: Fix startup error when http proxy is defined. (#6421) (diff)
parent: Merge pull request #6358 from matrix-org/babolivier/message_retention (diff)
download: synapse-b8344cabcd52fdeb4cf9f18c9c6adb1e5df0ee84.tar.xz
6 files changed, 141 insertions, 13 deletions
diff --git a/.buildkite/merge_base_branch.sh b/.buildkite/merge_base_branch.sh
index eb7219a56d..361440fd1a 100755
--- a/.buildkite/merge_base_branch.sh
+++ b/.buildkite/merge_base_branch.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-set -ex
+set -e
 
 if [[ "$BUILDKITE_BRANCH" =~ ^(develop|master|dinsic|shhs|release-.*)$ ]]; then
     echo "Not merging forward, as this is a release branch"
@@ -18,6 +18,8 @@ else
     GITBASE=$BUILDKITE_PULL_REQUEST_BASE_BRANCH
 fi
 
+echo "--- merge_base_branch $GITBASE"
+
 # Show what we are before
 git --no-pager show -s
 
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index c370d07e54..f8d3efa241 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -403,6 +403,69 @@ retention:
 #
 #user_ips_max_age: 14d
 
+# Message retention policy at the server level.
+#
+# Room admins and mods can define a retention period for their rooms using the
+# 'm.room.retention' state event, and server admins can cap this period by setting
+# the 'allowed_lifetime_min' and 'allowed_lifetime_max' config options.
+#
+# If this feature is enabled, Synapse will regularly look for and purge events
+# which are older than the room's maximum retention period. Synapse will also
+# filter events received over federation so that events that should have been
+# purged are ignored and not stored again.
+#
+retention:
+  # The message retention policies feature is disabled by default. Uncomment the
+  # following line to enable it.
+  #
+  #enabled: true
+
+  # Default retention policy. If set, Synapse will apply it to rooms that lack the
+  # 'm.room.retention' state event. Currently, the value of 'min_lifetime' doesn't
+  # matter much because Synapse doesn't take it into account yet.
+  #
+  #default_policy:
+  #  min_lifetime: 1d
+  #  max_lifetime: 1y
+
+  # Retention policy limits. If set, a user won't be able to send a
+  # 'm.room.retention' event which features a 'min_lifetime' or a 'max_lifetime'
+  # that's not within this range. This is especially useful in closed federations,
+  # in which server admins can make sure every federating server applies the same
+  # rules.
+  #
+  #allowed_lifetime_min: 1d
+  #allowed_lifetime_max: 1y
+
+  # Server admins can define the settings of the background jobs purging the
+  # events which lifetime has expired under the 'purge_jobs' section.
+  #
+  # If no configuration is provided, a single job will be set up to delete expired
+  # events in every room daily.
+  #
+  # Each job's configuration defines which range of message lifetimes the job
+  # takes care of. For example, if 'shortest_max_lifetime' is '2d' and
+  # 'longest_max_lifetime' is '3d', the job will handle purging expired events in
+  # rooms whose state defines a 'max_lifetime' that's both higher than 2 days, and
+  # lower than or equal to 3 days. Both the minimum and the maximum value of a
+  # range are optional, e.g. a job with no 'shortest_max_lifetime' and a
+  # 'longest_max_lifetime' of '3d' will handle every room with a retention policy
+  # which 'max_lifetime' is lower than or equal to three days.
+  #
+  # The rationale for this per-job configuration is that some rooms might have a
+  # retention policy with a low 'max_lifetime', where history needs to be purged
+  # of outdated messages on a very frequent basis (e.g. every 5min), but not want
+  # that purge to be performed by a job that's iterating over every room it knows,
+  # which would be quite heavy on the server.
+  #
+  #purge_jobs:
+  #  - shortest_max_lifetime: 1d
+  #    longest_max_lifetime: 3d
+  #    interval: 5m:
+  #  - shortest_max_lifetime: 3d
+  #    longest_max_lifetime: 1y
+  #    interval: 24h
+
 
 ## TLS ##
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 6bf6f6f9b5..b12b208760 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -19,7 +19,7 @@ import logging
 import os.path
 import re
 from textwrap import indent
-from typing import List
+from typing import Dict, List, Optional
 
 import attr
 import yaml
@@ -326,7 +326,7 @@ class ServerConfig(Config):
                 " greater than 'allowed_lifetime_max'"
             )
 
-        self.retention_purge_jobs = []
+        self.retention_purge_jobs = []  # type: List[Dict[str, Optional[int]]]
         for purge_job_config in retention_config.get("purge_jobs", []):
             interval_config = purge_job_config.get("interval")
 
@@ -966,6 +966,69 @@ class ServerConfig(Config):
         # Defaults to `28d`. Set to `null` to disable clearing out of old rows.
         #
         #user_ips_max_age: 14d
+
+        # Message retention policy at the server level.
+        #
+        # Room admins and mods can define a retention period for their rooms using the
+        # 'm.room.retention' state event, and server admins can cap this period by setting
+        # the 'allowed_lifetime_min' and 'allowed_lifetime_max' config options.
+        #
+        # If this feature is enabled, Synapse will regularly look for and purge events
+        # which are older than the room's maximum retention period. Synapse will also
+        # filter events received over federation so that events that should have been
+        # purged are ignored and not stored again.
+        #
+        retention:
+          # The message retention policies feature is disabled by default. Uncomment the
+          # following line to enable it.
+          #
+          #enabled: true
+
+          # Default retention policy. If set, Synapse will apply it to rooms that lack the
+          # 'm.room.retention' state event. Currently, the value of 'min_lifetime' doesn't
+          # matter much because Synapse doesn't take it into account yet.
+          #
+          #default_policy:
+          #  min_lifetime: 1d
+          #  max_lifetime: 1y
+
+          # Retention policy limits. If set, a user won't be able to send a
+          # 'm.room.retention' event which features a 'min_lifetime' or a 'max_lifetime'
+          # that's not within this range. This is especially useful in closed federations,
+          # in which server admins can make sure every federating server applies the same
+          # rules.
+          #
+          #allowed_lifetime_min: 1d
+          #allowed_lifetime_max: 1y
+
+          # Server admins can define the settings of the background jobs purging the
+          # events which lifetime has expired under the 'purge_jobs' section.
+          #
+          # If no configuration is provided, a single job will be set up to delete expired
+          # events in every room daily.
+          #
+          # Each job's configuration defines which range of message lifetimes the job
+          # takes care of. For example, if 'shortest_max_lifetime' is '2d' and
+          # 'longest_max_lifetime' is '3d', the job will handle purging expired events in
+          # rooms whose state defines a 'max_lifetime' that's both higher than 2 days, and
+          # lower than or equal to 3 days. Both the minimum and the maximum value of a
+          # range are optional, e.g. a job with no 'shortest_max_lifetime' and a
+          # 'longest_max_lifetime' of '3d' will handle every room with a retention policy
+          # which 'max_lifetime' is lower than or equal to three days.
+          #
+          # The rationale for this per-job configuration is that some rooms might have a
+          # retention policy with a low 'max_lifetime', where history needs to be purged
+          # of outdated messages on a very frequent basis (e.g. every 5min), but not want
+          # that purge to be performed by a job that's iterating over every room it knows,
+          # which would be quite heavy on the server.
+          #
+          #purge_jobs:
+          #  - shortest_max_lifetime: 1d
+          #    longest_max_lifetime: 3d
+          #    interval: 5m:
+          #  - shortest_max_lifetime: 3d
+          #    longest_max_lifetime: 1y
+          #    interval: 24h
         """
             % locals()
         )
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 7d5decba94..8514ddc600 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -154,12 +154,10 @@ class PaginationHandler(object):
             # Figure out what token we should start purging at.
             ts = self.clock.time_msec() - max_lifetime
 
-            stream_ordering = (yield self.store.find_first_stream_ordering_after_ts(ts))
+            stream_ordering = yield self.store.find_first_stream_ordering_after_ts(ts)
 
-            r = (
-                yield self.store.get_room_event_after_stream_ordering(
-                    room_id, stream_ordering
-                )
+            r = yield self.store.get_room_event_after_stream_ordering(
+                room_id, stream_ordering,
             )
             if not r:
                 logger.warning(
@@ -185,7 +183,7 @@ class PaginationHandler(object):
             # the background so that it's not blocking any other operation apart from
             # other purges in the same room.
             run_as_background_process(
-                "_purge_history", self._purge_history, purge_id, room_id, token, True
+                "_purge_history", self._purge_history, purge_id, room_id, token, True,
             )
 
     def start_purge_history(self, room_id, token, delete_local_events=False):
diff --git a/synapse/storage/data_stores/main/room.py b/synapse/storage/data_stores/main/room.py
index edc8b10ab1..2d19b1967e 100644
--- a/synapse/storage/data_stores/main/room.py
+++ b/synapse/storage/data_stores/main/room.py
@@ -390,7 +390,7 @@ class RoomStore(RoomWorkerStore, SearchStore):
         self.config = hs.config
 
         self.register_background_update_handler(
-            "insert_room_retention", self._background_insert_retention
+            "insert_room_retention", self._background_insert_retention,
         )
 
     @defer.inlineCallbacks
@@ -453,7 +453,7 @@ class RoomStore(RoomWorkerStore, SearchStore):
                 return False
 
         end = yield self.runInteraction(
-            "insert_room_retention", _background_insert_retention_txn
+            "insert_room_retention", _background_insert_retention_txn,
         )
 
         if end:
diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py
index 87055bba87..9e549d8a91 100644
--- a/tests/rest/client/test_retention.py
+++ b/tests/rest/client/test_retention.py
@@ -205,12 +205,14 @@ class RetentionNoDefaultPolicyTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
         config = self.default_config()
         config["default_room_version"] = "1"
-        config["retention"] = {"enabled": True}
+        config["retention"] = {
+            "enabled": True,
+        }
 
         mock_federation_client = Mock(spec=["backfill"])
 
         self.hs = self.setup_test_homeserver(
-            config=config, federation_client=mock_federation_client
+            config=config, federation_client=mock_federation_client,
         )
         return self.hs
author	Andrew Morgan <andrew@amorgan.xyz>	2020-03-18 17:22:33 +0000
committer	Andrew Morgan <andrew@amorgan.xyz>	2020-03-18 17:22:33 +0000
commit	b8344cabcd52fdeb4cf9f18c9c6adb1e5df0ee84 (patch)
tree	7ed3171bcbc8ae713a984744bc256f3cb4b7d86b
parent	Fix startup error when http proxy is defined. (#6421) (diff)
parent	Merge pull request #6358 from matrix-org/babolivier/message_retention (diff)
download	synapse-b8344cabcd52fdeb4cf9f18c9c6adb1e5df0ee84.tar.xz