From 420484a334a79b31e689bdcca2e57d9a23f7e3d4 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 24 Aug 2020 18:21:04 +0100 Subject: Allow capping a room's retention policy (#8104) --- synapse/config/server.py | 22 ++++++++++------ synapse/events/validator.py | 59 +++--------------------------------------- synapse/handlers/pagination.py | 36 +++++++++++++++++++++----- 3 files changed, 46 insertions(+), 71 deletions(-) (limited to 'synapse') diff --git a/synapse/config/server.py b/synapse/config/server.py index ed66f3eba1..526a90b26a 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -961,11 +961,10 @@ class ServerConfig(Config): # min_lifetime: 1d # max_lifetime: 1y - # Retention policy limits. If set, a user won't be able to send a - # 'm.room.retention' event which features a 'min_lifetime' or a 'max_lifetime' - # that's not within this range. This is especially useful in closed federations, - # in which server admins can make sure every federating server applies the same - # rules. + # Retention policy limits. If set, and the state of a room contains a + # 'm.room.retention' event in its state which contains a 'min_lifetime' or a + # 'max_lifetime' that's out of these bounds, Synapse will cap the room's policy + # to these limits when running purge jobs. # #allowed_lifetime_min: 1d #allowed_lifetime_max: 1y @@ -991,12 +990,19 @@ class ServerConfig(Config): # (e.g. every 12h), but not want that purge to be performed by a job that's # iterating over every room it knows, which could be heavy on the server. # + # If any purge job is configured, it is strongly recommended to have at least + # a single job with neither 'shortest_max_lifetime' nor 'longest_max_lifetime' + # set, or one job without 'shortest_max_lifetime' and one job without + # 'longest_max_lifetime' set. Otherwise some rooms might be ignored, even if + # 'allowed_lifetime_min' and 'allowed_lifetime_max' are set, because capping a + # room's policy to these values is done after the policies are retrieved from + # Synapse's database (which is done using the range specified in a purge job's + # configuration). + # #purge_jobs: - # - shortest_max_lifetime: 1d - # longest_max_lifetime: 3d + # - longest_max_lifetime: 3d # interval: 12h # - shortest_max_lifetime: 3d - # longest_max_lifetime: 1y # interval: 1d # Inhibits the /requestToken endpoints from returning an error that might leak diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 588d222f36..5ce3874fba 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -74,15 +74,14 @@ class EventValidator(object): ) if event.type == EventTypes.Retention: - self._validate_retention(event, config) + self._validate_retention(event) - def _validate_retention(self, event, config): + def _validate_retention(self, event): """Checks that an event that defines the retention policy for a room respects the - boundaries imposed by the server's administrator. + format enforced by the spec. Args: event (FrozenEvent): The event to validate. - config (Config): The homeserver's configuration. """ min_lifetime = event.content.get("min_lifetime") max_lifetime = event.content.get("max_lifetime") @@ -95,32 +94,6 @@ class EventValidator(object): errcode=Codes.BAD_JSON, ) - if ( - config.retention_allowed_lifetime_min is not None - and min_lifetime < config.retention_allowed_lifetime_min - ): - raise SynapseError( - code=400, - msg=( - "'min_lifetime' can't be lower than the minimum allowed" - " value enforced by the server's administrator" - ), - errcode=Codes.BAD_JSON, - ) - - if ( - config.retention_allowed_lifetime_max is not None - and min_lifetime > config.retention_allowed_lifetime_max - ): - raise SynapseError( - code=400, - msg=( - "'min_lifetime' can't be greater than the maximum allowed" - " value enforced by the server's administrator" - ), - errcode=Codes.BAD_JSON, - ) - if max_lifetime is not None: if not isinstance(max_lifetime, int): raise SynapseError( @@ -129,32 +102,6 @@ class EventValidator(object): errcode=Codes.BAD_JSON, ) - if ( - config.retention_allowed_lifetime_min is not None - and max_lifetime < config.retention_allowed_lifetime_min - ): - raise SynapseError( - code=400, - msg=( - "'max_lifetime' can't be lower than the minimum allowed value" - " enforced by the server's administrator" - ), - errcode=Codes.BAD_JSON, - ) - - if ( - config.retention_allowed_lifetime_max is not None - and max_lifetime > config.retention_allowed_lifetime_max - ): - raise SynapseError( - code=400, - msg=( - "'max_lifetime' can't be greater than the maximum allowed" - " value enforced by the server's administrator" - ), - errcode=Codes.BAD_JSON, - ) - if ( min_lifetime is not None and max_lifetime is not None diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 487420bb5d..ac3418d69d 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -82,6 +82,9 @@ class PaginationHandler(object): self._retention_default_max_lifetime = hs.config.retention_default_max_lifetime + self._retention_allowed_lifetime_min = hs.config.retention_allowed_lifetime_min + self._retention_allowed_lifetime_max = hs.config.retention_allowed_lifetime_max + if hs.config.retention_enabled: # Run the purge jobs described in the configuration file. for job in hs.config.retention_purge_jobs: @@ -111,7 +114,7 @@ class PaginationHandler(object): the range to handle (inclusive). If None, it means that the range has no upper limit. """ - # We want the storage layer to to include rooms with no retention policy in its + # We want the storage layer to include rooms with no retention policy in its # return value only if a default retention policy is defined in the server's # configuration and that policy's 'max_lifetime' is either lower (or equal) than # max_ms or higher than min_ms (or both). @@ -152,13 +155,32 @@ class PaginationHandler(object): ) continue - max_lifetime = retention_policy["max_lifetime"] + # If max_lifetime is None, it means that the room has no retention policy. + # Given we only retrieve such rooms when there's a default retention policy + # defined in the server's configuration, we can safely assume that's the + # case and use it for this room. + max_lifetime = ( + retention_policy["max_lifetime"] or self._retention_default_max_lifetime + ) - if max_lifetime is None: - # If max_lifetime is None, it means that include_null equals True, - # therefore we can safely assume that there is a default policy defined - # in the server's configuration. - max_lifetime = self._retention_default_max_lifetime + # Cap the effective max_lifetime to be within the range allowed in the + # config. + # We do this in two steps: + # 1. Make sure it's higher or equal to the minimum allowed value, and if + # it's not replace it with that value. This is because the server + # operator can be required to not delete information before a given + # time, e.g. to comply with freedom of information laws. + # 2. Make sure the resulting value is lower or equal to the maximum allowed + # value, and if it's not replace it with that value. This is because the + # server operator can be required to delete any data after a specific + # amount of time. + if self._retention_allowed_lifetime_min is not None: + max_lifetime = max(self._retention_allowed_lifetime_min, max_lifetime) + + if self._retention_allowed_lifetime_max is not None: + max_lifetime = min(max_lifetime, self._retention_allowed_lifetime_max) + + logger.debug("[purge] max_lifetime for room %s: %s", room_id, max_lifetime) # Figure out what token we should start purging at. ts = self.clock.time_msec() - max_lifetime -- cgit 1.4.1