summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2020-05-23 01:20:10 +0100
committerGitHub <noreply@github.com>2020-05-23 01:20:10 +0100
commitd14c4d6b6d02e8c55a6647ce0f0b3b44356d6173 (patch)
treefeb4f5e004543b85dd5096bd85f07e9ec9e8a356
parentOptimise some references to hs.config (#7546) (diff)
downloadsynapse-d14c4d6b6d02e8c55a6647ce0f0b3b44356d6173.tar.xz
Simplify reap_monthly_active_users (#7558)
we can use `make_in_list_sql_clause` rather than doing our own half-baked
equivalent, which has the benefit of working just fine with empty lists.

(This has quite a lot of tests, so I think it's pretty safe)
-rw-r--r--changelog.d/7558.misc1
-rw-r--r--synapse/storage/data_stores/main/monthly_active_users.py100
2 files changed, 42 insertions, 59 deletions
diff --git a/changelog.d/7558.misc b/changelog.d/7558.misc
new file mode 100644
index 0000000000..f8f4111136
--- /dev/null
+++ b/changelog.d/7558.misc
@@ -0,0 +1 @@
+Simplify `reap_monthly_active_users`.
diff --git a/synapse/storage/data_stores/main/monthly_active_users.py b/synapse/storage/data_stores/main/monthly_active_users.py
index a42c52f323..1310d39069 100644
--- a/synapse/storage/data_stores/main/monthly_active_users.py
+++ b/synapse/storage/data_stores/main/monthly_active_users.py
@@ -17,7 +17,7 @@ import logging
 from twisted.internet import defer
 
 from synapse.storage._base import SQLBaseStore
-from synapse.storage.database import Database
+from synapse.storage.database import Database, make_in_list_sql_clause
 from synapse.util.caches.descriptors import cached
 
 logger = logging.getLogger(__name__)
@@ -187,75 +187,57 @@ class MonthlyActiveUsersStore(MonthlyActiveUsersWorkerStore):
             """
 
             thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
-            query_args = [thirty_days_ago]
-            base_sql = "DELETE FROM monthly_active_users WHERE timestamp < ?"
-
-            # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
-            # when len(reserved_users) == 0. Works fine on sqlite.
-            if len(reserved_users) > 0:
-                # questionmarks is a hack to overcome sqlite not supporting
-                # tuples in 'WHERE IN %s'
-                question_marks = ",".join("?" * len(reserved_users))
-
-                query_args.extend(reserved_users)
-                sql = base_sql + " AND user_id NOT IN ({})".format(question_marks)
-            else:
-                sql = base_sql
 
-            txn.execute(sql, query_args)
+            in_clause, in_clause_args = make_in_list_sql_clause(
+                self.database_engine, "user_id", reserved_users
+            )
+
+            txn.execute(
+                "DELETE FROM monthly_active_users WHERE timestamp < ? AND NOT %s"
+                % (in_clause,),
+                [thirty_days_ago] + in_clause_args,
+            )
 
             if self._limit_usage_by_mau:
                 # If MAU user count still exceeds the MAU threshold, then delete on
                 # a least recently active basis.
                 # Note it is not possible to write this query using OFFSET due to
                 # incompatibilities in how sqlite and postgres support the feature.
-                # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
-                # While Postgres does not require 'LIMIT', but also does not support
+                # Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
+                # while Postgres does not require 'LIMIT', but also does not support
                 # negative LIMIT values. So there is no way to write it that both can
                 # support
-                if len(reserved_users) == 0:
-                    sql = """
-                        DELETE FROM monthly_active_users
-                        WHERE user_id NOT IN (
-                            SELECT user_id FROM monthly_active_users
-                            ORDER BY timestamp DESC
-                            LIMIT ?
-                        )
-                        """
-                    txn.execute(sql, ((self._max_mau_value),))
-                # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
-                # when len(reserved_users) == 0. Works fine on sqlite.
-                else:
-                    # Must be >= 0 for postgres
-                    num_of_non_reserved_users_to_remove = max(
-                        self._max_mau_value - len(reserved_users), 0
-                    )
 
-                    # It is important to filter reserved users twice to guard
-                    # against the case where the reserved user is present in the
-                    # SELECT, meaning that a legitmate mau is deleted.
-                    sql = """
-                        DELETE FROM monthly_active_users
-                        WHERE user_id NOT IN (
-                            SELECT user_id FROM monthly_active_users
-                            WHERE user_id NOT IN ({})
-                            ORDER BY timestamp DESC
-                            LIMIT ?
-                        )
-                        AND user_id NOT IN ({})
-                    """.format(
-                        question_marks, question_marks
+                # Limit must be >= 0 for postgres
+                num_of_non_reserved_users_to_remove = max(
+                    self._max_mau_value - len(reserved_users), 0
+                )
+
+                # It is important to filter reserved users twice to guard
+                # against the case where the reserved user is present in the
+                # SELECT, meaning that a legitimate mau is deleted.
+                sql = """
+                    DELETE FROM monthly_active_users
+                    WHERE user_id NOT IN (
+                        SELECT user_id FROM monthly_active_users
+                        WHERE NOT %s
+                        ORDER BY timestamp DESC
+                        LIMIT ?
                     )
-
-                    query_args = [
-                        *reserved_users,
-                        num_of_non_reserved_users_to_remove,
-                        *reserved_users,
-                    ]
-
-                    txn.execute(sql, query_args)
-
-            # It seems poor to invalidate the whole cache, Postgres supports
+                    AND NOT %s
+                """ % (
+                    in_clause,
+                    in_clause,
+                )
+
+                query_args = (
+                    in_clause_args
+                    + [num_of_non_reserved_users_to_remove]
+                    + in_clause_args
+                )
+                txn.execute(sql, query_args)
+
+            # It seems poor to invalidate the whole cache. Postgres supports
             # 'Returning' which would allow me to invalidate only the
             # specific users, but sqlite has no way to do this and instead
             # I would need to SELECT and the DELETE which without locking