From 32a59a6495f8d463f82ae52283159359a9961c25 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 23 Nov 2023 12:35:37 +0000 Subject: Keep track of `user_ips` and `monthly_active_users` when delegating auth (#16672) * Describe `insert_client_ip` * Pull out client_ips and MAU tracking to BaseAuth * Define HAS_AUTHLIB once in tests sick of copypasting * Track ips and token usage when delegating auth * Test that we track MAU and user_ips * Don't track `__oidc_admin` --- synapse/storage/databases/main/client_ips.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'synapse/storage/databases/main/client_ips.py') diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index c006129625..d4b14aaebe 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -589,6 +589,27 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke device_id: Optional[str], now: Optional[int] = None, ) -> None: + """Record that `user_id` used `access_token` from this `ip` address. + + This method does two things. + + 1. It queues up a row to be upserted into the `client_ips` table. These happen + periodically; see _update_client_ips_batch. + 2. It immediately records this user as having taken action for the purposes of + MAU tracking. + + Any DB writes take place on the background tasks worker, falling back to the + main process. If we're not that worker, this method emits a replication payload + to run this logic on that worker. + + Two caveats to note: + + - We only take action once per LAST_SEEN_GRANULARITY, to avoid spamming the + DB with writes. + - Requests using the sliding-sync proxy's user agent are excluded, as its + requests are not directly driven by end-users. This is a hack and we're not + very proud of it. + """ # The sync proxy continuously triggers /sync even if the user is not # present so should be excluded from user_ips entries. if user_agent == "sync-v3-proxy-": -- cgit 1.5.1 From df366966b4f16d22330f1a3783a6e4bee8aa22a7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 Nov 2023 11:54:42 +0000 Subject: Speed up pruning of `user_ips` table (#16667) Silly query planner --- changelog.d/16667.misc | 1 + synapse/storage/databases/main/client_ips.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 10 deletions(-) create mode 100644 changelog.d/16667.misc (limited to 'synapse/storage/databases/main/client_ips.py') diff --git a/changelog.d/16667.misc b/changelog.d/16667.misc new file mode 100644 index 0000000000..51aeca9243 --- /dev/null +++ b/changelog.d/16667.misc @@ -0,0 +1 @@ +Reduce database load of pruning old `user_ips`. diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index d4b14aaebe..1df7731050 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -465,18 +465,15 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke # # This works by finding the max last_seen that is less than the given # time, but has no more than N rows before it, deleting all rows with - # a lesser last_seen time. (We COALESCE so that the sub-SELECT always - # returns exactly one row). + # a lesser last_seen time. (We use an `IN` clause to force postgres to + # use the index, otherwise it tends to do a seq scan). sql = """ DELETE FROM user_ips - WHERE last_seen <= ( - SELECT COALESCE(MAX(last_seen), -1) - FROM ( - SELECT last_seen FROM user_ips - WHERE last_seen <= ? - ORDER BY last_seen ASC - LIMIT 5000 - ) AS u + WHERE last_seen IN ( + SELECT last_seen FROM user_ips + WHERE last_seen <= ? + ORDER BY last_seen ASC + LIMIT 5000 ) """ -- cgit 1.5.1