From 2ade05dca3d6da67e35c3a8ccdd278221f2566ed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 14:16:10 +0100 Subject: Add last seen info to devices table. This allows us to purge old user_ips entries without having to preserve the latest last seen info for active devices. --- synapse/storage/client_ips.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'synapse/storage/client_ips.py') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 6db8c54077..4db2e7f481 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -354,6 +354,21 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): }, lock=False, ) + + # Technically an access token might not be associated with + # a device so we need to check. + if device_id: + self._simple_upsert_txn( + txn, + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + values={ + "user_agent": user_agent, + "last_seen": last_seen, + "ip": ip, + }, + lock=False, + ) except Exception as e: # Failed to upsert, log and continue logger.error("Failed to insert client IP %r: %r", entry, e) -- cgit 1.5.1 From ed80231ade20ce7881bb2026692fe3a6252f1c02 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 15:59:43 +0100 Subject: Add BG update to populate devices last seen info --- synapse/storage/client_ips.py | 52 ++++++++++++++++++++++ .../storage/schema/delta/56/devices_last_seen.sql | 3 ++ 2 files changed, 55 insertions(+) (limited to 'synapse/storage/client_ips.py') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 4db2e7f481..8839562269 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -85,6 +85,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique ) + # Update the last seen info in devices. + self.register_background_update_handler( + "devices_last_seen", self._devices_last_seen_update + ) + # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) self._batch_row_update = {} @@ -485,3 +490,50 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } for (access_token, ip), (user_agent, last_seen) in iteritems(results) ) + + @defer.inlineCallbacks + def _devices_last_seen_update(self, progress, batch_size): + """Background update to insert last seen info into devices table + """ + + last_user_id = progress.get("last_user_id", "") + last_device_id = progress.get("last_device_id", "") + + def _devices_last_seen_update_txn(txn): + sql = """ + SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE user_id > ? OR (user_id = ? AND device_id > ?) + ORDER BY user_id ASC, device_id ASC + LIMIT ? + """ + txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size)) + + rows = txn.fetchall() + if not rows: + return 0 + + sql = """ + UPDATE devices + SET last_seen = ?, ip = ?, user_agent = ? + WHERE user_id = ? AND device_id = ? + """ + txn.execute_batch(sql, rows) + + _, _, _, user_id, device_id = rows[-1] + self._background_update_progress_txn( + txn, + "devices_last_seen", + {"last_user_id": user_id, "last_device_id": device_id}, + ) + + return len(rows) + + updated = yield self.runInteraction( + "_devices_last_seen_update", _devices_last_seen_update_txn + ) + + if not updated: + yield self._end_background_update("devices_last_seen") + + return updated diff --git a/synapse/storage/schema/delta/56/devices_last_seen.sql b/synapse/storage/schema/delta/56/devices_last_seen.sql index 8818eeeb7e..dfa902d0ba 100644 --- a/synapse/storage/schema/delta/56/devices_last_seen.sql +++ b/synapse/storage/schema/delta/56/devices_last_seen.sql @@ -19,3 +19,6 @@ ALTER TABLE devices ADD COLUMN last_seen BIGINT; ALTER TABLE devices ADD COLUMN ip TEXT; ALTER TABLE devices ADD COLUMN user_agent TEXT; + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('devices_last_seen', '{}'); -- cgit 1.5.1 From 51d28272e20d799b2e35a8a14b3c1d9d5f555d10 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Sep 2019 16:00:18 +0100 Subject: Query devices table for last seen info. This is a) simpler than querying user_ips directly and b) means we can purge older entries from user_ips without losing the required info. The storage functions now no longer return the access_token, since it was unused. --- synapse/storage/client_ips.py | 57 ++++++---------------------------------- tests/storage/test_client_ips.py | 1 - 2 files changed, 8 insertions(+), 50 deletions(-) (limited to 'synapse/storage/client_ips.py') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 8839562269..a4e6d9dbe7 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -392,19 +392,14 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): keys giving the column names """ - res = yield self.runInteraction( - "get_last_client_ip_by_device", - self._get_last_client_ip_by_device_txn, - user_id, - device_id, - retcols=( - "user_id", - "access_token", - "ip", - "user_agent", - "device_id", - "last_seen", - ), + keyvalues = {"user_id": user_id} + if device_id: + keyvalues["device_id"] = device_id + + res = yield self._simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), ) ret = {(d["user_id"], d["device_id"]): d for d in res} @@ -423,42 +418,6 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): } return ret - @classmethod - def _get_last_client_ip_by_device_txn(cls, txn, user_id, device_id, retcols): - where_clauses = [] - bindings = [] - if device_id is None: - where_clauses.append("user_id = ?") - bindings.extend((user_id,)) - else: - where_clauses.append("(user_id = ? AND device_id = ?)") - bindings.extend((user_id, device_id)) - - if not where_clauses: - return [] - - inner_select = ( - "SELECT MAX(last_seen) mls, user_id, device_id FROM user_ips " - "WHERE %(where)s " - "GROUP BY user_id, device_id" - ) % {"where": " OR ".join(where_clauses)} - - sql = ( - "SELECT %(retcols)s FROM user_ips " - "JOIN (%(inner_select)s) ips ON" - " user_ips.last_seen = ips.mls AND" - " user_ips.user_id = ips.user_id AND" - " (user_ips.device_id = ips.device_id OR" - " (user_ips.device_id IS NULL AND ips.device_id IS NULL)" - " )" - ) % { - "retcols": ",".join("user_ips." + c for c in retcols), - "inner_select": inner_select, - } - - txn.execute(sql, bindings) - return cls.cursor_to_dict(txn) - @defer.inlineCallbacks def get_user_ip_and_agents(self, user): user_id = user.to_string() diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 09305c3bf1..6ac4654085 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -55,7 +55,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): { "user_id": user_id, "device_id": "device_id", - "access_token": "access_token", "ip": "ip", "user_agent": "user_agent", "last_seen": 12345678000, -- cgit 1.5.1 From 242017db8b7b57be28a019ecbba1619d75d54889 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 Sep 2019 15:20:40 +0100 Subject: Prune rows in user_ips older than configured period Defaults to pruning everything older than 28d. --- docs/sample_config.yaml | 6 +++++ synapse/config/server.py | 13 +++++++++ synapse/storage/client_ips.py | 62 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 73 insertions(+), 8 deletions(-) (limited to 'synapse/storage/client_ips.py') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 61d9f09a99..cc6035c838 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -313,6 +313,12 @@ listeners: # redaction_retention_period: 7d +# How long to track users' last seen time and IPs in the database. +# +# Defaults to `28d`. Set to `null` to disable. +# +#user_ips_max_age: 14d + ## TLS ## diff --git a/synapse/config/server.py b/synapse/config/server.py index 7f8d315954..655e7487a4 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -172,6 +172,13 @@ class ServerConfig(Config): else: self.redaction_retention_period = None + # How long to keep entries in the `users_ips` table. + user_ips_max_age = config.get("user_ips_max_age", "28d") + if user_ips_max_age is not None: + self.user_ips_max_age = self.parse_duration(user_ips_max_age) + else: + self.user_ips_max_age = None + # Options to disable HS self.hs_disabled = config.get("hs_disabled", False) self.hs_disabled_message = config.get("hs_disabled_message", "") @@ -735,6 +742,12 @@ class ServerConfig(Config): # Defaults to `7d`. Set to `null` to disable. # redaction_retention_period: 7d + + # How long to track users' last seen time and IPs in the database. + # + # Defaults to `28d`. Set to `null` to disable. + # + #user_ips_max_age: 14d """ % locals() ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a4e6d9dbe7..176c812b1f 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -19,7 +19,7 @@ from six import iteritems from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.util.caches import CACHE_SIZE_FACTOR from . import background_updates @@ -42,6 +42,8 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): super(ClientIpStore, self).__init__(db_conn, hs) + self.user_ips_max_age = hs.config.user_ips_max_age + self.register_background_index_update( "user_ips_device_index", index_name="user_ips_device_id", @@ -100,6 +102,9 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): "before", "shutdown", self._update_client_ips_batch ) + if self.user_ips_max_age: + self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) + @defer.inlineCallbacks def _remove_user_ip_nonunique(self, progress, batch_size): def f(conn): @@ -319,20 +324,19 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._batch_row_update[key] = (user_agent, device_id, now) + @wrap_as_background_process("update_client_ips") def _update_client_ips_batch(self): # If the DB pool has already terminated, don't try updating if not self.hs.get_db_pool().running: return - def update(): - to_update = self._batch_row_update - self._batch_row_update = {} - return self.runInteraction( - "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update - ) + to_update = self._batch_row_update + self._batch_row_update = {} - return run_as_background_process("update_client_ips", update) + return self.runInteraction( + "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update + ) def _update_client_ips_batch_txn(self, txn, to_update): if "user_ips" in self._unsafe_to_upsert_tables or ( @@ -496,3 +500,45 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): yield self._end_background_update("devices_last_seen") return updated + + @wrap_as_background_process("prune_old_user_ips") + async def _prune_old_user_ips(self): + """Removes entries in user IPs older than the configured period. + """ + + if not self.user_ips_max_age: + # Nothing to do + return + + if not await self.has_completed_background_update("devices_last_seen"): + # Only start pruning if we have finished populating the devices + # last seen info. + return + + # We do a slightly funky SQL delete to ensure we don't try and delete + # too much at once (as the table may be very large from before we + # started pruning). + # + # This works by finding the max last_seen that is less than the given + # time, but has no more than N rows before it, deleting all rows with + # a lesser last_seen time. (We COALESCE so that the sub-SELECT always + # returns exactly one row). + sql = """ + DELETE FROM user_ips + WHERE last_seen <= ( + SELECT COALESCE(MAX(last_seen), -1) + FROM ( + SELECT last_seen FROM user_ips + WHERE last_seen <= ? + ORDER BY last_seen ASC + LIMIT 5000 + ) AS u + ) + """ + + timestamp = self.clock.time_msec() - self.user_ips_max_age + + def _prune_old_user_ips_txn(txn): + txn.execute(sql, (timestamp,)) + + await self.runInteraction("_prune_old_user_ips", _prune_old_user_ips_txn) -- cgit 1.5.1 From 50572db837f3e6a0869e9ec573e02d4af72548ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 17:00:23 +0100 Subject: Use if `is not None` Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/client_ips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/storage/client_ips.py') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index a4e6d9dbe7..8996689744 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -393,7 +393,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): """ keyvalues = {"user_id": user_id} - if device_id: + if device_id is not None: keyvalues["device_id"] = device_id res = yield self._simple_select_list( -- cgit 1.5.1 From 39b50ad42a8cf784e627959e9652589338121ccd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Sep 2019 17:22:33 +0100 Subject: Review comments --- docs/sample_config.yaml | 2 +- synapse/config/server.py | 2 +- synapse/storage/background_updates.py | 5 +---- synapse/storage/client_ips.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'synapse/storage/client_ips.py') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index cc6035c838..7902d9ed6f 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -315,7 +315,7 @@ redaction_retention_period: 7d # How long to track users' last seen time and IPs in the database. # -# Defaults to `28d`. Set to `null` to disable. +# Defaults to `28d`. Set to `null` to disable clearing out of old rows. # #user_ips_max_age: 14d diff --git a/synapse/config/server.py b/synapse/config/server.py index 655e7487a4..f8b7b4bef9 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -745,7 +745,7 @@ class ServerConfig(Config): # How long to track users' last seen time and IPs in the database. # - # Defaults to `28d`. Set to `null` to disable. + # Defaults to `28d`. Set to `null` to disable clearing out of old rows. # #user_ips_max_age: 14d """ diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 3fc25cd828..30788137a8 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -148,11 +148,8 @@ class BackgroundUpdateStore(SQLBaseStore): return False - async def has_completed_background_update(self, update_name): + async def has_completed_background_update(self, update_name) -> bool: """Check if the given background update has finished running. - - Returns: - Deferred[bool] """ if self._all_done: diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 176c812b1f..a4d40dfa1e 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -506,7 +506,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): """Removes entries in user IPs older than the configured period. """ - if not self.user_ips_max_age: + if self.user_ips_max_age is None: # Nothing to do return -- cgit 1.5.1 From 9267741a5f7732d7d16f8445edc68bc68b730601 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 30 Sep 2019 11:58:36 +0100 Subject: Fix `devices_last_seen` background update. Fixes #6134. --- synapse/storage/client_ips.py | 46 +++++++++++++++++++++++++++++++------ synapse/storage/engines/postgres.py | 7 ++++++ synapse/storage/engines/sqlite.py | 8 +++++++ 3 files changed, 54 insertions(+), 7 deletions(-) (limited to 'synapse/storage/client_ips.py') diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 539584288d..bb135166ce 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -463,14 +463,46 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): last_device_id = progress.get("last_device_id", "") def _devices_last_seen_update_txn(txn): + # This consists of two queries: + # + # 1. The sub-query searches for the next N devices and joins + # against user_ips to find the max last_seen associated with + # that device. + # 2. The outer query then joins again against user_ips on + # user/device/last_seen. This *should* hopefully only + # return one row, but if it does return more than one then + # we'll just end up updating the same device row multiple + # times, which is fine. + + if self.database_engine.supports_tuple_comparison: + where_clause = "(user_id, device_id) > (?, ?)" + where_args = [last_user_id, last_device_id] + else: + # We explicitly do a `user_id >= ? AND (...)` here to ensure + # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)` + # makes it hard for query optimiser to tell that it can use the + # index on user_id + where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)" + where_args = [last_user_id, last_user_id, last_device_id] + sql = """ - SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices - INNER JOIN user_ips AS u USING (user_id, device_id) - WHERE user_id > ? OR (user_id = ? AND device_id > ?) - ORDER BY user_id ASC, device_id ASC - LIMIT ? - """ - txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size)) + SELECT + last_seen, ip, user_agent, user_id, device_id + FROM ( + SELECT + user_id, device_id, MAX(u.last_seen) AS last_seen + FROM devices + INNER JOIN user_ips AS u USING (user_id, device_id) + WHERE %(where_clause)s + GROUP BY user_id, device_id + ORDER BY user_id ASC, device_id ASC + LIMIT ? + ) c + INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) + """ % { + "where_clause": where_clause + } + txn.execute(sql, where_args + [batch_size]) rows = txn.fetchall() if not rows: diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 289b6bc281..601617b21e 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -72,6 +72,13 @@ class PostgresEngine(object): """ return True + @property + def supports_tuple_comparison(self): + """ + Do we support comparing tuples, i.e. `(a, b) > (c, d)`? + """ + return True + def is_deadlock(self, error): if isinstance(error, self.module.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index e9b9caa49a..ac92109366 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -38,6 +38,14 @@ class Sqlite3Engine(object): """ return self.module.sqlite_version_info >= (3, 24, 0) + @property + def supports_tuple_comparison(self): + """ + Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires + SQLite 3.15+. + """ + return self.module.sqlite_version_info >= (3, 15, 0) + def check_database(self, txn): pass -- cgit 1.5.1