summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2019-10-01 10:17:24 +0100
committerErik Johnston <erik@matrix.org>2019-10-01 10:17:24 +0100
commit1d349fb159f4e5e6c4c593a83571d0da83f67101 (patch)
treef0a2bd387b5f45fd37bdda1abdf878a92636d3be
parentDrop unused tables (#6115) (diff)
parentNewsfile (diff)
downloadsynapse-1d349fb159f4e5e6c4c593a83571d0da83f67101.tar.xz
Merge branch 'erikj/fixup_devices_last_seen_query' of github.com:matrix-org/synapse into develop
-rw-r--r--CHANGES.md4
-rw-r--r--changelog.d/6117.misc1
-rw-r--r--changelog.d/6135.bugfix1
-rw-r--r--docs/sample_config.yaml2
-rw-r--r--synapse/config/server.py2
-rw-r--r--synapse/storage/client_ips.py46
-rw-r--r--synapse/storage/engines/postgres.py7
-rw-r--r--synapse/storage/engines/sqlite.py8
8 files changed, 60 insertions, 11 deletions
diff --git a/CHANGES.md b/CHANGES.md
index addc4c4b56..0a0d0b3439 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,7 +2,7 @@ Synapse 1.4.0rc1 (2019-09-26)
 =============================
 
 Note that this release includes significant changes around 3pid
-verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst##upgrading-to-v140).
+verification. Administrators are reminded to review the [upgrade notes](UPGRADE.rst#upgrading-to-v140).
 
 Features
 --------
@@ -48,7 +48,7 @@ Features
 - Let synctl accept a directory of config files. ([\#5904](https://github.com/matrix-org/synapse/issues/5904))
 - Increase max display name size to 256. ([\#5906](https://github.com/matrix-org/synapse/issues/5906))
 - Add admin API endpoint for getting whether or not a user is a server administrator. ([\#5914](https://github.com/matrix-org/synapse/issues/5914))
-- Redact events in the database that have been redacted for a month. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
+- Redact events in the database that have been redacted for a week. ([\#5934](https://github.com/matrix-org/synapse/issues/5934))
 - New prometheus metrics:
   - `synapse_federation_known_servers`: represents the total number of servers your server knows about (i.e. is in rooms with), including itself. Enable by setting `metrics_flags.known_servers` to True in the configuration.([\#5981](https://github.com/matrix-org/synapse/issues/5981))
   - `synapse_build_info`: exposes the Python version, OS version, and Synapse version of the running server. ([\#6005](https://github.com/matrix-org/synapse/issues/6005))
diff --git a/changelog.d/6117.misc b/changelog.d/6117.misc
new file mode 100644
index 0000000000..f8bdb58f41
--- /dev/null
+++ b/changelog.d/6117.misc
@@ -0,0 +1 @@
+Fix up sample config entry for `redaction_retention_period` option.
diff --git a/changelog.d/6135.bugfix b/changelog.d/6135.bugfix
new file mode 100644
index 0000000000..5f9f010cb1
--- /dev/null
+++ b/changelog.d/6135.bugfix
@@ -0,0 +1 @@
+Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 254e1b17b4..43893399ad 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -314,7 +314,7 @@ listeners:
 #
 # Defaults to `7d`. Set to `null` to disable.
 #
-redaction_retention_period: 7d
+#redaction_retention_period: 28d
 
 # How long to track users' last seen time and IPs in the database.
 #
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 5ad7ee911d..536ee7f29c 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -742,7 +742,7 @@ class ServerConfig(Config):
         #
         # Defaults to `7d`. Set to `null` to disable.
         #
-        redaction_retention_period: 7d
+        #redaction_retention_period: 28d
 
         # How long to track users' last seen time and IPs in the database.
         #
diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 539584288d..bb135166ce 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -463,14 +463,46 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
         last_device_id = progress.get("last_device_id", "")
 
         def _devices_last_seen_update_txn(txn):
+            # This consists of two queries:
+            #
+            #   1. The sub-query searches for the next N devices and joins
+            #      against user_ips to find the max last_seen associated with
+            #      that device.
+            #   2. The outer query then joins again against user_ips on
+            #      user/device/last_seen. This *should* hopefully only
+            #      return one row, but if it does return more than one then
+            #      we'll just end up updating the same device row multiple
+            #      times, which is fine.
+
+            if self.database_engine.supports_tuple_comparison:
+                where_clause = "(user_id, device_id) > (?, ?)"
+                where_args = [last_user_id, last_device_id]
+            else:
+                # We explicitly do a `user_id >= ? AND (...)` here to ensure
+                # that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
+                # makes it hard for query optimiser to tell that it can use the
+                # index on user_id
+                where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
+                where_args = [last_user_id, last_user_id, last_device_id]
+
             sql = """
-                SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices
-                INNER JOIN user_ips AS u USING (user_id, device_id)
-                WHERE user_id > ? OR (user_id = ? AND device_id > ?)
-                ORDER BY user_id ASC, device_id ASC
-                LIMIT ?
-            """
-            txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size))
+                SELECT
+                    last_seen, ip, user_agent, user_id, device_id
+                FROM (
+                    SELECT
+                        user_id, device_id, MAX(u.last_seen) AS last_seen
+                    FROM devices
+                    INNER JOIN user_ips AS u USING (user_id, device_id)
+                    WHERE %(where_clause)s
+                    GROUP BY user_id, device_id
+                    ORDER BY user_id ASC, device_id ASC
+                    LIMIT ?
+                ) c
+                INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
+            """ % {
+                "where_clause": where_clause
+            }
+            txn.execute(sql, where_args + [batch_size])
 
             rows = txn.fetchall()
             if not rows:
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 289b6bc281..601617b21e 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -72,6 +72,13 @@ class PostgresEngine(object):
         """
         return True
 
+    @property
+    def supports_tuple_comparison(self):
+        """
+        Do we support comparing tuples, i.e. `(a, b) > (c, d)`?
+        """
+        return True
+
     def is_deadlock(self, error):
         if isinstance(error, self.module.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index e9b9caa49a..ac92109366 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -38,6 +38,14 @@ class Sqlite3Engine(object):
         """
         return self.module.sqlite_version_info >= (3, 24, 0)
 
+    @property
+    def supports_tuple_comparison(self):
+        """
+        Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires
+        SQLite 3.15+.
+        """
+        return self.module.sqlite_version_info >= (3, 15, 0)
+
     def check_database(self, txn):
         pass