summary refs log tree commit diff
path: root/synapse/storage/schema
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2022-06-15 12:29:42 +0100
committerGitHub <noreply@github.com>2022-06-15 12:29:42 +0100
commit75fb10ee45950a175ee286b36fb5a46f123d7db5 (patch)
tree6e8283206657ac86a16cd19f1ddc1f3bf61fd301 /synapse/storage/schema
parentFix typechecks against twisted trunk (#13061) (diff)
downloadsynapse-75fb10ee45950a175ee286b36fb5a46f123d7db5.tar.xz
Clean up schema for `event_edges` (#12893)
* Remove redundant references to `event_edges.room_id`

We don't need to care about the room_id here, because we are already checking
the event id.

* Clean up the event_edges table

We make a number of changes to `event_edges`:

 * We give the `room_id` and `is_state` columns defaults (null and false
   respectively) so that we can stop populating them.
 * We drop any rows that have `is_state` set true - they should no longer
   exist.
 * We drop any rows that do not exist in `events` - these should not exist
   either.
 * We drop the old unique constraint on all the colums, which wasn't much use.
 * We create a new unique index on `(event_id, prev_event_id)`.
 * We add a foreign key constraint to `events`.

These happen rather differently depending on whether we are on Postgres or
SQLite. For SQLite, we just rebuild the whole table, copying only the rows we
want to keep. For Postgres, we try to do things in the background as much as
possible.

* Stop populating `event_edges.room_id` and `is_state`

We can just rely on the defaults.
Diffstat (limited to 'synapse/storage/schema')
-rw-r--r--synapse/storage/schema/__init__.py12
-rw-r--r--synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.postgres43
-rw-r--r--synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.sqlite47
3 files changed, 97 insertions, 5 deletions
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 5843fae605..dc237e3032 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 71  # remember to update the list below when updating
+SCHEMA_VERSION = 72  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -71,14 +71,16 @@ Changes in SCHEMA_VERSION = 70:
 Changes in SCHEMA_VERSION = 71:
     - event_edges.room_id is no longer read from.
     - Tables related to groups are no longer accessed.
+
+Changes in SCHEMA_VERSION = 72:
+    - event_edges.(room_id, is_state) are no longer written to.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    # We now assume that `device_lists_changes_in_room` has been filled out for
-    # recent device_list_updates.
-    # ... and that `application_services_state.last_txn` is not used.
-    69
+    # We no longer maintain `event_edges.room_id`, so synapses with SCHEMA_VERSION < 71
+    # will break.
+    71
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.postgres b/synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.postgres
new file mode 100644
index 0000000000..f32f445858
--- /dev/null
+++ b/synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.postgres
@@ -0,0 +1,43 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We're going to stop populating event_edges.room_id and event_edges.is_state,
+-- which means we now need to give them defaults.
+
+-- We also drop the exising unique constraint which spans all four columns. Franky
+-- it's not doing much, and there are other indexes on event_id and prev_event_id.
+-- Later on we introduce a proper unique constraint on (event_id, prev_event_id).
+--
+-- We also add a foreign key constraint (which will be enforced for new rows), but
+-- don't yet validate it for existing rows (since that's slow, and we haven't yet
+-- checked that all the rows are valid)
+
+ALTER TABLE event_edges
+   ALTER room_id DROP NOT NULL,
+   ALTER is_state SET DEFAULT FALSE,
+   DROP CONSTRAINT IF EXISTS event_edges_event_id_prev_event_id_room_id_is_state_key,
+   ADD CONSTRAINT event_edges_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id) NOT VALID;
+
+-- In the background, we drop any rows with is_state=True. These may have been
+-- added a long time ago, but they are no longer used.
+--
+-- We also drop rows that do not correspond to entries in `events`, and finally
+-- validate the foreign key.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7101, 'event_edges_drop_invalid_rows', '{}');
+
+-- We'll then create a new unique index on (event_id, prev_event_id).
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7101, 'event_edges_replace_index', '{}', 'event_edges_drop_invalid_rows');
diff --git a/synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.sqlite b/synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.sqlite
new file mode 100644
index 0000000000..0bb86edd2a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/71/01rebuild_event_edges.sql.sqlite
@@ -0,0 +1,47 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We're going to stop populating event_edges.room_id and event_edges.is_state,
+-- which means we now need to give them defaults.
+--
+-- We also take the opportunity to:
+--   - drop any rows with is_state=True (these were populated a long time ago, but
+--     are no longer used.)
+--   - drop any rows which do not correspond to entries in `events`
+--   - tighten the unique index so that it applies just to (event_id, prev_event_id)
+--   - drop the "ev_edges_id" index, which is redundant to the above.
+--   - add a foreign key constraint from event_id to `events`
+
+CREATE TABLE new_event_edges (
+  event_id TEXT NOT NULL,
+  prev_event_id TEXT NOT NULL,
+  room_id TEXT NULL,
+  is_state BOOL NOT NULL DEFAULT 0,
+  FOREIGN KEY(event_id) REFERENCES events(event_id)
+);
+
+INSERT INTO new_event_edges
+    SELECT ee.event_id, ee.prev_event_id, ee.room_id, ee.is_state
+    FROM event_edges ee JOIN events ev USING (event_id)
+    WHERE NOT ee.is_state;
+
+DROP TABLE event_edges;
+
+ALTER TABLE new_event_edges RENAME TO event_edges;
+
+CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx
+  ON event_edges (event_id, prev_event_id);
+
+CREATE INDEX ev_edges_prev_id ON event_edges (prev_event_id);