summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erikj@matrix.org>2023-08-23 16:24:30 +0100
committerGitHub <noreply@github.com>2023-08-23 16:24:30 +0100
commit18279631e9555bd9032b993074e62c7af886d9cd (patch)
tree1f8209b821216965468ef727e9e791c70221a2e6
parentSwitch `devenv` dependency in the nix development environment to the latest r... (diff)
downloadsynapse-18279631e9555bd9032b993074e62c7af886d9cd.tar.xz
Fix rare deadlock when using read/write locks (#16169) github/release-v1.91.0 release-v1.91.0
-rw-r--r--changelog.d/16169.bugfix1
-rw-r--r--synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres37
-rw-r--r--synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres71
3 files changed, 72 insertions, 37 deletions
diff --git a/changelog.d/16169.bugfix b/changelog.d/16169.bugfix
new file mode 100644
index 0000000000..ed8830692f
--- /dev/null
+++ b/changelog.d/16169.bugfix
@@ -0,0 +1 @@
+Fix a rare race that could block new events from being sent for up to two minutes. Introduced in v1.90.0.
diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
deleted file mode 100644
index 401c42e18a..0000000000
--- a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- To avoid the possibility of a deadlock, lock the
--- `worker_read_write_locks_mode` table so that we serialize inserts/deletes
--- for a specific lock name/key.
-
-CREATE OR REPLACE FUNCTION delete_read_write_lock_parent_before() RETURNS trigger AS $$
-BEGIN
-    -- `PERFORM` is a `SELECT` which discards the rows.
-    PERFORM * FROM worker_read_write_locks_mode
-        WHERE
-            lock_name = OLD.lock_name
-            AND lock_key = OLD.lock_key
-        FOR UPDATE;
-
-    RETURN OLD;
-END
-$$
-LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks;
-CREATE TRIGGER delete_read_write_lock_parent_before_trigger BEFORE DELETE ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE delete_read_write_lock_parent_before();
diff --git a/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres
new file mode 100644
index 0000000000..0eb459c0b9
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres
@@ -0,0 +1,71 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Remove a previous attempt to avoid deadlocks
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks;
+DROP FUNCTION IF EXISTS delete_read_write_lock_parent_before;
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+    new_token TEXT;
+    mode_row_token TEXT;
+BEGIN
+    -- Only update the token in `_mode` if its our token. This prevents
+    -- deadlocks.
+    --
+    -- We shove the token into `mode_row_token`, as otherwise postgres complains
+    -- we're not using the returned data.
+    SELECT token INTO mode_row_token FROM worker_read_write_locks_mode
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+            AND token = OLD.token
+        FOR UPDATE;
+
+    IF NOT FOUND THEN
+        RETURN NEW;
+    END IF;
+
+    SELECT token INTO new_token FROM worker_read_write_locks
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+        LIMIT 1 FOR UPDATE SKIP LOCKED;
+
+    IF NOT FOUND THEN
+        DELETE FROM worker_read_write_locks_mode
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
+    ELSE
+        UPDATE worker_read_write_locks_mode
+            SET token = new_token
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    END IF;
+
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent();