diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index 7270ef09da..c89b4f7919 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -25,6 +25,7 @@ from synapse.storage.database import (
LoggingDatabaseConnection,
LoggingTransaction,
)
+from synapse.storage.engines import PostgresEngine
from synapse.util import Clock
from synapse.util.stringutils import random_string
@@ -68,12 +69,20 @@ class LockStore(SQLBaseStore):
self._reactor = hs.get_reactor()
self._instance_name = hs.get_instance_id()
- # A map from `(lock_name, lock_key)` to the token of any locks that we
- # think we currently hold.
- self._live_tokens: WeakValueDictionary[
+ # A map from `(lock_name, lock_key)` to lock that we think we
+ # currently hold.
+ self._live_lock_tokens: WeakValueDictionary[
Tuple[str, str], Lock
] = WeakValueDictionary()
+ # A map from `(lock_name, lock_key, token)` to read/write lock that we
+ # think we currently hold. For a given lock_name/lock_key, there can be
+ # multiple read locks at a time but only one write lock (no mixing read
+ # and write locks at the same time).
+ self._live_read_write_lock_tokens: WeakValueDictionary[
+ Tuple[str, str, str], Lock
+ ] = WeakValueDictionary()
+
# When we shut down we want to remove the locks. Technically this can
# lead to a race, as we may drop the lock while we are still processing.
# However, a) it should be a small window, b) the lock is best effort
@@ -91,11 +100,13 @@ class LockStore(SQLBaseStore):
"""Called when the server is shutting down"""
logger.info("Dropping held locks due to shutdown")
- # We need to take a copy of the tokens dict as dropping the locks will
- # cause the dictionary to change.
- locks = dict(self._live_tokens)
+ # We need to take a copy of the locks as dropping the locks will cause
+ # the dictionary to change.
+ locks = list(self._live_lock_tokens.values()) + list(
+ self._live_read_write_lock_tokens.values()
+ )
- for lock in locks.values():
+ for lock in locks:
await lock.release()
logger.info("Dropped locks due to shutdown")
@@ -122,7 +133,7 @@ class LockStore(SQLBaseStore):
"""
# Check if this process has taken out a lock and if it's still valid.
- lock = self._live_tokens.get((lock_name, lock_key))
+ lock = self._live_lock_tokens.get((lock_name, lock_key))
if lock and await lock.is_still_valid():
return None
@@ -176,61 +187,111 @@ class LockStore(SQLBaseStore):
self._reactor,
self._clock,
self,
+ read_write=False,
lock_name=lock_name,
lock_key=lock_key,
token=token,
)
- self._live_tokens[(lock_name, lock_key)] = lock
+ self._live_lock_tokens[(lock_name, lock_key)] = lock
return lock
- async def _is_lock_still_valid(
- self, lock_name: str, lock_key: str, token: str
- ) -> bool:
- """Checks whether this instance still holds the lock."""
- last_renewed_ts = await self.db_pool.simple_select_one_onecol(
- table="worker_locks",
- keyvalues={
- "lock_name": lock_name,
- "lock_key": lock_key,
- "token": token,
- },
- retcol="last_renewed_ts",
- allow_none=True,
- desc="is_lock_still_valid",
- )
- return (
- last_renewed_ts is not None
- and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts
- )
+ async def try_acquire_read_write_lock(
+ self,
+ lock_name: str,
+ lock_key: str,
+ write: bool,
+ ) -> Optional["Lock"]:
+ """Try to acquire a lock for the given name/key. Will return an async
+ context manager if the lock is successfully acquired, which *must* be
+ used (otherwise the lock will leak).
+ """
- async def _renew_lock(self, lock_name: str, lock_key: str, token: str) -> None:
- """Attempt to renew the lock if we still hold it."""
- await self.db_pool.simple_update(
- table="worker_locks",
- keyvalues={
- "lock_name": lock_name,
- "lock_key": lock_key,
- "token": token,
- },
- updatevalues={"last_renewed_ts": self._clock.time_msec()},
- desc="renew_lock",
- )
+ now = self._clock.time_msec()
+ token = random_string(6)
- async def _drop_lock(self, lock_name: str, lock_key: str, token: str) -> None:
- """Attempt to drop the lock, if we still hold it"""
- await self.db_pool.simple_delete(
- table="worker_locks",
- keyvalues={
- "lock_name": lock_name,
- "lock_key": lock_key,
- "token": token,
- },
- desc="drop_lock",
+ def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None:
+ # We attempt to acquire the lock by inserting into
+ # `worker_read_write_locks` and seeing if that fails any
+ # constraints. If it doesn't then we have acquired the lock,
+ # otherwise we haven't.
+ #
+ # Before that though we clear the table of any stale locks.
+
+ delete_sql = """
+ DELETE FROM worker_read_write_locks
+ WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?;
+ """
+
+ insert_sql = """
+ INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts)
+ VALUES (?, ?, ?, ?, ?, ?)
+ """
+
+ if isinstance(self.database_engine, PostgresEngine):
+ # For Postgres we can send these queries at the same time.
+ txn.execute(
+ delete_sql + ";" + insert_sql,
+ (
+ # DELETE args
+ now - _LOCK_TIMEOUT_MS,
+ lock_name,
+ lock_key,
+ # UPSERT args
+ lock_name,
+ lock_key,
+ write,
+ self._instance_name,
+ token,
+ now,
+ ),
+ )
+ else:
+ # For SQLite these need to be two queries.
+ txn.execute(
+ delete_sql,
+ (
+ now - _LOCK_TIMEOUT_MS,
+ lock_name,
+ lock_key,
+ ),
+ )
+ txn.execute(
+ insert_sql,
+ (
+ lock_name,
+ lock_key,
+ write,
+ self._instance_name,
+ token,
+ now,
+ ),
+ )
+
+ return
+
+ try:
+ await self.db_pool.runInteraction(
+ "try_acquire_read_write_lock",
+ _try_acquire_read_write_lock_txn,
+ )
+ except self.database_engine.module.IntegrityError:
+ return None
+
+ lock = Lock(
+ self._reactor,
+ self._clock,
+ self,
+ read_write=True,
+ lock_name=lock_name,
+ lock_key=lock_key,
+ token=token,
)
- self._live_tokens.pop((lock_name, lock_key), None)
+ self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock
+
+ return lock
class Lock:
@@ -259,6 +320,7 @@ class Lock:
reactor: IReactorCore,
clock: Clock,
store: LockStore,
+ read_write: bool,
lock_name: str,
lock_key: str,
token: str,
@@ -266,13 +328,23 @@ class Lock:
self._reactor = reactor
self._clock = clock
self._store = store
+ self._read_write = read_write
self._lock_name = lock_name
self._lock_key = lock_key
self._token = token
+ self._table = "worker_read_write_locks" if read_write else "worker_locks"
+
self._looping_call = clock.looping_call(
- self._renew, _RENEWAL_INTERVAL_MS, store, lock_name, lock_key, token
+ self._renew,
+ _RENEWAL_INTERVAL_MS,
+ store,
+ clock,
+ read_write,
+ lock_name,
+ lock_key,
+ token,
)
self._dropped = False
@@ -281,6 +353,8 @@ class Lock:
@wrap_as_background_process("Lock._renew")
async def _renew(
store: LockStore,
+ clock: Clock,
+ read_write: bool,
lock_name: str,
lock_key: str,
token: str,
@@ -291,12 +365,34 @@ class Lock:
don't end up with a reference to `self` in the reactor, which would stop
this from being cleaned up if we dropped the context manager.
"""
- await store._renew_lock(lock_name, lock_key, token)
+ table = "worker_read_write_locks" if read_write else "worker_locks"
+ await store.db_pool.simple_update(
+ table=table,
+ keyvalues={
+ "lock_name": lock_name,
+ "lock_key": lock_key,
+ "token": token,
+ },
+ updatevalues={"last_renewed_ts": clock.time_msec()},
+ desc="renew_lock",
+ )
async def is_still_valid(self) -> bool:
"""Check if the lock is still held by us"""
- return await self._store._is_lock_still_valid(
- self._lock_name, self._lock_key, self._token
+ last_renewed_ts = await self._store.db_pool.simple_select_one_onecol(
+ table=self._table,
+ keyvalues={
+ "lock_name": self._lock_name,
+ "lock_key": self._lock_key,
+ "token": self._token,
+ },
+ retcol="last_renewed_ts",
+ allow_none=True,
+ desc="is_lock_still_valid",
+ )
+ return (
+ last_renewed_ts is not None
+ and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts
)
async def __aenter__(self) -> None:
@@ -325,7 +421,23 @@ class Lock:
if self._looping_call.running:
self._looping_call.stop()
- await self._store._drop_lock(self._lock_name, self._lock_key, self._token)
+ await self._store.db_pool.simple_delete(
+ table=self._table,
+ keyvalues={
+ "lock_name": self._lock_name,
+ "lock_key": self._lock_key,
+ "token": self._token,
+ },
+ desc="drop_lock",
+ )
+
+ if self._read_write:
+ self._store._live_read_write_lock_tokens.pop(
+ (self._lock_name, self._lock_key, self._token), None
+ )
+ else:
+ self._store._live_lock_tokens.pop((self._lock_name, self._lock_key), None)
+
self._dropped = True
def __del__(self) -> None:
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..e1a41be9c9
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,152 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We implement read/write style locks by using two tables with mutual foreign
+-- key constraints. Note that this implementation is vulnerable to starving
+-- writers if read locks repeatedly get acquired.
+--
+-- The first table (`worker_read_write_locks_mode`) indicates that a given lock
+-- has either been acquired in read mode *or* write mode, but not both. This is
+-- enforced by the unique constraint. Each instance of a lock being acquired is
+-- associated with a random `token`.
+--
+-- The second table (`worker_read_write_locks`) tracks who has currently
+-- acquired a given lock. For a given lock_name/lock_key, there can be multiple
+-- read locks at a time but only one write lock (no mixing read and write locks
+-- at the same time).
+--
+-- The foreign key from the second to first table enforces that for any given
+-- lock the second table cannot have a mix of rows with read or write.
+--
+-- The foreign key from the first to second table enforces that we don't have a
+-- row for a lock in the first table if not in the second table.
+--
+--
+-- Furthermore, we add some triggers to automatically keep the first table up to
+-- date when inserting/deleting from the second table. This reduces the number
+-- of round trips needed to acquire and release locks, as those operations
+-- simply become an INSERT or DELETE. These triggers are added in a separate
+-- delta due to database specific syntax.
+
+
+-- A table to track whether a lock is currently acquired, and if so whether its
+-- in read or write mode.
+CREATE TABLE worker_read_write_locks_mode (
+ lock_name TEXT NOT NULL,
+ lock_key TEXT NOT NULL,
+ -- Whether this lock is in read (false) or write (true) mode
+ write_lock BOOLEAN NOT NULL,
+ -- A token that has currently acquired the lock. We need this so that we can
+ -- add a foreign constraint from this table to `worker_read_write_locks`.
+ token TEXT NOT NULL
+);
+
+-- Ensure that we can only have one row per lock
+CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
+-- We need this (redundant) constraint so that we can have a foreign key
+-- constraint against this table.
+CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
+
+
+-- A table to track who has currently acquired a given lock.
+CREATE TABLE worker_read_write_locks (
+ lock_name TEXT NOT NULL,
+ lock_key TEXT NOT NULL,
+ -- We write the instance name to ease manual debugging, we don't ever read
+ -- from it.
+ -- Note: instance names aren't guarenteed to be unique.
+ instance_name TEXT NOT NULL,
+ -- Whether the process has taken out a "read" or a "write" lock.
+ write_lock BOOLEAN NOT NULL,
+ -- A random string generated each time an instance takes out a lock. Used by
+ -- the instance to tell whether the lock is still held by it (e.g. in the
+ -- case where the process stalls for a long time the lock may time out and
+ -- be taken out by another instance, at which point the original instance
+ -- can tell it no longer holds the lock as the tokens no longer match).
+ token TEXT NOT NULL,
+ last_renewed_ts BIGINT NOT NULL,
+
+ -- This constraint ensures that a given lock has only been acquired in read
+ -- xor write mode, but not both.
+ FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
+);
+
+CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
+-- Ensures that only one instance can acquire a lock in write mode at a time.
+CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
+
+
+-- Add a foreign key constraint to ensure that if a lock is in
+-- `worker_read_write_locks_mode` then there must be a corresponding row in
+-- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
+-- `worker_read_write_locks_mode` when the lock is not currently acquired).
+--
+-- We only add to PostgreSQL as SQLite does not support adding constraints
+-- after table creation, and so doesn't support "circular" foreign key
+-- constraints.
+ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign
+ FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED;
+
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
+BEGIN
+ INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+ VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+ ON CONFLICT (lock_name, lock_key)
+ DO NOTHING;
+ RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
+ FOR EACH ROW
+ EXECUTE PROCEDURE upsert_read_write_lock_parent();
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+ new_token TEXT;
+BEGIN
+ SELECT token INTO new_token FROM worker_read_write_locks
+ WHERE
+ lock_name = OLD.lock_name
+ AND lock_key = OLD.lock_key;
+
+ IF NOT FOUND THEN
+ DELETE FROM worker_read_write_locks_mode
+ WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+ ELSE
+ UPDATE worker_read_write_locks_mode
+ SET token = new_token
+ WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+ END IF;
+
+ RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+ FOR EACH ROW
+ EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
new file mode 100644
index 0000000000..be2dfbbb8a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
@@ -0,0 +1,119 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- c.f. the postgres version for context. The tables and constraints are the
+-- same, however they need to be defined slightly differently to work around how
+-- each database handles circular foreign key references.
+
+
+
+-- A table to track whether a lock is currently acquired, and if so whether its
+-- in read or write mode.
+CREATE TABLE worker_read_write_locks_mode (
+ lock_name TEXT NOT NULL,
+ lock_key TEXT NOT NULL,
+ -- Whether this lock is in read (false) or write (true) mode
+ write_lock BOOLEAN NOT NULL,
+ -- A token that has currently acquired the lock. We need this so that we can
+ -- add a foreign constraint from this table to `worker_read_write_locks`.
+ token TEXT NOT NULL,
+ -- Add a foreign key constraint to ensure that if a lock is in
+ -- `worker_read_write_locks_mode` then there must be a corresponding row in
+ -- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
+ -- `worker_read_write_locks_mode` when the lock is not currently acquired).
+ FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED
+);
+
+-- Ensure that we can only have one row per lock
+CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
+-- We need this (redundant) constraint so that we can have a foreign key
+-- constraint against this table.
+CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
+
+
+-- A table to track who has currently acquired a given lock.
+CREATE TABLE worker_read_write_locks (
+ lock_name TEXT NOT NULL,
+ lock_key TEXT NOT NULL,
+ -- We write the instance name to ease manual debugging, we don't ever read
+ -- from it.
+ -- Note: instance names aren't guarenteed to be unique.
+ instance_name TEXT NOT NULL,
+ -- Whether the process has taken out a "read" or a "write" lock.
+ write_lock BOOLEAN NOT NULL,
+ -- A random string generated each time an instance takes out a lock. Used by
+ -- the instance to tell whether the lock is still held by it (e.g. in the
+ -- case where the process stalls for a long time the lock may time out and
+ -- be taken out by another instance, at which point the original instance
+ -- can tell it no longer holds the lock as the tokens no longer match).
+ token TEXT NOT NULL,
+ last_renewed_ts BIGINT NOT NULL,
+
+ -- This constraint ensures that a given lock has only been acquired in read
+ -- xor write mode, but not both.
+ FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
+);
+
+CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
+-- Ensures that only one instance can acquire a lock in write mode at a time.
+CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
+
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
+BEFORE INSERT ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+ -- First ensure that `worker_read_write_locks_mode` doesn't have stale
+ -- entries in it, as on SQLite we don't have the foreign key constraint to
+ -- enforce this.
+ DELETE FROM worker_read_write_locks_mode
+ WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+ AND NOT EXISTS (
+ SELECT 1 FROM worker_read_write_locks
+ WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+ );
+
+ INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+ VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+ ON CONFLICT (lock_name, lock_key)
+ DO NOTHING;
+END;
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
+AFTER DELETE ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+ DELETE FROM worker_read_write_locks_mode
+ WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+ AND NOT EXISTS (
+ SELECT 1 FROM worker_read_write_locks
+ WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+ );
+
+ UPDATE worker_read_write_locks_mode
+ SET token = (
+ SELECT token FROM worker_read_write_locks
+ WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+ )
+ WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+END;
|