summary refs log tree commit diff
path: root/synapse/storage/databases/main
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2023-07-05 17:25:00 +0100
committerGitHub <noreply@github.com>2023-07-05 17:25:00 +0100
commit39d131b016673bbd4d3c28095c8838b8c6dc0953 (patch)
treee0f4ff47dd5a09ba7c058b8eb90a6d99e30912bd /synapse/storage/databases/main
parentAdd tracing to media `/upload` endpoint (#15850) (diff)
downloadsynapse-39d131b016673bbd4d3c28095c8838b8c6dc0953.tar.xz
Add basic read/write lock (#15782)
Diffstat (limited to 'synapse/storage/databases/main')
-rw-r--r--synapse/storage/databases/main/lock.py224
1 files changed, 168 insertions, 56 deletions
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index 7270ef09da..c89b4f7919 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -25,6 +25,7 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
+from synapse.storage.engines import PostgresEngine
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
@@ -68,12 +69,20 @@ class LockStore(SQLBaseStore):
         self._reactor = hs.get_reactor()
         self._instance_name = hs.get_instance_id()
 
-        # A map from `(lock_name, lock_key)` to the token of any locks that we
-        # think we currently hold.
-        self._live_tokens: WeakValueDictionary[
+        # A map from `(lock_name, lock_key)` to lock that we think we
+        # currently hold.
+        self._live_lock_tokens: WeakValueDictionary[
             Tuple[str, str], Lock
         ] = WeakValueDictionary()
 
+        # A map from `(lock_name, lock_key, token)` to read/write lock that we
+        # think we currently hold. For a given lock_name/lock_key, there can be
+        # multiple read locks at a time but only one write lock (no mixing read
+        # and write locks at the same time).
+        self._live_read_write_lock_tokens: WeakValueDictionary[
+            Tuple[str, str, str], Lock
+        ] = WeakValueDictionary()
+
         # When we shut down we want to remove the locks. Technically this can
         # lead to a race, as we may drop the lock while we are still processing.
         # However, a) it should be a small window, b) the lock is best effort
@@ -91,11 +100,13 @@ class LockStore(SQLBaseStore):
         """Called when the server is shutting down"""
         logger.info("Dropping held locks due to shutdown")
 
-        # We need to take a copy of the tokens dict as dropping the locks will
-        # cause the dictionary to change.
-        locks = dict(self._live_tokens)
+        # We need to take a copy of the locks as dropping the locks will cause
+        # the dictionary to change.
+        locks = list(self._live_lock_tokens.values()) + list(
+            self._live_read_write_lock_tokens.values()
+        )
 
-        for lock in locks.values():
+        for lock in locks:
             await lock.release()
 
         logger.info("Dropped locks due to shutdown")
@@ -122,7 +133,7 @@ class LockStore(SQLBaseStore):
         """
 
         # Check if this process has taken out a lock and if it's still valid.
-        lock = self._live_tokens.get((lock_name, lock_key))
+        lock = self._live_lock_tokens.get((lock_name, lock_key))
         if lock and await lock.is_still_valid():
             return None
 
@@ -176,61 +187,111 @@ class LockStore(SQLBaseStore):
             self._reactor,
             self._clock,
             self,
+            read_write=False,
             lock_name=lock_name,
             lock_key=lock_key,
             token=token,
         )
 
-        self._live_tokens[(lock_name, lock_key)] = lock
+        self._live_lock_tokens[(lock_name, lock_key)] = lock
 
         return lock
 
-    async def _is_lock_still_valid(
-        self, lock_name: str, lock_key: str, token: str
-    ) -> bool:
-        """Checks whether this instance still holds the lock."""
-        last_renewed_ts = await self.db_pool.simple_select_one_onecol(
-            table="worker_locks",
-            keyvalues={
-                "lock_name": lock_name,
-                "lock_key": lock_key,
-                "token": token,
-            },
-            retcol="last_renewed_ts",
-            allow_none=True,
-            desc="is_lock_still_valid",
-        )
-        return (
-            last_renewed_ts is not None
-            and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts
-        )
+    async def try_acquire_read_write_lock(
+        self,
+        lock_name: str,
+        lock_key: str,
+        write: bool,
+    ) -> Optional["Lock"]:
+        """Try to acquire a lock for the given name/key. Will return an async
+        context manager if the lock is successfully acquired, which *must* be
+        used (otherwise the lock will leak).
+        """
 
-    async def _renew_lock(self, lock_name: str, lock_key: str, token: str) -> None:
-        """Attempt to renew the lock if we still hold it."""
-        await self.db_pool.simple_update(
-            table="worker_locks",
-            keyvalues={
-                "lock_name": lock_name,
-                "lock_key": lock_key,
-                "token": token,
-            },
-            updatevalues={"last_renewed_ts": self._clock.time_msec()},
-            desc="renew_lock",
-        )
+        now = self._clock.time_msec()
+        token = random_string(6)
 
-    async def _drop_lock(self, lock_name: str, lock_key: str, token: str) -> None:
-        """Attempt to drop the lock, if we still hold it"""
-        await self.db_pool.simple_delete(
-            table="worker_locks",
-            keyvalues={
-                "lock_name": lock_name,
-                "lock_key": lock_key,
-                "token": token,
-            },
-            desc="drop_lock",
+        def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None:
+            # We attempt to acquire the lock by inserting into
+            # `worker_read_write_locks` and seeing if that fails any
+            # constraints. If it doesn't then we have acquired the lock,
+            # otherwise we haven't.
+            #
+            # Before that though we clear the table of any stale locks.
+
+            delete_sql = """
+                DELETE FROM worker_read_write_locks
+                    WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?;
+            """
+
+            insert_sql = """
+                INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """
+
+            if isinstance(self.database_engine, PostgresEngine):
+                # For Postgres we can send these queries at the same time.
+                txn.execute(
+                    delete_sql + ";" + insert_sql,
+                    (
+                        # DELETE args
+                        now - _LOCK_TIMEOUT_MS,
+                        lock_name,
+                        lock_key,
+                        # UPSERT args
+                        lock_name,
+                        lock_key,
+                        write,
+                        self._instance_name,
+                        token,
+                        now,
+                    ),
+                )
+            else:
+                # For SQLite these need to be two queries.
+                txn.execute(
+                    delete_sql,
+                    (
+                        now - _LOCK_TIMEOUT_MS,
+                        lock_name,
+                        lock_key,
+                    ),
+                )
+                txn.execute(
+                    insert_sql,
+                    (
+                        lock_name,
+                        lock_key,
+                        write,
+                        self._instance_name,
+                        token,
+                        now,
+                    ),
+                )
+
+            return
+
+        try:
+            await self.db_pool.runInteraction(
+                "try_acquire_read_write_lock",
+                _try_acquire_read_write_lock_txn,
+            )
+        except self.database_engine.module.IntegrityError:
+            return None
+
+        lock = Lock(
+            self._reactor,
+            self._clock,
+            self,
+            read_write=True,
+            lock_name=lock_name,
+            lock_key=lock_key,
+            token=token,
         )
 
-        self._live_tokens.pop((lock_name, lock_key), None)
+        self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock
+
+        return lock
 
 
 class Lock:
@@ -259,6 +320,7 @@ class Lock:
         reactor: IReactorCore,
         clock: Clock,
         store: LockStore,
+        read_write: bool,
         lock_name: str,
         lock_key: str,
         token: str,
@@ -266,13 +328,23 @@ class Lock:
         self._reactor = reactor
         self._clock = clock
         self._store = store
+        self._read_write = read_write
         self._lock_name = lock_name
         self._lock_key = lock_key
 
         self._token = token
 
+        self._table = "worker_read_write_locks" if read_write else "worker_locks"
+
         self._looping_call = clock.looping_call(
-            self._renew, _RENEWAL_INTERVAL_MS, store, lock_name, lock_key, token
+            self._renew,
+            _RENEWAL_INTERVAL_MS,
+            store,
+            clock,
+            read_write,
+            lock_name,
+            lock_key,
+            token,
         )
 
         self._dropped = False
@@ -281,6 +353,8 @@ class Lock:
     @wrap_as_background_process("Lock._renew")
     async def _renew(
         store: LockStore,
+        clock: Clock,
+        read_write: bool,
         lock_name: str,
         lock_key: str,
         token: str,
@@ -291,12 +365,34 @@ class Lock:
         don't end up with a reference to `self` in the reactor, which would stop
         this from being cleaned up if we dropped the context manager.
         """
-        await store._renew_lock(lock_name, lock_key, token)
+        table = "worker_read_write_locks" if read_write else "worker_locks"
+        await store.db_pool.simple_update(
+            table=table,
+            keyvalues={
+                "lock_name": lock_name,
+                "lock_key": lock_key,
+                "token": token,
+            },
+            updatevalues={"last_renewed_ts": clock.time_msec()},
+            desc="renew_lock",
+        )
 
     async def is_still_valid(self) -> bool:
         """Check if the lock is still held by us"""
-        return await self._store._is_lock_still_valid(
-            self._lock_name, self._lock_key, self._token
+        last_renewed_ts = await self._store.db_pool.simple_select_one_onecol(
+            table=self._table,
+            keyvalues={
+                "lock_name": self._lock_name,
+                "lock_key": self._lock_key,
+                "token": self._token,
+            },
+            retcol="last_renewed_ts",
+            allow_none=True,
+            desc="is_lock_still_valid",
+        )
+        return (
+            last_renewed_ts is not None
+            and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts
         )
 
     async def __aenter__(self) -> None:
@@ -325,7 +421,23 @@ class Lock:
         if self._looping_call.running:
             self._looping_call.stop()
 
-        await self._store._drop_lock(self._lock_name, self._lock_key, self._token)
+        await self._store.db_pool.simple_delete(
+            table=self._table,
+            keyvalues={
+                "lock_name": self._lock_name,
+                "lock_key": self._lock_key,
+                "token": self._token,
+            },
+            desc="drop_lock",
+        )
+
+        if self._read_write:
+            self._store._live_read_write_lock_tokens.pop(
+                (self._lock_name, self._lock_key, self._token), None
+            )
+        else:
+            self._store._live_lock_tokens.pop((self._lock_name, self._lock_key), None)
+
         self._dropped = True
 
     def __del__(self) -> None: