diff options
author | Erik Johnston <erikj@matrix.org> | 2023-08-30 14:18:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-30 14:18:42 +0100 |
commit | a2e0d4cd6024462f0067c56f83c2fe5b67da2109 (patch) | |
tree | 72d3d05c778b11ec08b3533df51a5d33bf5122c8 | |
parent | Merge branch 'master' into develop (diff) | |
download | synapse-a2e0d4cd6024462f0067c56f83c2fe5b67da2109.tar.xz |
Fix rare bug that broke looping calls (#16210)
* Fix rare bug that broke looping calls We can't interact with the reactor from the main thread via looping call. Introduced in v1.90.0 / #15791. * Newsfile
-rw-r--r-- | changelog.d/16210.bugfix | 1 | ||||
-rw-r--r-- | synapse/storage/databases/main/lock.py | 36 | ||||
-rw-r--r-- | tests/storage/databases/main/test_lock.py | 2 |
3 files changed, 25 insertions, 14 deletions
diff --git a/changelog.d/16210.bugfix b/changelog.d/16210.bugfix new file mode 100644 index 0000000000..39c35a1fe1 --- /dev/null +++ b/changelog.d/16210.bugfix @@ -0,0 +1 @@ +Fix rare bug that broke looping calls, which could lead to e.g. linearly increasing memory usage. Introduced in v1.90.0. diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py index 54d40e7a3a..5a01ec2137 100644 --- a/synapse/storage/databases/main/lock.py +++ b/synapse/storage/databases/main/lock.py @@ -17,7 +17,7 @@ from types import TracebackType from typing import TYPE_CHECKING, Collection, Optional, Set, Tuple, Type from weakref import WeakValueDictionary -from twisted.internet.interfaces import IReactorCore +from twisted.internet.task import LoopingCall from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore @@ -26,6 +26,7 @@ from synapse.storage.database import ( LoggingDatabaseConnection, LoggingTransaction, ) +from synapse.types import ISynapseReactor from synapse.util import Clock from synapse.util.stringutils import random_string @@ -358,7 +359,7 @@ class Lock: def __init__( self, - reactor: IReactorCore, + reactor: ISynapseReactor, clock: Clock, store: LockStore, read_write: bool, @@ -377,19 +378,25 @@ class Lock: self._table = "worker_read_write_locks" if read_write else "worker_locks" - self._looping_call = clock.looping_call( + # We might be called from a non-main thread, so we defer setting up the + # looping call. + self._looping_call: Optional[LoopingCall] = None + reactor.callFromThread(self._setup_looping_call) + + self._dropped = False + + def _setup_looping_call(self) -> None: + self._looping_call = self._clock.looping_call( self._renew, _RENEWAL_INTERVAL_MS, - store, - clock, - read_write, - lock_name, - lock_key, - token, + self._store, + self._clock, + self._read_write, + self._lock_name, + self._lock_key, + self._token, ) - self._dropped = False - @staticmethod @wrap_as_background_process("Lock._renew") async def _renew( @@ -459,7 +466,7 @@ class Lock: if self._dropped: return - if self._looping_call.running: + if self._looping_call and self._looping_call.running: self._looping_call.stop() await self._store.db_pool.simple_delete( @@ -486,8 +493,9 @@ class Lock: # We should not be dropped without the lock being released (unless # we're shutting down), but if we are then let's at least stop # renewing the lock. - if self._looping_call.running: - self._looping_call.stop() + if self._looping_call and self._looping_call.running: + # We might be called from a non-main thread. + self._reactor.callFromThread(self._looping_call.stop) if self._reactor.running: logger.error( diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py index f541f1d6be..650b4941ba 100644 --- a/tests/storage/databases/main/test_lock.py +++ b/tests/storage/databases/main/test_lock.py @@ -132,6 +132,7 @@ class LockTestCase(unittest.HomeserverTestCase): # We simulate the process getting stuck by cancelling the looping call # that keeps the lock active. + assert lock._looping_call lock._looping_call.stop() # Wait for the lock to timeout. @@ -403,6 +404,7 @@ class ReadWriteLockTestCase(unittest.HomeserverTestCase): # We simulate the process getting stuck by cancelling the looping call # that keeps the lock active. + assert lock._looping_call lock._looping_call.stop() # Wait for the lock to timeout. |