Fix `HomeServer`s leaking during `trial` test runs (#15630)
This change fixes two memory leaks during `trial` test runs.
Garbage collection is disabled during each test case and a gen-0 GC is
run at the end of each test. However, when the gen-0 GC is run, the
`TestCase` object usually still holds references to the `HomeServer`
used during the test. As a result, the `HomeServer` gets promoted to
gen-1 and then never garbage collected.
Fix this by periodically running full GCs.
Additionally, fix `HomeServer`s leaking after tests that touch inbound
federation due to `FederationRateLimiter`s adding themselves to a global
set, by turning the set into a `WeakSet`.
Resolves #15622.
Signed-off-by: Sean Quah <seanq@matrix.org>
3 files changed, 15 insertions, 3 deletions
diff --git a/changelog.d/15630.misc b/changelog.d/15630.misc
new file mode 100644
index 0000000000..a30304bfd6
--- /dev/null
+++ b/changelog.d/15630.misc
@@ -0,0 +1 @@
+Fix two memory leaks in `trial` test runs.
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index f262bf95a0..2ad55ac13e 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -25,10 +25,12 @@ from typing import (
Iterator,
List,
Mapping,
+ MutableSet,
Optional,
Set,
Tuple,
)
+from weakref import WeakSet
from prometheus_client.core import Counter
from typing_extensions import ContextManager
@@ -86,7 +88,9 @@ queue_wait_timer = Histogram(
)
-_rate_limiter_instances: Set["FederationRateLimiter"] = set()
+# This must be a `WeakSet`, otherwise we indirectly hold on to entire `HomeServer`s
+# during trial test runs and leak a lot of memory.
+_rate_limiter_instances: MutableSet["FederationRateLimiter"] = WeakSet()
# Protects the _rate_limiter_instances set from concurrent access
_rate_limiter_instances_lock = threading.Lock()
diff --git a/tests/unittest.py b/tests/unittest.py
index b6fdf69635..623c5a75a2 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -229,13 +229,20 @@ class TestCase(unittest.TestCase):
#
# The easiest way to do this would be to do a full GC after each test
# run, but that is very expensive. Instead, we disable GC (above) for
- # the duration of the test so that we only need to run a gen-0 GC, which
- # is a lot quicker.
+ # the duration of the test and only run a gen-0 GC, which is a lot
+ # quicker. This doesn't clean up everything, since the TestCase
+ # instance still holds references to objects created during the test,
+ # such as HomeServers, so we do a full GC every so often.
@around(self)
def tearDown(orig: Callable[[], R]) -> R:
ret = orig()
gc.collect(0)
+ # Run a full GC every 50 gen-0 GCs.
+ gen0_stats = gc.get_stats()[0]
+ gen0_collections = gen0_stats["collections"]
+ if gen0_collections % 50 == 0:
+ gc.collect()
gc.enable()
set_current_context(SENTINEL_CONTEXT)
|