diff --git a/changelog.d/16174.bugfix b/changelog.d/16174.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16174.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 80190838b7..a4b05b72e7 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -155,6 +155,8 @@ LAST_ACTIVE_GRANULARITY = 60 * 1000
# How long to wait until a new /events or /sync request before assuming
# the client has gone.
SYNC_ONLINE_TIMEOUT = 30 * 1000
+# Busy status waits longer, but does eventually go offline.
+BUSY_ONLINE_TIMEOUT = 60 * 60 * 1000
# How long to wait before marking the user as idle. Compared against last active
IDLE_TIMER = 5 * 60 * 1000
@@ -2066,7 +2068,15 @@ def handle_timeout(
device_state.last_sync_ts, device_state.last_active_ts
)
- if now - sync_or_active > SYNC_ONLINE_TIMEOUT:
+ # Implementations aren't meant to timeout a device with a busy
+ # state, but it needs to timeout *eventually* or else the user
+ # will be stuck in that state.
+ online_timeout = (
+ BUSY_ONLINE_TIMEOUT
+ if device_state.state == PresenceState.BUSY
+ else SYNC_ONLINE_TIMEOUT
+ )
+ if now - sync_or_active > online_timeout:
# Mark the device as going offline.
offline_devices.append(device_id)
device_changed = True
@@ -2166,6 +2176,13 @@ def handle_update(
new_state = new_state.copy_and_replace(last_federation_update_ts=now)
federation_ping = True
+ if new_state.state == PresenceState.BUSY:
+ wheel_timer.insert(
+ now=now,
+ obj=user_id,
+ then=new_state.last_user_sync_ts + BUSY_ONLINE_TIMEOUT,
+ )
+
else:
wheel_timer.insert(
now=now,
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 914415740a..638787b029 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -26,6 +26,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
from synapse.events.builder import EventBuilder
from synapse.federation.sender import FederationSender
from synapse.handlers.presence import (
+ BUSY_ONLINE_TIMEOUT,
EXTERNAL_PROCESS_EXPIRY,
FEDERATION_PING_INTERVAL,
FEDERATION_TIMEOUT,
@@ -913,6 +914,13 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
# If both devices have the same state, online should eventually idle.
# Otherwise, the state doesn't change.
(
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
PresenceState.ONLINE,
PresenceState.ONLINE,
PresenceState.ONLINE,
@@ -933,7 +941,29 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
PresenceState.OFFLINE,
PresenceState.OFFLINE,
),
- # If the second device has a "lower" state it should fallback to it.
+ # If the second device has a "lower" state it should fallback to it,
+ # except for "busy" which overrides.
+ (
+ PresenceState.BUSY,
+ PresenceState.ONLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.BUSY,
+ PresenceState.UNAVAILABLE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.BUSY,
+ PresenceState.OFFLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
(
PresenceState.ONLINE,
PresenceState.UNAVAILABLE,
@@ -957,6 +987,27 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
),
# If the second device has a "higher" state it should override.
(
+ PresenceState.ONLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.UNAVAILABLE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.OFFLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
PresenceState.UNAVAILABLE,
PresenceState.ONLINE,
PresenceState.ONLINE,
@@ -1115,6 +1166,12 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
for cases in [
# If both devices have the same state, nothing exciting should happen.
(
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
PresenceState.ONLINE,
PresenceState.ONLINE,
PresenceState.ONLINE,
@@ -1132,7 +1189,26 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
PresenceState.OFFLINE,
PresenceState.OFFLINE,
),
- # If the second device has a "lower" state it should fallback to it.
+ # If the second device has a "lower" state it should fallback to it,
+ # except for "busy" which overrides.
+ (
+ PresenceState.BUSY,
+ PresenceState.ONLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.BUSY,
+ PresenceState.UNAVAILABLE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.BUSY,
+ PresenceState.OFFLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
(
PresenceState.ONLINE,
PresenceState.UNAVAILABLE,
@@ -1153,6 +1229,24 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
),
# If the second device has a "higher" state it should override.
(
+ PresenceState.ONLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.UNAVAILABLE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
+ PresenceState.OFFLINE,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ PresenceState.BUSY,
+ ),
+ (
PresenceState.UNAVAILABLE,
PresenceState.ONLINE,
PresenceState.ONLINE,
@@ -1266,7 +1360,11 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
# 8. Advance such that the second device should be discarded (the sync timeout),
# then pump so _handle_timeouts function to called.
- self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000)
+ if dev_1_state == PresenceState.BUSY or dev_2_state == PresenceState.BUSY:
+ timeout = BUSY_ONLINE_TIMEOUT
+ else:
+ timeout = SYNC_ONLINE_TIMEOUT
+ self.reactor.advance(timeout / 1000)
self.reactor.pump([5])
# 9. There are no more devices, should be offline.
|