summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2023-09-05 10:39:38 -0400
committerGitHub <noreply@github.com>2023-09-05 10:39:38 -0400
commit8b5013dcbc5db16f0f771898da493e812be6fc8a (patch)
tree3dbeb578580d48c386490734d00ee30090ebc4a5
parentTrack presence state per-device and combine to a user state. (#16066) (diff)
downloadsynapse-8b5013dcbc5db16f0f771898da493e812be6fc8a.tar.xz
Time out busy presence status & test multi-device busy (#16174)
Add a (long) timeout to when a "busy" device is considered not online.
This does *not* match MSC3026, but is a reasonable thing for an
implementation to do.

Expands tests for the (unstable) busy presence with multiple devices.
-rw-r--r--changelog.d/16174.bugfix1
-rw-r--r--synapse/handlers/presence.py19
-rw-r--r--tests/handlers/test_presence.py104
3 files changed, 120 insertions, 4 deletions
diff --git a/changelog.d/16174.bugfix b/changelog.d/16174.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16174.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 80190838b7..a4b05b72e7 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -155,6 +155,8 @@ LAST_ACTIVE_GRANULARITY = 60 * 1000
 # How long to wait until a new /events or /sync request before assuming
 # the client has gone.
 SYNC_ONLINE_TIMEOUT = 30 * 1000
+# Busy status waits longer, but does eventually go offline.
+BUSY_ONLINE_TIMEOUT = 60 * 60 * 1000
 
 # How long to wait before marking the user as idle. Compared against last active
 IDLE_TIMER = 5 * 60 * 1000
@@ -2066,7 +2068,15 @@ def handle_timeout(
                     device_state.last_sync_ts, device_state.last_active_ts
                 )
 
-                if now - sync_or_active > SYNC_ONLINE_TIMEOUT:
+                # Implementations aren't meant to timeout a device with a busy
+                # state, but it needs to timeout *eventually* or else the user
+                # will be stuck in that state.
+                online_timeout = (
+                    BUSY_ONLINE_TIMEOUT
+                    if device_state.state == PresenceState.BUSY
+                    else SYNC_ONLINE_TIMEOUT
+                )
+                if now - sync_or_active > online_timeout:
                     # Mark the device as going offline.
                     offline_devices.append(device_id)
                     device_changed = True
@@ -2166,6 +2176,13 @@ def handle_update(
                 new_state = new_state.copy_and_replace(last_federation_update_ts=now)
                 federation_ping = True
 
+        if new_state.state == PresenceState.BUSY:
+            wheel_timer.insert(
+                now=now,
+                obj=user_id,
+                then=new_state.last_user_sync_ts + BUSY_ONLINE_TIMEOUT,
+            )
+
     else:
         wheel_timer.insert(
             now=now,
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 914415740a..638787b029 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -26,6 +26,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events.builder import EventBuilder
 from synapse.federation.sender import FederationSender
 from synapse.handlers.presence import (
+    BUSY_ONLINE_TIMEOUT,
     EXTERNAL_PROCESS_EXPIRY,
     FEDERATION_PING_INTERVAL,
     FEDERATION_TIMEOUT,
@@ -913,6 +914,13 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                 # If both devices have the same state, online should eventually idle.
                 # Otherwise, the state doesn't change.
                 (
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
@@ -933,7 +941,29 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                     PresenceState.OFFLINE,
                     PresenceState.OFFLINE,
                 ),
-                # If the second device has a "lower" state it should fallback to it.
+                # If the second device has a "lower" state it should fallback to it,
+                # except for "busy" which overrides.
+                (
+                    PresenceState.BUSY,
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.ONLINE,
                     PresenceState.UNAVAILABLE,
@@ -957,6 +987,27 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                 ),
                 # If the second device has a "higher" state it should override.
                 (
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
                     PresenceState.UNAVAILABLE,
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
@@ -1115,6 +1166,12 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
             for cases in [
                 # If both devices have the same state, nothing exciting should happen.
                 (
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
@@ -1132,7 +1189,26 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                     PresenceState.OFFLINE,
                     PresenceState.OFFLINE,
                 ),
-                # If the second device has a "lower" state it should fallback to it.
+                # If the second device has a "lower" state it should fallback to it,
+                # except for "busy" which overrides.
+                (
+                    PresenceState.BUSY,
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.ONLINE,
                     PresenceState.UNAVAILABLE,
@@ -1153,6 +1229,24 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                 ),
                 # If the second device has a "higher" state it should override.
                 (
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
                     PresenceState.UNAVAILABLE,
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
@@ -1266,7 +1360,11 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
 
         # 8. Advance such that the second device should be discarded (the sync timeout),
         # then pump so _handle_timeouts function to called.
-        self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000)
+        if dev_1_state == PresenceState.BUSY or dev_2_state == PresenceState.BUSY:
+            timeout = BUSY_ONLINE_TIMEOUT
+        else:
+            timeout = SYNC_ONLINE_TIMEOUT
+        self.reactor.advance(timeout / 1000)
         self.reactor.pump([5])
 
         # 9. There are no more devices, should be offline.