summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/4397.bugfix1
-rw-r--r--synapse/handlers/device.py19
2 files changed, 20 insertions, 0 deletions
diff --git a/changelog.d/4397.bugfix b/changelog.d/4397.bugfix
new file mode 100644
index 0000000000..e7526d4454
--- /dev/null
+++ b/changelog.d/4397.bugfix
@@ -0,0 +1 @@
+Fix high CPU usage due to remote devicelist updates
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9e017116a9..8955cde4ed 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -532,6 +532,25 @@ class DeviceListEduUpdater(object):
 
                 stream_id = result["stream_id"]
                 devices = result["devices"]
+
+                # If the remote server has more than ~1000 devices for this user
+                # we assume that something is going horribly wrong (e.g. a bot
+                # that logs in and creates a new device every time it tries to
+                # send a message).  Maintaining lots of devices per user in the
+                # cache can cause serious performance issues as if this request
+                # takes more than 60s to complete, internal replication from the
+                # inbound federation worker to the synapse master may time out
+                # causing the inbound federation to fail and causing the remote
+                # server to retry, causing a DoS.  So in this scenario we give
+                # up on storing the total list of devices and only handle the
+                # delta instead.
+                if len(devices) > 1000:
+                    logger.warn(
+                        "Ignoring device list snapshot for %s as it has >1K devs (%d)",
+                        user_id, len(devices)
+                    )
+                    devices = []
+
                 yield self.store.update_remote_device_list_cache(
                     user_id, devices, stream_id,
                 )