summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatthew Hodgson <matthew@matrix.org>2019-01-15 21:38:07 +0000
committerMatthew Hodgson <matthew@matrix.org>2019-01-15 21:38:07 +0000
commit482d06774ac456943fb7e519a78431c82da305ca (patch)
tree523b61a1a02c7ef02104fd9f2c850c7cc23a5781
parentlimit remote device lists to 1000 entries per user (diff)
downloadsynapse-482d06774ac456943fb7e519a78431c82da305ca.tar.xz
don't store remote device lists if they have more than 10K devices
-rw-r--r--synapse/handlers/device.py25
1 files changed, 13 insertions, 12 deletions
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 6f80a7dce9..5bca62418e 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -533,18 +533,19 @@ class DeviceListEduUpdater(object):
                 stream_id = result["stream_id"]
                 devices = result["devices"]
 
-                # Emergency hack to prevent DoS from
-                # @bot:oliviervandertoorn.nl and @bot:matrix-beta.igalia.com
-                # on Jan 15 2019: only store the most recent 1000 devices for
-                # a given user.  (We assume we receive them in chronological
-                # order, which is dubious given _get_e2e_device_keys_txn does
-                # not explicitly order its results).  Otherwise it can take
-                # longer than 60s to persist the >100K devices, at which point
-                # the internal replication request to handle the
-                # m.device_list_update EDU times out, causing the remote
-                # server to retry the transaction and thus DoS synapse master
-                # CPU and DB.
-                devices = devices[-1000:]
+                # If the remote server has more than ~10000 devices for this user
+                # we assume that something is going horribly wrong (e.g. a bot
+                # that logs in and creates a new device every time it tries to
+                # send a message).  Maintaining lots of devices per user in the
+                # cache can cause serious performance issues as if this request
+                # takes more than 60s to complete, internal replication from the
+                # inbound federation worker to the synapse master may time out
+                # causing the inbound federation to fail and causing the remote
+                # server to retry, causing a DoS.  So in this scenario we give
+                # up on storing the total list of devices and only handle the
+                # delta instead.
+                if len(devices) > 10000:
+                    devices = []
 
                 yield self.store.update_remote_device_list_cache(
                     user_id, devices, stream_id,