summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2024-07-30 09:51:24 +0100
committerGitHub <noreply@github.com>2024-07-30 09:51:24 +0100
commit808dab0699c2e5f52e9153650323ed93b0bc4c25 (patch)
tree9613e54a03963fd99e12f7911cce58dfda10eca5
parentOnly send rooms with updates down sliding sync (#17479) (diff)
downloadsynapse-808dab0699c2e5f52e9153650323ed93b0bc4c25.tar.xz
Fix `failures` property in `/keys/query` (#17499)
Fixes: https://github.com/element-hq/synapse/issues/17498
Fixes: https://github.com/element-hq/element-web/issues/27867
-rw-r--r--changelog.d/17499.bugfix1
-rw-r--r--synapse/handlers/e2e_keys.py26
-rw-r--r--tests/handlers/test_e2e_keys.py59
3 files changed, 75 insertions, 11 deletions
diff --git a/changelog.d/17499.bugfix b/changelog.d/17499.bugfix
new file mode 100644
index 0000000000..5cb7b3c30e
--- /dev/null
+++ b/changelog.d/17499.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.110.0 which caused `/keys/query` to return incomplete results, leading to high network activity and CPU usage on Matrix clients.
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 668cec513b..f78e66ad0a 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -291,13 +291,20 @@ class E2eKeysHandler:
 
             # Only try and fetch keys for destinations that are not marked as
             # down.
-            filtered_destinations = await filter_destinations_by_retry_limiter(
-                remote_queries_not_in_cache.keys(),
-                self.clock,
-                self.store,
-                # Let's give an arbitrary grace period for those hosts that are
-                # only recently down
-                retry_due_within_ms=60 * 1000,
+            unfiltered_destinations = remote_queries_not_in_cache.keys()
+            filtered_destinations = set(
+                await filter_destinations_by_retry_limiter(
+                    unfiltered_destinations,
+                    self.clock,
+                    self.store,
+                    # Let's give an arbitrary grace period for those hosts that are
+                    # only recently down
+                    retry_due_within_ms=60 * 1000,
+                )
+            )
+            failures.update(
+                (dest, _NOT_READY_FOR_RETRY_FAILURE)
+                for dest in (unfiltered_destinations - filtered_destinations)
             )
 
             await concurrently_execute(
@@ -1641,6 +1648,9 @@ def _check_device_signature(
         raise SynapseError(400, "Invalid signature", Codes.INVALID_SIGNATURE)
 
 
+_NOT_READY_FOR_RETRY_FAILURE = {"status": 503, "message": "Not ready for retry"}
+
+
 def _exception_to_failure(e: Exception) -> JsonDict:
     if isinstance(e, SynapseError):
         return {"status": e.code, "errcode": e.errcode, "message": str(e)}
@@ -1649,7 +1659,7 @@ def _exception_to_failure(e: Exception) -> JsonDict:
         return {"status": e.code, "message": str(e)}
 
     if isinstance(e, NotRetryingDestination):
-        return {"status": 503, "message": "Not ready for retry"}
+        return _NOT_READY_FOR_RETRY_FAILURE
 
     # include ConnectionRefused and other errors
     #
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 0e6352ff4b..8a3dfdcf75 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -43,9 +43,7 @@ from tests.unittest import override_config
 class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         self.appservice_api = mock.AsyncMock()
-        return self.setup_test_homeserver(
-            federation_client=mock.Mock(), application_service_api=self.appservice_api
-        )
+        return self.setup_test_homeserver(application_service_api=self.appservice_api)
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = hs.get_e2e_keys_handler()
@@ -1224,6 +1222,61 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             },
         )
 
+    def test_query_devices_remote_down(self) -> None:
+        """Tests that querying keys for a remote user on an unreachable server returns
+        results in the "failures" property
+        """
+
+        remote_user_id = "@test:other"
+        local_user_id = "@test:test"
+
+        # The backoff code treats time zero as special
+        self.reactor.advance(5)
+
+        self.hs.get_federation_http_client().agent.request = mock.AsyncMock(  # type: ignore[method-assign]
+            side_effect=Exception("boop")
+        )
+
+        e2e_handler = self.hs.get_e2e_keys_handler()
+
+        query_result = self.get_success(
+            e2e_handler.query_devices(
+                {
+                    "device_keys": {remote_user_id: []},
+                },
+                timeout=10,
+                from_user_id=local_user_id,
+                from_device_id="some_device_id",
+            )
+        )
+
+        self.assertEqual(
+            query_result["failures"],
+            {
+                "other": {
+                    "message": "Failed to send request: Exception: boop",
+                    "status": 503,
+                }
+            },
+        )
+
+        # Do it again: we should hit the backoff
+        query_result = self.get_success(
+            e2e_handler.query_devices(
+                {
+                    "device_keys": {remote_user_id: []},
+                },
+                timeout=10,
+                from_user_id=local_user_id,
+                from_device_id="some_device_id",
+            )
+        )
+
+        self.assertEqual(
+            query_result["failures"],
+            {"other": {"message": "Not ready for retry", "status": 503}},
+        )
+
     @parameterized.expand(
         [
             # The remote homeserver's response indicates that this user has 0/1/2 devices.