diff --git a/changelog.d/18363.bugfix b/changelog.d/18363.bugfix
new file mode 100644
index 0000000000..bfa336d52f
--- /dev/null
+++ b/changelog.d/18363.bugfix
@@ -0,0 +1 @@
+Fix longstanding bug where Synapse would immediately retry a failing push endpoint when a new event is received, ignoring any backoff timers.
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 69790ecab5..7df8a128c9 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -205,6 +205,12 @@ class HttpPusher(Pusher):
if self._is_processing:
return
+ # Check if we are trying, but failing, to contact the pusher. If so, we
+ # don't try and start processing immediately and instead wait for the
+ # retry loop to try again later (which is controlled by the timer).
+ if self.failing_since and self.timed_call and self.timed_call.active():
+ return
+
run_as_background_process("httppush.process", self._process)
async def _process(self) -> None:
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index 5c235bbe53..b42fd284b6 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -1167,3 +1167,81 @@ class HTTPPusherTests(HomeserverTestCase):
self.assertEqual(
self.push_attempts[0][2]["notification"]["counts"]["unread"], 1
)
+
+ def test_push_backoff(self) -> None:
+ """
+ The HTTP pusher will backoff correctly if it fails to contact the pusher.
+ """
+
+ # Register the user who gets notified
+ user_id = self.register_user("user", "pass")
+ access_token = self.login("user", "pass")
+
+ # Register the user who sends the message
+ other_user_id = self.register_user("otheruser", "pass")
+ other_access_token = self.login("otheruser", "pass")
+
+ # Register the pusher
+ user_tuple = self.get_success(
+ self.hs.get_datastores().main.get_user_by_access_token(access_token)
+ )
+ assert user_tuple is not None
+ device_id = user_tuple.device_id
+
+ self.get_success(
+ self.hs.get_pusherpool().add_or_update_pusher(
+ user_id=user_id,
+ device_id=device_id,
+ kind="http",
+ app_id="m.http",
+ app_display_name="HTTP Push Notifications",
+ device_display_name="pushy push",
+ pushkey="a@example.com",
+ lang=None,
+ data={"url": "http://example.com/_matrix/push/v1/notify"},
+ )
+ )
+
+ # Create a room with the other user
+ room = self.helper.create_room_as(user_id, tok=access_token)
+ self.helper.join(room=room, user=other_user_id, tok=other_access_token)
+
+ # The other user sends some messages
+ self.helper.send(room, body="Message 1", tok=other_access_token)
+
+ # One push was attempted to be sent
+ self.assertEqual(len(self.push_attempts), 1)
+ self.assertEqual(
+ self.push_attempts[0][1], "http://example.com/_matrix/push/v1/notify"
+ )
+ self.assertEqual(
+ self.push_attempts[0][2]["notification"]["content"]["body"], "Message 1"
+ )
+ self.push_attempts[0][0].callback({})
+ self.pump()
+
+ # Send another message, this time it fails
+ self.helper.send(room, body="Message 2", tok=other_access_token)
+ self.assertEqual(len(self.push_attempts), 2)
+ self.push_attempts[1][0].errback(Exception("couldn't connect"))
+ self.pump()
+
+ # Sending yet another message doesn't trigger a push immediately
+ self.helper.send(room, body="Message 3", tok=other_access_token)
+ self.pump()
+ self.assertEqual(len(self.push_attempts), 2)
+
+ # .. but waiting for a bit will cause more pushes
+ self.reactor.advance(10)
+ self.assertEqual(len(self.push_attempts), 3)
+ self.assertEqual(
+ self.push_attempts[2][2]["notification"]["content"]["body"], "Message 2"
+ )
+ self.push_attempts[2][0].callback({})
+ self.pump()
+
+ self.assertEqual(len(self.push_attempts), 4)
+ self.assertEqual(
+ self.push_attempts[3][2]["notification"]["content"]["body"], "Message 3"
+ )
+ self.push_attempts[3][0].callback({})
|