summary refs log tree commit diff
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2022-09-29 10:00:02 +0000
committerGitHub <noreply@github.com>2022-09-29 10:00:02 +0000
commit73ecff7e9ed456c64368296858d17d4b393c9f9a (patch)
tree0d652dfb98d5d1531481fcc04e25091bbbcf3219
parentHandle local device list updates during partial join (#13934) (diff)
downloadsynapse-73ecff7e9ed456c64368296858d17d4b393c9f9a.tar.xz
Improve backfill robustness by trying more servers. (#13890)
Co-authored-by: Eric Eastwood <erice@element.io>
-rw-r--r--changelog.d/13890.misc1
-rw-r--r--synapse/handlers/federation.py33
2 files changed, 32 insertions, 2 deletions
diff --git a/changelog.d/13890.misc b/changelog.d/13890.misc
new file mode 100644
index 0000000000..bf76cf7be7
--- /dev/null
+++ b/changelog.d/13890.misc
@@ -0,0 +1 @@
+Improve backfill robustness by trying more servers when we get a `4xx` error back.
\ No newline at end of file
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 500c1c16d0..b866258298 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -417,6 +417,15 @@ class FederationHandler:
 
         async def try_backfill(domains: Collection[str]) -> bool:
             # TODO: Should we try multiple of these at a time?
+
+            # Number of contacted remote homeservers that have denied our backfill
+            # request with a 4xx code.
+            denied_count = 0
+
+            # Maximum number of contacted remote homeservers that can deny our
+            # backfill request with 4xx codes before we give up.
+            max_denied_count = 5
+
             for dom in domains:
                 # We don't want to ask our own server for information we don't have
                 if dom == self.server_name:
@@ -435,13 +444,33 @@ class FederationHandler:
                     continue
                 except HttpResponseException as e:
                     if 400 <= e.code < 500:
-                        raise e.to_synapse_error()
+                        logger.warning(
+                            "Backfill denied from %s because %s [%d/%d]",
+                            dom,
+                            e,
+                            denied_count,
+                            max_denied_count,
+                        )
+                        denied_count += 1
+                        if denied_count >= max_denied_count:
+                            return False
+                        continue
 
                     logger.info("Failed to backfill from %s because %s", dom, e)
                     continue
                 except CodeMessageException as e:
                     if 400 <= e.code < 500:
-                        raise
+                        logger.warning(
+                            "Backfill denied from %s because %s [%d/%d]",
+                            dom,
+                            e,
+                            denied_count,
+                            max_denied_count,
+                        )
+                        denied_count += 1
+                        if denied_count >= max_denied_count:
+                            return False
+                        continue
 
                     logger.info("Failed to backfill from %s because %s", dom, e)
                     continue