diff options
author | Amber Brown <hawkowl@atleastfornow.net> | 2018-09-19 17:57:48 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-19 17:57:48 +1000 |
commit | f773ecbd61f16823d182ff1cb308c62c5f74a104 (patch) | |
tree | 16a6024d4b40e24b47c0700bac2de8ac479689ff | |
parent | Use directory server for room joins (#3899) (diff) | |
parent | changelog (diff) | |
download | synapse-f773ecbd61f16823d182ff1cb308c62c5f74a104.tar.xz |
Merge pull request #3903 from matrix-org/rav/increase_get_missing_events_timeout
Bump timeout on get_missing_events request
-rw-r--r-- | changelog.d/3903.misc | 1 | ||||
-rw-r--r-- | synapse/handlers/federation.py | 31 |
2 files changed, 31 insertions, 1 deletions
diff --git a/changelog.d/3903.misc b/changelog.d/3903.misc new file mode 100644 index 0000000000..49b64bf333 --- /dev/null +++ b/changelog.d/3903.misc @@ -0,0 +1 @@ +Increase the timeout when filling missing events in federation requests \ No newline at end of file diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0c68e8a472..f10b46414b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -360,6 +360,35 @@ class FederationHandler(BaseHandler): # apparently. # # see https://github.com/matrix-org/synapse/pull/1744 + # + # ---- + # + # Update richvdh 2018/09/18: There are a number of problems with timing this + # request out agressively on the client side: + # + # - it plays badly with the server-side rate-limiter, which starts tarpitting you + # if you send too many requests at once, so you end up with the server carefully + # working through the backlog of your requests, which you have already timed + # out. + # + # - for this request in particular, we now (as of + # https://github.com/matrix-org/synapse/pull/3456) reject any PDUs where the + # server can't produce a plausible-looking set of prev_events - so we becone + # much more likely to reject the event. + # + # - contrary to what it says above, we do *not* fall back to fetching fresh state + # for the room if get_missing_events times out. Rather, we give up processing + # the PDU whose prevs we are missing, which then makes it much more likely that + # we'll end up back here for the *next* PDU in the list, which exacerbates the + # problem. + # + # - the agressive 10s timeout was introduced to deal with incoming federation + # requests taking 8 hours to process. It's not entirely clear why that was going + # on; certainly there were other issues causing traffic storms which are now + # resolved, and I think in any case we may be more sensible about our locking + # now. We're *certainly* more sensible about our logging. + # + # All that said: Let's try increasing the timout to 60s and see what happens. missing_events = yield self.federation_client.get_missing_events( origin, @@ -368,7 +397,7 @@ class FederationHandler(BaseHandler): latest_events=[pdu], limit=10, min_depth=min_depth, - timeout=10000, + timeout=60000, ) logger.info( |