diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index c840da834c..3d676e7d8b 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -24,6 +24,7 @@ from synapse.http.servlet import (
)
from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.versionstring import get_version_string
+from synapse.util.logcontext import preserve_fn
from synapse.types import ThirdPartyInstanceID
import functools
@@ -79,6 +80,7 @@ class Authenticator(object):
def __init__(self, hs):
self.keyring = hs.get_keyring()
self.server_name = hs.hostname
+ self.store = hs.get_datastore()
# A method just so we can pass 'self' as the authenticator to the Servlets
@defer.inlineCallbacks
@@ -138,6 +140,13 @@ class Authenticator(object):
logger.info("Request from %s", origin)
request.authenticated_entity = origin
+ # If we get a valid signed request from the other side, its probably
+ # alive
+ retry_timings = yield self.store.get_destination_retry_timings(origin)
+ if retry_timings and retry_timings["retry_last_ts"]:
+ logger.info("Marking origin %r as up", origin)
+ preserve_fn(self.store.set_destination_retry_timings)(origin, 0, 0)
+
defer.returnValue(origin)
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c22f65ce5d..982cda3edf 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -17,6 +17,7 @@ from synapse.api.constants import EventTypes
from synapse.util import stringutils
from synapse.util.async import Linearizer
from synapse.util.caches.expiringcache import ExpiringCache
+from synapse.util.retryutils import NotRetryingDestination
from synapse.util.metrics import measure_func
from synapse.types import get_domain_from_id, RoomStreamToken
from twisted.internet import defer
@@ -425,12 +426,38 @@ class DeviceListEduUpdater(object):
# This can happen since we batch updates
return
+ # Given a list of updates we check if we need to resync. This
+ # happens if we've missed updates.
resync = yield self._need_to_do_resync(user_id, pending_updates)
if resync:
# Fetch all devices for the user.
origin = get_domain_from_id(user_id)
- result = yield self.federation.query_user_devices(origin, user_id)
+ try:
+ result = yield self.federation.query_user_devices(origin, user_id)
+ except NotRetryingDestination:
+ # TODO: Remember that we are now out of sync and try again
+ # later
+ logger.warn(
+ "Failed to handle device list update for %s,"
+ " we're not retrying the remote",
+ user_id,
+ )
+ # We abort on exceptions rather than accepting the update
+ # as otherwise synapse will 'forget' that its device list
+ # is out of date. If we bail then we will retry the resync
+ # next time we get a device list update for this user_id.
+ # This makes it more likely that the device lists will
+ # eventually become consistent.
+ return
+ except Exception:
+ # TODO: Remember that we are now out of sync and try again
+ # later
+ logger.exception(
+ "Failed to handle device list update for %s", user_id
+ )
+ return
+
stream_id = result["stream_id"]
devices = result["devices"]
yield self.store.update_remote_device_list_cache(
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 2af9849ed0..52d97dfbf3 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -380,13 +380,6 @@ class FederationHandler(BaseHandler):
affected=event.event_id,
)
- # if we're receiving valid events from an origin,
- # it's probably a good idea to mark it as not in retry-state
- # for sending (although this is a bit of a leap)
- retry_timings = yield self.store.get_destination_retry_timings(origin)
- if retry_timings and retry_timings["retry_last_ts"]:
- self.store.set_destination_retry_timings(origin, 0, 0)
-
room = yield self.store.get_room(event.room_id)
if not room:
|