summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/federation_base.py166
-rw-r--r--synapse/federation/federation_client.py768
-rw-r--r--synapse/federation/federation_server.py562
-rw-r--r--synapse/federation/persistence.py4
-rw-r--r--synapse/federation/send_queue.py141
-rw-r--r--synapse/federation/sender/__init__.py137
-rw-r--r--synapse/federation/sender/per_destination_queue.py107
-rw-r--r--synapse/federation/sender/transaction_manager.py26
-rw-r--r--synapse/federation/transport/__init__.py4
-rw-r--r--synapse/federation/transport/client.py184
-rw-r--r--synapse/federation/transport/server.py121
-rw-r--r--synapse/federation/units.py23
12 files changed, 1157 insertions, 1086 deletions
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 5a1e23a145..c0012c6872 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,27 +15,28 @@
 # limitations under the License.
 import logging
 from collections import namedtuple
+from typing import Iterable, List
 
 import six
 
 from twisted.internet import defer
-from twisted.internet.defer import DeferredList
+from twisted.internet.defer import Deferred, DeferredList
+from twisted.python.failure import Failure
 
 from synapse.api.constants import MAX_DEPTH, EventTypes, Membership
 from synapse.api.errors import Codes, SynapseError
-from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, EventFormatVersions
+from synapse.api.room_versions import EventFormatVersions, RoomVersion
 from synapse.crypto.event_signing import check_event_content_hash
-from synapse.events import event_type_from_format_version
-from synapse.events.utils import prune_event
+from synapse.crypto.keyring import Keyring
+from synapse.events import EventBase, make_event_from_dict
+from synapse.events.utils import prune_event, validate_canonicaljson
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import (
-    LoggingContext,
     PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
-    preserve_fn,
 )
-from synapse.types import get_domain_from_id
-from synapse.util import unwrapFirstError
+from synapse.types import JsonDict, get_domain_from_id
 
 logger = logging.getLogger(__name__)
 
@@ -49,92 +51,25 @@ class FederationBase(object):
         self.store = hs.get_datastore()
         self._clock = hs.get_clock()
 
-    @defer.inlineCallbacks
-    def _check_sigs_and_hash_and_fetch(
-        self, origin, pdus, room_version, outlier=False, include_none=False
-    ):
-        """Takes a list of PDUs and checks the signatures and hashs of each
-        one. If a PDU fails its signature check then we check if we have it in
-        the database and if not then request if from the originating server of
-        that PDU.
-
-        If a PDU fails its content hash check then it is redacted.
-
-        The given list of PDUs are not modified, instead the function returns
-        a new list.
-
-        Args:
-            origin (str)
-            pdu (list)
-            room_version (str)
-            outlier (bool): Whether the events are outliers or not
-            include_none (str): Whether to include None in the returned list
-                for events that have failed their checks
-
-        Returns:
-            Deferred : A list of PDUs that have valid signatures and hashes.
-        """
-        deferreds = self._check_sigs_and_hashes(room_version, pdus)
-
-        @defer.inlineCallbacks
-        def handle_check_result(pdu, deferred):
-            try:
-                res = yield make_deferred_yieldable(deferred)
-            except SynapseError:
-                res = None
-
-            if not res:
-                # Check local db.
-                res = yield self.store.get_event(
-                    pdu.event_id, allow_rejected=True, allow_none=True
-                )
-
-            if not res and pdu.origin != origin:
-                try:
-                    res = yield self.get_pdu(
-                        destinations=[pdu.origin],
-                        event_id=pdu.event_id,
-                        room_version=room_version,
-                        outlier=outlier,
-                        timeout=10000,
-                    )
-                except SynapseError:
-                    pass
-
-            if not res:
-                logger.warn(
-                    "Failed to find copy of %s with valid signature", pdu.event_id
-                )
-
-            return res
-
-        handle = preserve_fn(handle_check_result)
-        deferreds2 = [handle(pdu, deferred) for pdu, deferred in zip(pdus, deferreds)]
-
-        valid_pdus = yield make_deferred_yieldable(
-            defer.gatherResults(deferreds2, consumeErrors=True)
-        ).addErrback(unwrapFirstError)
-
-        if include_none:
-            return valid_pdus
-        else:
-            return [p for p in valid_pdus if p]
-
-    def _check_sigs_and_hash(self, room_version, pdu):
+    def _check_sigs_and_hash(
+        self, room_version: RoomVersion, pdu: EventBase
+    ) -> Deferred:
         return make_deferred_yieldable(
             self._check_sigs_and_hashes(room_version, [pdu])[0]
         )
 
-    def _check_sigs_and_hashes(self, room_version, pdus):
+    def _check_sigs_and_hashes(
+        self, room_version: RoomVersion, pdus: List[EventBase]
+    ) -> List[Deferred]:
         """Checks that each of the received events is correctly signed by the
         sending server.
 
         Args:
-            room_version (str): The room version of the PDUs
-            pdus (list[FrozenEvent]): the events to be checked
+            room_version: The room version of the PDUs
+            pdus: the events to be checked
 
         Returns:
-            list[Deferred]: for each input event, a deferred which:
+            For each input event, a deferred which:
               * returns the original event if the checks pass
               * returns a redacted version of the event (if the signature
                 matched but the hash did not)
@@ -143,9 +78,9 @@ class FederationBase(object):
         """
         deferreds = _check_sigs_on_pdus(self.keyring, room_version, pdus)
 
-        ctx = LoggingContext.current_context()
+        ctx = current_context()
 
-        def callback(_, pdu):
+        def callback(_, pdu: EventBase):
             with PreserveLoggingContext(ctx):
                 if not check_event_content_hash(pdu):
                     # let's try to distinguish between failures because the event was
@@ -173,7 +108,7 @@ class FederationBase(object):
                     return redacted_event
 
                 if self.spam_checker.check_event_for_spam(pdu):
-                    logger.warn(
+                    logger.warning(
                         "Event contains spam, redacting %s: %s",
                         pdu.event_id,
                         pdu.get_pdu_json(),
@@ -182,10 +117,10 @@ class FederationBase(object):
 
                 return pdu
 
-        def errback(failure, pdu):
+        def errback(failure: Failure, pdu: EventBase):
             failure.trap(SynapseError)
             with PreserveLoggingContext(ctx):
-                logger.warn(
+                logger.warning(
                     "Signature check failed for %s: %s",
                     pdu.event_id,
                     failure.getErrorMessage(),
@@ -208,16 +143,18 @@ class PduToCheckSig(
     pass
 
 
-def _check_sigs_on_pdus(keyring, room_version, pdus):
+def _check_sigs_on_pdus(
+    keyring: Keyring, room_version: RoomVersion, pdus: Iterable[EventBase]
+) -> List[Deferred]:
     """Check that the given events are correctly signed
 
     Args:
-        keyring (synapse.crypto.Keyring): keyring object to do the checks
-        room_version (str): the room version of the PDUs
-        pdus (Collection[EventBase]): the events to be checked
+        keyring: keyring object to do the checks
+        room_version: the room version of the PDUs
+        pdus: the events to be checked
 
     Returns:
-        List[Deferred]: a Deferred for each event in pdus, which will either succeed if
+        A Deferred for each event in pdus, which will either succeed if
            the signatures are valid, or fail (with a SynapseError) if not.
     """
 
@@ -252,10 +189,6 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
         for p in pdus
     ]
 
-    v = KNOWN_ROOM_VERSIONS.get(room_version)
-    if not v:
-        raise RuntimeError("Unrecognized room version %s" % (room_version,))
-
     # First we check that the sender event is signed by the sender's domain
     # (except if its a 3pid invite, in which case it may be sent by any server)
     pdus_to_check_sender = [p for p in pdus_to_check if not _is_invite_via_3pid(p.pdu)]
@@ -265,7 +198,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
             (
                 p.sender_domain,
                 p.redacted_pdu_json,
-                p.pdu.origin_server_ts if v.enforce_key_validity else 0,
+                p.pdu.origin_server_ts if room_version.enforce_key_validity else 0,
                 p.pdu.event_id,
             )
             for p in pdus_to_check_sender
@@ -278,9 +211,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
             pdu_to_check.sender_domain,
             e.getErrorMessage(),
         )
-        # XX not really sure if these are the right codes, but they are what
-        # we've done for ages
-        raise SynapseError(400, errmsg, Codes.UNAUTHORIZED)
+        raise SynapseError(403, errmsg, Codes.FORBIDDEN)
 
     for p, d in zip(pdus_to_check_sender, more_deferreds):
         d.addErrback(sender_err, p)
@@ -290,7 +221,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
     # event id's domain (normally only the case for joins/leaves), and add additional
     # checks. Only do this if the room version has a concept of event ID domain
     # (ie, the room version uses old-style non-hash event IDs).
-    if v.event_format == EventFormatVersions.V1:
+    if room_version.event_format == EventFormatVersions.V1:
         pdus_to_check_event_id = [
             p
             for p in pdus_to_check
@@ -302,7 +233,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
                 (
                     get_domain_from_id(p.pdu.event_id),
                     p.redacted_pdu_json,
-                    p.pdu.origin_server_ts if v.enforce_key_validity else 0,
+                    p.pdu.origin_server_ts if room_version.enforce_key_validity else 0,
                     p.pdu.event_id,
                 )
                 for p in pdus_to_check_event_id
@@ -314,8 +245,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
                 "event id %s: unable to verify signature for event id domain: %s"
                 % (pdu_to_check.pdu.event_id, e.getErrorMessage())
             )
-            # XX as above: not really sure if these are the right codes
-            raise SynapseError(400, errmsg, Codes.UNAUTHORIZED)
+            raise SynapseError(403, errmsg, Codes.FORBIDDEN)
 
         for p, d in zip(pdus_to_check_event_id, more_deferreds):
             d.addErrback(event_err, p)
@@ -325,7 +255,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus):
     return [_flatten_deferred_list(p.deferreds) for p in pdus_to_check]
 
 
-def _flatten_deferred_list(deferreds):
+def _flatten_deferred_list(deferreds: List[Deferred]) -> Deferred:
     """Given a list of deferreds, either return the single deferred,
     combine into a DeferredList, or return an already resolved deferred.
     """
@@ -337,7 +267,7 @@ def _flatten_deferred_list(deferreds):
         return defer.succeed(None)
 
 
-def _is_invite_via_3pid(event):
+def _is_invite_via_3pid(event: EventBase) -> bool:
     return (
         event.type == EventTypes.Member
         and event.membership == Membership.INVITE
@@ -345,16 +275,15 @@ def _is_invite_via_3pid(event):
     )
 
 
-def event_from_pdu_json(pdu_json, event_format_version, outlier=False):
-    """Construct a FrozenEvent from an event json received over federation
+def event_from_pdu_json(
+    pdu_json: JsonDict, room_version: RoomVersion, outlier: bool = False
+) -> EventBase:
+    """Construct an EventBase from an event json received over federation
 
     Args:
-        pdu_json (object): pdu as received over federation
-        event_format_version (int): The event format version
-        outlier (bool): True to mark this event as an outlier
-
-    Returns:
-        FrozenEvent
+        pdu_json: pdu as received over federation
+        room_version: The version of the room this event belongs to
+        outlier: True to mark this event as an outlier
 
     Raises:
         SynapseError: if the pdu is missing required fields or is otherwise
@@ -373,8 +302,11 @@ def event_from_pdu_json(pdu_json, event_format_version, outlier=False):
     elif depth > MAX_DEPTH:
         raise SynapseError(400, "Depth too large", Codes.BAD_JSON)
 
-    event = event_type_from_format_version(event_format_version)(pdu_json)
+    # Validate that the JSON conforms to the specification.
+    if room_version.strict_canonicaljson:
+        validate_canonicaljson(pdu_json)
 
+    event = make_event_from_dict(pdu_json, room_version)
     event.internal_metadata.outlier = outlier
 
     return event
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 6ee6216660..687cd841ac 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -17,12 +17,23 @@
 import copy
 import itertools
 import logging
-
-from six.moves import range
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+)
 
 from prometheus_client import Counter
 
 from twisted.internet import defer
+from twisted.internet.defer import Deferred
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import (
@@ -31,16 +42,19 @@ from synapse.api.errors import (
     FederationDeniedError,
     HttpResponseException,
     SynapseError,
+    UnsupportedRoomVersionError,
 )
 from synapse.api.room_versions import (
     KNOWN_ROOM_VERSIONS,
     EventFormatVersions,
+    RoomVersion,
     RoomVersions,
 )
-from synapse.events import builder, room_version_to_event_format
+from synapse.events import EventBase, builder
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
-from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.logging.context import make_deferred_yieldable, preserve_fn
 from synapse.logging.utils import log_function
+from synapse.types import JsonDict
 from synapse.util import unwrapFirstError
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.retryutils import NotRetryingDestination
@@ -52,6 +66,8 @@ sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["t
 
 PDU_RETRY_TIME_MS = 1 * 60 * 1000
 
+T = TypeVar("T")
+
 
 class InvalidResponseError(RuntimeError):
     """Helper for _try_destination_list: indicates that the server returned a response
@@ -170,56 +186,54 @@ class FederationClient(FederationBase):
         sent_queries_counter.labels("client_one_time_keys").inc()
         return self.transport_layer.claim_client_keys(destination, content, timeout)
 
-    @defer.inlineCallbacks
-    @log_function
-    def backfill(self, dest, room_id, limit, extremities):
-        """Requests some more historic PDUs for the given context from the
+    async def backfill(
+        self, dest: str, room_id: str, limit: int, extremities: Iterable[str]
+    ) -> Optional[List[EventBase]]:
+        """Requests some more historic PDUs for the given room from the
         given destination server.
 
         Args:
-            dest (str): The remote home server to ask.
+            dest (str): The remote homeserver to ask.
             room_id (str): The room_id to backfill.
-            limit (int): The maximum number of PDUs to return.
-            extremities (list): List of PDU id and origins of the first pdus
-                we have seen from the context
-
-        Returns:
-            Deferred: Results in the received PDUs.
+            limit (int): The maximum number of events to return.
+            extremities (list): our current backwards extremities, to backfill from
         """
         logger.debug("backfill extrem=%s", extremities)
 
-        # If there are no extremeties then we've (probably) reached the start.
+        # If there are no extremities then we've (probably) reached the start.
         if not extremities:
-            return
+            return None
 
-        transaction_data = yield self.transport_layer.backfill(
+        transaction_data = await self.transport_layer.backfill(
             dest, room_id, extremities, limit
         )
 
-        logger.debug("backfill transaction_data=%s", repr(transaction_data))
+        logger.debug("backfill transaction_data=%r", transaction_data)
 
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
+        room_version = await self.store.get_room_version(room_id)
 
         pdus = [
-            event_from_pdu_json(p, format_ver, outlier=False)
+            event_from_pdu_json(p, room_version, outlier=False)
             for p in transaction_data["pdus"]
         ]
 
         # FIXME: We should handle signature failures more gracefully.
-        pdus[:] = yield make_deferred_yieldable(
+        pdus[:] = await make_deferred_yieldable(
             defer.gatherResults(
-                self._check_sigs_and_hashes(room_version, pdus), consumeErrors=True
+                self._check_sigs_and_hashes(room_version, pdus), consumeErrors=True,
             ).addErrback(unwrapFirstError)
         )
 
         return pdus
 
-    @defer.inlineCallbacks
-    @log_function
-    def get_pdu(
-        self, destinations, event_id, room_version, outlier=False, timeout=None
-    ):
+    async def get_pdu(
+        self,
+        destinations: Iterable[str],
+        event_id: str,
+        room_version: RoomVersion,
+        outlier: bool = False,
+        timeout: Optional[int] = None,
+    ) -> Optional[EventBase]:
         """Requests the PDU with given origin and ID from the remote home
         servers.
 
@@ -227,18 +241,17 @@ class FederationClient(FederationBase):
         one succeeds.
 
         Args:
-            destinations (list): Which home servers to query
-            event_id (str): event to fetch
-            room_version (str): version of the room
-            outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if
+            destinations: Which homeservers to query
+            event_id: event to fetch
+            room_version: version of the room
+            outlier: Indicates whether the PDU is an `outlier`, i.e. if
                 it's from an arbitary point in the context as opposed to part
                 of the current block of PDUs. Defaults to `False`
-            timeout (int): How long to try (in ms) each destination for before
+            timeout: How long to try (in ms) each destination for before
                 moving to the next destination. None indicates no timeout.
 
         Returns:
-            Deferred: Results in the requested PDU, or None if we were unable to find
-               it.
+            The requested PDU, or None if we were unable to find it.
         """
 
         # TODO: Rate limit the number of times we try and get the same event.
@@ -249,8 +262,6 @@ class FederationClient(FederationBase):
 
         pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})
 
-        format_ver = room_version_to_event_format(room_version)
-
         signed_pdu = None
         for destination in destinations:
             now = self._clock.time_msec()
@@ -259,7 +270,7 @@ class FederationClient(FederationBase):
                 continue
 
             try:
-                transaction_data = yield self.transport_layer.get_event(
+                transaction_data = await self.transport_layer.get_event(
                     destination, event_id, timeout=timeout
                 )
 
@@ -271,15 +282,15 @@ class FederationClient(FederationBase):
                 )
 
                 pdu_list = [
-                    event_from_pdu_json(p, format_ver, outlier=outlier)
+                    event_from_pdu_json(p, room_version, outlier=outlier)
                     for p in transaction_data["pdus"]
-                ]
+                ]  # type: List[EventBase]
 
                 if pdu_list and pdu_list[0]:
                     pdu = pdu_list[0]
 
                     # Check signatures are correct.
-                    signed_pdu = yield self._check_sigs_and_hash(room_version, pdu)
+                    signed_pdu = await self._check_sigs_and_hash(room_version, pdu)
 
                     break
 
@@ -309,177 +320,117 @@ class FederationClient(FederationBase):
 
         return signed_pdu
 
-    @defer.inlineCallbacks
-    @log_function
-    def get_state_for_room(self, destination, room_id, event_id):
-        """Requests all of the room state at a given event from a remote home server.
-
-        Args:
-            destination (str): The remote homeserver to query for the state.
-            room_id (str): The id of the room we're interested in.
-            event_id (str): The id of the event we want the state at.
+    async def get_room_state_ids(
+        self, destination: str, room_id: str, event_id: str
+    ) -> Tuple[List[str], List[str]]:
+        """Calls the /state_ids endpoint to fetch the state at a particular point
+        in the room, and the auth events for the given event
 
         Returns:
-            Deferred[Tuple[List[EventBase], List[EventBase]]]:
-                A list of events in the state, and a list of events in the auth chain
-                for the given event.
+            a tuple of (state event_ids, auth event_ids)
         """
-        try:
-            # First we try and ask for just the IDs, as thats far quicker if
-            # we have most of the state and auth_chain already.
-            # However, this may 404 if the other side has an old synapse.
-            result = yield self.transport_layer.get_room_state_ids(
-                destination, room_id, event_id=event_id
-            )
-
-            state_event_ids = result["pdu_ids"]
-            auth_event_ids = result.get("auth_chain_ids", [])
-
-            fetched_events, failed_to_fetch = yield self.get_events_from_store_or_dest(
-                destination, room_id, set(state_event_ids + auth_event_ids)
-            )
-
-            if failed_to_fetch:
-                logger.warning(
-                    "Failed to fetch missing state/auth events for %s: %s",
-                    room_id,
-                    failed_to_fetch,
-                )
-
-            event_map = {ev.event_id: ev for ev in fetched_events}
-
-            pdus = [event_map[e_id] for e_id in state_event_ids if e_id in event_map]
-            auth_chain = [
-                event_map[e_id] for e_id in auth_event_ids if e_id in event_map
-            ]
-
-            auth_chain.sort(key=lambda e: e.depth)
-
-            return pdus, auth_chain
-        except HttpResponseException as e:
-            if e.code == 400 or e.code == 404:
-                logger.info("Failed to use get_room_state_ids API, falling back")
-            else:
-                raise e
-
-        result = yield self.transport_layer.get_room_state(
+        result = await self.transport_layer.get_room_state_ids(
             destination, room_id, event_id=event_id
         )
 
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
+        state_event_ids = result["pdu_ids"]
+        auth_event_ids = result.get("auth_chain_ids", [])
 
-        pdus = [
-            event_from_pdu_json(p, format_ver, outlier=True) for p in result["pdus"]
-        ]
+        if not isinstance(state_event_ids, list) or not isinstance(
+            auth_event_ids, list
+        ):
+            raise Exception("invalid response from /state_ids")
 
-        auth_chain = [
-            event_from_pdu_json(p, format_ver, outlier=True)
-            for p in result.get("auth_chain", [])
-        ]
-
-        seen_events = yield self.store.get_events(
-            [ev.event_id for ev in itertools.chain(pdus, auth_chain)]
-        )
-
-        signed_pdus = yield self._check_sigs_and_hash_and_fetch(
-            destination,
-            [p for p in pdus if p.event_id not in seen_events],
-            outlier=True,
-            room_version=room_version,
-        )
-        signed_pdus.extend(
-            seen_events[p.event_id] for p in pdus if p.event_id in seen_events
-        )
-
-        signed_auth = yield self._check_sigs_and_hash_and_fetch(
-            destination,
-            [p for p in auth_chain if p.event_id not in seen_events],
-            outlier=True,
-            room_version=room_version,
-        )
-        signed_auth.extend(
-            seen_events[p.event_id] for p in auth_chain if p.event_id in seen_events
-        )
-
-        signed_auth.sort(key=lambda e: e.depth)
+        return state_event_ids, auth_event_ids
 
-        return signed_pdus, signed_auth
-
-    @defer.inlineCallbacks
-    def get_events_from_store_or_dest(self, destination, room_id, event_ids):
-        """Fetch events from a remote destination, checking if we already have them.
+    async def _check_sigs_and_hash_and_fetch(
+        self,
+        origin: str,
+        pdus: List[EventBase],
+        room_version: RoomVersion,
+        outlier: bool = False,
+        include_none: bool = False,
+    ) -> List[EventBase]:
+        """Takes a list of PDUs and checks the signatures and hashs of each
+        one. If a PDU fails its signature check then we check if we have it in
+        the database and if not then request if from the originating server of
+        that PDU.
+
+        If a PDU fails its content hash check then it is redacted.
+
+        The given list of PDUs are not modified, instead the function returns
+        a new list.
 
         Args:
-            destination (str)
-            room_id (str)
-            event_ids (list)
+            origin
+            pdu
+            room_version
+            outlier: Whether the events are outliers or not
+            include_none: Whether to include None in the returned list
+                for events that have failed their checks
 
         Returns:
-            Deferred: A deferred resolving to a 2-tuple where the first is a list of
-            events and the second is a list of event ids that we failed to fetch.
+            Deferred : A list of PDUs that have valid signatures and hashes.
         """
-        seen_events = yield self.store.get_events(event_ids, allow_rejected=True)
-        signed_events = list(seen_events.values())
-
-        failed_to_fetch = set()
+        deferreds = self._check_sigs_and_hashes(room_version, pdus)
 
-        missing_events = set(event_ids)
-        for k in seen_events:
-            missing_events.discard(k)
-
-        if not missing_events:
-            return signed_events, failed_to_fetch
-
-        logger.debug(
-            "Fetching unknown state/auth events %s for room %s",
-            missing_events,
-            event_ids,
-        )
-
-        room_version = yield self.store.get_room_version(room_id)
+        @defer.inlineCallbacks
+        def handle_check_result(pdu: EventBase, deferred: Deferred):
+            try:
+                res = yield make_deferred_yieldable(deferred)
+            except SynapseError:
+                res = None
+
+            if not res:
+                # Check local db.
+                res = yield self.store.get_event(
+                    pdu.event_id, allow_rejected=True, allow_none=True
+                )
 
-        batch_size = 20
-        missing_events = list(missing_events)
-        for i in range(0, len(missing_events), batch_size):
-            batch = set(missing_events[i : i + batch_size])
+            if not res and pdu.origin != origin:
+                try:
+                    res = yield defer.ensureDeferred(
+                        self.get_pdu(
+                            destinations=[pdu.origin],
+                            event_id=pdu.event_id,
+                            room_version=room_version,
+                            outlier=outlier,
+                            timeout=10000,
+                        )
+                    )
+                except SynapseError:
+                    pass
 
-            deferreds = [
-                run_in_background(
-                    self.get_pdu,
-                    destinations=[destination],
-                    event_id=e_id,
-                    room_version=room_version,
+            if not res:
+                logger.warning(
+                    "Failed to find copy of %s with valid signature", pdu.event_id
                 )
-                for e_id in batch
-            ]
 
-            res = yield make_deferred_yieldable(
-                defer.DeferredList(deferreds, consumeErrors=True)
-            )
-            for success, result in res:
-                if success and result:
-                    signed_events.append(result)
-                    batch.discard(result.event_id)
+            return res
 
-            # We removed all events we successfully fetched from `batch`
-            failed_to_fetch.update(batch)
+        handle = preserve_fn(handle_check_result)
+        deferreds2 = [handle(pdu, deferred) for pdu, deferred in zip(pdus, deferreds)]
 
-        return signed_events, failed_to_fetch
+        valid_pdus = await make_deferred_yieldable(
+            defer.gatherResults(deferreds2, consumeErrors=True)
+        ).addErrback(unwrapFirstError)
 
-    @defer.inlineCallbacks
-    @log_function
-    def get_event_auth(self, destination, room_id, event_id):
-        res = yield self.transport_layer.get_event_auth(destination, room_id, event_id)
+        if include_none:
+            return valid_pdus
+        else:
+            return [p for p in valid_pdus if p]
+
+    async def get_event_auth(self, destination, room_id, event_id):
+        res = await self.transport_layer.get_event_auth(destination, room_id, event_id)
 
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
+        room_version = await self.store.get_room_version(room_id)
 
         auth_chain = [
-            event_from_pdu_json(p, format_ver, outlier=True) for p in res["auth_chain"]
+            event_from_pdu_json(p, room_version, outlier=True)
+            for p in res["auth_chain"]
         ]
 
-        signed_auth = yield self._check_sigs_and_hash_and_fetch(
+        signed_auth = await self._check_sigs_and_hash_and_fetch(
             destination, auth_chain, outlier=True, room_version=room_version
         )
 
@@ -487,17 +438,21 @@ class FederationClient(FederationBase):
 
         return signed_auth
 
-    @defer.inlineCallbacks
-    def _try_destination_list(self, description, destinations, callback):
+    async def _try_destination_list(
+        self,
+        description: str,
+        destinations: Iterable[str],
+        callback: Callable[[str], Awaitable[T]],
+    ) -> T:
         """Try an operation on a series of servers, until it succeeds
 
         Args:
-            description (unicode): description of the operation we're doing, for logging
+            description: description of the operation we're doing, for logging
 
-            destinations (Iterable[unicode]): list of server_names to try
+            destinations: list of server_names to try
 
-            callback (callable):  Function to run for each server. Passed a single
-                argument: the server_name to try. May return a deferred.
+            callback:  Function to run for each server. Passed a single
+                argument: the server_name to try.
 
                 If the callback raises a CodeMessageException with a 300/400 code,
                 attempts to perform the operation stop immediately and the exception is
@@ -508,7 +463,7 @@ class FederationClient(FederationBase):
                 suppressed if the exception is an InvalidResponseError.
 
         Returns:
-            The [Deferred] result of callback, if it succeeds
+            The result of callback, if it succeeds
 
         Raises:
             SynapseError if the chosen remote server returns a 300/400 code, or
@@ -519,15 +474,17 @@ class FederationClient(FederationBase):
                 continue
 
             try:
-                res = yield callback(destination)
+                res = await callback(destination)
                 return res
             except InvalidResponseError as e:
-                logger.warn("Failed to %s via %s: %s", description, destination, e)
+                logger.warning("Failed to %s via %s: %s", description, destination, e)
+            except UnsupportedRoomVersionError:
+                raise
             except HttpResponseException as e:
                 if not 500 <= e.code < 600:
                     raise e.to_synapse_error()
                 else:
-                    logger.warn(
+                    logger.warning(
                         "Failed to %s via %s: %i %s",
                         description,
                         destination,
@@ -535,13 +492,21 @@ class FederationClient(FederationBase):
                         e.args[0],
                     )
             except Exception:
-                logger.warn("Failed to %s via %s", description, destination, exc_info=1)
+                logger.warning(
+                    "Failed to %s via %s", description, destination, exc_info=True
+                )
 
         raise SynapseError(502, "Failed to %s via any server" % (description,))
 
-    def make_membership_event(
-        self, destinations, room_id, user_id, membership, content, params
-    ):
+    async def make_membership_event(
+        self,
+        destinations: Iterable[str],
+        room_id: str,
+        user_id: str,
+        membership: str,
+        content: dict,
+        params: Dict[str, str],
+    ) -> Tuple[str, EventBase, RoomVersion]:
         """
         Creates an m.room.member event, with context, without participating in the room.
 
@@ -553,26 +518,28 @@ class FederationClient(FederationBase):
         Note that this does not append any events to any graphs.
 
         Args:
-            destinations (str): Candidate homeservers which are probably
+            destinations: Candidate homeservers which are probably
                 participating in the room.
-            room_id (str): The room in which the event will happen.
-            user_id (str): The user whose membership is being evented.
-            membership (str): The "membership" property of the event. Must be
-                one of "join" or "leave".
-            content (dict): Any additional data to put into the content field
-                of the event.
-            params (dict[str, str|Iterable[str]]): Query parameters to include in the
-                request.
-        Return:
-            Deferred[tuple[str, FrozenEvent, int]]: resolves to a tuple of
-            `(origin, event, event_format)` where origin is the remote
-            homeserver which generated the event, and event_format is one of
-            `synapse.api.room_versions.EventFormatVersions`.
-
-            Fails with a ``SynapseError`` if the chosen remote server
-            returns a 300/400 code.
-
-            Fails with a ``RuntimeError`` if no servers were reachable.
+            room_id: The room in which the event will happen.
+            user_id: The user whose membership is being evented.
+            membership: The "membership" property of the event. Must be one of
+                "join" or "leave".
+            content: Any additional data to put into the content field of the
+                event.
+            params: Query parameters to include in the request.
+
+        Returns:
+            `(origin, event, room_version)` where origin is the remote
+            homeserver which generated the event, and room_version is the
+            version of the room.
+
+        Raises:
+            UnsupportedRoomVersionError: if remote responds with
+                a room version we don't understand.
+
+            SynapseError: if the chosen remote server returns a 300/400 code.
+
+            RuntimeError: if no servers were reachable.
         """
         valid_memberships = {Membership.JOIN, Membership.LEAVE}
         if membership not in valid_memberships:
@@ -581,16 +548,17 @@ class FederationClient(FederationBase):
                 % (membership, ",".join(valid_memberships))
             )
 
-        @defer.inlineCallbacks
-        def send_request(destination):
-            ret = yield self.transport_layer.make_membership_event(
+        async def send_request(destination: str) -> Tuple[str, EventBase, RoomVersion]:
+            ret = await self.transport_layer.make_membership_event(
                 destination, room_id, user_id, membership, params
             )
 
             # Note: If not supplied, the room version may be either v1 or v2,
             # however either way the event format version will be v1.
-            room_version = ret.get("room_version", RoomVersions.V1.identifier)
-            event_format = room_version_to_event_format(room_version)
+            room_version_id = ret.get("room_version", RoomVersions.V1.identifier)
+            room_version = KNOWN_ROOM_VERSIONS.get(room_version_id)
+            if not room_version:
+                raise UnsupportedRoomVersionError()
 
             pdu_dict = ret.get("event", None)
             if not isinstance(pdu_dict, dict):
@@ -610,94 +578,83 @@ class FederationClient(FederationBase):
                 self._clock,
                 self.hostname,
                 self.signing_key,
-                format_version=event_format,
+                room_version=room_version,
                 event_dict=pdu_dict,
             )
 
-            return (destination, ev, event_format)
+            return destination, ev, room_version
 
-        return self._try_destination_list(
+        return await self._try_destination_list(
             "make_" + membership, destinations, send_request
         )
 
-    def send_join(self, destinations, pdu, event_format_version):
+    async def send_join(
+        self, destinations: Iterable[str], pdu: EventBase, room_version: RoomVersion
+    ) -> Dict[str, Any]:
         """Sends a join event to one of a list of homeservers.
 
         Doing so will cause the remote server to add the event to the graph,
         and send the event out to the rest of the federation.
 
         Args:
-            destinations (str): Candidate homeservers which are probably
+            destinations: Candidate homeservers which are probably
                 participating in the room.
-            pdu (BaseEvent): event to be sent
-            event_format_version (int): The event format version
+            pdu: event to be sent
+            room_version: the version of the room (according to the server that
+                did the make_join)
 
-        Return:
-            Deferred: resolves to a dict with members ``origin`` (a string
-            giving the serer the event was sent to, ``state`` (?) and
+        Returns:
+            a dict with members ``origin`` (a string
+            giving the server the event was sent to, ``state`` (?) and
             ``auth_chain``.
 
-            Fails with a ``SynapseError`` if the chosen remote server
-            returns a 300/400 code.
+        Raises:
+            SynapseError: if the chosen remote server returns a 300/400 code.
 
-            Fails with a ``RuntimeError`` if no servers were reachable.
+            RuntimeError: if no servers were reachable.
         """
 
-        def check_authchain_validity(signed_auth_chain):
-            for e in signed_auth_chain:
-                if e.type == EventTypes.Create:
-                    create_event = e
-                    break
-            else:
-                raise InvalidResponseError("no %s in auth chain" % (EventTypes.Create,))
-
-            # the room version should be sane.
-            room_version = create_event.content.get("room_version", "1")
-            if room_version not in KNOWN_ROOM_VERSIONS:
-                # This shouldn't be possible, because the remote server should have
-                # rejected the join attempt during make_join.
-                raise InvalidResponseError(
-                    "room appears to have unsupported version %s" % (room_version,)
-                )
-
-        @defer.inlineCallbacks
-        def send_request(destination):
-            time_now = self._clock.time_msec()
-            _, content = yield self.transport_layer.send_join(
-                destination=destination,
-                room_id=pdu.room_id,
-                event_id=pdu.event_id,
-                content=pdu.get_pdu_json(time_now),
-            )
+        async def send_request(destination) -> Dict[str, Any]:
+            content = await self._do_send_join(destination, pdu)
 
             logger.debug("Got content: %s", content)
 
             state = [
-                event_from_pdu_json(p, event_format_version, outlier=True)
+                event_from_pdu_json(p, room_version, outlier=True)
                 for p in content.get("state", [])
             ]
 
             auth_chain = [
-                event_from_pdu_json(p, event_format_version, outlier=True)
+                event_from_pdu_json(p, room_version, outlier=True)
                 for p in content.get("auth_chain", [])
             ]
 
             pdus = {p.event_id: p for p in itertools.chain(state, auth_chain)}
 
-            room_version = None
+            create_event = None
             for e in state:
                 if (e.type, e.state_key) == (EventTypes.Create, ""):
-                    room_version = e.content.get(
-                        "room_version", RoomVersions.V1.identifier
-                    )
+                    create_event = e
                     break
 
-            if room_version is None:
+            if create_event is None:
                 # If the state doesn't have a create event then the room is
                 # invalid, and it would fail auth checks anyway.
                 raise SynapseError(400, "No create event in state")
 
-            valid_pdus = yield self._check_sigs_and_hash_and_fetch(
+            # the room version should be sane.
+            create_room_version = create_event.content.get(
+                "room_version", RoomVersions.V1.identifier
+            )
+            if create_room_version != room_version.identifier:
+                # either the server that fulfilled the make_join, or the server that is
+                # handling the send_join, is lying.
+                raise InvalidResponseError(
+                    "Unexpected room version %s in create event"
+                    % (create_room_version,)
+                )
+
+            valid_pdus = await self._check_sigs_and_hash_and_fetch(
                 destination,
                 list(pdus.values()),
                 outlier=True,
@@ -725,7 +682,17 @@ class FederationClient(FederationBase):
             for s in signed_state:
                 s.internal_metadata = copy.deepcopy(s.internal_metadata)
 
-            check_authchain_validity(signed_auth)
+            # double-check that the same create event has ended up in the auth chain
+            auth_chain_create_events = [
+                e.event_id
+                for e in signed_auth
+                if (e.type, e.state_key) == (EventTypes.Create, "")
+            ]
+            if auth_chain_create_events != [create_event.event_id]:
+                raise InvalidResponseError(
+                    "Unexpected create event(s) in auth chain: %s"
+                    % (auth_chain_create_events,)
+                )
 
             return {
                 "state": signed_state,
@@ -733,53 +700,84 @@ class FederationClient(FederationBase):
                 "origin": destination,
             }
 
-        return self._try_destination_list("send_join", destinations, send_request)
+        return await self._try_destination_list("send_join", destinations, send_request)
 
-    @defer.inlineCallbacks
-    def send_invite(self, destination, room_id, event_id, pdu):
-        room_version = yield self.store.get_room_version(room_id)
+    async def _do_send_join(self, destination: str, pdu: EventBase):
+        time_now = self._clock.time_msec()
+
+        try:
+            content = await self.transport_layer.send_join_v2(
+                destination=destination,
+                room_id=pdu.room_id,
+                event_id=pdu.event_id,
+                content=pdu.get_pdu_json(time_now),
+            )
+
+            return content
+        except HttpResponseException as e:
+            if e.code in [400, 404]:
+                err = e.to_synapse_error()
+
+                # If we receive an error response that isn't a generic error, or an
+                # unrecognised endpoint error, we  assume that the remote understands
+                # the v2 invite API and this is a legitimate error.
+                if err.errcode not in [Codes.UNKNOWN, Codes.UNRECOGNIZED]:
+                    raise err
+            else:
+                raise e.to_synapse_error()
 
-        content = yield self._do_send_invite(destination, pdu, room_version)
+        logger.debug("Couldn't send_join with the v2 API, falling back to the v1 API")
+
+        resp = await self.transport_layer.send_join_v1(
+            destination=destination,
+            room_id=pdu.room_id,
+            event_id=pdu.event_id,
+            content=pdu.get_pdu_json(time_now),
+        )
+
+        # We expect the v1 API to respond with [200, content], so we only return the
+        # content.
+        return resp[1]
+
+    async def send_invite(
+        self, destination: str, room_id: str, event_id: str, pdu: EventBase,
+    ) -> EventBase:
+        room_version = await self.store.get_room_version(room_id)
+
+        content = await self._do_send_invite(destination, pdu, room_version)
 
         pdu_dict = content["event"]
 
         logger.debug("Got response to send_invite: %s", pdu_dict)
 
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
-
-        pdu = event_from_pdu_json(pdu_dict, format_ver)
+        pdu = event_from_pdu_json(pdu_dict, room_version)
 
         # Check signatures are correct.
-        pdu = yield self._check_sigs_and_hash(room_version, pdu)
+        pdu = await self._check_sigs_and_hash(room_version, pdu)
 
         # FIXME: We should handle signature failures more gracefully.
 
         return pdu
 
-    @defer.inlineCallbacks
-    def _do_send_invite(self, destination, pdu, room_version):
+    async def _do_send_invite(
+        self, destination: str, pdu: EventBase, room_version: RoomVersion
+    ) -> JsonDict:
         """Actually sends the invite, first trying v2 API and falling back to
         v1 API if necessary.
 
-        Args:
-            destination (str): Target server
-            pdu (FrozenEvent)
-            room_version (str)
-
         Returns:
-            dict: The event as a dict as returned by the remote server
+            The event as a dict as returned by the remote server
         """
         time_now = self._clock.time_msec()
 
         try:
-            content = yield self.transport_layer.send_invite_v2(
+            content = await self.transport_layer.send_invite_v2(
                 destination=destination,
                 room_id=pdu.room_id,
                 event_id=pdu.event_id,
                 content={
                     "event": pdu.get_pdu_json(time_now),
-                    "room_version": room_version,
+                    "room_version": room_version.identifier,
                     "invite_room_state": pdu.unsigned.get("invite_room_state", []),
                 },
             )
@@ -797,8 +795,7 @@ class FederationClient(FederationBase):
                 # Otherwise, we assume that the remote server doesn't understand
                 # the v2 invite API. That's ok provided the room uses old-style event
                 # IDs.
-                v = KNOWN_ROOM_VERSIONS.get(room_version)
-                if v.event_format != EventFormatVersions.V1:
+                if room_version.event_format != EventFormatVersions.V1:
                     raise SynapseError(
                         400,
                         "User's homeserver does not support this room version",
@@ -812,7 +809,7 @@ class FederationClient(FederationBase):
         # Didn't work, try v1 API.
         # Note the v1 API returns a tuple of `(200, content)`
 
-        _, content = yield self.transport_layer.send_invite_v1(
+        _, content = await self.transport_layer.send_invite_v1(
             destination=destination,
             room_id=pdu.room_id,
             event_id=pdu.event_id,
@@ -820,7 +817,7 @@ class FederationClient(FederationBase):
         )
         return content
 
-    def send_leave(self, destinations, pdu):
+    async def send_leave(self, destinations: Iterable[str], pdu: EventBase) -> None:
         """Sends a leave event to one of a list of homeservers.
 
         Doing so will cause the remote server to add the event to the graph,
@@ -829,48 +826,94 @@ class FederationClient(FederationBase):
         This is mostly useful to reject received invites.
 
         Args:
-            destinations (str): Candidate homeservers which are probably
+            destinations: Candidate homeservers which are probably
                 participating in the room.
-            pdu (BaseEvent): event to be sent
+            pdu: event to be sent
 
-        Return:
-            Deferred: resolves to None.
-
-            Fails with a ``SynapseError`` if the chosen remote server
-            returns a 300/400 code.
+        Raises:
+            SynapseError if the chosen remote server returns a 300/400 code.
 
-            Fails with a ``RuntimeError`` if no servers were reachable.
+            RuntimeError if no servers were reachable.
         """
 
-        @defer.inlineCallbacks
-        def send_request(destination):
-            time_now = self._clock.time_msec()
-            _, content = yield self.transport_layer.send_leave(
+        async def send_request(destination: str) -> None:
+            content = await self._do_send_leave(destination, pdu)
+            logger.debug("Got content: %s", content)
+
+        return await self._try_destination_list(
+            "send_leave", destinations, send_request
+        )
+
+    async def _do_send_leave(self, destination, pdu):
+        time_now = self._clock.time_msec()
+
+        try:
+            content = await self.transport_layer.send_leave_v2(
                 destination=destination,
                 room_id=pdu.room_id,
                 event_id=pdu.event_id,
                 content=pdu.get_pdu_json(time_now),
             )
 
-            logger.debug("Got content: %s", content)
-            return None
+            return content
+        except HttpResponseException as e:
+            if e.code in [400, 404]:
+                err = e.to_synapse_error()
 
-        return self._try_destination_list("send_leave", destinations, send_request)
+                # If we receive an error response that isn't a generic error, or an
+                # unrecognised endpoint error, we  assume that the remote understands
+                # the v2 invite API and this is a legitimate error.
+                if err.errcode not in [Codes.UNKNOWN, Codes.UNRECOGNIZED]:
+                    raise err
+            else:
+                raise e.to_synapse_error()
+
+        logger.debug("Couldn't send_leave with the v2 API, falling back to the v1 API")
+
+        resp = await self.transport_layer.send_leave_v1(
+            destination=destination,
+            room_id=pdu.room_id,
+            event_id=pdu.event_id,
+            content=pdu.get_pdu_json(time_now),
+        )
+
+        # We expect the v1 API to respond with [200, content], so we only return the
+        # content.
+        return resp[1]
 
     def get_public_rooms(
         self,
-        destination,
-        limit=None,
-        since_token=None,
-        search_filter=None,
-        include_all_networks=False,
-        third_party_instance_id=None,
+        remote_server: str,
+        limit: Optional[int] = None,
+        since_token: Optional[str] = None,
+        search_filter: Optional[Dict] = None,
+        include_all_networks: bool = False,
+        third_party_instance_id: Optional[str] = None,
     ):
-        if destination == self.server_name:
-            return
+        """Get the list of public rooms from a remote homeserver
+
+        Args:
+            remote_server: The name of the remote server
+            limit: Maximum amount of rooms to return
+            since_token: Used for result pagination
+            search_filter: A filter dictionary to send the remote homeserver
+                and filter the result set
+            include_all_networks: Whether to include results from all third party instances
+            third_party_instance_id: Whether to only include results from a specific third
+                party instance
+
+        Returns:
+            Deferred[Dict[str, Any]]: The response from the remote server, or None if
+            `remote_server` is the same as the local server_name
+
+        Raises:
+            HttpResponseException: There was an exception returned from the remote server
+            SynapseException: M_FORBIDDEN when the remote server has disallowed publicRoom
+                requests over federation
 
+        """
         return self.transport_layer.get_public_rooms(
-            destination,
+            remote_server,
             limit,
             since_token,
             search_filter,
@@ -878,72 +921,33 @@ class FederationClient(FederationBase):
             third_party_instance_id=third_party_instance_id,
         )
 
-    @defer.inlineCallbacks
-    def query_auth(self, destination, room_id, event_id, local_auth):
-        """
-        Params:
-            destination (str)
-            event_it (str)
-            local_auth (list)
-        """
-        time_now = self._clock.time_msec()
-
-        send_content = {"auth_chain": [e.get_pdu_json(time_now) for e in local_auth]}
-
-        code, content = yield self.transport_layer.send_query_auth(
-            destination=destination,
-            room_id=room_id,
-            event_id=event_id,
-            content=send_content,
-        )
-
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
-
-        auth_chain = [event_from_pdu_json(e, format_ver) for e in content["auth_chain"]]
-
-        signed_auth = yield self._check_sigs_and_hash_and_fetch(
-            destination, auth_chain, outlier=True, room_version=room_version
-        )
-
-        signed_auth.sort(key=lambda e: e.depth)
-
-        ret = {
-            "auth_chain": signed_auth,
-            "rejects": content.get("rejects", []),
-            "missing": content.get("missing", []),
-        }
-
-        return ret
-
-    @defer.inlineCallbacks
-    def get_missing_events(
+    async def get_missing_events(
         self,
-        destination,
-        room_id,
-        earliest_events_ids,
-        latest_events,
-        limit,
-        min_depth,
-        timeout,
-    ):
+        destination: str,
+        room_id: str,
+        earliest_events_ids: Sequence[str],
+        latest_events: Iterable[EventBase],
+        limit: int,
+        min_depth: int,
+        timeout: int,
+    ) -> List[EventBase]:
         """Tries to fetch events we are missing. This is called when we receive
         an event without having received all of its ancestors.
 
         Args:
-            destination (str)
-            room_id (str)
-            earliest_events_ids (list): List of event ids. Effectively the
+            destination
+            room_id
+            earliest_events_ids: List of event ids. Effectively the
                 events we expected to receive, but haven't. `get_missing_events`
                 should only return events that didn't happen before these.
-            latest_events (list): List of events we have received that we don't
+            latest_events: List of events we have received that we don't
                 have all previous events for.
-            limit (int): Maximum number of events to return.
-            min_depth (int): Minimum depth of events tor return.
-            timeout (int): Max time to wait in ms
+            limit: Maximum number of events to return.
+            min_depth: Minimum depth of events to return.
+            timeout: Max time to wait in ms
         """
         try:
-            content = yield self.transport_layer.get_missing_events(
+            content = await self.transport_layer.get_missing_events(
                 destination=destination,
                 room_id=room_id,
                 earliest_events=earliest_events_ids,
@@ -953,14 +957,13 @@ class FederationClient(FederationBase):
                 timeout=timeout,
             )
 
-            room_version = yield self.store.get_room_version(room_id)
-            format_ver = room_version_to_event_format(room_version)
+            room_version = await self.store.get_room_version(room_id)
 
             events = [
-                event_from_pdu_json(e, format_ver) for e in content.get("events", [])
+                event_from_pdu_json(e, room_version) for e in content.get("events", [])
             ]
 
-            signed_events = yield self._check_sigs_and_hash_and_fetch(
+            signed_events = await self._check_sigs_and_hash_and_fetch(
                 destination, events, outlier=False, room_version=room_version
             )
         except HttpResponseException as e:
@@ -973,14 +976,13 @@ class FederationClient(FederationBase):
 
         return signed_events
 
-    @defer.inlineCallbacks
-    def forward_third_party_invite(self, destinations, room_id, event_dict):
+    async def forward_third_party_invite(self, destinations, room_id, event_dict):
         for destination in destinations:
             if destination == self.server_name:
                 continue
 
             try:
-                yield self.transport_layer.exchange_third_party_invite(
+                await self.transport_layer.exchange_third_party_invite(
                     destination=destination, room_id=room_id, event_dict=event_dict
                 )
                 return None
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index da06ab379d..32a8a2ee46 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright 2015, 2016 OpenMarket Ltd
 # Copyright 2018 New Vector Ltd
+# Copyright 2019 Matrix.org Federation C.I.C
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from typing import Any, Callable, Dict, List, Match, Optional, Tuple, Union
 
 import six
 from six import iteritems
@@ -36,21 +38,24 @@ from synapse.api.errors import (
     UnsupportedRoomVersionError,
 )
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
-from synapse.crypto.event_signing import compute_event_signature
-from synapse.events import room_version_to_event_format
+from synapse.events import EventBase
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
 from synapse.federation.persistence import TransactionActions
 from synapse.federation.units import Edu, Transaction
 from synapse.http.endpoint import parse_server_name
-from synapse.logging.context import nested_logging_context
+from synapse.logging.context import (
+    make_deferred_yieldable,
+    nested_logging_context,
+    run_in_background,
+)
 from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace
 from synapse.logging.utils import log_function
 from synapse.replication.http.federation import (
     ReplicationFederationSendEduRestServlet,
     ReplicationGetQueryRestServlet,
 )
-from synapse.types import get_domain_from_id
-from synapse.util import glob_to_regex
+from synapse.types import JsonDict, get_domain_from_id
+from synapse.util import glob_to_regex, unwrapFirstError
 from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.caches.response_cache import ResponseCache
 
@@ -75,6 +80,9 @@ class FederationServer(FederationBase):
 
         self.auth = hs.get_auth()
         self.handler = hs.get_handlers().federation_handler
+        self.state = hs.get_state_handler()
+
+        self.device_handler = hs.get_device_handler()
 
         self._server_linearizer = Linearizer("fed_server")
         self._transaction_linearizer = Linearizer("fed_txn_handler")
@@ -87,14 +95,14 @@ class FederationServer(FederationBase):
         # come in waves.
         self._state_resp_cache = ResponseCache(hs, "state_resp", timeout_ms=30000)
 
-    @defer.inlineCallbacks
-    @log_function
-    def on_backfill_request(self, origin, room_id, versions, limit):
-        with (yield self._server_linearizer.queue((origin, room_id))):
+    async def on_backfill_request(
+        self, origin: str, room_id: str, versions: List[str], limit: int
+    ) -> Tuple[int, Dict[str, Any]]:
+        with (await self._server_linearizer.queue((origin, room_id))):
             origin_host, _ = parse_server_name(origin)
-            yield self.check_server_matches_acl(origin_host, room_id)
+            await self.check_server_matches_acl(origin_host, room_id)
 
-            pdus = yield self.handler.on_backfill_request(
+            pdus = await self.handler.on_backfill_request(
                 origin, room_id, versions, limit
             )
 
@@ -102,76 +110,114 @@ class FederationServer(FederationBase):
 
         return 200, res
 
-    @defer.inlineCallbacks
-    @log_function
-    def on_incoming_transaction(self, origin, transaction_data):
+    async def on_incoming_transaction(
+        self, origin: str, transaction_data: JsonDict
+    ) -> Tuple[int, Dict[str, Any]]:
         # keep this as early as possible to make the calculated origin ts as
         # accurate as possible.
         request_time = self._clock.time_msec()
 
         transaction = Transaction(**transaction_data)
 
-        if not transaction.transaction_id:
+        if not transaction.transaction_id:  # type: ignore
             raise Exception("Transaction missing transaction_id")
 
-        logger.debug("[%s] Got transaction", transaction.transaction_id)
+        logger.debug("[%s] Got transaction", transaction.transaction_id)  # type: ignore
 
         # use a linearizer to ensure that we don't process the same transaction
         # multiple times in parallel.
         with (
-            yield self._transaction_linearizer.queue(
-                (origin, transaction.transaction_id)
+            await self._transaction_linearizer.queue(
+                (origin, transaction.transaction_id)  # type: ignore
             )
         ):
-            result = yield self._handle_incoming_transaction(
+            result = await self._handle_incoming_transaction(
                 origin, transaction, request_time
             )
 
         return result
 
-    @defer.inlineCallbacks
-    def _handle_incoming_transaction(self, origin, transaction, request_time):
+    async def _handle_incoming_transaction(
+        self, origin: str, transaction: Transaction, request_time: int
+    ) -> Tuple[int, Dict[str, Any]]:
         """ Process an incoming transaction and return the HTTP response
 
         Args:
-            origin (unicode): the server making the request
-            transaction (Transaction): incoming transaction
-            request_time (int): timestamp that the HTTP request arrived at
+            origin: the server making the request
+            transaction: incoming transaction
+            request_time: timestamp that the HTTP request arrived at
 
         Returns:
-            Deferred[(int, object)]: http response code and body
+            HTTP response code and body
         """
-        response = yield self.transaction_actions.have_responded(origin, transaction)
+        response = await self.transaction_actions.have_responded(origin, transaction)
 
         if response:
             logger.debug(
                 "[%s] We've already responded to this request",
-                transaction.transaction_id,
+                transaction.transaction_id,  # type: ignore
             )
             return response
 
-        logger.debug("[%s] Transaction is new", transaction.transaction_id)
+        logger.debug("[%s] Transaction is new", transaction.transaction_id)  # type: ignore
 
-        # Reject if PDU count > 50 and EDU count > 100
-        if len(transaction.pdus) > 50 or (
-            hasattr(transaction, "edus") and len(transaction.edus) > 100
+        # Reject if PDU count > 50 or EDU count > 100
+        if len(transaction.pdus) > 50 or (  # type: ignore
+            hasattr(transaction, "edus") and len(transaction.edus) > 100  # type: ignore
         ):
 
             logger.info("Transaction PDU or EDU count too large. Returning 400")
 
             response = {}
-            yield self.transaction_actions.set_response(
+            await self.transaction_actions.set_response(
                 origin, transaction, 400, response
             )
             return 400, response
 
-        received_pdus_counter.inc(len(transaction.pdus))
+        # We process PDUs and EDUs in parallel. This is important as we don't
+        # want to block things like to device messages from reaching clients
+        # behind the potentially expensive handling of PDUs.
+        pdu_results, _ = await make_deferred_yieldable(
+            defer.gatherResults(
+                [
+                    run_in_background(
+                        self._handle_pdus_in_txn, origin, transaction, request_time
+                    ),
+                    run_in_background(self._handle_edus_in_txn, origin, transaction),
+                ],
+                consumeErrors=True,
+            ).addErrback(unwrapFirstError)
+        )
+
+        response = {"pdus": pdu_results}
+
+        logger.debug("Returning: %s", str(response))
+
+        await self.transaction_actions.set_response(origin, transaction, 200, response)
+        return 200, response
+
+    async def _handle_pdus_in_txn(
+        self, origin: str, transaction: Transaction, request_time: int
+    ) -> Dict[str, dict]:
+        """Process the PDUs in a received transaction.
+
+        Args:
+            origin: the server making the request
+            transaction: incoming transaction
+            request_time: timestamp that the HTTP request arrived at
+
+        Returns:
+            A map from event ID of a processed PDU to any errors we should
+            report back to the sending server.
+        """
+
+        received_pdus_counter.inc(len(transaction.pdus))  # type: ignore
 
         origin_host, _ = parse_server_name(origin)
 
-        pdus_by_room = {}
+        pdus_by_room = {}  # type: Dict[str, List[EventBase]]
 
-        for p in transaction.pdus:
+        for p in transaction.pdus:  # type: ignore
             if "unsigned" in p:
                 unsigned = p["unsigned"]
                 if "age" in unsigned:
@@ -196,24 +242,17 @@ class FederationServer(FederationBase):
                 continue
 
             try:
-                room_version = yield self.store.get_room_version(room_id)
+                room_version = await self.store.get_room_version(room_id)
             except NotFoundError:
                 logger.info("Ignoring PDU for unknown room_id: %s", room_id)
                 continue
-
-            try:
-                format_ver = room_version_to_event_format(room_version)
-            except UnsupportedRoomVersionError:
+            except UnsupportedRoomVersionError as e:
                 # this can happen if support for a given room version is withdrawn,
                 # so that we still get events for said room.
-                logger.info(
-                    "Ignoring PDU for room %s with unknown version %s",
-                    room_id,
-                    room_version,
-                )
+                logger.info("Ignoring PDU: %s", e)
                 continue
 
-            event = event_from_pdu_json(p, format_ver)
+            event = event_from_pdu_json(p, room_version)
             pdus_by_room.setdefault(room_id, []).append(event)
 
         pdu_results = {}
@@ -222,13 +261,12 @@ class FederationServer(FederationBase):
         # require callouts to other servers to fetch missing events), but
         # impose a limit to avoid going too crazy with ram/cpu.
 
-        @defer.inlineCallbacks
-        def process_pdus_for_room(room_id):
+        async def process_pdus_for_room(room_id: str):
             logger.debug("Processing PDUs for %s", room_id)
             try:
-                yield self.check_server_matches_acl(origin_host, room_id)
+                await self.check_server_matches_acl(origin_host, room_id)
             except AuthError as e:
-                logger.warn("Ignoring PDUs for room %s from banned server", room_id)
+                logger.warning("Ignoring PDUs for room %s from banned server", room_id)
                 for pdu in pdus_by_room[room_id]:
                     event_id = pdu.event_id
                     pdu_results[event_id] = e.error_dict()
@@ -238,10 +276,10 @@ class FederationServer(FederationBase):
                 event_id = pdu.event_id
                 with nested_logging_context(event_id):
                     try:
-                        yield self._handle_received_pdu(origin, pdu)
+                        await self._handle_received_pdu(origin, pdu)
                         pdu_results[event_id] = {}
                     except FederationError as e:
-                        logger.warn("Error handling PDU %s: %s", event_id, e)
+                        logger.warning("Error handling PDU %s: %s", event_id, e)
                         pdu_results[event_id] = {"error": str(e)}
                     except Exception as e:
                         f = failure.Failure()
@@ -252,36 +290,40 @@ class FederationServer(FederationBase):
                             exc_info=(f.type, f.value, f.getTracebackObject()),
                         )
 
-        yield concurrently_execute(
+        await concurrently_execute(
             process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT
         )
 
-        if hasattr(transaction, "edus"):
-            for edu in (Edu(**x) for x in transaction.edus):
-                yield self.received_edu(origin, edu.edu_type, edu.content)
-
-        response = {"pdus": pdu_results}
+        return pdu_results
 
-        logger.debug("Returning: %s", str(response))
+    async def _handle_edus_in_txn(self, origin: str, transaction: Transaction):
+        """Process the EDUs in a received transaction.
+        """
 
-        yield self.transaction_actions.set_response(origin, transaction, 200, response)
-        return 200, response
+        async def _process_edu(edu_dict):
+            received_edus_counter.inc()
 
-    @defer.inlineCallbacks
-    def received_edu(self, origin, edu_type, content):
-        received_edus_counter.inc()
-        yield self.registry.on_edu(edu_type, origin, content)
+            edu = Edu(
+                origin=origin,
+                destination=self.server_name,
+                edu_type=edu_dict["edu_type"],
+                content=edu_dict["content"],
+            )
+            await self.registry.on_edu(edu.edu_type, origin, edu.content)
 
-    @defer.inlineCallbacks
-    @log_function
-    def on_context_state_request(self, origin, room_id, event_id):
-        if not event_id:
-            raise NotImplementedError("Specify an event")
+        await concurrently_execute(
+            _process_edu,
+            getattr(transaction, "edus", []),
+            TRANSACTION_CONCURRENCY_LIMIT,
+        )
 
+    async def on_context_state_request(
+        self, origin: str, room_id: str, event_id: str
+    ) -> Tuple[int, Dict[str, Any]]:
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, room_id)
+        await self.check_server_matches_acl(origin_host, room_id)
 
-        in_room = yield self.auth.check_host_in_room(room_id, origin)
+        in_room = await self.auth.check_host_in_room(room_id, origin)
         if not in_room:
             raise AuthError(403, "Host not in room.")
 
@@ -290,237 +332,196 @@ class FederationServer(FederationBase):
         # in the cache so we could return it without waiting for the linearizer
         # - but that's non-trivial to get right, and anyway somewhat defeats
         # the point of the linearizer.
-        with (yield self._server_linearizer.queue((origin, room_id))):
-            resp = yield self._state_resp_cache.wrap(
-                (room_id, event_id),
-                self._on_context_state_request_compute,
-                room_id,
-                event_id,
+        with (await self._server_linearizer.queue((origin, room_id))):
+            resp = dict(
+                await self._state_resp_cache.wrap(
+                    (room_id, event_id),
+                    self._on_context_state_request_compute,
+                    room_id,
+                    event_id,
+                )
             )
 
+        room_version = await self.store.get_room_version_id(room_id)
+        resp["room_version"] = room_version
+
         return 200, resp
 
-    @defer.inlineCallbacks
-    def on_state_ids_request(self, origin, room_id, event_id):
+    async def on_state_ids_request(
+        self, origin: str, room_id: str, event_id: str
+    ) -> Tuple[int, Dict[str, Any]]:
         if not event_id:
             raise NotImplementedError("Specify an event")
 
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, room_id)
+        await self.check_server_matches_acl(origin_host, room_id)
 
-        in_room = yield self.auth.check_host_in_room(room_id, origin)
+        in_room = await self.auth.check_host_in_room(room_id, origin)
         if not in_room:
             raise AuthError(403, "Host not in room.")
 
-        state_ids = yield self.handler.get_state_ids_for_pdu(room_id, event_id)
-        auth_chain_ids = yield self.store.get_auth_chain_ids(state_ids)
+        state_ids = await self.handler.get_state_ids_for_pdu(room_id, event_id)
+        auth_chain_ids = await self.store.get_auth_chain_ids(state_ids)
 
         return 200, {"pdu_ids": state_ids, "auth_chain_ids": auth_chain_ids}
 
-    @defer.inlineCallbacks
-    def _on_context_state_request_compute(self, room_id, event_id):
-        pdus = yield self.handler.get_state_for_pdu(room_id, event_id)
-        auth_chain = yield self.store.get_auth_chain([pdu.event_id for pdu in pdus])
-
-        for event in auth_chain:
-            # We sign these again because there was a bug where we
-            # incorrectly signed things the first time round
-            if self.hs.is_mine_id(event.event_id):
-                event.signatures.update(
-                    compute_event_signature(
-                        event.get_pdu_json(),
-                        self.hs.hostname,
-                        self.hs.config.signing_key[0],
-                    )
-                )
+    async def _on_context_state_request_compute(
+        self, room_id: str, event_id: str
+    ) -> Dict[str, list]:
+        if event_id:
+            pdus = await self.handler.get_state_for_pdu(room_id, event_id)
+        else:
+            pdus = (await self.state.get_current_state(room_id)).values()
+
+        auth_chain = await self.store.get_auth_chain([pdu.event_id for pdu in pdus])
 
         return {
             "pdus": [pdu.get_pdu_json() for pdu in pdus],
             "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],
         }
 
-    @defer.inlineCallbacks
-    @log_function
-    def on_pdu_request(self, origin, event_id):
-        pdu = yield self.handler.get_persisted_pdu(origin, event_id)
+    async def on_pdu_request(
+        self, origin: str, event_id: str
+    ) -> Tuple[int, Union[JsonDict, str]]:
+        pdu = await self.handler.get_persisted_pdu(origin, event_id)
 
         if pdu:
             return 200, self._transaction_from_pdus([pdu]).get_dict()
         else:
             return 404, ""
 
-    @defer.inlineCallbacks
-    def on_query_request(self, query_type, args):
+    async def on_query_request(
+        self, query_type: str, args: Dict[str, str]
+    ) -> Tuple[int, Dict[str, Any]]:
         received_queries_counter.labels(query_type).inc()
-        resp = yield self.registry.on_query(query_type, args)
+        resp = await self.registry.on_query(query_type, args)
         return 200, resp
 
-    @defer.inlineCallbacks
-    def on_make_join_request(self, origin, room_id, user_id, supported_versions):
+    async def on_make_join_request(
+        self, origin: str, room_id: str, user_id: str, supported_versions: List[str]
+    ) -> Dict[str, Any]:
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, room_id)
+        await self.check_server_matches_acl(origin_host, room_id)
 
-        room_version = yield self.store.get_room_version(room_id)
+        room_version = await self.store.get_room_version_id(room_id)
         if room_version not in supported_versions:
-            logger.warn("Room version %s not in %s", room_version, supported_versions)
+            logger.warning(
+                "Room version %s not in %s", room_version, supported_versions
+            )
             raise IncompatibleRoomVersionError(room_version=room_version)
 
-        pdu = yield self.handler.on_make_join_request(origin, room_id, user_id)
+        pdu = await self.handler.on_make_join_request(origin, room_id, user_id)
         time_now = self._clock.time_msec()
         return {"event": pdu.get_pdu_json(time_now), "room_version": room_version}
 
-    @defer.inlineCallbacks
-    def on_invite_request(self, origin, content, room_version):
-        if room_version not in KNOWN_ROOM_VERSIONS:
+    async def on_invite_request(
+        self, origin: str, content: JsonDict, room_version_id: str
+    ) -> Dict[str, Any]:
+        room_version = KNOWN_ROOM_VERSIONS.get(room_version_id)
+        if not room_version:
             raise SynapseError(
                 400,
                 "Homeserver does not support this room version",
                 Codes.UNSUPPORTED_ROOM_VERSION,
             )
 
-        format_ver = room_version_to_event_format(room_version)
-
-        pdu = event_from_pdu_json(content, format_ver)
+        pdu = event_from_pdu_json(content, room_version)
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, pdu.room_id)
-        ret_pdu = yield self.handler.on_invite_request(origin, pdu)
+        await self.check_server_matches_acl(origin_host, pdu.room_id)
+        pdu = await self._check_sigs_and_hash(room_version, pdu)
+        ret_pdu = await self.handler.on_invite_request(origin, pdu, room_version)
         time_now = self._clock.time_msec()
         return {"event": ret_pdu.get_pdu_json(time_now)}
 
-    @defer.inlineCallbacks
-    def on_send_join_request(self, origin, content, room_id):
+    async def on_send_join_request(
+        self, origin: str, content: JsonDict, room_id: str
+    ) -> Dict[str, Any]:
         logger.debug("on_send_join_request: content: %s", content)
 
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
-        pdu = event_from_pdu_json(content, format_ver)
+        room_version = await self.store.get_room_version(room_id)
+        pdu = event_from_pdu_json(content, room_version)
 
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, pdu.room_id)
+        await self.check_server_matches_acl(origin_host, pdu.room_id)
 
         logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures)
-        res_pdus = yield self.handler.on_send_join_request(origin, pdu)
+
+        pdu = await self._check_sigs_and_hash(room_version, pdu)
+
+        res_pdus = await self.handler.on_send_join_request(origin, pdu)
         time_now = self._clock.time_msec()
-        return (
-            200,
-            {
-                "state": [p.get_pdu_json(time_now) for p in res_pdus["state"]],
-                "auth_chain": [
-                    p.get_pdu_json(time_now) for p in res_pdus["auth_chain"]
-                ],
-            },
-        )
+        return {
+            "state": [p.get_pdu_json(time_now) for p in res_pdus["state"]],
+            "auth_chain": [p.get_pdu_json(time_now) for p in res_pdus["auth_chain"]],
+        }
 
-    @defer.inlineCallbacks
-    def on_make_leave_request(self, origin, room_id, user_id):
+    async def on_make_leave_request(
+        self, origin: str, room_id: str, user_id: str
+    ) -> Dict[str, Any]:
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, room_id)
-        pdu = yield self.handler.on_make_leave_request(origin, room_id, user_id)
+        await self.check_server_matches_acl(origin_host, room_id)
+        pdu = await self.handler.on_make_leave_request(origin, room_id, user_id)
 
-        room_version = yield self.store.get_room_version(room_id)
+        room_version = await self.store.get_room_version_id(room_id)
 
         time_now = self._clock.time_msec()
         return {"event": pdu.get_pdu_json(time_now), "room_version": room_version}
 
-    @defer.inlineCallbacks
-    def on_send_leave_request(self, origin, content, room_id):
+    async def on_send_leave_request(
+        self, origin: str, content: JsonDict, room_id: str
+    ) -> dict:
         logger.debug("on_send_leave_request: content: %s", content)
 
-        room_version = yield self.store.get_room_version(room_id)
-        format_ver = room_version_to_event_format(room_version)
-        pdu = event_from_pdu_json(content, format_ver)
+        room_version = await self.store.get_room_version(room_id)
+        pdu = event_from_pdu_json(content, room_version)
 
         origin_host, _ = parse_server_name(origin)
-        yield self.check_server_matches_acl(origin_host, pdu.room_id)
+        await self.check_server_matches_acl(origin_host, pdu.room_id)
 
         logger.debug("on_send_leave_request: pdu sigs: %s", pdu.signatures)
-        yield self.handler.on_send_leave_request(origin, pdu)
-        return 200, {}
 
-    @defer.inlineCallbacks
-    def on_event_auth(self, origin, room_id, event_id):
-        with (yield self._server_linearizer.queue((origin, room_id))):
-            origin_host, _ = parse_server_name(origin)
-            yield self.check_server_matches_acl(origin_host, room_id)
+        pdu = await self._check_sigs_and_hash(room_version, pdu)
 
-            time_now = self._clock.time_msec()
-            auth_pdus = yield self.handler.on_event_auth(event_id)
-            res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]}
-        return 200, res
+        await self.handler.on_send_leave_request(origin, pdu)
+        return {}
 
-    @defer.inlineCallbacks
-    def on_query_auth_request(self, origin, content, room_id, event_id):
-        """
-        Content is a dict with keys::
-            auth_chain (list): A list of events that give the auth chain.
-            missing (list): A list of event_ids indicating what the other
-              side (`origin`) think we're missing.
-            rejects (dict): A mapping from event_id to a 2-tuple of reason
-              string and a proof (or None) of why the event was rejected.
-              The keys of this dict give the list of events the `origin` has
-              rejected.
-
-        Args:
-            origin (str)
-            content (dict)
-            event_id (str)
-
-        Returns:
-            Deferred: Results in `dict` with the same format as `content`
-        """
-        with (yield self._server_linearizer.queue((origin, room_id))):
+    async def on_event_auth(
+        self, origin: str, room_id: str, event_id: str
+    ) -> Tuple[int, Dict[str, Any]]:
+        with (await self._server_linearizer.queue((origin, room_id))):
             origin_host, _ = parse_server_name(origin)
-            yield self.check_server_matches_acl(origin_host, room_id)
-
-            room_version = yield self.store.get_room_version(room_id)
-            format_ver = room_version_to_event_format(room_version)
-
-            auth_chain = [
-                event_from_pdu_json(e, format_ver) for e in content["auth_chain"]
-            ]
-
-            signed_auth = yield self._check_sigs_and_hash_and_fetch(
-                origin, auth_chain, outlier=True, room_version=room_version
-            )
-
-            ret = yield self.handler.on_query_auth(
-                origin,
-                event_id,
-                room_id,
-                signed_auth,
-                content.get("rejects", []),
-                content.get("missing", []),
-            )
+            await self.check_server_matches_acl(origin_host, room_id)
 
             time_now = self._clock.time_msec()
-            send_content = {
-                "auth_chain": [e.get_pdu_json(time_now) for e in ret["auth_chain"]],
-                "rejects": ret.get("rejects", []),
-                "missing": ret.get("missing", []),
-            }
-
-        return 200, send_content
+            auth_pdus = await self.handler.on_event_auth(event_id)
+            res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]}
+        return 200, res
 
     @log_function
-    def on_query_client_keys(self, origin, content):
-        return self.on_query_request("client_keys", content)
+    async def on_query_client_keys(
+        self, origin: str, content: Dict[str, str]
+    ) -> Tuple[int, Dict[str, Any]]:
+        return await self.on_query_request("client_keys", content)
 
-    def on_query_user_devices(self, origin, user_id):
-        return self.on_query_request("user_devices", user_id)
+    async def on_query_user_devices(
+        self, origin: str, user_id: str
+    ) -> Tuple[int, Dict[str, Any]]:
+        keys = await self.device_handler.on_federation_query_user_devices(user_id)
+        return 200, keys
 
     @trace
-    @defer.inlineCallbacks
-    @log_function
-    def on_claim_client_keys(self, origin, content):
+    async def on_claim_client_keys(
+        self, origin: str, content: JsonDict
+    ) -> Dict[str, Any]:
         query = []
         for user_id, device_keys in content.get("one_time_keys", {}).items():
             for device_id, algorithm in device_keys.items():
                 query.append((user_id, device_id, algorithm))
 
         log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
-        results = yield self.store.claim_e2e_one_time_keys(query)
+        results = await self.store.claim_e2e_one_time_keys(query)
 
-        json_result = {}
+        json_result = {}  # type: Dict[str, Dict[str, dict]]
         for user_id, device_keys in results.items():
             for device_id, keys in device_keys.items():
                 for key_id, json_bytes in keys.items():
@@ -542,16 +543,19 @@ class FederationServer(FederationBase):
 
         return {"one_time_keys": json_result}
 
-    @defer.inlineCallbacks
-    @log_function
-    def on_get_missing_events(
-        self, origin, room_id, earliest_events, latest_events, limit
-    ):
-        with (yield self._server_linearizer.queue((origin, room_id))):
+    async def on_get_missing_events(
+        self,
+        origin: str,
+        room_id: str,
+        earliest_events: List[str],
+        latest_events: List[str],
+        limit: int,
+    ) -> Dict[str, list]:
+        with (await self._server_linearizer.queue((origin, room_id))):
             origin_host, _ = parse_server_name(origin)
-            yield self.check_server_matches_acl(origin_host, room_id)
+            await self.check_server_matches_acl(origin_host, room_id)
 
-            logger.info(
+            logger.debug(
                 "on_get_missing_events: earliest_events: %r, latest_events: %r,"
                 " limit: %d",
                 earliest_events,
@@ -559,27 +563,27 @@ class FederationServer(FederationBase):
                 limit,
             )
 
-            missing_events = yield self.handler.on_get_missing_events(
+            missing_events = await self.handler.on_get_missing_events(
                 origin, room_id, earliest_events, latest_events, limit
             )
 
             if len(missing_events) < 5:
-                logger.info(
+                logger.debug(
                     "Returning %d events: %r", len(missing_events), missing_events
                 )
             else:
-                logger.info("Returning %d events", len(missing_events))
+                logger.debug("Returning %d events", len(missing_events))
 
             time_now = self._clock.time_msec()
 
         return {"events": [ev.get_pdu_json(time_now) for ev in missing_events]}
 
     @log_function
-    def on_openid_userinfo(self, token):
+    async def on_openid_userinfo(self, token: str) -> Optional[str]:
         ts_now_ms = self._clock.time_msec()
-        return self.store.get_user_id_for_open_id_token(token, ts_now_ms)
+        return await self.store.get_user_id_for_open_id_token(token, ts_now_ms)
 
-    def _transaction_from_pdus(self, pdu_list):
+    def _transaction_from_pdus(self, pdu_list: List[EventBase]) -> Transaction:
         """Returns a new Transaction containing the given PDUs suitable for
         transmission.
         """
@@ -592,8 +596,7 @@ class FederationServer(FederationBase):
             destination=None,
         )
 
-    @defer.inlineCallbacks
-    def _handle_received_pdu(self, origin, pdu):
+    async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None:
         """ Process a PDU received in a federation /send/ transaction.
 
         If the event is invalid, then this method throws a FederationError.
@@ -614,10 +617,8 @@ class FederationServer(FederationBase):
         until we try to backfill across the discontinuity.
 
         Args:
-            origin (str): server which sent the pdu
-            pdu (FrozenEvent): received pdu
-
-        Returns (Deferred): completes with None
+            origin: server which sent the pdu
+            pdu: received pdu
 
         Raises: FederationError if the signatures / hash do not match, or
             if the event was unacceptable for any other reason (eg, too large,
@@ -646,68 +647,67 @@ class FederationServer(FederationBase):
                 logger.info("Accepting join PDU %s from %s", pdu.event_id, origin)
 
         # We've already checked that we know the room version by this point
-        room_version = yield self.store.get_room_version(pdu.room_id)
+        room_version = await self.store.get_room_version(pdu.room_id)
 
         # Check signature.
         try:
-            pdu = yield self._check_sigs_and_hash(room_version, pdu)
+            pdu = await self._check_sigs_and_hash(room_version, pdu)
         except SynapseError as e:
             raise FederationError("ERROR", e.code, e.msg, affected=pdu.event_id)
 
-        yield self.handler.on_receive_pdu(origin, pdu, sent_to_us_directly=True)
+        await self.handler.on_receive_pdu(origin, pdu, sent_to_us_directly=True)
 
     def __str__(self):
         return "<ReplicationLayer(%s)>" % self.server_name
 
-    @defer.inlineCallbacks
-    def exchange_third_party_invite(
-        self, sender_user_id, target_user_id, room_id, signed
+    async def exchange_third_party_invite(
+        self, sender_user_id: str, target_user_id: str, room_id: str, signed: Dict
     ):
-        ret = yield self.handler.exchange_third_party_invite(
+        ret = await self.handler.exchange_third_party_invite(
             sender_user_id, target_user_id, room_id, signed
         )
         return ret
 
-    @defer.inlineCallbacks
-    def on_exchange_third_party_invite_request(self, room_id, event_dict):
-        ret = yield self.handler.on_exchange_third_party_invite_request(
+    async def on_exchange_third_party_invite_request(
+        self, room_id: str, event_dict: Dict
+    ):
+        ret = await self.handler.on_exchange_third_party_invite_request(
             room_id, event_dict
         )
         return ret
 
-    @defer.inlineCallbacks
-    def check_server_matches_acl(self, server_name, room_id):
+    async def check_server_matches_acl(self, server_name: str, room_id: str):
         """Check if the given server is allowed by the server ACLs in the room
 
         Args:
-            server_name (str): name of server, *without any port part*
-            room_id (str): ID of the room to check
+            server_name: name of server, *without any port part*
+            room_id: ID of the room to check
 
         Raises:
             AuthError if the server does not match the ACL
         """
-        state_ids = yield self.store.get_current_state_ids(room_id)
+        state_ids = await self.store.get_current_state_ids(room_id)
         acl_event_id = state_ids.get((EventTypes.ServerACL, ""))
 
         if not acl_event_id:
             return
 
-        acl_event = yield self.store.get_event(acl_event_id)
+        acl_event = await self.store.get_event(acl_event_id)
         if server_matches_acl_event(server_name, acl_event):
             return
 
         raise AuthError(code=403, msg="Server is banned from room")
 
 
-def server_matches_acl_event(server_name, acl_event):
+def server_matches_acl_event(server_name: str, acl_event: EventBase) -> bool:
     """Check if the given server is allowed by the ACL event
 
     Args:
-        server_name (str): name of server, without any port part
-        acl_event (EventBase): m.room.server_acl event
+        server_name: name of server, without any port part
+        acl_event: m.room.server_acl event
 
     Returns:
-        bool: True if this server is allowed by the ACLs
+        True if this server is allowed by the ACLs
     """
     logger.debug("Checking %s against acl %s", server_name, acl_event.content)
 
@@ -715,7 +715,7 @@ def server_matches_acl_event(server_name, acl_event):
     # server name is a literal IP
     allow_ip_literals = acl_event.content.get("allow_ip_literals", True)
     if not isinstance(allow_ip_literals, bool):
-        logger.warn("Ignorning non-bool allow_ip_literals flag")
+        logger.warning("Ignorning non-bool allow_ip_literals flag")
         allow_ip_literals = True
     if not allow_ip_literals:
         # check for ipv6 literals. These start with '['.
@@ -729,7 +729,7 @@ def server_matches_acl_event(server_name, acl_event):
     # next,  check the deny list
     deny = acl_event.content.get("deny", [])
     if not isinstance(deny, (list, tuple)):
-        logger.warn("Ignorning non-list deny ACL %s", deny)
+        logger.warning("Ignorning non-list deny ACL %s", deny)
         deny = []
     for e in deny:
         if _acl_entry_matches(server_name, e):
@@ -739,7 +739,7 @@ def server_matches_acl_event(server_name, acl_event):
     # then the allow list.
     allow = acl_event.content.get("allow", [])
     if not isinstance(allow, (list, tuple)):
-        logger.warn("Ignorning non-list allow ACL %s", allow)
+        logger.warning("Ignorning non-list allow ACL %s", allow)
         allow = []
     for e in allow:
         if _acl_entry_matches(server_name, e):
@@ -751,9 +751,9 @@ def server_matches_acl_event(server_name, acl_event):
     return False
 
 
-def _acl_entry_matches(server_name, acl_entry):
+def _acl_entry_matches(server_name: str, acl_entry: str) -> Match:
     if not isinstance(acl_entry, six.string_types):
-        logger.warn(
+        logger.warning(
             "Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry)
         )
         return False
@@ -770,13 +770,13 @@ class FederationHandlerRegistry(object):
         self.edu_handlers = {}
         self.query_handlers = {}
 
-    def register_edu_handler(self, edu_type, handler):
+    def register_edu_handler(self, edu_type: str, handler: Callable[[str, dict], None]):
         """Sets the handler callable that will be used to handle an incoming
         federation EDU of the given type.
 
         Args:
-            edu_type (str): The type of the incoming EDU to register handler for
-            handler (Callable[[str, dict]]): A callable invoked on incoming EDU
+            edu_type: The type of the incoming EDU to register handler for
+            handler: A callable invoked on incoming EDU
                 of the given type. The arguments are the origin server name and
                 the EDU contents.
         """
@@ -787,14 +787,16 @@ class FederationHandlerRegistry(object):
 
         self.edu_handlers[edu_type] = handler
 
-    def register_query_handler(self, query_type, handler):
+    def register_query_handler(
+        self, query_type: str, handler: Callable[[dict], defer.Deferred]
+    ):
         """Sets the handler callable that will be used to handle an incoming
         federation query of the given type.
 
         Args:
-            query_type (str): Category name of the query, which should match
+            query_type: Category name of the query, which should match
                 the string used by make_query.
-            handler (Callable[[dict], Deferred[dict]]): Invoked to handle
+            handler: Invoked to handle
                 incoming queries of this type. The return will be yielded
                 on and the result used as the response to the query request.
         """
@@ -805,24 +807,24 @@ class FederationHandlerRegistry(object):
 
         self.query_handlers[query_type] = handler
 
-    @defer.inlineCallbacks
-    def on_edu(self, edu_type, origin, content):
+    async def on_edu(self, edu_type: str, origin: str, content: dict):
         handler = self.edu_handlers.get(edu_type)
         if not handler:
-            logger.warn("No handler registered for EDU type %s", edu_type)
+            logger.warning("No handler registered for EDU type %s", edu_type)
+            return
 
         with start_active_span_from_edu(content, "handle_edu"):
             try:
-                yield handler(origin, content)
+                await handler(origin, content)
             except SynapseError as e:
                 logger.info("Failed to handle edu %r: %r", edu_type, e)
             except Exception:
                 logger.exception("Failed to handle edu %r", edu_type)
 
-    def on_query(self, query_type, args):
+    def on_query(self, query_type: str, args: dict) -> defer.Deferred:
         handler = self.query_handlers.get(query_type)
         if not handler:
-            logger.warn("No handler registered for query type %s", query_type)
+            logger.warning("No handler registered for query type %s", query_type)
             raise NotFoundError("No handler for Query type '%s'" % (query_type,))
 
         return handler(args)
@@ -846,7 +848,7 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry):
 
         super(ReplicationFederationHandlerRegistry, self).__init__()
 
-    def on_edu(self, edu_type, origin, content):
+    async def on_edu(self, edu_type: str, origin: str, content: dict):
         """Overrides FederationHandlerRegistry
         """
         if not self.config.use_presence and edu_type == "m.presence":
@@ -854,17 +856,17 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry):
 
         handler = self.edu_handlers.get(edu_type)
         if handler:
-            return super(ReplicationFederationHandlerRegistry, self).on_edu(
+            return await super(ReplicationFederationHandlerRegistry, self).on_edu(
                 edu_type, origin, content
             )
 
-        return self._send_edu(edu_type=edu_type, origin=origin, content=content)
+        return await self._send_edu(edu_type=edu_type, origin=origin, content=content)
 
-    def on_query(self, query_type, args):
+    async def on_query(self, query_type: str, args: dict):
         """Overrides FederationHandlerRegistry
         """
         handler = self.query_handlers.get(query_type)
         if handler:
-            return handler(args)
+            return await handler(args)
 
-        return self._get_query_client(query_type=query_type, args=args)
+        return await self._get_query_client(query_type=query_type, args=args)
diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py
index 44edcabed4..d68b4bd670 100644
--- a/synapse/federation/persistence.py
+++ b/synapse/federation/persistence.py
@@ -44,7 +44,7 @@ class TransactionActions(object):
             response code and response body.
         """
         if not transaction.transaction_id:
-            raise RuntimeError("Cannot persist a transaction with no " "transaction_id")
+            raise RuntimeError("Cannot persist a transaction with no transaction_id")
 
         return self.store.get_received_txn_response(transaction.transaction_id, origin)
 
@@ -56,7 +56,7 @@ class TransactionActions(object):
             Deferred
         """
         if not transaction.transaction_id:
-            raise RuntimeError("Cannot persist a transaction with no " "transaction_id")
+            raise RuntimeError("Cannot persist a transaction with no transaction_id")
 
         return self.store.set_received_txn_response(
             transaction.transaction_id, origin, code, response
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 454456a52d..52f4f54215 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -31,11 +31,14 @@ Events are replicated via a separate events stream.
 
 import logging
 from collections import namedtuple
+from typing import Dict, List, Tuple, Type
 
 from six import iteritems
 
 from sortedcontainers import SortedDict
 
+from twisted.internet import defer
+
 from synapse.metrics import LaterGauge
 from synapse.storage.presence import UserPresenceState
 from synapse.util.metrics import Measure
@@ -54,23 +57,35 @@ class FederationRemoteSendQueue(object):
         self.notifier = hs.get_notifier()
         self.is_mine_id = hs.is_mine_id
 
-        self.presence_map = {}  # Pending presence map user_id -> UserPresenceState
-        self.presence_changed = SortedDict()  # Stream position -> list[user_id]
+        # Pending presence map user_id -> UserPresenceState
+        self.presence_map = {}  # type: Dict[str, UserPresenceState]
+
+        # Stream position -> list[user_id]
+        self.presence_changed = SortedDict()  # type: SortedDict[int, List[str]]
 
         # Stores the destinations we need to explicitly send presence to about a
         # given user.
         # Stream position -> (user_id, destinations)
-        self.presence_destinations = SortedDict()
+        self.presence_destinations = (
+            SortedDict()
+        )  # type: SortedDict[int, Tuple[str, List[str]]]
 
-        self.keyed_edu = {}  # (destination, key) -> EDU
-        self.keyed_edu_changed = SortedDict()  # stream position -> (destination, key)
+        # (destination, key) -> EDU
+        self.keyed_edu = {}  # type: Dict[Tuple[str, tuple], Edu]
 
-        self.edus = SortedDict()  # stream position -> Edu
+        # stream position -> (destination, key)
+        self.keyed_edu_changed = (
+            SortedDict()
+        )  # type: SortedDict[int, Tuple[str, tuple]]
 
-        self.device_messages = SortedDict()  # stream position -> destination
+        self.edus = SortedDict()  # type: SortedDict[int, Edu]
 
+        # stream ID for the next entry into presence_changed/keyed_edu_changed/edus.
         self.pos = 1
-        self.pos_time = SortedDict()
+
+        # map from stream ID to the time that stream entry was generated, so that we
+        # can clear out entries after a while
+        self.pos_time = SortedDict()  # type: SortedDict[int, int]
 
         # EVERYTHING IS SAD. In particular, python only makes new scopes when
         # we make a new function, so we need to make a new function so the inner
@@ -90,7 +105,6 @@ class FederationRemoteSendQueue(object):
             "keyed_edu",
             "keyed_edu_changed",
             "edus",
-            "device_messages",
             "pos_time",
             "presence_destinations",
         ]:
@@ -130,9 +144,9 @@ class FederationRemoteSendQueue(object):
             for key in keys[:i]:
                 del self.presence_changed[key]
 
-            user_ids = set(
+            user_ids = {
                 user_id for uids in self.presence_changed.values() for user_id in uids
-            )
+            }
 
             keys = self.presence_destinations.keys()
             i = self.presence_destinations.bisect_left(position_to_delete)
@@ -159,8 +173,10 @@ class FederationRemoteSendQueue(object):
             for edu_key in self.keyed_edu_changed.values():
                 live_keys.add(edu_key)
 
-            to_del = [edu_key for edu_key in self.keyed_edu if edu_key not in live_keys]
-            for edu_key in to_del:
+            keys_to_del = [
+                edu_key for edu_key in self.keyed_edu if edu_key not in live_keys
+            ]
+            for edu_key in keys_to_del:
                 del self.keyed_edu[edu_key]
 
             # Delete things out of edu map
@@ -169,12 +185,6 @@ class FederationRemoteSendQueue(object):
             for key in keys[:i]:
                 del self.edus[key]
 
-            # Delete things out of device map
-            keys = self.device_messages.keys()
-            i = self.device_messages.bisect_left(position_to_delete)
-            for key in keys[:i]:
-                del self.device_messages[key]
-
     def notify_new_events(self, current_id):
         """As per FederationSender"""
         # We don't need to replicate this as it gets sent down a different
@@ -212,7 +222,7 @@ class FederationRemoteSendQueue(object):
             receipt (synapse.types.ReadReceipt):
         """
         # nothing to do here: the replication listener will handle it.
-        pass
+        return defer.succeed(None)
 
     def send_presence(self, states):
         """As per FederationSender
@@ -247,9 +257,8 @@ class FederationRemoteSendQueue(object):
 
     def send_device_messages(self, destination):
         """As per FederationSender"""
-        pos = self._next_pos()
-        self.device_messages[pos] = destination
-        self.notifier.on_new_replication_data()
+        # We don't need to replicate this as it gets sent down a different
+        # stream.
 
     def get_current_token(self):
         return self.pos - 1
@@ -257,18 +266,24 @@ class FederationRemoteSendQueue(object):
     def federation_ack(self, token):
         self._clear_queue_before_pos(token)
 
-    def get_replication_rows(self, from_token, to_token, limit, federation_ack=None):
+    async def get_replication_rows(
+        self, instance_name: str, from_token: int, to_token: int, target_row_count: int
+    ) -> Tuple[List[Tuple[int, Tuple]], int, bool]:
         """Get rows to be sent over federation between the two tokens
 
         Args:
-            from_token (int)
-            to_token(int)
-            limit (int)
-            federation_ack (int): Optional. The position where the worker is
-                explicitly acknowledged it has handled. Allows us to drop
-                data from before that point
+            instance_name: the name of the current process
+            from_token: the previous stream token: the starting point for fetching the
+                updates
+            to_token: the new stream token: the point to get updates up to
+            target_row_count: a target for the number of rows to be returned.
+
+        Returns: a triplet `(updates, new_last_token, limited)`, where:
+           * `updates` is a list of `(token, row)` entries.
+           * `new_last_token` is the new position in stream.
+           * `limited` is whether there are more updates to fetch.
         """
-        # TODO: Handle limit.
+        # TODO: Handle target_row_count.
 
         # To handle restarts where we wrap around
         if from_token > self.pos:
@@ -276,12 +291,7 @@ class FederationRemoteSendQueue(object):
 
         # list of tuple(int, BaseFederationRow), where the first is the position
         # of the federation stream.
-        rows = []
-
-        # There should be only one reader, so lets delete everything its
-        # acknowledged its seen.
-        if federation_ack:
-            self._clear_queue_before_pos(federation_ack)
+        rows = []  # type: List[Tuple[int, BaseFederationRow]]
 
         # Fetch changed presence
         i = self.presence_changed.bisect_right(from_token)
@@ -335,18 +345,14 @@ class FederationRemoteSendQueue(object):
         for (pos, edu) in edus:
             rows.append((pos, EduRow(edu)))
 
-        # Fetch changed device messages
-        i = self.device_messages.bisect_right(from_token)
-        j = self.device_messages.bisect_right(to_token) + 1
-        device_messages = {v: k for k, v in self.device_messages.items()[i:j]}
-
-        for (destination, pos) in iteritems(device_messages):
-            rows.append((pos, DeviceRow(destination=destination)))
-
         # Sort rows based on pos
         rows.sort()
 
-        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
+        return (
+            [(pos, (row.TypeId, row.to_data())) for pos, row in rows],
+            to_token,
+            False,
+        )
 
 
 class BaseFederationRow(object):
@@ -355,7 +361,7 @@ class BaseFederationRow(object):
     Specifies how to identify, serialize and deserialize the different types.
     """
 
-    TypeId = None  # Unique string that ids the type. Must be overriden in sub classes.
+    TypeId = ""  # Unique string that ids the type. Must be overriden in sub classes.
 
     @staticmethod
     def from_data(data):
@@ -468,29 +474,14 @@ class EduRow(BaseFederationRow, namedtuple("EduRow", ("edu",))):  # Edu
         buff.edus.setdefault(self.edu.destination, []).append(self.edu)
 
 
-class DeviceRow(BaseFederationRow, namedtuple("DeviceRow", ("destination",))):  # str
-    """Streams the fact that either a) there is pending to device messages for
-    users on the remote, or b) a local users device has changed and needs to
-    be sent to the remote.
-    """
+_rowtypes = (
+    PresenceRow,
+    PresenceDestinationsRow,
+    KeyedEduRow,
+    EduRow,
+)  # type: Tuple[Type[BaseFederationRow], ...]
 
-    TypeId = "d"
-
-    @staticmethod
-    def from_data(data):
-        return DeviceRow(destination=data["destination"])
-
-    def to_data(self):
-        return {"destination": self.destination}
-
-    def add_to_buffer(self, buff):
-        buff.device_destinations.add(self.destination)
-
-
-TypeToRow = {
-    Row.TypeId: Row
-    for Row in (PresenceRow, PresenceDestinationsRow, KeyedEduRow, EduRow, DeviceRow)
-}
+TypeToRow = {Row.TypeId: Row for Row in _rowtypes}
 
 
 ParsedFederationStreamData = namedtuple(
@@ -500,7 +491,6 @@ ParsedFederationStreamData = namedtuple(
         "presence_destinations",  # list of tuples of UserPresenceState and destinations
         "keyed_edus",  # dict of destination -> { key -> Edu }
         "edus",  # dict of destination -> [Edu]
-        "device_destinations",  # set of destinations
     ),
 )
 
@@ -511,7 +501,7 @@ def process_rows_for_federation(transaction_queue, rows):
 
     Args:
         transaction_queue (FederationSender)
-        rows (list(synapse.replication.tcp.streams.FederationStreamRow))
+        rows (list(synapse.replication.tcp.streams.federation.FederationStream.FederationStreamRow))
     """
 
     # The federation stream contains a bunch of different types of
@@ -519,11 +509,7 @@ def process_rows_for_federation(transaction_queue, rows):
     # them into the appropriate collection and then send them off.
 
     buff = ParsedFederationStreamData(
-        presence=[],
-        presence_destinations=[],
-        keyed_edus={},
-        edus={},
-        device_destinations=set(),
+        presence=[], presence_destinations=[], keyed_edus={}, edus={},
     )
 
     # Parse the rows in the stream and add to the buffer
@@ -551,6 +537,3 @@ def process_rows_for_federation(transaction_queue, rows):
     for destination, edu_list in iteritems(buff.edus):
         for edu in edu_list:
             transaction_queue.send_edu(edu, None)
-
-    for destination in buff.device_destinations:
-        transaction_queue.send_device_messages(destination)
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index d46f4aaeb1..d473576902 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+from typing import Dict, Hashable, Iterable, List, Optional, Set, Tuple
 
 from six import itervalues
 
@@ -21,7 +22,9 @@ from prometheus_client import Counter
 
 from twisted.internet import defer
 
+import synapse
 import synapse.metrics
+from synapse.events import EventBase
 from synapse.federation.sender.per_destination_queue import PerDestinationQueue
 from synapse.federation.sender.transaction_manager import TransactionManager
 from synapse.federation.units import Edu
@@ -38,7 +41,9 @@ from synapse.metrics import (
     events_processed_counter,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.util.metrics import measure_func
+from synapse.storage.presence import UserPresenceState
+from synapse.types import ReadReceipt
+from synapse.util.metrics import Measure, measure_func
 
 logger = logging.getLogger(__name__)
 
@@ -49,12 +54,12 @@ sent_pdus_destination_dist_count = Counter(
 
 sent_pdus_destination_dist_total = Counter(
     "synapse_federation_client_sent_pdu_destinations:total",
-    "" "Total number of PDUs queued for sending across all destinations",
+    "Total number of PDUs queued for sending across all destinations",
 )
 
 
 class FederationSender(object):
-    def __init__(self, hs):
+    def __init__(self, hs: "synapse.server.HomeServer"):
         self.hs = hs
         self.server_name = hs.hostname
 
@@ -67,7 +72,7 @@ class FederationSender(object):
         self._transaction_manager = TransactionManager(hs)
 
         # map from destination to PerDestinationQueue
-        self._per_destination_queues = {}  # type: dict[str, PerDestinationQueue]
+        self._per_destination_queues = {}  # type: Dict[str, PerDestinationQueue]
 
         LaterGauge(
             "synapse_federation_transaction_queue_pending_destinations",
@@ -83,7 +88,7 @@ class FederationSender(object):
         # Map of user_id -> UserPresenceState for all the pending presence
         # to be sent out by user_id. Entries here get processed and put in
         # pending_presence_by_dest
-        self.pending_presence = {}
+        self.pending_presence = {}  # type: Dict[str, UserPresenceState]
 
         LaterGauge(
             "synapse_federation_transaction_queue_pending_pdus",
@@ -115,20 +120,17 @@ class FederationSender(object):
         # and that there is a pending call to _flush_rrs_for_room in the system.
         self._queues_awaiting_rr_flush_by_room = (
             {}
-        )  # type: dict[str, set[PerDestinationQueue]]
+        )  # type: Dict[str, Set[PerDestinationQueue]]
 
         self._rr_txn_interval_per_room_ms = (
-            1000.0 / hs.get_config().federation_rr_transactions_per_room_per_second
+            1000.0 / hs.config.federation_rr_transactions_per_room_per_second
         )
 
-    def _get_per_destination_queue(self, destination):
+    def _get_per_destination_queue(self, destination: str) -> PerDestinationQueue:
         """Get or create a PerDestinationQueue for the given destination
 
         Args:
-            destination (str): server_name of remote server
-
-        Returns:
-            PerDestinationQueue
+            destination: server_name of remote server
         """
         queue = self._per_destination_queues.get(destination)
         if not queue:
@@ -136,7 +138,7 @@ class FederationSender(object):
             self._per_destination_queues[destination] = queue
         return queue
 
-    def notify_new_events(self, current_id):
+    def notify_new_events(self, current_id: int) -> None:
         """This gets called when we have some new events we might want to
         send out to other servers.
         """
@@ -150,13 +152,12 @@ class FederationSender(object):
             "process_event_queue_for_federation", self._process_event_queue_loop
         )
 
-    @defer.inlineCallbacks
-    def _process_event_queue_loop(self):
+    async def _process_event_queue_loop(self) -> None:
         try:
             self._is_processing = True
             while True:
-                last_token = yield self.store.get_federation_out_pos("events")
-                next_token, events = yield self.store.get_all_new_events_stream(
+                last_token = await self.store.get_federation_out_pos("events")
+                next_token, events = await self.store.get_all_new_events_stream(
                     last_token, self._last_poked_id, limit=100
                 )
 
@@ -165,8 +166,7 @@ class FederationSender(object):
                 if not events and next_token >= self._last_poked_id:
                     break
 
-                @defer.inlineCallbacks
-                def handle_event(event):
+                async def handle_event(event: EventBase) -> None:
                     # Only send events for this server.
                     send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of()
                     is_mine = self.is_mine_id(event.sender)
@@ -183,8 +183,8 @@ class FederationSender(object):
                         # Otherwise if the last member on a server in a room is
                         # banned then it won't receive the event because it won't
                         # be in the room after the ban.
-                        destinations = yield self.state.get_current_hosts_in_room(
-                            event.room_id, latest_event_ids=event.prev_event_ids()
+                        destinations = await self.state.get_hosts_in_room_at_events(
+                            event.room_id, event_ids=event.prev_event_ids()
                         )
                     except Exception:
                         logger.exception(
@@ -205,16 +205,16 @@ class FederationSender(object):
 
                     self._send_pdu(event, destinations)
 
-                @defer.inlineCallbacks
-                def handle_room_events(events):
-                    for event in events:
-                        yield handle_event(event)
+                async def handle_room_events(events: Iterable[EventBase]) -> None:
+                    with Measure(self.clock, "handle_room_events"):
+                        for event in events:
+                            await handle_event(event)
 
-                events_by_room = {}
+                events_by_room = {}  # type: Dict[str, List[EventBase]]
                 for event in events:
                     events_by_room.setdefault(event.room_id, []).append(event)
 
-                yield make_deferred_yieldable(
+                await make_deferred_yieldable(
                     defer.gatherResults(
                         [
                             run_in_background(handle_room_events, evs)
@@ -224,11 +224,11 @@ class FederationSender(object):
                     )
                 )
 
-                yield self.store.update_federation_out_pos("events", next_token)
+                await self.store.update_federation_out_pos("events", next_token)
 
                 if events:
                     now = self.clock.time_msec()
-                    ts = yield self.store.get_received_ts(events[-1].event_id)
+                    ts = await self.store.get_received_ts(events[-1].event_id)
 
                     synapse.metrics.event_processing_lag.labels(
                         "federation_sender"
@@ -252,7 +252,7 @@ class FederationSender(object):
         finally:
             self._is_processing = False
 
-    def _send_pdu(self, pdu, destinations):
+    def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
         # We loop through all destinations to see whether we already have
         # a transaction in progress. If we do, stick it in the pending_pdus
         # table and we'll get back to it later.
@@ -274,11 +274,11 @@ class FederationSender(object):
             self._get_per_destination_queue(destination).send_pdu(pdu, order)
 
     @defer.inlineCallbacks
-    def send_read_receipt(self, receipt):
+    def send_read_receipt(self, receipt: ReadReceipt):
         """Send a RR to any other servers in the room
 
         Args:
-            receipt (synapse.types.ReadReceipt): receipt to be sent
+            receipt: receipt to be sent
         """
 
         # Some background on the rate-limiting going on here.
@@ -341,7 +341,7 @@ class FederationSender(object):
             else:
                 queue.flush_read_receipts_for_room(room_id)
 
-    def _schedule_rr_flush_for_room(self, room_id, n_domains):
+    def _schedule_rr_flush_for_room(self, room_id: str, n_domains: int) -> None:
         # that is going to cause approximately len(domains) transactions, so now back
         # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM
         backoff_ms = self._rr_txn_interval_per_room_ms * n_domains
@@ -350,7 +350,7 @@ class FederationSender(object):
         self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id)
         self._queues_awaiting_rr_flush_by_room[room_id] = set()
 
-    def _flush_rrs_for_room(self, room_id):
+    def _flush_rrs_for_room(self, room_id: str) -> None:
         queues = self._queues_awaiting_rr_flush_by_room.pop(room_id)
         logger.debug("Flushing RRs in %s to %s", room_id, queues)
 
@@ -366,14 +366,11 @@ class FederationSender(object):
 
     @preserve_fn  # the caller should not yield on this
     @defer.inlineCallbacks
-    def send_presence(self, states):
+    def send_presence(self, states: List[UserPresenceState]):
         """Send the new presence states to the appropriate destinations.
 
         This actually queues up the presence states ready for sending and
         triggers a background task to process them and send out the transactions.
-
-        Args:
-            states (list(UserPresenceState))
         """
         if not self.hs.config.use_presence:
             # No-op if presence is disabled.
@@ -410,11 +407,10 @@ class FederationSender(object):
         finally:
             self._processing_pending_presence = False
 
-    def send_presence_to_destinations(self, states, destinations):
+    def send_presence_to_destinations(
+        self, states: List[UserPresenceState], destinations: List[str]
+    ) -> None:
         """Send the given presence states to the given destinations.
-
-        Args:
-            states (list[UserPresenceState])
             destinations (list[str])
         """
 
@@ -429,12 +425,9 @@ class FederationSender(object):
 
     @measure_func("txnqueue._process_presence")
     @defer.inlineCallbacks
-    def _process_presence_inner(self, states):
+    def _process_presence_inner(self, states: List[UserPresenceState]):
         """Given a list of states populate self.pending_presence_by_dest and
         poke to send a new transaction to each destination
-
-        Args:
-            states (list(UserPresenceState))
         """
         hosts_and_states = yield get_interested_remotes(self.store, states, self.state)
 
@@ -444,14 +437,20 @@ class FederationSender(object):
                     continue
                 self._get_per_destination_queue(destination).send_presence(states)
 
-    def build_and_send_edu(self, destination, edu_type, content, key=None):
+    def build_and_send_edu(
+        self,
+        destination: str,
+        edu_type: str,
+        content: dict,
+        key: Optional[Hashable] = None,
+    ):
         """Construct an Edu object, and queue it for sending
 
         Args:
-            destination (str): name of server to send to
-            edu_type (str): type of EDU to send
-            content (dict): content of EDU
-            key (Any|None): clobbering key for this edu
+            destination: name of server to send to
+            edu_type: type of EDU to send
+            content: content of EDU
+            key: clobbering key for this edu
         """
         if destination == self.server_name:
             logger.info("Not sending EDU to ourselves")
@@ -466,12 +465,12 @@ class FederationSender(object):
 
         self.send_edu(edu, key)
 
-    def send_edu(self, edu, key):
+    def send_edu(self, edu: Edu, key: Optional[Hashable]):
         """Queue an EDU for sending
 
         Args:
-            edu (Edu): edu to send
-            key (Any|None): clobbering key for this edu
+            edu: edu to send
+            key: clobbering key for this edu
         """
         queue = self._get_per_destination_queue(edu.destination)
         if key:
@@ -479,12 +478,36 @@ class FederationSender(object):
         else:
             queue.send_edu(edu)
 
-    def send_device_messages(self, destination):
+    def send_device_messages(self, destination: str):
         if destination == self.server_name:
-            logger.info("Not sending device update to ourselves")
+            logger.warning("Not sending device update to ourselves")
             return
 
         self._get_per_destination_queue(destination).attempt_new_transaction()
 
-    def get_current_token(self):
+    def wake_destination(self, destination: str):
+        """Called when we want to retry sending transactions to a remote.
+
+        This is mainly useful if the remote server has been down and we think it
+        might have come back.
+        """
+
+        if destination == self.server_name:
+            logger.warning("Not waking up ourselves")
+            return
+
+        self._get_per_destination_queue(destination).attempt_new_transaction()
+
+    @staticmethod
+    def get_current_token() -> int:
+        # Dummy implementation for case where federation sender isn't offloaded
+        # to a worker.
         return 0
+
+    @staticmethod
+    async def get_replication_rows(
+        instance_name: str, from_token: int, to_token: int, target_row_count: int
+    ) -> Tuple[List[Tuple[int, Tuple]], int, bool]:
+        # Dummy implementation for case where federation sender isn't offloaded
+        # to a worker.
+        return [], 0, False
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index fad980b893..4e698981a4 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -15,11 +15,10 @@
 # limitations under the License.
 import datetime
 import logging
+from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Tuple
 
 from prometheus_client import Counter
 
-from twisted.internet import defer
-
 from synapse.api.errors import (
     FederationDeniedError,
     HttpResponseException,
@@ -30,9 +29,13 @@ from synapse.federation.units import Edu
 from synapse.handlers.presence import format_user_presence_state
 from synapse.metrics import sent_transactions_counter
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage import UserPresenceState
+from synapse.storage.presence import UserPresenceState
+from synapse.types import ReadReceipt
 from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
 
+if TYPE_CHECKING:
+    import synapse.server
+
 # This is defined in the Matrix spec and enforced by the receiver.
 MAX_EDUS_PER_TRANSACTION = 100
 
@@ -55,13 +58,18 @@ class PerDestinationQueue(object):
     Manages the per-destination transmission queues.
 
     Args:
-        hs (synapse.HomeServer):
-        transaction_sender (TransactionManager):
-        destination (str): the server_name of the destination that we are managing
+        hs
+        transaction_sender
+        destination: the server_name of the destination that we are managing
             transmission for.
     """
 
-    def __init__(self, hs, transaction_manager, destination):
+    def __init__(
+        self,
+        hs: "synapse.server.HomeServer",
+        transaction_manager: "synapse.federation.sender.TransactionManager",
+        destination: str,
+    ):
         self._server_name = hs.hostname
         self._clock = hs.get_clock()
         self._store = hs.get_datastore()
@@ -71,20 +79,23 @@ class PerDestinationQueue(object):
         self.transmission_loop_running = False
 
         # a list of tuples of (pending pdu, order)
-        self._pending_pdus = []  # type: list[tuple[EventBase, int]]
-        self._pending_edus = []  # type: list[Edu]
+        self._pending_pdus = []  # type: List[Tuple[EventBase, int]]
+
+        # XXX this is never actually used: see
+        # https://github.com/matrix-org/synapse/issues/7549
+        self._pending_edus = []  # type: List[Edu]
 
         # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered
         # based on their key (e.g. typing events by room_id)
         # Map of (edu_type, key) -> Edu
-        self._pending_edus_keyed = {}  # type: dict[tuple[str, str], Edu]
+        self._pending_edus_keyed = {}  # type: Dict[Tuple[str, Hashable], Edu]
 
         # Map of user_id -> UserPresenceState of pending presence to be sent to this
         # destination
-        self._pending_presence = {}  # type: dict[str, UserPresenceState]
+        self._pending_presence = {}  # type: Dict[str, UserPresenceState]
 
         # room_id -> receipt_type -> user_id -> receipt_dict
-        self._pending_rrs = {}
+        self._pending_rrs = {}  # type: Dict[str, Dict[str, Dict[str, dict]]]
         self._rrs_pending_flush = False
 
         # stream_id of last successfully sent to-device message.
@@ -94,50 +105,50 @@ class PerDestinationQueue(object):
         # stream_id of last successfully sent device list update.
         self._last_device_list_stream_id = 0
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "PerDestinationQueue[%s]" % self._destination
 
-    def pending_pdu_count(self):
+    def pending_pdu_count(self) -> int:
         return len(self._pending_pdus)
 
-    def pending_edu_count(self):
+    def pending_edu_count(self) -> int:
         return (
             len(self._pending_edus)
             + len(self._pending_presence)
             + len(self._pending_edus_keyed)
         )
 
-    def send_pdu(self, pdu, order):
+    def send_pdu(self, pdu: EventBase, order: int) -> None:
         """Add a PDU to the queue, and start the transmission loop if neccessary
 
         Args:
-            pdu (EventBase): pdu to send
-            order (int):
+            pdu: pdu to send
+            order
         """
         self._pending_pdus.append((pdu, order))
         self.attempt_new_transaction()
 
-    def send_presence(self, states):
+    def send_presence(self, states: Iterable[UserPresenceState]) -> None:
         """Add presence updates to the queue. Start the transmission loop if neccessary.
 
         Args:
-            states (iterable[UserPresenceState]): presence to send
+            states: presence to send
         """
         self._pending_presence.update({state.user_id: state for state in states})
         self.attempt_new_transaction()
 
-    def queue_read_receipt(self, receipt):
+    def queue_read_receipt(self, receipt: ReadReceipt) -> None:
         """Add a RR to the list to be sent. Doesn't start the transmission loop yet
         (see flush_read_receipts_for_room)
 
         Args:
-            receipt (synapse.api.receipt_info.ReceiptInfo): receipt to be queued
+            receipt: receipt to be queued
         """
         self._pending_rrs.setdefault(receipt.room_id, {}).setdefault(
             receipt.receipt_type, {}
         )[receipt.user_id] = {"event_ids": receipt.event_ids, "data": receipt.data}
 
-    def flush_read_receipts_for_room(self, room_id):
+    def flush_read_receipts_for_room(self, room_id: str) -> None:
         # if we don't have any read-receipts for this room, it may be that we've already
         # sent them out, so we don't need to flush.
         if room_id not in self._pending_rrs:
@@ -145,15 +156,15 @@ class PerDestinationQueue(object):
         self._rrs_pending_flush = True
         self.attempt_new_transaction()
 
-    def send_keyed_edu(self, edu, key):
+    def send_keyed_edu(self, edu: Edu, key: Hashable) -> None:
         self._pending_edus_keyed[(edu.edu_type, key)] = edu
         self.attempt_new_transaction()
 
-    def send_edu(self, edu):
+    def send_edu(self, edu) -> None:
         self._pending_edus.append(edu)
         self.attempt_new_transaction()
 
-    def attempt_new_transaction(self):
+    def attempt_new_transaction(self) -> None:
         """Try to start a new transaction to this destination
 
         If there is already a transaction in progress to this destination,
@@ -176,31 +187,31 @@ class PerDestinationQueue(object):
             self._transaction_transmission_loop,
         )
 
-    @defer.inlineCallbacks
-    def _transaction_transmission_loop(self):
-        pending_pdus = []
+    async def _transaction_transmission_loop(self) -> None:
+        pending_pdus = []  # type: List[Tuple[EventBase, int]]
         try:
             self.transmission_loop_running = True
 
             # This will throw if we wouldn't retry. We do this here so we fail
             # quickly, but we will later check this again in the http client,
             # hence why we throw the result away.
-            yield get_retry_limiter(self._destination, self._clock, self._store)
+            await get_retry_limiter(self._destination, self._clock, self._store)
 
             pending_pdus = []
             while True:
                 # We have to keep 2 free slots for presence and rr_edus
                 limit = MAX_EDUS_PER_TRANSACTION - 2
 
-                device_update_edus, dev_list_id = (
-                    yield self._get_device_update_edus(limit)
+                device_update_edus, dev_list_id = await self._get_device_update_edus(
+                    limit
                 )
 
                 limit -= len(device_update_edus)
 
-                to_device_edus, device_stream_id = (
-                    yield self._get_to_device_message_edus(limit)
-                )
+                (
+                    to_device_edus,
+                    device_stream_id,
+                ) = await self._get_to_device_message_edus(limit)
 
                 pending_edus = device_update_edus + to_device_edus
 
@@ -267,7 +278,7 @@ class PerDestinationQueue(object):
 
                 # END CRITICAL SECTION
 
-                success = yield self._transaction_manager.send_new_transaction(
+                success = await self._transaction_manager.send_new_transaction(
                     self._destination, pending_pdus, pending_edus
                 )
                 if success:
@@ -278,7 +289,7 @@ class PerDestinationQueue(object):
                     # Remove the acknowledged device messages from the database
                     # Only bother if we actually sent some device messages
                     if to_device_edus:
-                        yield self._store.delete_device_msgs_for_remote(
+                        await self._store.delete_device_msgs_for_remote(
                             self._destination, device_stream_id
                         )
 
@@ -287,7 +298,7 @@ class PerDestinationQueue(object):
                         logger.info(
                             "Marking as sent %r %r", self._destination, dev_list_id
                         )
-                        yield self._store.mark_as_sent_devices_by_remote(
+                        await self._store.mark_as_sent_devices_by_remote(
                             self._destination, dev_list_id
                         )
 
@@ -332,7 +343,7 @@ class PerDestinationQueue(object):
             # We want to be *very* sure we clear this after we stop processing
             self.transmission_loop_running = False
 
-    def _get_rr_edus(self, force_flush):
+    def _get_rr_edus(self, force_flush: bool) -> Iterable[Edu]:
         if not self._pending_rrs:
             return
         if not force_flush and not self._rrs_pending_flush:
@@ -349,38 +360,36 @@ class PerDestinationQueue(object):
         self._rrs_pending_flush = False
         yield edu
 
-    def _pop_pending_edus(self, limit):
+    def _pop_pending_edus(self, limit: int) -> List[Edu]:
         pending_edus = self._pending_edus
         pending_edus, self._pending_edus = pending_edus[:limit], pending_edus[limit:]
         return pending_edus
 
-    @defer.inlineCallbacks
-    def _get_device_update_edus(self, limit):
+    async def _get_device_update_edus(self, limit: int) -> Tuple[List[Edu], int]:
         last_device_list = self._last_device_list_stream_id
 
         # Retrieve list of new device updates to send to the destination
-        now_stream_id, results = yield self._store.get_devices_by_remote(
+        now_stream_id, results = await self._store.get_device_updates_by_remote(
             self._destination, last_device_list, limit=limit
         )
         edus = [
             Edu(
                 origin=self._server_name,
                 destination=self._destination,
-                edu_type="m.device_list_update",
+                edu_type=edu_type,
                 content=content,
             )
-            for content in results
+            for (edu_type, content) in results
         ]
 
-        assert len(edus) <= limit, "get_devices_by_remote returned too many EDUs"
+        assert len(edus) <= limit, "get_device_updates_by_remote returned too many EDUs"
 
         return (edus, now_stream_id)
 
-    @defer.inlineCallbacks
-    def _get_to_device_message_edus(self, limit):
+    async def _get_to_device_message_edus(self, limit: int) -> Tuple[List[Edu], int]:
         last_device_stream_id = self._last_device_stream_id
         to_device_stream_id = self._store.get_to_device_stream_token()
-        contents, stream_id = yield self._store.get_new_device_msgs_for_remote(
+        contents, stream_id = await self._store.get_new_device_msgs_for_remote(
             self._destination, last_device_stream_id, to_device_stream_id, limit
         )
         edus = [
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index 5b6c79c51a..a2752a54a5 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -13,14 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from typing import TYPE_CHECKING, List
 
 from canonicaljson import json
 
-from twisted.internet import defer
-
 from synapse.api.errors import HttpResponseException
+from synapse.events import EventBase
 from synapse.federation.persistence import TransactionActions
-from synapse.federation.units import Transaction
+from synapse.federation.units import Edu, Transaction
 from synapse.logging.opentracing import (
     extract_text_map,
     set_tag,
@@ -30,6 +30,9 @@ from synapse.logging.opentracing import (
 )
 from synapse.util.metrics import measure_func
 
+if TYPE_CHECKING:
+    import synapse.server
+
 logger = logging.getLogger(__name__)
 
 
@@ -39,7 +42,7 @@ class TransactionManager(object):
     shared between PerDestinationQueue objects
     """
 
-    def __init__(self, hs):
+    def __init__(self, hs: "synapse.server.HomeServer"):
         self._server_name = hs.hostname
         self.clock = hs.get_clock()  # nb must be called this for @measure_func
         self._store = hs.get_datastore()
@@ -50,8 +53,9 @@ class TransactionManager(object):
         self._next_txn_id = int(self.clock.time_msec())
 
     @measure_func("_send_new_transaction")
-    @defer.inlineCallbacks
-    def send_new_transaction(self, destination, pending_pdus, pending_edus):
+    async def send_new_transaction(
+        self, destination: str, pending_pdus: List[EventBase], pending_edus: List[Edu]
+    ):
 
         # Make a transaction-sending opentracing span. This span follows on from
         # all the edus in that transaction. This needs to be done since there is
@@ -84,7 +88,7 @@ class TransactionManager(object):
             txn_id = str(self._next_txn_id)
 
             logger.debug(
-                "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)",
+                "TX [%s] {%s} Attempting new transaction (pdus: %d, edus: %d)",
                 destination,
                 txn_id,
                 len(pdus),
@@ -103,7 +107,7 @@ class TransactionManager(object):
             self._next_txn_id += 1
 
             logger.info(
-                "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)",
+                "TX [%s] {%s} Sending transaction [%s], (PDUs: %d, EDUs: %d)",
                 destination,
                 txn_id,
                 transaction.transaction_id,
@@ -127,7 +131,7 @@ class TransactionManager(object):
                 return data
 
             try:
-                response = yield self._transport_layer.send_transaction(
+                response = await self._transport_layer.send_transaction(
                     transaction, json_data_cb
                 )
                 code = 200
@@ -146,7 +150,7 @@ class TransactionManager(object):
             if code == 200:
                 for e_id, r in response.get("pdus", {}).items():
                     if "error" in r:
-                        logger.warn(
+                        logger.warning(
                             "TX [%s] {%s} Remote returned error for %s: %s",
                             destination,
                             txn_id,
@@ -155,7 +159,7 @@ class TransactionManager(object):
                         )
             else:
                 for p in pdus:
-                    logger.warn(
+                    logger.warning(
                         "TX [%s] {%s} Failed to send event %s",
                         destination,
                         txn_id,
diff --git a/synapse/federation/transport/__init__.py b/synapse/federation/transport/__init__.py
index d9fcc520a0..5db733af98 100644
--- a/synapse/federation/transport/__init__.py
+++ b/synapse/federation/transport/__init__.py
@@ -14,9 +14,9 @@
 # limitations under the License.
 
 """The transport layer is responsible for both sending transactions to remote
-home servers and receiving a variety of requests from other home servers.
+homeservers and receiving a variety of requests from other homeservers.
 
-By default this is done over HTTPS (and all home servers are required to
+By default this is done over HTTPS (and all homeservers are required to
 support HTTPS), however individual pairings of servers may decide to
 communicate over a different (albeit still reliable) protocol.
 """
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 482a101c09..060bf07197 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -15,12 +15,14 @@
 # limitations under the License.
 
 import logging
+from typing import Any, Dict, Optional
 
 from six.moves import urllib
 
 from twisted.internet import defer
 
 from synapse.api.constants import Membership
+from synapse.api.errors import Codes, HttpResponseException, SynapseError
 from synapse.api.urls import (
     FEDERATION_UNSTABLE_PREFIX,
     FEDERATION_V1_PREFIX,
@@ -39,36 +41,12 @@ class TransportLayerClient(object):
         self.client = hs.get_http_client()
 
     @log_function
-    def get_room_state(self, destination, room_id, event_id):
-        """ Requests all state for a given room from the given server at the
-        given event.
-
-        Args:
-            destination (str): The host name of the remote home server we want
-                to get the state from.
-            context (str): The name of the context we want the state of
-            event_id (str): The event we want the context at.
-
-        Returns:
-            Deferred: Results in a dict received from the remote homeserver.
-        """
-        logger.debug("get_room_state dest=%s, room=%s", destination, room_id)
-
-        path = _create_v1_path("/state/%s", room_id)
-        return self.client.get_json(
-            destination,
-            path=path,
-            args={"event_id": event_id},
-            try_trailing_slash_on_400=True,
-        )
-
-    @log_function
     def get_room_state_ids(self, destination, room_id, event_id):
         """ Requests all state for a given room from the given server at the
         given event. Returns the state's event_id's
 
         Args:
-            destination (str): The host name of the remote home server we want
+            destination (str): The host name of the remote homeserver we want
                 to get the state from.
             context (str): The name of the context we want the state of
             event_id (str): The event we want the context at.
@@ -91,7 +69,7 @@ class TransportLayerClient(object):
         """ Requests the pdu with give id and origin from the given server.
 
         Args:
-            destination (str): The host name of the remote home server we want
+            destination (str): The host name of the remote homeserver we want
                 to get the state from.
             event_id (str): The id of the event being requested.
             timeout (int): How long to try (in ms) the destination for before
@@ -122,10 +100,10 @@ class TransportLayerClient(object):
             Deferred: Results in a dict received from the remote homeserver.
         """
         logger.debug(
-            "backfill dest=%s, room_id=%s, event_tuples=%s, limit=%s",
+            "backfill dest=%s, room_id=%s, event_tuples=%r, limit=%s",
             destination,
             room_id,
-            repr(event_tuples),
+            event_tuples,
             str(limit),
         )
 
@@ -267,7 +245,7 @@ class TransportLayerClient(object):
 
     @defer.inlineCallbacks
     @log_function
-    def send_join(self, destination, room_id, event_id, content):
+    def send_join_v1(self, destination, room_id, event_id, content):
         path = _create_v1_path("/send_join/%s/%s", room_id, event_id)
 
         response = yield self.client.put_json(
@@ -278,7 +256,18 @@ class TransportLayerClient(object):
 
     @defer.inlineCallbacks
     @log_function
-    def send_leave(self, destination, room_id, event_id, content):
+    def send_join_v2(self, destination, room_id, event_id, content):
+        path = _create_v2_path("/send_join/%s/%s", room_id, event_id)
+
+        response = yield self.client.put_json(
+            destination=destination, path=path, data=content
+        )
+
+        return response
+
+    @defer.inlineCallbacks
+    @log_function
+    def send_leave_v1(self, destination, room_id, event_id, content):
         path = _create_v1_path("/send_leave/%s/%s", room_id, event_id)
 
         response = yield self.client.put_json(
@@ -296,6 +285,24 @@ class TransportLayerClient(object):
 
     @defer.inlineCallbacks
     @log_function
+    def send_leave_v2(self, destination, room_id, event_id, content):
+        path = _create_v2_path("/send_leave/%s/%s", room_id, event_id)
+
+        response = yield self.client.put_json(
+            destination=destination,
+            path=path,
+            data=content,
+            # we want to do our best to send this through. The problem is
+            # that if it fails, we won't retry it later, so if the remote
+            # server was just having a momentary blip, the room will be out of
+            # sync.
+            ignore_backoff=True,
+        )
+
+        return response
+
+    @defer.inlineCallbacks
+    @log_function
     def send_invite_v1(self, destination, room_id, event_id, content):
         path = _create_v1_path("/invite/%s/%s", room_id, event_id)
 
@@ -320,18 +327,25 @@ class TransportLayerClient(object):
     @log_function
     def get_public_rooms(
         self,
-        remote_server,
-        limit,
-        since_token,
-        search_filter=None,
-        include_all_networks=False,
-        third_party_instance_id=None,
+        remote_server: str,
+        limit: Optional[int] = None,
+        since_token: Optional[str] = None,
+        search_filter: Optional[Dict] = None,
+        include_all_networks: bool = False,
+        third_party_instance_id: Optional[str] = None,
     ):
+        """Get the list of public rooms from a remote homeserver
+
+        See synapse.federation.federation_client.FederationClient.get_public_rooms for
+        more information.
+        """
         if search_filter:
             # this uses MSC2197 (Search Filtering over Federation)
             path = _create_v1_path("/publicRooms")
 
-            data = {"include_all_networks": "true" if include_all_networks else "false"}
+            data = {
+                "include_all_networks": "true" if include_all_networks else "false"
+            }  # type: Dict[str, Any]
             if third_party_instance_id:
                 data["third_party_instance_id"] = third_party_instance_id
             if limit:
@@ -341,13 +355,25 @@ class TransportLayerClient(object):
 
             data["filter"] = search_filter
 
-            response = yield self.client.post_json(
-                destination=remote_server, path=path, data=data, ignore_backoff=True
-            )
+            try:
+                response = yield self.client.post_json(
+                    destination=remote_server, path=path, data=data, ignore_backoff=True
+                )
+            except HttpResponseException as e:
+                if e.code == 403:
+                    raise SynapseError(
+                        403,
+                        "You are not allowed to view the public rooms list of %s"
+                        % (remote_server,),
+                        errcode=Codes.FORBIDDEN,
+                    )
+                raise
         else:
             path = _create_v1_path("/publicRooms")
 
-            args = {"include_all_networks": "true" if include_all_networks else "false"}
+            args = {
+                "include_all_networks": "true" if include_all_networks else "false"
+            }  # type: Dict[str, Any]
             if third_party_instance_id:
                 args["third_party_instance_id"] = (third_party_instance_id,)
             if limit:
@@ -355,9 +381,19 @@ class TransportLayerClient(object):
             if since_token:
                 args["since"] = [since_token]
 
-            response = yield self.client.get_json(
-                destination=remote_server, path=path, args=args, ignore_backoff=True
-            )
+            try:
+                response = yield self.client.get_json(
+                    destination=remote_server, path=path, args=args, ignore_backoff=True
+                )
+            except HttpResponseException as e:
+                if e.code == 403:
+                    raise SynapseError(
+                        403,
+                        "You are not allowed to view the public rooms list of %s"
+                        % (remote_server,),
+                        errcode=Codes.FORBIDDEN,
+                    )
+                raise
 
         return response
 
@@ -383,17 +419,6 @@ class TransportLayerClient(object):
 
     @defer.inlineCallbacks
     @log_function
-    def send_query_auth(self, destination, room_id, event_id, content):
-        path = _create_v1_path("/query_auth/%s/%s", room_id, event_id)
-
-        content = yield self.client.post_json(
-            destination=destination, path=path, data=content
-        )
-
-        return content
-
-    @defer.inlineCallbacks
-    @log_function
     def query_client_keys(self, destination, query_content, timeout):
         """Query the device keys for a list of user ids hosted on a remote
         server.
@@ -402,20 +427,30 @@ class TransportLayerClient(object):
             {
               "device_keys": {
                 "<user_id>": ["<device_id>"]
-            } }
+              }
+            }
 
         Response:
             {
               "device_keys": {
                 "<user_id>": {
                   "<device_id>": {...}
-            } } }
+                }
+              },
+              "master_key": {
+                "<user_id>": {...}
+                }
+              },
+              "self_signing_key": {
+                "<user_id>": {...}
+              }
+            }
 
         Args:
             destination(str): The server to query.
             query_content(dict): The user ids to query.
         Returns:
-            A dict containg the device keys.
+            A dict containing device and cross-signing keys.
         """
         path = _create_v1_path("/user/keys/query")
 
@@ -432,14 +467,30 @@ class TransportLayerClient(object):
         Response:
             {
               "stream_id": "...",
-              "devices": [ { ... } ]
+              "devices": [ { ... } ],
+              "master_key": {
+                "user_id": "<user_id>",
+                "usage": [...],
+                "keys": {...},
+                "signatures": {
+                  "<user_id>": {...}
+                }
+              },
+              "self_signing_key": {
+                "user_id": "<user_id>",
+                "usage": [...],
+                "keys": {...},
+                "signatures": {
+                  "<user_id>": {...}
+                }
+              }
             }
 
         Args:
             destination(str): The server to query.
             query_content(dict): The user ids to query.
         Returns:
-            A dict containg the device keys.
+            A dict containing device and cross-signing keys.
         """
         path = _create_v1_path("/user/devices/%s", user_id)
 
@@ -457,8 +508,10 @@ class TransportLayerClient(object):
             {
               "one_time_keys": {
                 "<user_id>": {
-                    "<device_id>": "<algorithm>"
-            } } }
+                  "<device_id>": "<algorithm>"
+                }
+              }
+            }
 
         Response:
             {
@@ -466,13 +519,16 @@ class TransportLayerClient(object):
                 "<user_id>": {
                   "<device_id>": {
                     "<algorithm>:<key_id>": "<key_base64>"
-            } } } }
+                  }
+                }
+              }
+            }
 
         Args:
             destination(str): The server to query.
             query_content(dict): The user ids to query.
         Returns:
-            A dict containg the one-time keys.
+            A dict containing the one-time keys.
         """
 
         path = _create_v1_path("/user/keys/claim")
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 7dc696c7ae..af4595498c 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -18,6 +18,7 @@
 import functools
 import logging
 import re
+from typing import Optional, Tuple, Type
 
 from twisted.internet.defer import maybeDeferred
 
@@ -44,6 +45,7 @@ from synapse.logging.opentracing import (
     tags,
     whitelisted_homeserver,
 )
+from synapse.server import HomeServer
 from synapse.types import ThirdPartyInstanceID, get_domain_from_id
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.versionstring import get_version_string
@@ -101,12 +103,17 @@ class NoAuthenticationError(AuthenticationError):
 
 
 class Authenticator(object):
-    def __init__(self, hs):
+    def __init__(self, hs: HomeServer):
         self._clock = hs.get_clock()
         self.keyring = hs.get_keyring()
         self.server_name = hs.hostname
         self.store = hs.get_datastore()
         self.federation_domain_whitelist = hs.config.federation_domain_whitelist
+        self.notifer = hs.get_notifier()
+
+        self.replication_client = None
+        if hs.config.worker.worker_app:
+            self.replication_client = hs.get_tcp_replication()
 
     # A method just so we can pass 'self' as the authenticator to the Servlets
     async def authenticate_request(self, request, content):
@@ -151,7 +158,7 @@ class Authenticator(object):
             origin, json_request, now, "Incoming request"
         )
 
-        logger.info("Request from %s", origin)
+        logger.debug("Request from %s", origin)
         request.authenticated_entity = origin
 
         # If we get a valid signed request from the other side, its probably
@@ -165,7 +172,18 @@ class Authenticator(object):
     async def _reset_retry_timings(self, origin):
         try:
             logger.info("Marking origin %r as up", origin)
-            await self.store.set_destination_retry_timings(origin, 0, 0)
+            await self.store.set_destination_retry_timings(origin, None, 0, 0)
+
+            # Inform the relevant places that the remote server is back up.
+            self.notifer.notify_remote_server_up(origin)
+            if self.replication_client:
+                # If we're on a worker we try and inform master about this. The
+                # replication client doesn't hook into the notifier to avoid
+                # infinite loops where we send a `REMOTE_SERVER_UP` command to
+                # master, which then echoes it back to us which in turn pokes
+                # the notifier.
+                self.replication_client.send_remote_server_up(origin)
+
         except Exception:
             logger.exception("Error resetting retry timings on %s", origin)
 
@@ -202,7 +220,7 @@ def _parse_auth_header(header_bytes):
         sig = strip_quotes(param_dict["sig"])
         return origin, key, sig
     except Exception as e:
-        logger.warn(
+        logger.warning(
             "Error parsing auth header '%s': %s",
             header_bytes.decode("ascii", "replace"),
             e,
@@ -250,6 +268,8 @@ class BaseFederationServlet(object):
                 returned.
     """
 
+    PATH = ""  # Overridden in subclasses, the regex to match against the path.
+
     REQUIRE_AUTH = True
 
     PREFIX = FEDERATION_V1_PREFIX  # Allows specifying the API version
@@ -287,10 +307,12 @@ class BaseFederationServlet(object):
             except NoAuthenticationError:
                 origin = None
                 if self.REQUIRE_AUTH:
-                    logger.warn("authenticate_request failed: missing authentication")
+                    logger.warning(
+                        "authenticate_request failed: missing authentication"
+                    )
                     raise
             except Exception as e:
-                logger.warn("authenticate_request failed: %s", e)
+                logger.warning("authenticate_request failed: %s", e)
                 raise
 
             request_tags = {
@@ -328,9 +350,6 @@ class BaseFederationServlet(object):
 
             return response
 
-        # Extra logic that functools.wraps() doesn't finish
-        new_func.__self__ = func.__self__
-
         return new_func
 
     def register(self, server):
@@ -419,7 +438,7 @@ class FederationEventServlet(BaseFederationServlet):
         return await self.handler.on_pdu_request(origin, event_id)
 
 
-class FederationStateServlet(BaseFederationServlet):
+class FederationStateV1Servlet(BaseFederationServlet):
     PATH = "/state/(?P<context>[^/]*)/?"
 
     # This is when someone asks for all data for a given context.
@@ -427,7 +446,7 @@ class FederationStateServlet(BaseFederationServlet):
         return await self.handler.on_context_state_request(
             origin,
             context,
-            parse_string_from_args(query, "event_id", None, required=True),
+            parse_string_from_args(query, "event_id", None, required=False),
         )
 
 
@@ -504,11 +523,21 @@ class FederationMakeLeaveServlet(BaseFederationServlet):
         return 200, content
 
 
-class FederationSendLeaveServlet(BaseFederationServlet):
+class FederationV1SendLeaveServlet(BaseFederationServlet):
     PATH = "/send_leave/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
 
     async def on_PUT(self, origin, content, query, room_id, event_id):
         content = await self.handler.on_send_leave_request(origin, content, room_id)
+        return 200, (200, content)
+
+
+class FederationV2SendLeaveServlet(BaseFederationServlet):
+    PATH = "/send_leave/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+
+    PREFIX = FEDERATION_V2_PREFIX
+
+    async def on_PUT(self, origin, content, query, room_id, event_id):
+        content = await self.handler.on_send_leave_request(origin, content, room_id)
         return 200, content
 
 
@@ -519,13 +548,25 @@ class FederationEventAuthServlet(BaseFederationServlet):
         return await self.handler.on_event_auth(origin, context, event_id)
 
 
-class FederationSendJoinServlet(BaseFederationServlet):
+class FederationV1SendJoinServlet(BaseFederationServlet):
     PATH = "/send_join/(?P<context>[^/]*)/(?P<event_id>[^/]*)"
 
     async def on_PUT(self, origin, content, query, context, event_id):
         # TODO(paul): assert that context/event_id parsed from path actually
         #   match those given in content
         content = await self.handler.on_send_join_request(origin, content, context)
+        return 200, (200, content)
+
+
+class FederationV2SendJoinServlet(BaseFederationServlet):
+    PATH = "/send_join/(?P<context>[^/]*)/(?P<event_id>[^/]*)"
+
+    PREFIX = FEDERATION_V2_PREFIX
+
+    async def on_PUT(self, origin, content, query, context, event_id):
+        # TODO(paul): assert that context/event_id parsed from path actually
+        #   match those given in content
+        content = await self.handler.on_send_join_request(origin, content, context)
         return 200, content
 
 
@@ -538,7 +579,7 @@ class FederationV1InviteServlet(BaseFederationServlet):
         # state resolution algorithm, and we don't use that for processing
         # invites
         content = await self.handler.on_invite_request(
-            origin, content, room_version=RoomVersions.V1.identifier
+            origin, content, room_version_id=RoomVersions.V1.identifier
         )
 
         # V1 federation API is defined to return a content of `[200, {...}]`
@@ -565,7 +606,7 @@ class FederationV2InviteServlet(BaseFederationServlet):
         event.setdefault("unsigned", {})["invite_room_state"] = invite_room_state
 
         content = await self.handler.on_invite_request(
-            origin, event, room_version=room_version
+            origin, event, room_version_id=room_version
         )
         return 200, content
 
@@ -602,17 +643,6 @@ class FederationClientKeysClaimServlet(BaseFederationServlet):
         return 200, response
 
 
-class FederationQueryAuthServlet(BaseFederationServlet):
-    PATH = "/query_auth/(?P<context>[^/]*)/(?P<event_id>[^/]*)"
-
-    async def on_POST(self, origin, content, query, context, event_id):
-        new_content = await self.handler.on_query_auth_request(
-            origin, content, context, event_id
-        )
-
-        return 200, new_content
-
-
 class FederationGetMissingEventsServlet(BaseFederationServlet):
     # TODO(paul): Why does this path alone end with "/?" optional?
     PATH = "/get_missing_events/(?P<room_id>[^/]*)/?"
@@ -712,7 +742,7 @@ class PublicRoomList(BaseFederationServlet):
 
     This API returns information in the same format as /publicRooms on the
     client API, but will only ever include local public rooms and hence is
-    intended for consumption by other home servers.
+    intended for consumption by other homeservers.
 
     GET /publicRooms HTTP/1.1
 
@@ -765,6 +795,10 @@ class PublicRoomList(BaseFederationServlet):
         else:
             network_tuple = ThirdPartyInstanceID(None, None)
 
+        if limit == 0:
+            # zero is a special value which corresponds to no limit.
+            limit = None
+
         data = await maybeDeferred(
             self.handler.get_local_public_room_list,
             limit,
@@ -779,7 +813,7 @@ class PublicRoomList(BaseFederationServlet):
         if not self.allow_access:
             raise FederationDeniedError(origin)
 
-        limit = int(content.get("limit", 100))
+        limit = int(content.get("limit", 100))  # type: Optional[int]
         since_token = content.get("since", None)
         search_filter = content.get("filter", None)
 
@@ -800,6 +834,10 @@ class PublicRoomList(BaseFederationServlet):
         if search_filter is None:
             logger.warning("Nonefilter")
 
+        if limit == 0:
+            # zero is a special value which corresponds to no limit.
+            limit = None
+
         data = await self.handler.get_local_public_room_list(
             limit=limit,
             since_token=since_token,
@@ -922,7 +960,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet):
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
-        result = await self.groups_handler.update_room_in_group(
+        result = await self.handler.update_room_in_group(
             group_id, requester_user_id, room_id, config_key, content
         )
 
@@ -1350,18 +1388,19 @@ class RoomComplexityServlet(BaseFederationServlet):
 FEDERATION_SERVLET_CLASSES = (
     FederationSendServlet,
     FederationEventServlet,
-    FederationStateServlet,
+    FederationStateV1Servlet,
     FederationStateIdsServlet,
     FederationBackfillServlet,
     FederationQueryServlet,
     FederationMakeJoinServlet,
     FederationMakeLeaveServlet,
     FederationEventServlet,
-    FederationSendJoinServlet,
-    FederationSendLeaveServlet,
+    FederationV1SendJoinServlet,
+    FederationV2SendJoinServlet,
+    FederationV1SendLeaveServlet,
+    FederationV2SendLeaveServlet,
     FederationV1InviteServlet,
     FederationV2InviteServlet,
-    FederationQueryAuthServlet,
     FederationGetMissingEventsServlet,
     FederationEventAuthServlet,
     FederationClientKeysQueryServlet,
@@ -1371,11 +1410,13 @@ FEDERATION_SERVLET_CLASSES = (
     On3pidBindServlet,
     FederationVersionServlet,
     RoomComplexityServlet,
-)
+)  # type: Tuple[Type[BaseFederationServlet], ...]
 
-OPENID_SERVLET_CLASSES = (OpenIdUserInfo,)
+OPENID_SERVLET_CLASSES = (
+    OpenIdUserInfo,
+)  # type: Tuple[Type[BaseFederationServlet], ...]
 
-ROOM_LIST_CLASSES = (PublicRoomList,)
+ROOM_LIST_CLASSES = (PublicRoomList,)  # type: Tuple[Type[PublicRoomList], ...]
 
 GROUP_SERVER_SERVLET_CLASSES = (
     FederationGroupsProfileServlet,
@@ -1396,17 +1437,19 @@ GROUP_SERVER_SERVLET_CLASSES = (
     FederationGroupsAddRoomsServlet,
     FederationGroupsAddRoomsConfigServlet,
     FederationGroupsSettingJoinPolicyServlet,
-)
+)  # type: Tuple[Type[BaseFederationServlet], ...]
 
 
 GROUP_LOCAL_SERVLET_CLASSES = (
     FederationGroupsLocalInviteServlet,
     FederationGroupsRemoveLocalUserServlet,
     FederationGroupsBulkPublicisedServlet,
-)
+)  # type: Tuple[Type[BaseFederationServlet], ...]
 
 
-GROUP_ATTESTATION_SERVLET_CLASSES = (FederationGroupsRenewAttestaionServlet,)
+GROUP_ATTESTATION_SERVLET_CLASSES = (
+    FederationGroupsRenewAttestaionServlet,
+)  # type: Tuple[Type[BaseFederationServlet], ...]
 
 DEFAULT_SERVLET_GROUPS = (
     "federation",
diff --git a/synapse/federation/units.py b/synapse/federation/units.py
index b4d743cde7..6b32e0dcbf 100644
--- a/synapse/federation/units.py
+++ b/synapse/federation/units.py
@@ -19,11 +19,15 @@ server protocol.
 
 import logging
 
+import attr
+
+from synapse.types import JsonDict
 from synapse.util.jsonobject import JsonEncodedObject
 
 logger = logging.getLogger(__name__)
 
 
+@attr.s(slots=True)
 class Edu(JsonEncodedObject):
     """ An Edu represents a piece of data sent from one homeserver to another.
 
@@ -32,11 +36,24 @@ class Edu(JsonEncodedObject):
     internal ID or previous references graph.
     """
 
-    valid_keys = ["origin", "destination", "edu_type", "content"]
+    edu_type = attr.ib(type=str)
+    content = attr.ib(type=dict)
+    origin = attr.ib(type=str)
+    destination = attr.ib(type=str)
 
-    required_keys = ["edu_type"]
+    def get_dict(self) -> JsonDict:
+        return {
+            "edu_type": self.edu_type,
+            "content": self.content,
+        }
 
-    internal_keys = ["origin", "destination"]
+    def get_internal_dict(self) -> JsonDict:
+        return {
+            "edu_type": self.edu_type,
+            "content": self.content,
+            "origin": self.origin,
+            "destination": self.destination,
+        }
 
     def get_context(self):
         return getattr(self, "content", {}).get("org.matrix.opentracing_context", "{}")