summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2015-03-09 13:29:41 +0000
committerErik Johnston <erik@matrix.org>2015-03-09 13:29:41 +0000
commitf31e65ca8b3a056b81c9ee1c8e5be298e36ed495 (patch)
tree414d6a488f090cea0aff41ef8ca7346f47567a62 /synapse/federation
parentMerge branch 'develop' of github.com:matrix-org/synapse into erikj-perf (diff)
parentD'oh: underscore, not hyphen (diff)
downloadsynapse-f31e65ca8b3a056b81c9ee1c8e5be298e36ed495.tar.xz
Merge branch 'develop' of github.com:matrix-org/synapse into erikj-perf
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/federation_base.py133
-rw-r--r--synapse/federation/federation_client.py563
-rw-r--r--synapse/federation/federation_server.py474
-rw-r--r--synapse/federation/persistence.py7
-rw-r--r--synapse/federation/replication.py897
-rw-r--r--synapse/federation/transaction_queue.py359
-rw-r--r--synapse/federation/transport/__init__.py12
-rw-r--r--synapse/federation/transport/client.py67
-rw-r--r--synapse/federation/transport/server.py68
9 files changed, 1666 insertions, 914 deletions
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
new file mode 100644
index 0000000000..21a763214b
--- /dev/null
+++ b/synapse/federation/federation_base.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.internet import defer
+
+from synapse.events.utils import prune_event
+
+from syutil.jsonutil import encode_canonical_json
+
+from synapse.crypto.event_signing import check_event_content_hash
+
+from synapse.api.errors import SynapseError
+
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+class FederationBase(object):
+    @defer.inlineCallbacks
+    def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False):
+        """Takes a list of PDUs and checks the signatures and hashs of each
+        one. If a PDU fails its signature check then we check if we have it in
+        the database and if not then request if from the originating server of
+        that PDU.
+
+        If a PDU fails its content hash check then it is redacted.
+
+        The given list of PDUs are not modified, instead the function returns
+        a new list.
+
+        Args:
+            pdu (list)
+            outlier (bool)
+
+        Returns:
+            Deferred : A list of PDUs that have valid signatures and hashes.
+        """
+
+        signed_pdus = []
+
+        @defer.inlineCallbacks
+        def do(pdu):
+            try:
+                new_pdu = yield self._check_sigs_and_hash(pdu)
+                signed_pdus.append(new_pdu)
+            except SynapseError:
+                # FIXME: We should handle signature failures more gracefully.
+
+                # Check local db.
+                new_pdu = yield self.store.get_event(
+                    pdu.event_id,
+                    allow_rejected=True,
+                    allow_none=True,
+                )
+                if new_pdu:
+                    signed_pdus.append(new_pdu)
+                    return
+
+                # Check pdu.origin
+                if pdu.origin != origin:
+                    try:
+                        new_pdu = yield self.get_pdu(
+                            destinations=[pdu.origin],
+                            event_id=pdu.event_id,
+                            outlier=outlier,
+                        )
+
+                        if new_pdu:
+                            signed_pdus.append(new_pdu)
+                            return
+                    except:
+                        pass
+
+                logger.warn(
+                    "Failed to find copy of %s with valid signature",
+                    pdu.event_id,
+                )
+
+        yield defer.gatherResults(
+            [do(pdu) for pdu in pdus],
+            consumeErrors=True
+        )
+
+        defer.returnValue(signed_pdus)
+
+    @defer.inlineCallbacks
+    def _check_sigs_and_hash(self, pdu):
+        """Throws a SynapseError if the PDU does not have the correct
+        signatures.
+
+        Returns:
+            FrozenEvent: Either the given event or it redacted if it failed the
+            content hash check.
+        """
+        # Check signatures are correct.
+        redacted_event = prune_event(pdu)
+        redacted_pdu_json = redacted_event.get_pdu_json()
+
+        try:
+            yield self.keyring.verify_json_for_server(
+                pdu.origin, redacted_pdu_json
+            )
+        except SynapseError:
+            logger.warn(
+                "Signature check failed for %s redacted to %s",
+                encode_canonical_json(pdu.get_pdu_json()),
+                encode_canonical_json(redacted_pdu_json),
+            )
+            raise
+
+        if not check_event_content_hash(pdu):
+            logger.warn(
+                "Event content has been tampered, redacting %s, %s",
+                pdu.event_id, encode_canonical_json(pdu.get_dict())
+            )
+            defer.returnValue(redacted_event)
+
+        defer.returnValue(pdu)
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
new file mode 100644
index 0000000000..f131941f45
--- /dev/null
+++ b/synapse/federation/federation_client.py
@@ -0,0 +1,563 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.internet import defer
+
+from .federation_base import FederationBase
+from .units import Edu
+
+from synapse.api.errors import (
+    CodeMessageException, HttpResponseException, SynapseError,
+)
+from synapse.util.expiringcache import ExpiringCache
+from synapse.util.logutils import log_function
+from synapse.events import FrozenEvent
+
+from synapse.util.retryutils import get_retry_limiter, NotRetryingDestination
+
+import itertools
+import logging
+import random
+
+
+logger = logging.getLogger(__name__)
+
+
+class FederationClient(FederationBase):
+    def __init__(self):
+        self._get_pdu_cache = None
+
+    def start_get_pdu_cache(self):
+        self._get_pdu_cache = ExpiringCache(
+            cache_name="get_pdu_cache",
+            clock=self._clock,
+            max_len=1000,
+            expiry_ms=120*1000,
+            reset_expiry_on_get=False,
+        )
+
+        self._get_pdu_cache.start()
+
+    @log_function
+    def send_pdu(self, pdu, destinations):
+        """Informs the replication layer about a new PDU generated within the
+        home server that should be transmitted to others.
+
+        TODO: Figure out when we should actually resolve the deferred.
+
+        Args:
+            pdu (Pdu): The new Pdu.
+
+        Returns:
+            Deferred: Completes when we have successfully processed the PDU
+            and replicated it to any interested remote home servers.
+        """
+        order = self._order
+        self._order += 1
+
+        logger.debug("[%s] transaction_layer.enqueue_pdu... ", pdu.event_id)
+
+        # TODO, add errback, etc.
+        self._transaction_queue.enqueue_pdu(pdu, destinations, order)
+
+        logger.debug(
+            "[%s] transaction_layer.enqueue_pdu... done",
+            pdu.event_id
+        )
+
+    @log_function
+    def send_edu(self, destination, edu_type, content):
+        edu = Edu(
+            origin=self.server_name,
+            destination=destination,
+            edu_type=edu_type,
+            content=content,
+        )
+
+        # TODO, add errback, etc.
+        self._transaction_queue.enqueue_edu(edu)
+        return defer.succeed(None)
+
+    @log_function
+    def send_failure(self, failure, destination):
+        self._transaction_queue.enqueue_failure(failure, destination)
+        return defer.succeed(None)
+
+    @log_function
+    def make_query(self, destination, query_type, args,
+                   retry_on_dns_fail=True):
+        """Sends a federation Query to a remote homeserver of the given type
+        and arguments.
+
+        Args:
+            destination (str): Domain name of the remote homeserver
+            query_type (str): Category of the query type; should match the
+                handler name used in register_query_handler().
+            args (dict): Mapping of strings to strings containing the details
+                of the query request.
+
+        Returns:
+            a Deferred which will eventually yield a JSON object from the
+            response
+        """
+        return self.transport_layer.make_query(
+            destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail
+        )
+
+    @defer.inlineCallbacks
+    @log_function
+    def backfill(self, dest, context, limit, extremities):
+        """Requests some more historic PDUs for the given context from the
+        given destination server.
+
+        Args:
+            dest (str): The remote home server to ask.
+            context (str): The context to backfill.
+            limit (int): The maximum number of PDUs to return.
+            extremities (list): List of PDU id and origins of the first pdus
+                we have seen from the context
+
+        Returns:
+            Deferred: Results in the received PDUs.
+        """
+        logger.debug("backfill extrem=%s", extremities)
+
+        # If there are no extremeties then we've (probably) reached the start.
+        if not extremities:
+            return
+
+        transaction_data = yield self.transport_layer.backfill(
+            dest, context, extremities, limit)
+
+        logger.debug("backfill transaction_data=%s", repr(transaction_data))
+
+        pdus = [
+            self.event_from_pdu_json(p, outlier=False)
+            for p in transaction_data["pdus"]
+        ]
+
+        for i, pdu in enumerate(pdus):
+            pdus[i] = yield self._check_sigs_and_hash(pdu)
+
+            # FIXME: We should handle signature failures more gracefully.
+
+        defer.returnValue(pdus)
+
+    @defer.inlineCallbacks
+    @log_function
+    def get_pdu(self, destinations, event_id, outlier=False):
+        """Requests the PDU with given origin and ID from the remote home
+        servers.
+
+        Will attempt to get the PDU from each destination in the list until
+        one succeeds.
+
+        This will persist the PDU locally upon receipt.
+
+        Args:
+            destinations (list): Which home servers to query
+            pdu_origin (str): The home server that originally sent the pdu.
+            event_id (str)
+            outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if
+                it's from an arbitary point in the context as opposed to part
+                of the current block of PDUs. Defaults to `False`
+
+        Returns:
+            Deferred: Results in the requested PDU.
+        """
+
+        # TODO: Rate limit the number of times we try and get the same event.
+
+        if self._get_pdu_cache:
+            e = self._get_pdu_cache.get(event_id)
+            if e:
+                defer.returnValue(e)
+
+        pdu = None
+        for destination in destinations:
+            try:
+                limiter = yield get_retry_limiter(
+                    destination,
+                    self._clock,
+                    self.store,
+                )
+
+                with limiter:
+                    transaction_data = yield self.transport_layer.get_event(
+                        destination, event_id
+                    )
+
+                    logger.debug("transaction_data %r", transaction_data)
+
+                    pdu_list = [
+                        self.event_from_pdu_json(p, outlier=outlier)
+                        for p in transaction_data["pdus"]
+                    ]
+
+                    if pdu_list:
+                        pdu = pdu_list[0]
+
+                        # Check signatures are correct.
+                        pdu = yield self._check_sigs_and_hash(pdu)
+
+                        break
+
+            except SynapseError:
+                logger.info(
+                    "Failed to get PDU %s from %s because %s",
+                    event_id, destination, e,
+                )
+                continue
+            except CodeMessageException as e:
+                if 400 <= e.code < 500:
+                    raise
+
+                logger.info(
+                    "Failed to get PDU %s from %s because %s",
+                    event_id, destination, e,
+                )
+                continue
+            except NotRetryingDestination as e:
+                logger.info(e.message)
+                continue
+            except Exception as e:
+                logger.info(
+                    "Failed to get PDU %s from %s because %s",
+                    event_id, destination, e,
+                )
+                continue
+
+        if self._get_pdu_cache is not None:
+            self._get_pdu_cache[event_id] = pdu
+
+        defer.returnValue(pdu)
+
+    @defer.inlineCallbacks
+    @log_function
+    def get_state_for_room(self, destination, room_id, event_id):
+        """Requests all of the `current` state PDUs for a given room from
+        a remote home server.
+
+        Args:
+            destination (str): The remote homeserver to query for the state.
+            room_id (str): The id of the room we're interested in.
+            event_id (str): The id of the event we want the state at.
+
+        Returns:
+            Deferred: Results in a list of PDUs.
+        """
+
+        result = yield self.transport_layer.get_room_state(
+            destination, room_id, event_id=event_id,
+        )
+
+        pdus = [
+            self.event_from_pdu_json(p, outlier=True) for p in result["pdus"]
+        ]
+
+        auth_chain = [
+            self.event_from_pdu_json(p, outlier=True)
+            for p in result.get("auth_chain", [])
+        ]
+
+        signed_pdus = yield self._check_sigs_and_hash_and_fetch(
+            destination, pdus, outlier=True
+        )
+
+        signed_auth = yield self._check_sigs_and_hash_and_fetch(
+            destination, auth_chain, outlier=True
+        )
+
+        signed_auth.sort(key=lambda e: e.depth)
+
+        defer.returnValue((signed_pdus, signed_auth))
+
+    @defer.inlineCallbacks
+    @log_function
+    def get_event_auth(self, destination, room_id, event_id):
+        res = yield self.transport_layer.get_event_auth(
+            destination, room_id, event_id,
+        )
+
+        auth_chain = [
+            self.event_from_pdu_json(p, outlier=True)
+            for p in res["auth_chain"]
+        ]
+
+        signed_auth = yield self._check_sigs_and_hash_and_fetch(
+            destination, auth_chain, outlier=True
+        )
+
+        signed_auth.sort(key=lambda e: e.depth)
+
+        defer.returnValue(signed_auth)
+
+    @defer.inlineCallbacks
+    def make_join(self, destinations, room_id, user_id):
+        for destination in destinations:
+            try:
+                ret = yield self.transport_layer.make_join(
+                    destination, room_id, user_id
+                )
+
+                pdu_dict = ret["event"]
+
+                logger.debug("Got response to make_join: %s", pdu_dict)
+
+                defer.returnValue(
+                    (destination, self.event_from_pdu_json(pdu_dict))
+                )
+                break
+            except CodeMessageException:
+                raise
+            except Exception as e:
+                logger.warn(
+                    "Failed to make_join via %s: %s",
+                    destination, e.message
+                )
+
+        raise RuntimeError("Failed to send to any server.")
+
+    @defer.inlineCallbacks
+    def send_join(self, destinations, pdu):
+        for destination in destinations:
+            try:
+                time_now = self._clock.time_msec()
+                _, content = yield self.transport_layer.send_join(
+                    destination=destination,
+                    room_id=pdu.room_id,
+                    event_id=pdu.event_id,
+                    content=pdu.get_pdu_json(time_now),
+                )
+
+                logger.debug("Got content: %s", content)
+
+                state = [
+                    self.event_from_pdu_json(p, outlier=True)
+                    for p in content.get("state", [])
+                ]
+
+                auth_chain = [
+                    self.event_from_pdu_json(p, outlier=True)
+                    for p in content.get("auth_chain", [])
+                ]
+
+                signed_state = yield self._check_sigs_and_hash_and_fetch(
+                    destination, state, outlier=True
+                )
+
+                signed_auth = yield self._check_sigs_and_hash_and_fetch(
+                    destination, auth_chain, outlier=True
+                )
+
+                auth_chain.sort(key=lambda e: e.depth)
+
+                defer.returnValue({
+                    "state": signed_state,
+                    "auth_chain": signed_auth,
+                    "origin": destination,
+                })
+            except CodeMessageException:
+                raise
+            except Exception as e:
+                logger.warn(
+                    "Failed to send_join via %s: %s",
+                    destination, e.message
+                )
+
+        raise RuntimeError("Failed to send to any server.")
+
+    @defer.inlineCallbacks
+    def send_invite(self, destination, room_id, event_id, pdu):
+        time_now = self._clock.time_msec()
+        code, content = yield self.transport_layer.send_invite(
+            destination=destination,
+            room_id=room_id,
+            event_id=event_id,
+            content=pdu.get_pdu_json(time_now),
+        )
+
+        pdu_dict = content["event"]
+
+        logger.debug("Got response to send_invite: %s", pdu_dict)
+
+        pdu = self.event_from_pdu_json(pdu_dict)
+
+        # Check signatures are correct.
+        pdu = yield self._check_sigs_and_hash(pdu)
+
+        # FIXME: We should handle signature failures more gracefully.
+
+        defer.returnValue(pdu)
+
+    @defer.inlineCallbacks
+    def query_auth(self, destination, room_id, event_id, local_auth):
+        """
+        Params:
+            destination (str)
+            event_it (str)
+            local_auth (list)
+        """
+        time_now = self._clock.time_msec()
+
+        send_content = {
+            "auth_chain": [e.get_pdu_json(time_now) for e in local_auth],
+        }
+
+        code, content = yield self.transport_layer.send_query_auth(
+            destination=destination,
+            room_id=room_id,
+            event_id=event_id,
+            content=send_content,
+        )
+
+        auth_chain = [
+            self.event_from_pdu_json(e)
+            for e in content["auth_chain"]
+        ]
+
+        signed_auth = yield self._check_sigs_and_hash_and_fetch(
+            destination, auth_chain, outlier=True
+        )
+
+        signed_auth.sort(key=lambda e: e.depth)
+
+        ret = {
+            "auth_chain": signed_auth,
+            "rejects": content.get("rejects", []),
+            "missing": content.get("missing", []),
+        }
+
+        defer.returnValue(ret)
+
+    @defer.inlineCallbacks
+    def get_missing_events(self, destination, room_id, earliest_events_ids,
+                           latest_events, limit, min_depth):
+        """Tries to fetch events we are missing. This is called when we receive
+        an event without having received all of its ancestors.
+
+        Args:
+            destination (str)
+            room_id (str)
+            earliest_events_ids (list): List of event ids. Effectively the
+                events we expected to receive, but haven't. `get_missing_events`
+                should only return events that didn't happen before these.
+            latest_events (list): List of events we have received that we don't
+                have all previous events for.
+            limit (int): Maximum number of events to return.
+            min_depth (int): Minimum depth of events tor return.
+        """
+        try:
+            content = yield self.transport_layer.get_missing_events(
+                destination=destination,
+                room_id=room_id,
+                earliest_events=earliest_events_ids,
+                latest_events=[e.event_id for e in latest_events],
+                limit=limit,
+                min_depth=min_depth,
+            )
+
+            events = [
+                self.event_from_pdu_json(e)
+                for e in content.get("events", [])
+            ]
+
+            signed_events = yield self._check_sigs_and_hash_and_fetch(
+                destination, events, outlier=True
+            )
+
+            have_gotten_all_from_destination = True
+        except HttpResponseException as e:
+            if not e.code == 400:
+                raise
+
+            # We are probably hitting an old server that doesn't support
+            # get_missing_events
+            signed_events = []
+            have_gotten_all_from_destination = False
+
+        if len(signed_events) >= limit:
+            defer.returnValue(signed_events)
+
+        servers = yield self.store.get_joined_hosts_for_room(room_id)
+
+        servers = set(servers)
+        servers.discard(self.server_name)
+
+        failed_to_fetch = set()
+
+        while len(signed_events) < limit:
+            # Are we missing any?
+
+            seen_events = set(earliest_events_ids)
+            seen_events.update(e.event_id for e in signed_events)
+
+            missing_events = {}
+            for e in itertools.chain(latest_events, signed_events):
+                if e.depth > min_depth:
+                    missing_events.update({
+                        e_id: e.depth for e_id, _ in e.prev_events
+                        if e_id not in seen_events
+                        and e_id not in failed_to_fetch
+                    })
+
+            if not missing_events:
+                break
+
+            have_seen = yield self.store.have_events(missing_events)
+
+            for k in have_seen:
+                missing_events.pop(k, None)
+
+            if not missing_events:
+                break
+
+            # Okay, we haven't gotten everything yet. Lets get them.
+            ordered_missing = sorted(missing_events.items(), key=lambda x: x[0])
+
+            if have_gotten_all_from_destination:
+                servers.discard(destination)
+
+            def random_server_list():
+                srvs = list(servers)
+                random.shuffle(srvs)
+                return srvs
+
+            deferreds = [
+                self.get_pdu(
+                    destinations=random_server_list(),
+                    event_id=e_id,
+                )
+                for e_id, depth in ordered_missing[:limit - len(signed_events)]
+            ]
+
+            res = yield defer.DeferredList(deferreds, consumeErrors=True)
+            for (result, val), (e_id, _) in zip(res, ordered_missing):
+                if result:
+                    signed_events.append(val)
+                else:
+                    failed_to_fetch.add(e_id)
+
+        defer.returnValue(signed_events)
+
+    def event_from_pdu_json(self, pdu_json, outlier=False):
+        event = FrozenEvent(
+            pdu_json
+        )
+
+        event.internal_metadata.outlier = outlier
+
+        return event
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
new file mode 100644
index 0000000000..9c7dcdba96
--- /dev/null
+++ b/synapse/federation/federation_server.py
@@ -0,0 +1,474 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.internet import defer
+
+from .federation_base import FederationBase
+from .units import Transaction, Edu
+
+from synapse.util.logutils import log_function
+from synapse.util.logcontext import PreserveLoggingContext
+from synapse.events import FrozenEvent
+
+from synapse.api.errors import FederationError, SynapseError
+
+from synapse.crypto.event_signing import compute_event_signature
+
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+class FederationServer(FederationBase):
+    def set_handler(self, handler):
+        """Sets the handler that the replication layer will use to communicate
+        receipt of new PDUs from other home servers. The required methods are
+        documented on :py:class:`.ReplicationHandler`.
+        """
+        self.handler = handler
+
+    def register_edu_handler(self, edu_type, handler):
+        if edu_type in self.edu_handlers:
+            raise KeyError("Already have an EDU handler for %s" % (edu_type,))
+
+        self.edu_handlers[edu_type] = handler
+
+    def register_query_handler(self, query_type, handler):
+        """Sets the handler callable that will be used to handle an incoming
+        federation Query of the given type.
+
+        Args:
+            query_type (str): Category name of the query, which should match
+                the string used by make_query.
+            handler (callable): Invoked to handle incoming queries of this type
+
+        handler is invoked as:
+            result = handler(args)
+
+        where 'args' is a dict mapping strings to strings of the query
+          arguments. It should return a Deferred that will eventually yield an
+          object to encode as JSON.
+        """
+        if query_type in self.query_handlers:
+            raise KeyError(
+                "Already have a Query handler for %s" % (query_type,)
+            )
+
+        self.query_handlers[query_type] = handler
+
+    @defer.inlineCallbacks
+    @log_function
+    def on_backfill_request(self, origin, room_id, versions, limit):
+        pdus = yield self.handler.on_backfill_request(
+            origin, room_id, versions, limit
+        )
+
+        defer.returnValue((200, self._transaction_from_pdus(pdus).get_dict()))
+
+    @defer.inlineCallbacks
+    @log_function
+    def on_incoming_transaction(self, transaction_data):
+        transaction = Transaction(**transaction_data)
+
+        for p in transaction.pdus:
+            if "unsigned" in p:
+                unsigned = p["unsigned"]
+                if "age" in unsigned:
+                    p["age"] = unsigned["age"]
+            if "age" in p:
+                p["age_ts"] = int(self._clock.time_msec()) - int(p["age"])
+                del p["age"]
+
+        pdu_list = [
+            self.event_from_pdu_json(p) for p in transaction.pdus
+        ]
+
+        logger.debug("[%s] Got transaction", transaction.transaction_id)
+
+        response = yield self.transaction_actions.have_responded(transaction)
+
+        if response:
+            logger.debug(
+                "[%s] We've already responed to this request",
+                transaction.transaction_id
+            )
+            defer.returnValue(response)
+            return
+
+        logger.debug("[%s] Transaction is new", transaction.transaction_id)
+
+        with PreserveLoggingContext():
+            results = []
+
+            for pdu in pdu_list:
+                d = self._handle_new_pdu(transaction.origin, pdu)
+
+                try:
+                    yield d
+                    results.append({})
+                except FederationError as e:
+                    self.send_failure(e, transaction.origin)
+                    results.append({"error": str(e)})
+                except Exception as e:
+                    results.append({"error": str(e)})
+                    logger.exception("Failed to handle PDU")
+
+            if hasattr(transaction, "edus"):
+                for edu in [Edu(**x) for x in transaction.edus]:
+                    self.received_edu(
+                        transaction.origin,
+                        edu.edu_type,
+                        edu.content
+                    )
+
+            for failure in getattr(transaction, "pdu_failures", []):
+                logger.info("Got failure %r", failure)
+
+        logger.debug("Returning: %s", str(results))
+
+        response = {
+            "pdus": dict(zip(
+                (p.event_id for p in pdu_list), results
+            )),
+        }
+
+        yield self.transaction_actions.set_response(
+            transaction,
+            200, response
+        )
+        defer.returnValue((200, response))
+
+    def received_edu(self, origin, edu_type, content):
+        if edu_type in self.edu_handlers:
+            self.edu_handlers[edu_type](origin, content)
+        else:
+            logger.warn("Received EDU of type %s with no handler", edu_type)
+
+    @defer.inlineCallbacks
+    @log_function
+    def on_context_state_request(self, origin, room_id, event_id):
+        if event_id:
+            pdus = yield self.handler.get_state_for_pdu(
+                origin, room_id, event_id,
+            )
+            auth_chain = yield self.store.get_auth_chain(
+                [pdu.event_id for pdu in pdus]
+            )
+
+            for event in auth_chain:
+                event.signatures.update(
+                    compute_event_signature(
+                        event,
+                        self.hs.hostname,
+                        self.hs.config.signing_key[0]
+                    )
+                )
+        else:
+            raise NotImplementedError("Specify an event")
+
+        defer.returnValue((200, {
+            "pdus": [pdu.get_pdu_json() for pdu in pdus],
+            "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],
+        }))
+
+    @defer.inlineCallbacks
+    @log_function
+    def on_pdu_request(self, origin, event_id):
+        pdu = yield self._get_persisted_pdu(origin, event_id)
+
+        if pdu:
+            defer.returnValue(
+                (200, self._transaction_from_pdus([pdu]).get_dict())
+            )
+        else:
+            defer.returnValue((404, ""))
+
+    @defer.inlineCallbacks
+    @log_function
+    def on_pull_request(self, origin, versions):
+        raise NotImplementedError("Pull transactions not implemented")
+
+    @defer.inlineCallbacks
+    def on_query_request(self, query_type, args):
+        if query_type in self.query_handlers:
+            response = yield self.query_handlers[query_type](args)
+            defer.returnValue((200, response))
+        else:
+            defer.returnValue(
+                (404, "No handler for Query type '%s'" % (query_type,))
+            )
+
+    @defer.inlineCallbacks
+    def on_make_join_request(self, room_id, user_id):
+        pdu = yield self.handler.on_make_join_request(room_id, user_id)
+        time_now = self._clock.time_msec()
+        defer.returnValue({"event": pdu.get_pdu_json(time_now)})
+
+    @defer.inlineCallbacks
+    def on_invite_request(self, origin, content):
+        pdu = self.event_from_pdu_json(content)
+        ret_pdu = yield self.handler.on_invite_request(origin, pdu)
+        time_now = self._clock.time_msec()
+        defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)}))
+
+    @defer.inlineCallbacks
+    def on_send_join_request(self, origin, content):
+        logger.debug("on_send_join_request: content: %s", content)
+        pdu = self.event_from_pdu_json(content)
+        logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures)
+        res_pdus = yield self.handler.on_send_join_request(origin, pdu)
+        time_now = self._clock.time_msec()
+        defer.returnValue((200, {
+            "state": [p.get_pdu_json(time_now) for p in res_pdus["state"]],
+            "auth_chain": [
+                p.get_pdu_json(time_now) for p in res_pdus["auth_chain"]
+            ],
+        }))
+
+    @defer.inlineCallbacks
+    def on_event_auth(self, origin, room_id, event_id):
+        time_now = self._clock.time_msec()
+        auth_pdus = yield self.handler.on_event_auth(event_id)
+        defer.returnValue((200, {
+            "auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus],
+        }))
+
+    @defer.inlineCallbacks
+    def on_query_auth_request(self, origin, content, event_id):
+        """
+        Content is a dict with keys::
+            auth_chain (list): A list of events that give the auth chain.
+            missing (list): A list of event_ids indicating what the other
+              side (`origin`) think we're missing.
+            rejects (dict): A mapping from event_id to a 2-tuple of reason
+              string and a proof (or None) of why the event was rejected.
+              The keys of this dict give the list of events the `origin` has
+              rejected.
+
+        Args:
+            origin (str)
+            content (dict)
+            event_id (str)
+
+        Returns:
+            Deferred: Results in `dict` with the same format as `content`
+        """
+        auth_chain = [
+            self.event_from_pdu_json(e)
+            for e in content["auth_chain"]
+        ]
+
+        signed_auth = yield self._check_sigs_and_hash_and_fetch(
+            origin, auth_chain, outlier=True
+        )
+
+        ret = yield self.handler.on_query_auth(
+            origin,
+            event_id,
+            signed_auth,
+            content.get("rejects", []),
+            content.get("missing", []),
+        )
+
+        time_now = self._clock.time_msec()
+        send_content = {
+            "auth_chain": [
+                e.get_pdu_json(time_now)
+                for e in ret["auth_chain"]
+            ],
+            "rejects": ret.get("rejects", []),
+            "missing": ret.get("missing", []),
+        }
+
+        defer.returnValue(
+            (200, send_content)
+        )
+
+    @defer.inlineCallbacks
+    @log_function
+    def on_get_missing_events(self, origin, room_id, earliest_events,
+                              latest_events, limit, min_depth):
+        missing_events = yield self.handler.on_get_missing_events(
+            origin, room_id, earliest_events, latest_events, limit, min_depth
+        )
+
+        time_now = self._clock.time_msec()
+
+        defer.returnValue({
+            "events": [ev.get_pdu_json(time_now) for ev in missing_events],
+        })
+
+    @log_function
+    def _get_persisted_pdu(self, origin, event_id, do_auth=True):
+        """ Get a PDU from the database with given origin and id.
+
+        Returns:
+            Deferred: Results in a `Pdu`.
+        """
+        return self.handler.get_persisted_pdu(
+            origin, event_id, do_auth=do_auth
+        )
+
+    def _transaction_from_pdus(self, pdu_list):
+        """Returns a new Transaction containing the given PDUs suitable for
+        transmission.
+        """
+        time_now = self._clock.time_msec()
+        pdus = [p.get_pdu_json(time_now) for p in pdu_list]
+        return Transaction(
+            origin=self.server_name,
+            pdus=pdus,
+            origin_server_ts=int(time_now),
+            destination=None,
+        )
+
+    @defer.inlineCallbacks
+    @log_function
+    def _handle_new_pdu(self, origin, pdu, get_missing=True):
+        # We reprocess pdus when we have seen them only as outliers
+        existing = yield self._get_persisted_pdu(
+            origin, pdu.event_id, do_auth=False
+        )
+
+        # FIXME: Currently we fetch an event again when we already have it
+        # if it has been marked as an outlier.
+
+        already_seen = (
+            existing and (
+                not existing.internal_metadata.is_outlier()
+                or pdu.internal_metadata.is_outlier()
+            )
+        )
+        if already_seen:
+            logger.debug("Already seen pdu %s", pdu.event_id)
+            return
+
+        # Check signature.
+        try:
+            pdu = yield self._check_sigs_and_hash(pdu)
+        except SynapseError as e:
+            raise FederationError(
+                "ERROR",
+                e.code,
+                e.msg,
+                affected=pdu.event_id,
+            )
+
+        state = None
+
+        auth_chain = []
+
+        have_seen = yield self.store.have_events(
+            [ev for ev, _ in pdu.prev_events]
+        )
+
+        fetch_state = False
+
+        # Get missing pdus if necessary.
+        if not pdu.internal_metadata.is_outlier():
+            # We only backfill backwards to the min depth.
+            min_depth = yield self.handler.get_min_depth_for_context(
+                pdu.room_id
+            )
+
+            logger.debug(
+                "_handle_new_pdu min_depth for %s: %d",
+                pdu.room_id, min_depth
+            )
+
+            prevs = {e_id for e_id, _ in pdu.prev_events}
+            seen = set(have_seen.keys())
+
+            if min_depth and pdu.depth < min_depth:
+                # This is so that we don't notify the user about this
+                # message, to work around the fact that some events will
+                # reference really really old events we really don't want to
+                # send to the clients.
+                pdu.internal_metadata.outlier = True
+            elif min_depth and pdu.depth > min_depth:
+                if get_missing and prevs - seen:
+                    latest_tuples = yield self.store.get_latest_events_in_room(
+                        pdu.room_id
+                    )
+
+                    # We add the prev events that we have seen to the latest
+                    # list to ensure the remote server doesn't give them to us
+                    latest = set(e_id for e_id, _, _ in latest_tuples)
+                    latest |= seen
+
+                    missing_events = yield self.get_missing_events(
+                        origin,
+                        pdu.room_id,
+                        earliest_events_ids=list(latest),
+                        latest_events=[pdu],
+                        limit=10,
+                        min_depth=min_depth,
+                    )
+
+                    # We want to sort these by depth so we process them and
+                    # tell clients about them in order.
+                    missing_events.sort(key=lambda x: x.depth)
+
+                    for e in missing_events:
+                        yield self._handle_new_pdu(
+                            origin,
+                            e,
+                            get_missing=False
+                        )
+
+                    have_seen = yield self.store.have_events(
+                        [ev for ev, _ in pdu.prev_events]
+                    )
+
+            prevs = {e_id for e_id, _ in pdu.prev_events}
+            seen = set(have_seen.keys())
+            if prevs - seen:
+                fetch_state = True
+
+        if fetch_state:
+            # We need to get the state at this event, since we haven't
+            # processed all the prev events.
+            logger.debug(
+                "_handle_new_pdu getting state for %s",
+                pdu.room_id
+            )
+            try:
+                state, auth_chain = yield self.get_state_for_room(
+                    origin, pdu.room_id, pdu.event_id,
+                )
+            except:
+                logger.warn("Failed to get state for event: %s", pdu.event_id)
+
+        yield self.handler.on_receive_pdu(
+            origin,
+            pdu,
+            backfilled=False,
+            state=state,
+            auth_chain=auth_chain,
+        )
+
+    def __str__(self):
+        return "<ReplicationLayer(%s)>" % self.server_name
+
+    def event_from_pdu_json(self, pdu_json, outlier=False):
+        event = FrozenEvent(
+            pdu_json
+        )
+
+        event.internal_metadata.outlier = outlier
+
+        return event
diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py
index 85c82a4623..76a9dcd777 100644
--- a/synapse/federation/persistence.py
+++ b/synapse/federation/persistence.py
@@ -23,7 +23,8 @@ from twisted.internet import defer
 
 from synapse.util.logutils import log_function
 
-import json
+from syutil.jsonutil import encode_canonical_json
+
 import logging
 
 
@@ -70,7 +71,7 @@ class TransactionActions(object):
             transaction.transaction_id,
             transaction.origin,
             code,
-            json.dumps(response)
+            encode_canonical_json(response)
         )
 
     @defer.inlineCallbacks
@@ -100,5 +101,5 @@ class TransactionActions(object):
             transaction.transaction_id,
             transaction.destination,
             response_code,
-            json.dumps(response_dict)
+            encode_canonical_json(response_dict)
         )
diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py
index 6620532a60..54a0c7ad8e 100644
--- a/synapse/federation/replication.py
+++ b/synapse/federation/replication.py
@@ -17,23 +17,20 @@
 a given transport.
 """
 
-from twisted.internet import defer
+from .federation_client import FederationClient
+from .federation_server import FederationServer
 
-from .units import Transaction, Edu
+from .transaction_queue import TransactionQueue
 
 from .persistence import TransactionActions
 
-from synapse.util.logutils import log_function
-from synapse.util.logcontext import PreserveLoggingContext
-from synapse.events import FrozenEvent
-
 import logging
 
 
 logger = logging.getLogger(__name__)
 
 
-class ReplicationLayer(object):
+class ReplicationLayer(FederationClient, FederationServer):
     """This layer is responsible for replicating with remote home servers over
     the given transport. I.e., does the sending and receiving of PDUs to
     remote home servers.
@@ -54,898 +51,28 @@ class ReplicationLayer(object):
     def __init__(self, hs, transport_layer):
         self.server_name = hs.hostname
 
+        self.keyring = hs.get_keyring()
+
         self.transport_layer = transport_layer
         self.transport_layer.register_received_handler(self)
         self.transport_layer.register_request_handler(self)
 
-        self.store = hs.get_datastore()
-        # self.pdu_actions = PduActions(self.store)
-        self.transaction_actions = TransactionActions(self.store)
+        self.federation_client = self
 
-        self._transaction_queue = _TransactionQueue(
-            hs, self.transaction_actions, transport_layer
-        )
+        self.store = hs.get_datastore()
 
         self.handler = None
         self.edu_handlers = {}
         self.query_handlers = {}
 
-        self._order = 0
-
         self._clock = hs.get_clock()
 
-        self.event_builder_factory = hs.get_event_builder_factory()
-
-    def set_handler(self, handler):
-        """Sets the handler that the replication layer will use to communicate
-        receipt of new PDUs from other home servers. The required methods are
-        documented on :py:class:`.ReplicationHandler`.
-        """
-        self.handler = handler
-
-    def register_edu_handler(self, edu_type, handler):
-        if edu_type in self.edu_handlers:
-            raise KeyError("Already have an EDU handler for %s" % (edu_type,))
-
-        self.edu_handlers[edu_type] = handler
-
-    def register_query_handler(self, query_type, handler):
-        """Sets the handler callable that will be used to handle an incoming
-        federation Query of the given type.
-
-        Args:
-            query_type (str): Category name of the query, which should match
-                the string used by make_query.
-            handler (callable): Invoked to handle incoming queries of this type
-
-        handler is invoked as:
-            result = handler(args)
-
-        where 'args' is a dict mapping strings to strings of the query
-          arguments. It should return a Deferred that will eventually yield an
-          object to encode as JSON.
-        """
-        if query_type in self.query_handlers:
-            raise KeyError(
-                "Already have a Query handler for %s" % (query_type,)
-            )
-
-        self.query_handlers[query_type] = handler
-
-    @log_function
-    def send_pdu(self, pdu, destinations):
-        """Informs the replication layer about a new PDU generated within the
-        home server that should be transmitted to others.
-
-        TODO: Figure out when we should actually resolve the deferred.
-
-        Args:
-            pdu (Pdu): The new Pdu.
-
-        Returns:
-            Deferred: Completes when we have successfully processed the PDU
-            and replicated it to any interested remote home servers.
-        """
-        order = self._order
-        self._order += 1
-
-        logger.debug("[%s] transaction_layer.enqueue_pdu... ", pdu.event_id)
-
-        # TODO, add errback, etc.
-        self._transaction_queue.enqueue_pdu(pdu, destinations, order)
-
-        logger.debug(
-            "[%s] transaction_layer.enqueue_pdu... done",
-            pdu.event_id
-        )
-
-    @log_function
-    def send_edu(self, destination, edu_type, content):
-        edu = Edu(
-            origin=self.server_name,
-            destination=destination,
-            edu_type=edu_type,
-            content=content,
-        )
-
-        # TODO, add errback, etc.
-        self._transaction_queue.enqueue_edu(edu)
-        return defer.succeed(None)
-
-    @log_function
-    def send_failure(self, failure, destination):
-        self._transaction_queue.enqueue_failure(failure, destination)
-        return defer.succeed(None)
-
-    @log_function
-    def make_query(self, destination, query_type, args,
-                   retry_on_dns_fail=True):
-        """Sends a federation Query to a remote homeserver of the given type
-        and arguments.
-
-        Args:
-            destination (str): Domain name of the remote homeserver
-            query_type (str): Category of the query type; should match the
-                handler name used in register_query_handler().
-            args (dict): Mapping of strings to strings containing the details
-                of the query request.
-
-        Returns:
-            a Deferred which will eventually yield a JSON object from the
-            response
-        """
-        return self.transport_layer.make_query(
-            destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail
-        )
-
-    @defer.inlineCallbacks
-    @log_function
-    def backfill(self, dest, context, limit, extremities):
-        """Requests some more historic PDUs for the given context from the
-        given destination server.
-
-        Args:
-            dest (str): The remote home server to ask.
-            context (str): The context to backfill.
-            limit (int): The maximum number of PDUs to return.
-            extremities (list): List of PDU id and origins of the first pdus
-                we have seen from the context
-
-        Returns:
-            Deferred: Results in the received PDUs.
-        """
-        logger.debug("backfill extrem=%s", extremities)
-
-        # If there are no extremeties then we've (probably) reached the start.
-        if not extremities:
-            return
-
-        transaction_data = yield self.transport_layer.backfill(
-            dest, context, extremities, limit)
-
-        logger.debug("backfill transaction_data=%s", repr(transaction_data))
-
-        transaction = Transaction(**transaction_data)
-
-        pdus = [
-            self.event_from_pdu_json(p, outlier=False)
-            for p in transaction.pdus
-        ]
-        for pdu in pdus:
-            yield self._handle_new_pdu(dest, pdu, backfilled=True)
-
-        defer.returnValue(pdus)
-
-    @defer.inlineCallbacks
-    @log_function
-    def get_pdu(self, destination, event_id, outlier=False):
-        """Requests the PDU with given origin and ID from the remote home
-        server.
-
-        This will persist the PDU locally upon receipt.
-
-        Args:
-            destination (str): Which home server to query
-            pdu_origin (str): The home server that originally sent the pdu.
-            event_id (str)
-            outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if
-                it's from an arbitary point in the context as opposed to part
-                of the current block of PDUs. Defaults to `False`
-
-        Returns:
-            Deferred: Results in the requested PDU.
-        """
-
-        transaction_data = yield self.transport_layer.get_event(
-            destination, event_id
-        )
-
-        transaction = Transaction(**transaction_data)
-
-        pdu_list = [
-            self.event_from_pdu_json(p, outlier=outlier)
-            for p in transaction.pdus
-        ]
-
-        pdu = None
-        if pdu_list:
-            pdu = pdu_list[0]
-            yield self._handle_new_pdu(destination, pdu)
-
-        defer.returnValue(pdu)
-
-    @defer.inlineCallbacks
-    @log_function
-    def get_state_for_room(self, destination, room_id, event_id):
-        """Requests all of the `current` state PDUs for a given room from
-        a remote home server.
-
-        Args:
-            destination (str): The remote homeserver to query for the state.
-            room_id (str): The id of the room we're interested in.
-            event_id (str): The id of the event we want the state at.
-
-        Returns:
-            Deferred: Results in a list of PDUs.
-        """
-
-        result = yield self.transport_layer.get_room_state(
-            destination, room_id, event_id=event_id,
-        )
-
-        pdus = [
-            self.event_from_pdu_json(p, outlier=True) for p in result["pdus"]
-        ]
-
-        auth_chain = [
-            self.event_from_pdu_json(p, outlier=True)
-            for p in result.get("auth_chain", [])
-        ]
-
-        defer.returnValue((pdus, auth_chain))
-
-    @defer.inlineCallbacks
-    @log_function
-    def get_event_auth(self, destination, room_id, event_id):
-        res = yield self.transport_layer.get_event_auth(
-            destination, room_id, event_id,
-        )
-
-        auth_chain = [
-            self.event_from_pdu_json(p, outlier=True)
-            for p in res["auth_chain"]
-        ]
-
-        auth_chain.sort(key=lambda e: e.depth)
-
-        defer.returnValue(auth_chain)
-
-    @defer.inlineCallbacks
-    @log_function
-    def on_backfill_request(self, origin, room_id, versions, limit):
-        pdus = yield self.handler.on_backfill_request(
-            origin, room_id, versions, limit
-        )
-
-        defer.returnValue((200, self._transaction_from_pdus(pdus).get_dict()))
-
-    @defer.inlineCallbacks
-    @log_function
-    def on_incoming_transaction(self, transaction_data):
-        transaction = Transaction(**transaction_data)
-
-        for p in transaction.pdus:
-            if "unsigned" in p:
-                unsigned = p["unsigned"]
-                if "age" in unsigned:
-                    p["age"] = unsigned["age"]
-            if "age" in p:
-                p["age_ts"] = int(self._clock.time_msec()) - int(p["age"])
-                del p["age"]
-
-        pdu_list = [
-            self.event_from_pdu_json(p) for p in transaction.pdus
-        ]
-
-        logger.debug("[%s] Got transaction", transaction.transaction_id)
-
-        response = yield self.transaction_actions.have_responded(transaction)
-
-        if response:
-            logger.debug("[%s] We've already responed to this request",
-                         transaction.transaction_id)
-            defer.returnValue(response)
-            return
-
-        logger.debug("[%s] Transaction is new", transaction.transaction_id)
-
-        with PreserveLoggingContext():
-            dl = []
-            for pdu in pdu_list:
-                dl.append(self._handle_new_pdu(transaction.origin, pdu))
-
-            if hasattr(transaction, "edus"):
-                for edu in [Edu(**x) for x in transaction.edus]:
-                    self.received_edu(
-                        transaction.origin,
-                        edu.edu_type,
-                        edu.content
-                    )
-
-            results = yield defer.DeferredList(dl)
-
-        ret = []
-        for r in results:
-            if r[0]:
-                ret.append({})
-            else:
-                logger.exception(r[1])
-                ret.append({"error": str(r[1])})
-
-        logger.debug("Returning: %s", str(ret))
-
-        yield self.transaction_actions.set_response(
-            transaction,
-            200, response
-        )
-        defer.returnValue((200, response))
-
-    def received_edu(self, origin, edu_type, content):
-        if edu_type in self.edu_handlers:
-            self.edu_handlers[edu_type](origin, content)
-        else:
-            logger.warn("Received EDU of type %s with no handler", edu_type)
-
-    @defer.inlineCallbacks
-    @log_function
-    def on_context_state_request(self, origin, room_id, event_id):
-        if event_id:
-            pdus = yield self.handler.get_state_for_pdu(
-                origin, room_id, event_id,
-            )
-            auth_chain = yield self.store.get_auth_chain(
-                [pdu.event_id for pdu in pdus]
-            )
-        else:
-            raise NotImplementedError("Specify an event")
-
-        defer.returnValue((200, {
-            "pdus": [pdu.get_pdu_json() for pdu in pdus],
-            "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],
-        }))
-
-    @defer.inlineCallbacks
-    @log_function
-    def on_pdu_request(self, origin, event_id):
-        pdu = yield self._get_persisted_pdu(origin, event_id)
-
-        if pdu:
-            defer.returnValue(
-                (200, self._transaction_from_pdus([pdu]).get_dict())
-            )
-        else:
-            defer.returnValue((404, ""))
-
-    @defer.inlineCallbacks
-    @log_function
-    def on_pull_request(self, origin, versions):
-        raise NotImplementedError("Pull transactions not implemented")
-
-    @defer.inlineCallbacks
-    def on_query_request(self, query_type, args):
-        if query_type in self.query_handlers:
-            response = yield self.query_handlers[query_type](args)
-            defer.returnValue((200, response))
-        else:
-            defer.returnValue(
-                (404, "No handler for Query type '%s'" % (query_type,))
-            )
-
-    @defer.inlineCallbacks
-    def on_make_join_request(self, room_id, user_id):
-        pdu = yield self.handler.on_make_join_request(room_id, user_id)
-        time_now = self._clock.time_msec()
-        defer.returnValue({"event": pdu.get_pdu_json(time_now)})
-
-    @defer.inlineCallbacks
-    def on_invite_request(self, origin, content):
-        pdu = self.event_from_pdu_json(content)
-        ret_pdu = yield self.handler.on_invite_request(origin, pdu)
-        time_now = self._clock.time_msec()
-        defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)}))
-
-    @defer.inlineCallbacks
-    def on_send_join_request(self, origin, content):
-        logger.debug("on_send_join_request: content: %s", content)
-        pdu = self.event_from_pdu_json(content)
-        logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures)
-        res_pdus = yield self.handler.on_send_join_request(origin, pdu)
-        time_now = self._clock.time_msec()
-        defer.returnValue((200, {
-            "state": [p.get_pdu_json(time_now) for p in res_pdus["state"]],
-            "auth_chain": [
-                p.get_pdu_json(time_now) for p in res_pdus["auth_chain"]
-            ],
-        }))
-
-    @defer.inlineCallbacks
-    def on_event_auth(self, origin, room_id, event_id):
-        time_now = self._clock.time_msec()
-        auth_pdus = yield self.handler.on_event_auth(event_id)
-        defer.returnValue((200, {
-            "auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus],
-        }))
-
-    @defer.inlineCallbacks
-    def make_join(self, destination, room_id, user_id):
-        ret = yield self.transport_layer.make_join(
-            destination, room_id, user_id
-        )
-
-        pdu_dict = ret["event"]
-
-        logger.debug("Got response to make_join: %s", pdu_dict)
-
-        defer.returnValue(self.event_from_pdu_json(pdu_dict))
-
-    @defer.inlineCallbacks
-    def send_join(self, destination, pdu):
-        time_now = self._clock.time_msec()
-        _, content = yield self.transport_layer.send_join(
-            destination=destination,
-            room_id=pdu.room_id,
-            event_id=pdu.event_id,
-            content=pdu.get_pdu_json(time_now),
-        )
-
-        logger.debug("Got content: %s", content)
-
-        state = [
-            self.event_from_pdu_json(p, outlier=True)
-            for p in content.get("state", [])
-        ]
-
-        auth_chain = [
-            self.event_from_pdu_json(p, outlier=True)
-            for p in content.get("auth_chain", [])
-        ]
-
-        auth_chain.sort(key=lambda e: e.depth)
-
-        defer.returnValue({
-            "state": state,
-            "auth_chain": auth_chain,
-        })
-
-    @defer.inlineCallbacks
-    def send_invite(self, destination, room_id, event_id, pdu):
-        time_now = self._clock.time_msec()
-        code, content = yield self.transport_layer.send_invite(
-            destination=destination,
-            room_id=room_id,
-            event_id=event_id,
-            content=pdu.get_pdu_json(time_now),
-        )
-
-        pdu_dict = content["event"]
-
-        logger.debug("Got response to send_invite: %s", pdu_dict)
-
-        defer.returnValue(self.event_from_pdu_json(pdu_dict))
-
-    @log_function
-    def _get_persisted_pdu(self, origin, event_id, do_auth=True):
-        """ Get a PDU from the database with given origin and id.
-
-        Returns:
-            Deferred: Results in a `Pdu`.
-        """
-        return self.handler.get_persisted_pdu(
-            origin, event_id, do_auth=do_auth
-        )
-
-    def _transaction_from_pdus(self, pdu_list):
-        """Returns a new Transaction containing the given PDUs suitable for
-        transmission.
-        """
-        time_now = self._clock.time_msec()
-        pdus = [p.get_pdu_json(time_now) for p in pdu_list]
-        return Transaction(
-            origin=self.server_name,
-            pdus=pdus,
-            origin_server_ts=int(time_now),
-            destination=None,
-        )
-
-    @defer.inlineCallbacks
-    @log_function
-    def _handle_new_pdu(self, origin, pdu, backfilled=False):
-        # We reprocess pdus when we have seen them only as outliers
-        existing = yield self._get_persisted_pdu(
-            origin, pdu.event_id, do_auth=False
-        )
-
-        already_seen = (
-            existing and (
-                not existing.internal_metadata.is_outlier()
-                or pdu.internal_metadata.is_outlier()
-            )
-        )
-        if already_seen:
-            logger.debug("Already seen pdu %s", pdu.event_id)
-            defer.returnValue({})
-            return
-
-        state = None
-
-        auth_chain = []
-
-        # We need to make sure we have all the auth events.
-        # for e_id, _ in pdu.auth_events:
-        #     exists = yield self._get_persisted_pdu(
-        #         origin,
-        #         e_id,
-        #         do_auth=False
-        #     )
-        #
-        #     if not exists:
-        #         try:
-        #             logger.debug(
-        #                 "_handle_new_pdu fetch missing auth event %s from %s",
-        #                 e_id,
-        #                 origin,
-        #             )
-        #
-        #             yield self.get_pdu(
-        #                 origin,
-        #                 event_id=e_id,
-        #                 outlier=True,
-        #             )
-        #
-        #             logger.debug("Processed pdu %s", e_id)
-        #         except:
-        #             logger.warn(
-        #                 "Failed to get auth event %s from %s",
-        #                 e_id,
-        #                 origin
-        #             )
-
-        # Get missing pdus if necessary.
-        if not pdu.internal_metadata.is_outlier():
-            # We only backfill backwards to the min depth.
-            min_depth = yield self.handler.get_min_depth_for_context(
-                pdu.room_id
-            )
-
-            logger.debug(
-                "_handle_new_pdu min_depth for %s: %d",
-                pdu.room_id, min_depth
-            )
-
-            if min_depth and pdu.depth > min_depth:
-                for event_id, hashes in pdu.prev_events:
-                    exists = yield self._get_persisted_pdu(
-                        origin,
-                        event_id,
-                        do_auth=False
-                    )
-
-                    if not exists:
-                        logger.debug(
-                            "_handle_new_pdu requesting pdu %s",
-                            event_id
-                        )
-
-                        try:
-                            yield self.get_pdu(
-                                origin,
-                                event_id=event_id,
-                            )
-                            logger.debug("Processed pdu %s", event_id)
-                        except:
-                            # TODO(erikj): Do some more intelligent retries.
-                            logger.exception("Failed to get PDU")
-            else:
-                # We need to get the state at this event, since we have reached
-                # a backward extremity edge.
-                logger.debug(
-                    "_handle_new_pdu getting state for %s",
-                    pdu.room_id
-                )
-                state, auth_chain = yield self.get_state_for_room(
-                    origin, pdu.room_id, pdu.event_id,
-                )
-
-        if not backfilled:
-            ret = yield self.handler.on_receive_pdu(
-                origin,
-                pdu,
-                backfilled=backfilled,
-                state=state,
-                auth_chain=auth_chain,
-            )
-        else:
-            ret = None
+        self.transaction_actions = TransactionActions(self.store)
+        self._transaction_queue = TransactionQueue(hs, transport_layer)
 
-        # yield self.pdu_actions.mark_as_processed(pdu)
+        self._order = 0
 
-        defer.returnValue(ret)
+        self.hs = hs
 
     def __str__(self):
         return "<ReplicationLayer(%s)>" % self.server_name
-
-    def event_from_pdu_json(self, pdu_json, outlier=False):
-        event = FrozenEvent(
-            pdu_json
-        )
-
-        event.internal_metadata.outlier = outlier
-
-        return event
-
-
-class _TransactionQueue(object):
-    """This class makes sure we only have one transaction in flight at
-    a time for a given destination.
-
-    It batches pending PDUs into single transactions.
-    """
-
-    def __init__(self, hs, transaction_actions, transport_layer):
-        self.server_name = hs.hostname
-        self.transaction_actions = transaction_actions
-        self.transport_layer = transport_layer
-
-        self._clock = hs.get_clock()
-        self.store = hs.get_datastore()
-
-        # Is a mapping from destinations -> deferreds. Used to keep track
-        # of which destinations have transactions in flight and when they are
-        # done
-        self.pending_transactions = {}
-
-        # Is a mapping from destination -> list of
-        # tuple(pending pdus, deferred, order)
-        self.pending_pdus_by_dest = {}
-        # destination -> list of tuple(edu, deferred)
-        self.pending_edus_by_dest = {}
-
-        # destination -> list of tuple(failure, deferred)
-        self.pending_failures_by_dest = {}
-
-        # HACK to get unique tx id
-        self._next_txn_id = int(self._clock.time_msec())
-
-    @defer.inlineCallbacks
-    @log_function
-    def enqueue_pdu(self, pdu, destinations, order):
-        # We loop through all destinations to see whether we already have
-        # a transaction in progress. If we do, stick it in the pending_pdus
-        # table and we'll get back to it later.
-
-        destinations = set(destinations)
-        destinations.discard(self.server_name)
-        destinations.discard("localhost")
-
-        logger.debug("Sending to: %s", str(destinations))
-
-        if not destinations:
-            return
-
-        deferreds = []
-
-        for destination in destinations:
-            deferred = defer.Deferred()
-            self.pending_pdus_by_dest.setdefault(destination, []).append(
-                (pdu, deferred, order)
-            )
-
-            def eb(failure):
-                if not deferred.called:
-                    deferred.errback(failure)
-                else:
-                    logger.warn("Failed to send pdu", failure)
-
-            with PreserveLoggingContext():
-                self._attempt_new_transaction(destination).addErrback(eb)
-
-            deferreds.append(deferred)
-
-        yield defer.DeferredList(deferreds)
-
-    # NO inlineCallbacks
-    def enqueue_edu(self, edu):
-        destination = edu.destination
-
-        if destination == self.server_name:
-            return
-
-        deferred = defer.Deferred()
-        self.pending_edus_by_dest.setdefault(destination, []).append(
-            (edu, deferred)
-        )
-
-        def eb(failure):
-            if not deferred.called:
-                deferred.errback(failure)
-            else:
-                logger.warn("Failed to send edu", failure)
-
-        with PreserveLoggingContext():
-            self._attempt_new_transaction(destination).addErrback(eb)
-
-        return deferred
-
-    @defer.inlineCallbacks
-    def enqueue_failure(self, failure, destination):
-        deferred = defer.Deferred()
-
-        self.pending_failures_by_dest.setdefault(
-            destination, []
-        ).append(
-            (failure, deferred)
-        )
-
-        yield deferred
-
-    @defer.inlineCallbacks
-    @log_function
-    def _attempt_new_transaction(self, destination):
-
-        (retry_last_ts, retry_interval) = (0, 0)
-        retry_timings = yield self.store.get_destination_retry_timings(
-            destination
-        )
-        if retry_timings:
-            (retry_last_ts, retry_interval) = (
-                retry_timings.retry_last_ts, retry_timings.retry_interval
-            )
-            if retry_last_ts + retry_interval > int(self._clock.time_msec()):
-                logger.info(
-                    "TX [%s] not ready for retry yet - "
-                    "dropping transaction for now",
-                    destination,
-                )
-                return
-            else:
-                logger.info("TX [%s] is ready for retry", destination)
-
-        logger.info("TX [%s] _attempt_new_transaction", destination)
-
-        if destination in self.pending_transactions:
-            # XXX: pending_transactions can get stuck on by a never-ending
-            # request at which point pending_pdus_by_dest just keeps growing.
-            # we need application-layer timeouts of some flavour of these
-            # requests
-            return
-
-        # list of (pending_pdu, deferred, order)
-        pending_pdus = self.pending_pdus_by_dest.pop(destination, [])
-        pending_edus = self.pending_edus_by_dest.pop(destination, [])
-        pending_failures = self.pending_failures_by_dest.pop(destination, [])
-
-        if pending_pdus:
-            logger.info("TX [%s] len(pending_pdus_by_dest[dest]) = %d",
-                        destination, len(pending_pdus))
-
-        if not pending_pdus and not pending_edus and not pending_failures:
-            return
-
-        logger.debug(
-            "TX [%s] Attempting new transaction"
-            " (pdus: %d, edus: %d, failures: %d)",
-            destination,
-            len(pending_pdus),
-            len(pending_edus),
-            len(pending_failures)
-        )
-
-        # Sort based on the order field
-        pending_pdus.sort(key=lambda t: t[2])
-
-        pdus = [x[0] for x in pending_pdus]
-        edus = [x[0] for x in pending_edus]
-        failures = [x[0].get_dict() for x in pending_failures]
-        deferreds = [
-            x[1]
-            for x in pending_pdus + pending_edus + pending_failures
-        ]
-
-        try:
-            self.pending_transactions[destination] = 1
-
-            logger.debug("TX [%s] Persisting transaction...", destination)
-
-            transaction = Transaction.create_new(
-                origin_server_ts=int(self._clock.time_msec()),
-                transaction_id=str(self._next_txn_id),
-                origin=self.server_name,
-                destination=destination,
-                pdus=pdus,
-                edus=edus,
-                pdu_failures=failures,
-            )
-
-            self._next_txn_id += 1
-
-            yield self.transaction_actions.prepare_to_send(transaction)
-
-            logger.debug("TX [%s] Persisted transaction", destination)
-            logger.info(
-                "TX [%s] Sending transaction [%s]",
-                destination,
-                transaction.transaction_id,
-            )
-
-            # Actually send the transaction
-
-            # FIXME (erikj): This is a bit of a hack to make the Pdu age
-            # keys work
-            def json_data_cb():
-                data = transaction.get_dict()
-                now = int(self._clock.time_msec())
-                if "pdus" in data:
-                    for p in data["pdus"]:
-                        if "age_ts" in p:
-                            unsigned = p.setdefault("unsigned", {})
-                            unsigned["age"] = now - int(p["age_ts"])
-                            del p["age_ts"]
-                return data
-
-            code, response = yield self.transport_layer.send_transaction(
-                transaction, json_data_cb
-            )
-
-            logger.info("TX [%s] got %d response", destination, code)
-
-            logger.debug("TX [%s] Sent transaction", destination)
-            logger.debug("TX [%s] Marking as delivered...", destination)
-
-            yield self.transaction_actions.delivered(
-                transaction, code, response
-            )
-
-            logger.debug("TX [%s] Marked as delivered", destination)
-            logger.debug("TX [%s] Yielding to callbacks...", destination)
-
-            for deferred in deferreds:
-                if code == 200:
-                    if retry_last_ts:
-                        # this host is alive! reset retry schedule
-                        yield self.store.set_destination_retry_timings(
-                            destination, 0, 0
-                        )
-                    deferred.callback(None)
-                else:
-                    self.set_retrying(destination, retry_interval)
-                    deferred.errback(RuntimeError("Got status %d" % code))
-
-                # Ensures we don't continue until all callbacks on that
-                # deferred have fired
-                try:
-                    yield deferred
-                except:
-                    pass
-
-            logger.debug("TX [%s] Yielded to callbacks", destination)
-
-        except Exception as e:
-            # We capture this here as there as nothing actually listens
-            # for this finishing functions deferred.
-            logger.warn(
-                "TX [%s] Problem in _attempt_transaction: %s",
-                destination,
-                e,
-            )
-
-            self.set_retrying(destination, retry_interval)
-
-            for deferred in deferreds:
-                if not deferred.called:
-                    deferred.errback(e)
-
-        finally:
-            # We want to be *very* sure we delete this after we stop processing
-            self.pending_transactions.pop(destination, None)
-
-            # Check to see if there is anything else to send.
-            self._attempt_new_transaction(destination)
-
-    @defer.inlineCallbacks
-    def set_retrying(self, destination, retry_interval):
-        # track that this destination is having problems and we should
-        # give it a chance to recover before trying it again
-
-        if retry_interval:
-            retry_interval *= 2
-            # plateau at hourly retries for now
-            if retry_interval >= 60 * 60 * 1000:
-                retry_interval = 60 * 60 * 1000
-        else:
-            retry_interval = 2000  # try again at first after 2 seconds
-
-        yield self.store.set_destination_retry_timings(
-            destination,
-            int(self._clock.time_msec()),
-            retry_interval
-        )
diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
new file mode 100644
index 0000000000..741a4e7a1a
--- /dev/null
+++ b/synapse/federation/transaction_queue.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014, 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.internet import defer
+
+from .persistence import TransactionActions
+from .units import Transaction
+
+from synapse.api.errors import HttpResponseException
+from synapse.util.logutils import log_function
+from synapse.util.logcontext import PreserveLoggingContext
+from synapse.util.retryutils import (
+    get_retry_limiter, NotRetryingDestination,
+)
+
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+class TransactionQueue(object):
+    """This class makes sure we only have one transaction in flight at
+    a time for a given destination.
+
+    It batches pending PDUs into single transactions.
+    """
+
+    def __init__(self, hs, transport_layer):
+        self.server_name = hs.hostname
+
+        self.store = hs.get_datastore()
+        self.transaction_actions = TransactionActions(self.store)
+
+        self.transport_layer = transport_layer
+
+        self._clock = hs.get_clock()
+
+        # Is a mapping from destinations -> deferreds. Used to keep track
+        # of which destinations have transactions in flight and when they are
+        # done
+        self.pending_transactions = {}
+
+        # Is a mapping from destination -> list of
+        # tuple(pending pdus, deferred, order)
+        self.pending_pdus_by_dest = {}
+        # destination -> list of tuple(edu, deferred)
+        self.pending_edus_by_dest = {}
+
+        # destination -> list of tuple(failure, deferred)
+        self.pending_failures_by_dest = {}
+
+        # HACK to get unique tx id
+        self._next_txn_id = int(self._clock.time_msec())
+
+    def can_send_to(self, destination):
+        """Can we send messages to the given server?
+
+        We can't send messages to ourselves. If we are running on localhost
+        then we can only federation with other servers running on localhost.
+        Otherwise we only federate with servers on a public domain.
+
+        Args:
+            destination(str): The server we are possibly trying to send to.
+        Returns:
+            bool: True if we can send to the server.
+        """
+
+        if destination == self.server_name:
+            return False
+        if self.server_name.startswith("localhost"):
+            return destination.startswith("localhost")
+        else:
+            return not destination.startswith("localhost")
+
+    @defer.inlineCallbacks
+    @log_function
+    def enqueue_pdu(self, pdu, destinations, order):
+        # We loop through all destinations to see whether we already have
+        # a transaction in progress. If we do, stick it in the pending_pdus
+        # table and we'll get back to it later.
+
+        destinations = set(destinations)
+        destinations = set(
+            dest for dest in destinations if self.can_send_to(dest)
+        )
+
+        logger.debug("Sending to: %s", str(destinations))
+
+        if not destinations:
+            return
+
+        deferreds = []
+
+        for destination in destinations:
+            deferred = defer.Deferred()
+            self.pending_pdus_by_dest.setdefault(destination, []).append(
+                (pdu, deferred, order)
+            )
+
+            def chain(failure):
+                if not deferred.called:
+                    deferred.errback(failure)
+
+            def log_failure(failure):
+                logger.warn("Failed to send pdu", failure.value)
+
+            deferred.addErrback(log_failure)
+
+            with PreserveLoggingContext():
+                self._attempt_new_transaction(destination).addErrback(chain)
+
+            deferreds.append(deferred)
+
+        yield defer.DeferredList(deferreds, consumeErrors=True)
+
+    # NO inlineCallbacks
+    def enqueue_edu(self, edu):
+        destination = edu.destination
+
+        if not self.can_send_to(destination):
+            return
+
+        deferred = defer.Deferred()
+        self.pending_edus_by_dest.setdefault(destination, []).append(
+            (edu, deferred)
+        )
+
+        def chain(failure):
+            if not deferred.called:
+                deferred.errback(failure)
+
+        def log_failure(failure):
+            logger.warn("Failed to send pdu", failure.value)
+
+        deferred.addErrback(log_failure)
+
+        with PreserveLoggingContext():
+            self._attempt_new_transaction(destination).addErrback(chain)
+
+        return deferred
+
+    @defer.inlineCallbacks
+    def enqueue_failure(self, failure, destination):
+        if destination == self.server_name or destination == "localhost":
+            return
+
+        deferred = defer.Deferred()
+
+        if not self.can_send_to(destination):
+            return
+
+        self.pending_failures_by_dest.setdefault(
+            destination, []
+        ).append(
+            (failure, deferred)
+        )
+
+        def chain(f):
+            if not deferred.called:
+                deferred.errback(f)
+
+        def log_failure(f):
+            logger.warn("Failed to send pdu", f.value)
+
+        deferred.addErrback(log_failure)
+
+        with PreserveLoggingContext():
+            self._attempt_new_transaction(destination).addErrback(chain)
+
+        yield deferred
+
+    @defer.inlineCallbacks
+    @log_function
+    def _attempt_new_transaction(self, destination):
+        if destination in self.pending_transactions:
+            # XXX: pending_transactions can get stuck on by a never-ending
+            # request at which point pending_pdus_by_dest just keeps growing.
+            # we need application-layer timeouts of some flavour of these
+            # requests
+            logger.info(
+                "TX [%s] Transaction already in progress",
+                destination
+            )
+            return
+
+        logger.info("TX [%s] _attempt_new_transaction", destination)
+
+        # list of (pending_pdu, deferred, order)
+        pending_pdus = self.pending_pdus_by_dest.pop(destination, [])
+        pending_edus = self.pending_edus_by_dest.pop(destination, [])
+        pending_failures = self.pending_failures_by_dest.pop(destination, [])
+
+        if pending_pdus:
+            logger.info("TX [%s] len(pending_pdus_by_dest[dest]) = %d",
+                        destination, len(pending_pdus))
+
+        if not pending_pdus and not pending_edus and not pending_failures:
+            logger.info("TX [%s] Nothing to send", destination)
+            return
+
+        # Sort based on the order field
+        pending_pdus.sort(key=lambda t: t[2])
+
+        pdus = [x[0] for x in pending_pdus]
+        edus = [x[0] for x in pending_edus]
+        failures = [x[0].get_dict() for x in pending_failures]
+        deferreds = [
+            x[1]
+            for x in pending_pdus + pending_edus + pending_failures
+        ]
+
+        try:
+            self.pending_transactions[destination] = 1
+
+            limiter = yield get_retry_limiter(
+                destination,
+                self._clock,
+                self.store,
+            )
+
+            logger.debug(
+                "TX [%s] Attempting new transaction"
+                " (pdus: %d, edus: %d, failures: %d)",
+                destination,
+                len(pending_pdus),
+                len(pending_edus),
+                len(pending_failures)
+            )
+
+            logger.debug("TX [%s] Persisting transaction...", destination)
+
+            transaction = Transaction.create_new(
+                origin_server_ts=int(self._clock.time_msec()),
+                transaction_id=str(self._next_txn_id),
+                origin=self.server_name,
+                destination=destination,
+                pdus=pdus,
+                edus=edus,
+                pdu_failures=failures,
+            )
+
+            self._next_txn_id += 1
+
+            yield self.transaction_actions.prepare_to_send(transaction)
+
+            logger.debug("TX [%s] Persisted transaction", destination)
+            logger.info(
+                "TX [%s] Sending transaction [%s]",
+                destination,
+                transaction.transaction_id,
+            )
+
+            with limiter:
+                # Actually send the transaction
+
+                # FIXME (erikj): This is a bit of a hack to make the Pdu age
+                # keys work
+                def json_data_cb():
+                    data = transaction.get_dict()
+                    now = int(self._clock.time_msec())
+                    if "pdus" in data:
+                        for p in data["pdus"]:
+                            if "age_ts" in p:
+                                unsigned = p.setdefault("unsigned", {})
+                                unsigned["age"] = now - int(p["age_ts"])
+                                del p["age_ts"]
+                    return data
+
+                try:
+                    response = yield self.transport_layer.send_transaction(
+                        transaction, json_data_cb
+                    )
+                    code = 200
+
+                    if response:
+                        for e_id, r in response.get("pdus", {}).items():
+                            if "error" in r:
+                                logger.warn(
+                                    "Transaction returned error for %s: %s",
+                                    e_id, r,
+                                )
+                except HttpResponseException as e:
+                    code = e.code
+                    response = e.response
+
+                logger.info("TX [%s] got %d response", destination, code)
+
+                logger.debug("TX [%s] Sent transaction", destination)
+                logger.debug("TX [%s] Marking as delivered...", destination)
+
+            yield self.transaction_actions.delivered(
+                transaction, code, response
+            )
+
+            logger.debug("TX [%s] Marked as delivered", destination)
+
+            logger.debug("TX [%s] Yielding to callbacks...", destination)
+
+            for deferred in deferreds:
+                if code == 200:
+                    deferred.callback(None)
+                else:
+                    deferred.errback(RuntimeError("Got status %d" % code))
+
+                # Ensures we don't continue until all callbacks on that
+                # deferred have fired
+                try:
+                    yield deferred
+                except:
+                    pass
+
+            logger.debug("TX [%s] Yielded to callbacks", destination)
+        except NotRetryingDestination:
+            logger.info(
+                "TX [%s] not ready for retry yet - "
+                "dropping transaction for now",
+                destination,
+            )
+        except RuntimeError as e:
+            # We capture this here as there as nothing actually listens
+            # for this finishing functions deferred.
+            logger.warn(
+                "TX [%s] Problem in _attempt_transaction: %s",
+                destination,
+                e,
+            )
+        except Exception as e:
+            # We capture this here as there as nothing actually listens
+            # for this finishing functions deferred.
+            logger.warn(
+                "TX [%s] Problem in _attempt_transaction: %s",
+                destination,
+                e,
+            )
+
+            for deferred in deferreds:
+                if not deferred.called:
+                    deferred.errback(e)
+
+        finally:
+            # We want to be *very* sure we delete this after we stop processing
+            self.pending_transactions.pop(destination, None)
+
+            # Check to see if there is anything else to send.
+            self._attempt_new_transaction(destination)
diff --git a/synapse/federation/transport/__init__.py b/synapse/federation/transport/__init__.py
index 6800ac46c5..2a671b9aec 100644
--- a/synapse/federation/transport/__init__.py
+++ b/synapse/federation/transport/__init__.py
@@ -24,6 +24,8 @@ communicate over a different (albeit still reliable) protocol.
 from .server import TransportLayerServer
 from .client import TransportLayerClient
 
+from synapse.util.ratelimitutils import FederationRateLimiter
+
 
 class TransportLayer(TransportLayerServer, TransportLayerClient):
     """This is a basic implementation of the transport layer that translates
@@ -55,8 +57,18 @@ class TransportLayer(TransportLayerServer, TransportLayerClient):
                 send requests
         """
         self.keyring = homeserver.get_keyring()
+        self.clock = homeserver.get_clock()
         self.server_name = server_name
         self.server = server
         self.client = client
         self.request_handler = None
         self.received_handler = None
+
+        self.ratelimiter = FederationRateLimiter(
+            self.clock,
+            window_size=homeserver.config.federation_rc_window_size,
+            sleep_limit=homeserver.config.federation_rc_sleep_limit,
+            sleep_msec=homeserver.config.federation_rc_sleep_delay,
+            reject_limit=homeserver.config.federation_rc_reject_limit,
+            concurrent_requests=homeserver.config.federation_rc_concurrent,
+        )
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index e634a3a213..80d03012b7 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -19,7 +19,6 @@ from synapse.api.urls import FEDERATION_PREFIX as PREFIX
 from synapse.util.logutils import log_function
 
 import logging
-import json
 
 
 logger = logging.getLogger(__name__)
@@ -129,7 +128,7 @@ class TransportLayerClient(object):
         # generated by the json_data_callback.
         json_data = transaction.get_dict()
 
-        code, response = yield self.client.put_json(
+        response = yield self.client.put_json(
             transaction.destination,
             path=PREFIX + "/send/%s/" % transaction.transaction_id,
             data=json_data,
@@ -137,79 +136,105 @@ class TransportLayerClient(object):
         )
 
         logger.debug(
-            "send_data dest=%s, txid=%s, got response: %d",
-            transaction.destination, transaction.transaction_id, code
+            "send_data dest=%s, txid=%s, got response: 200",
+            transaction.destination, transaction.transaction_id,
         )
 
-        defer.returnValue((code, response))
+        defer.returnValue(response)
 
     @defer.inlineCallbacks
     @log_function
     def make_query(self, destination, query_type, args, retry_on_dns_fail):
         path = PREFIX + "/query/%s" % query_type
 
-        response = yield self.client.get_json(
+        content = yield self.client.get_json(
             destination=destination,
             path=path,
             args=args,
             retry_on_dns_fail=retry_on_dns_fail,
         )
 
-        defer.returnValue(response)
+        defer.returnValue(content)
 
     @defer.inlineCallbacks
     @log_function
     def make_join(self, destination, room_id, user_id, retry_on_dns_fail=True):
         path = PREFIX + "/make_join/%s/%s" % (room_id, user_id)
 
-        response = yield self.client.get_json(
+        content = yield self.client.get_json(
             destination=destination,
             path=path,
             retry_on_dns_fail=retry_on_dns_fail,
         )
 
-        defer.returnValue(response)
+        defer.returnValue(content)
 
     @defer.inlineCallbacks
     @log_function
     def send_join(self, destination, room_id, event_id, content):
         path = PREFIX + "/send_join/%s/%s" % (room_id, event_id)
 
-        code, content = yield self.client.put_json(
+        response = yield self.client.put_json(
             destination=destination,
             path=path,
             data=content,
         )
 
-        if not 200 <= code < 300:
-            raise RuntimeError("Got %d from send_join", code)
-
-        defer.returnValue(json.loads(content))
+        defer.returnValue(response)
 
     @defer.inlineCallbacks
     @log_function
     def send_invite(self, destination, room_id, event_id, content):
         path = PREFIX + "/invite/%s/%s" % (room_id, event_id)
 
-        code, content = yield self.client.put_json(
+        response = yield self.client.put_json(
             destination=destination,
             path=path,
             data=content,
         )
 
-        if not 200 <= code < 300:
-            raise RuntimeError("Got %d from send_invite", code)
-
-        defer.returnValue(json.loads(content))
+        defer.returnValue(response)
 
     @defer.inlineCallbacks
     @log_function
     def get_event_auth(self, destination, room_id, event_id):
         path = PREFIX + "/event_auth/%s/%s" % (room_id, event_id)
 
-        response = yield self.client.get_json(
+        content = yield self.client.get_json(
             destination=destination,
             path=path,
         )
 
-        defer.returnValue(response)
+        defer.returnValue(content)
+
+    @defer.inlineCallbacks
+    @log_function
+    def send_query_auth(self, destination, room_id, event_id, content):
+        path = PREFIX + "/query_auth/%s/%s" % (room_id, event_id)
+
+        content = yield self.client.post_json(
+            destination=destination,
+            path=path,
+            data=content,
+        )
+
+        defer.returnValue(content)
+
+    @defer.inlineCallbacks
+    @log_function
+    def get_missing_events(self, destination, room_id, earliest_events,
+                           latest_events, limit, min_depth):
+        path = PREFIX + "/get_missing_events/%s" % (room_id,)
+
+        content = yield self.client.post_json(
+            destination=destination,
+            path=path,
+            data={
+                "limit": int(limit),
+                "min_depth": int(min_depth),
+                "earliest_events": earliest_events,
+                "latest_events": latest_events,
+            }
+        )
+
+        defer.returnValue(content)
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index a380a6910b..ece6dbcf62 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -20,7 +20,7 @@ from synapse.api.errors import Codes, SynapseError
 from synapse.util.logutils import log_function
 
 import logging
-import json
+import simplejson as json
 import re
 
 
@@ -42,7 +42,7 @@ class TransportLayerServer(object):
         content = None
         origin = None
 
-        if request.method == "PUT":
+        if request.method in ["PUT", "POST"]:
             # TODO: Handle other method types? other content types?
             try:
                 content_bytes = request.content.read()
@@ -98,15 +98,23 @@ class TransportLayerServer(object):
         def new_handler(request, *args, **kwargs):
             try:
                 (origin, content) = yield self._authenticate_request(request)
-                response = yield handler(
-                    origin, content, request.args, *args, **kwargs
-                )
+                with self.ratelimiter.ratelimit(origin) as d:
+                    yield d
+                    response = yield handler(
+                        origin, content, request.args, *args, **kwargs
+                    )
             except:
                 logger.exception("_authenticate_request failed")
                 raise
             defer.returnValue(response)
         return new_handler
 
+    def rate_limit_origin(self, handler):
+        def new_handler(origin, *args, **kwargs):
+            response = yield handler(origin, *args, **kwargs)
+            defer.returnValue(response)
+        return new_handler()
+
     @log_function
     def register_received_handler(self, handler):
         """ Register a handler that will be fired when we receive data.
@@ -235,6 +243,28 @@ class TransportLayerServer(object):
             )
         )
 
+        self.server.register_path(
+            "POST",
+            re.compile("^" + PREFIX + "/query_auth/([^/]*)/([^/]*)$"),
+            self._with_authentication(
+                lambda origin, content, query, context, event_id:
+                self._on_query_auth_request(
+                    origin, content, event_id,
+                )
+            )
+        )
+
+        self.server.register_path(
+            "POST",
+            re.compile("^" + PREFIX + "/get_missing_events/([^/]*)/?$"),
+            self._with_authentication(
+                lambda origin, content, query, room_id:
+                self._get_missing_events(
+                    origin, content, room_id,
+                )
+            )
+        )
+
     @defer.inlineCallbacks
     @log_function
     def _on_send_request(self, origin, content, query, transaction_id):
@@ -325,3 +355,31 @@ class TransportLayerServer(object):
         )
 
         defer.returnValue((200, content))
+
+    @defer.inlineCallbacks
+    @log_function
+    def _on_query_auth_request(self, origin, content, event_id):
+        new_content = yield self.request_handler.on_query_auth_request(
+            origin, content, event_id
+        )
+
+        defer.returnValue((200, new_content))
+
+    @defer.inlineCallbacks
+    @log_function
+    def _get_missing_events(self, origin, content, room_id):
+        limit = int(content.get("limit", 10))
+        min_depth = int(content.get("min_depth", 0))
+        earliest_events = content.get("earliest_events", [])
+        latest_events = content.get("latest_events", [])
+
+        content = yield self.request_handler.on_get_missing_events(
+            origin,
+            room_id=room_id,
+            earliest_events=earliest_events,
+            latest_events=latest_events,
+            min_depth=min_depth,
+            limit=limit,
+        )
+
+        defer.returnValue((200, content))