summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2018-09-19 15:08:36 +0100
committerErik Johnston <erik@matrix.org>2018-09-19 15:08:36 +0100
commitce846bb620e9866531f4a49f183fdd3cbb3f1eeb (patch)
treeed38561118af50948598967e17ad6a7f1fee8a8c /synapse
parentNewsfile (diff)
parentMerge pull request #3910 from matrix-org/erikj/update_timeout (diff)
downloadsynapse-ce846bb620e9866531f4a49f183fdd3cbb3f1eeb.tar.xz
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/faster_typing
Diffstat (limited to 'synapse')
-rw-r--r--synapse/__init__.py12
-rw-r--r--synapse/api/filtering.py1
-rwxr-xr-xsynapse/app/homeserver.py18
-rw-r--r--synapse/appservice/api.py13
-rw-r--r--synapse/config/homeserver.py2
-rw-r--r--synapse/config/jwt_config.py (renamed from synapse/config/jwt.py)0
-rw-r--r--synapse/config/logger.py17
-rw-r--r--synapse/crypto/context_factory.py2
-rw-r--r--synapse/crypto/keyclient.py8
-rw-r--r--synapse/crypto/keyring.py33
-rw-r--r--synapse/events/__init__.py5
-rw-r--r--synapse/federation/federation_base.py158
-rw-r--r--synapse/federation/federation_client.py8
-rw-r--r--synapse/federation/federation_server.py30
-rw-r--r--synapse/federation/persistence.py8
-rw-r--r--synapse/federation/transaction_queue.py12
-rw-r--r--synapse/federation/transport/client.py5
-rw-r--r--synapse/federation/transport/server.py26
-rw-r--r--synapse/handlers/auth.py8
-rw-r--r--synapse/handlers/directory.py19
-rw-r--r--synapse/handlers/e2e_keys.py5
-rw-r--r--synapse/handlers/federation.py37
-rw-r--r--synapse/handlers/pagination.py9
-rw-r--r--synapse/handlers/register.py1
-rw-r--r--synapse/handlers/room_list.py2
-rw-r--r--synapse/handlers/room_member.py5
-rw-r--r--synapse/handlers/search.py14
-rw-r--r--synapse/handlers/sync.py168
-rw-r--r--synapse/http/__init__.py4
-rw-r--r--synapse/http/client.py96
-rw-r--r--synapse/http/endpoint.py13
-rw-r--r--synapse/http/matrixfederationclient.py590
-rw-r--r--synapse/http/request_metrics.py22
-rw-r--r--synapse/http/site.py19
-rw-r--r--synapse/metrics/__init__.py108
-rw-r--r--synapse/notifier.py21
-rw-r--r--synapse/push/httppusher.py7
-rw-r--r--synapse/push/mailer.py7
-rw-r--r--synapse/python_dependencies.py5
-rw-r--r--synapse/replication/slave/storage/devices.py23
-rw-r--r--synapse/replication/tcp/protocol.py24
-rw-r--r--synapse/replication/tcp/streams.py2
-rw-r--r--synapse/rest/client/v1/admin.py9
-rw-r--r--synapse/rest/client/v1/events.py12
-rw-r--r--synapse/rest/client/v1/initial_sync.py2
-rw-r--r--synapse/rest/client/v1/login.py44
-rw-r--r--synapse/rest/client/v1/push_rule.py24
-rw-r--r--synapse/rest/client/v1/pusher.py4
-rw-r--r--synapse/rest/client/v1/room.py14
-rw-r--r--synapse/rest/client/v1/voip.py6
-rw-r--r--synapse/rest/client/v2_alpha/sync.py53
-rw-r--r--synapse/rest/client/v2_alpha/thirdparty.py4
-rw-r--r--synapse/rest/key/v1/server_key_resource.py2
-rw-r--r--synapse/rest/key/v2/__init__.py4
-rw-r--r--synapse/rest/key/v2/remote_key_resource.py6
-rw-r--r--synapse/rest/media/v0/content_repository.py4
-rw-r--r--synapse/rest/media/v1/_base.py30
-rw-r--r--synapse/rest/media/v1/download_resource.py12
-rw-r--r--synapse/rest/media/v1/media_repository.py29
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py10
-rw-r--r--synapse/storage/directory.py1
-rw-r--r--synapse/storage/events.py48
-rw-r--r--synapse/storage/keys.py1
-rw-r--r--synapse/storage/monthly_active_users.py23
-rw-r--r--synapse/storage/roommember.py65
-rw-r--r--synapse/util/async_helpers.py64
-rw-r--r--synapse/util/manhole.py44
-rw-r--r--synapse/util/metrics.py22
68 files changed, 1415 insertions, 689 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py
index e62901b761..9dbe0b9f10 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -17,4 +17,14 @@
 """ This is a reference implementation of a Matrix home server.
 """
 
-__version__ = "0.33.3"
+try:
+    from twisted.internet import protocol
+    from twisted.internet.protocol import Factory
+    from twisted.names.dns import DNSDatagramProtocol
+    protocol.Factory.noisy = False
+    Factory.noisy = False
+    DNSDatagramProtocol.noisy = False
+except ImportError:
+    pass
+
+__version__ = "0.33.4"
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 186831e118..a31a9a17e0 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -251,6 +251,7 @@ class FilterCollection(object):
             "include_leave", False
         )
         self.event_fields = filter_json.get("event_fields", [])
+        self.event_format = filter_json.get("event_format", "client")
 
     def __repr__(self):
         return "<FilterCollection %s>" % (json.dumps(self._filter_json),)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 3eb5b663de..3241ded188 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -307,6 +307,10 @@ class SynapseHomeServer(HomeServer):
 # Gauges to expose monthly active user control metrics
 current_mau_gauge = Gauge("synapse_admin_mau:current", "Current MAU")
 max_mau_gauge = Gauge("synapse_admin_mau:max", "MAU Limit")
+registered_reserved_users_mau_gauge = Gauge(
+    "synapse_admin_mau:registered_reserved_users",
+    "Registered users with reserved threepids"
+)
 
 
 def setup(config_options):
@@ -453,6 +457,10 @@ def run(hs):
         stats["homeserver"] = hs.config.server_name
         stats["timestamp"] = now
         stats["uptime_seconds"] = uptime
+        version = sys.version_info
+        stats["python_version"] = "{}.{}.{}".format(
+            version.major, version.minor, version.micro
+        )
         stats["total_users"] = yield hs.get_datastore().count_all_users()
 
         total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users()
@@ -531,10 +539,14 @@ def run(hs):
 
     @defer.inlineCallbacks
     def generate_monthly_active_users():
-        count = 0
+        current_mau_count = 0
+        reserved_count = 0
+        store = hs.get_datastore()
         if hs.config.limit_usage_by_mau:
-            count = yield hs.get_datastore().get_monthly_active_count()
-        current_mau_gauge.set(float(count))
+            current_mau_count = yield store.get_monthly_active_count()
+            reserved_count = yield store.get_registered_reserved_users_count()
+        current_mau_gauge.set(float(current_mau_count))
+        registered_reserved_users_mau_gauge.set(float(reserved_count))
         max_mau_gauge.set(float(hs.config.max_mau_value))
 
     hs.get_datastore().initialise_reserved_users(
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 6980e5890e..9ccc5a80fc 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -13,7 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import urllib
+
+from six.moves import urllib
 
 from prometheus_client import Counter
 
@@ -98,7 +99,7 @@ class ApplicationServiceApi(SimpleHttpClient):
     def query_user(self, service, user_id):
         if service.url is None:
             defer.returnValue(False)
-        uri = service.url + ("/users/%s" % urllib.quote(user_id))
+        uri = service.url + ("/users/%s" % urllib.parse.quote(user_id))
         response = None
         try:
             response = yield self.get_json(uri, {
@@ -119,7 +120,7 @@ class ApplicationServiceApi(SimpleHttpClient):
     def query_alias(self, service, alias):
         if service.url is None:
             defer.returnValue(False)
-        uri = service.url + ("/rooms/%s" % urllib.quote(alias))
+        uri = service.url + ("/rooms/%s" % urllib.parse.quote(alias))
         response = None
         try:
             response = yield self.get_json(uri, {
@@ -153,7 +154,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             service.url,
             APP_SERVICE_PREFIX,
             kind,
-            urllib.quote(protocol)
+            urllib.parse.quote(protocol)
         )
         try:
             response = yield self.get_json(uri, fields)
@@ -188,7 +189,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             uri = "%s%s/thirdparty/protocol/%s" % (
                 service.url,
                 APP_SERVICE_PREFIX,
-                urllib.quote(protocol)
+                urllib.parse.quote(protocol)
             )
             try:
                 info = yield self.get_json(uri, {})
@@ -228,7 +229,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         txn_id = str(txn_id)
 
         uri = service.url + ("/transactions/%s" %
-                             urllib.quote(txn_id))
+                             urllib.parse.quote(txn_id))
         try:
             yield self.put_json(
                 uri=uri,
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 2fd9c48abf..b8d5690f2b 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -21,7 +21,7 @@ from .consent_config import ConsentConfig
 from .database import DatabaseConfig
 from .emailconfig import EmailConfig
 from .groups import GroupsConfig
-from .jwt import JWTConfig
+from .jwt_config import JWTConfig
 from .key import KeyConfig
 from .logger import LoggingConfig
 from .metrics import MetricsConfig
diff --git a/synapse/config/jwt.py b/synapse/config/jwt_config.py
index 51e7f7e003..51e7f7e003 100644
--- a/synapse/config/jwt.py
+++ b/synapse/config/jwt_config.py
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 3f187adfc8..e9a936118d 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -227,7 +227,22 @@ def setup_logging(config, use_worker_options=False):
     #
     # However this may not be too much of a problem if we are just writing to a file.
     observer = STDLibLogObserver()
+
+    def _log(event):
+
+        if "log_text" in event:
+            if event["log_text"].startswith("DNSDatagramProtocol starting on "):
+                return
+
+            if event["log_text"].startswith("(UDP Port "):
+                return
+
+            if event["log_text"].startswith("Timing out client"):
+                return
+
+        return observer(event)
+
     globalLogBeginner.beginLoggingTo(
-        [observer],
+        [_log],
         redirectStandardIO=not config.no_redirect_stdio,
     )
diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py
index 1a391adec1..02b76dfcfb 100644
--- a/synapse/crypto/context_factory.py
+++ b/synapse/crypto/context_factory.py
@@ -123,6 +123,6 @@ class ClientTLSOptionsFactory(object):
 
     def get_options(self, host):
         return ClientTLSOptions(
-            host.decode('utf-8'),
+            host,
             CertificateOptions(verify=False).getContext()
         )
diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py
index e94400b8e2..57d4665e84 100644
--- a/synapse/crypto/keyclient.py
+++ b/synapse/crypto/keyclient.py
@@ -50,7 +50,7 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1):
                 defer.returnValue((server_response, server_certificate))
         except SynapseKeyClientError as e:
             logger.warn("Error getting key for %r: %s", server_name, e)
-            if e.status.startswith("4"):
+            if e.status.startswith(b"4"):
                 # Don't retry for 4xx responses.
                 raise IOError("Cannot get key for %r" % server_name)
         except (ConnectError, DomainError) as e:
@@ -82,6 +82,12 @@ class SynapseKeyClientProtocol(HTTPClient):
         self._peer = self.transport.getPeer()
         logger.debug("Connected to %s", self._peer)
 
+        if not isinstance(self.path, bytes):
+            self.path = self.path.encode('ascii')
+
+        if not isinstance(self.host, bytes):
+            self.host = self.host.encode('ascii')
+
         self.sendCommand(b"GET", self.path)
         if self.host:
             self.sendHeader(b"Host", self.host)
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 30e2742102..d89f94c219 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -16,9 +16,10 @@
 
 import hashlib
 import logging
-import urllib
 from collections import namedtuple
 
+from six.moves import urllib
+
 from signedjson.key import (
     decode_verify_key_bytes,
     encode_verify_key_base64,
@@ -40,6 +41,7 @@ from synapse.api.errors import Codes, SynapseError
 from synapse.crypto.keyclient import fetch_server_key
 from synapse.util import logcontext, unwrapFirstError
 from synapse.util.logcontext import (
+    LoggingContext,
     PreserveLoggingContext,
     preserve_fn,
     run_in_background,
@@ -216,23 +218,34 @@ class Keyring(object):
             servers have completed. Follows the synapse rules of logcontext
             preservation.
         """
+        loop_count = 1
         while True:
             wait_on = [
-                self.key_downloads[server_name]
+                (server_name, self.key_downloads[server_name])
                 for server_name in server_names
                 if server_name in self.key_downloads
             ]
-            if wait_on:
-                with PreserveLoggingContext():
-                    yield defer.DeferredList(wait_on)
-            else:
+            if not wait_on:
                 break
+            logger.info(
+                "Waiting for existing lookups for %s to complete [loop %i]",
+                [w[0] for w in wait_on], loop_count,
+            )
+            with PreserveLoggingContext():
+                yield defer.DeferredList((w[1] for w in wait_on))
+
+            loop_count += 1
+
+        ctx = LoggingContext.current_context()
 
         def rm(r, server_name_):
-            self.key_downloads.pop(server_name_, None)
+            with PreserveLoggingContext(ctx):
+                logger.debug("Releasing key lookup lock on %s", server_name_)
+                self.key_downloads.pop(server_name_, None)
             return r
 
         for server_name, deferred in server_to_deferred.items():
+            logger.debug("Got key lookup lock on %s", server_name)
             self.key_downloads[server_name] = deferred
             deferred.addBoth(rm, server_name)
 
@@ -432,7 +445,7 @@ class Keyring(object):
         # an incoming request.
         query_response = yield self.client.post_json(
             destination=perspective_name,
-            path=b"/_matrix/key/v2/query",
+            path="/_matrix/key/v2/query",
             data={
                 u"server_keys": {
                     server_name: {
@@ -513,8 +526,8 @@ class Keyring(object):
 
             (response, tls_certificate) = yield fetch_server_key(
                 server_name, self.hs.tls_client_options_factory,
-                path=(b"/_matrix/key/v2/server/%s" % (
-                    urllib.quote(requested_key_id),
+                path=("/_matrix/key/v2/server/%s" % (
+                    urllib.parse.quote(requested_key_id),
                 )).encode("ascii"),
             )
 
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 51f9084b90..b782af6308 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import six
+
 from synapse.util.caches import intern_dict
 from synapse.util.frozenutils import freeze
 
@@ -147,6 +149,9 @@ class EventBase(object):
     def items(self):
         return list(self._event_dict.items())
 
+    def keys(self):
+        return six.iterkeys(self._event_dict)
+
 
 class FrozenEvent(EventBase):
     def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None):
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index c11798093d..b7ad729c63 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -13,17 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from collections import namedtuple
 
 import six
 
 from twisted.internet import defer
+from twisted.internet.defer import DeferredList
 
-from synapse.api.constants import MAX_DEPTH
+from synapse.api.constants import MAX_DEPTH, EventTypes, Membership
 from synapse.api.errors import Codes, SynapseError
 from synapse.crypto.event_signing import check_event_content_hash
 from synapse.events import FrozenEvent
 from synapse.events.utils import prune_event
 from synapse.http.servlet import assert_params_in_dict
+from synapse.types import get_domain_from_id
 from synapse.util import logcontext, unwrapFirstError
 
 logger = logging.getLogger(__name__)
@@ -133,34 +136,45 @@ class FederationBase(object):
               * throws a SynapseError if the signature check failed.
             The deferreds run their callbacks in the sentinel logcontext.
         """
-
-        redacted_pdus = [
-            prune_event(pdu)
-            for pdu in pdus
-        ]
-
-        deferreds = self.keyring.verify_json_objects_for_server([
-            (p.origin, p.get_pdu_json())
-            for p in redacted_pdus
-        ])
+        deferreds = _check_sigs_on_pdus(self.keyring, pdus)
 
         ctx = logcontext.LoggingContext.current_context()
 
-        def callback(_, pdu, redacted):
+        def callback(_, pdu):
             with logcontext.PreserveLoggingContext(ctx):
                 if not check_event_content_hash(pdu):
-                    logger.warn(
-                        "Event content has been tampered, redacting %s: %s",
-                        pdu.event_id, pdu.get_pdu_json()
-                    )
-                    return redacted
+                    # let's try to distinguish between failures because the event was
+                    # redacted (which are somewhat expected) vs actual ball-tampering
+                    # incidents.
+                    #
+                    # This is just a heuristic, so we just assume that if the keys are
+                    # about the same between the redacted and received events, then the
+                    # received event was probably a redacted copy (but we then use our
+                    # *actual* redacted copy to be on the safe side.)
+                    redacted_event = prune_event(pdu)
+                    if (
+                        set(redacted_event.keys()) == set(pdu.keys()) and
+                        set(six.iterkeys(redacted_event.content))
+                            == set(six.iterkeys(pdu.content))
+                    ):
+                        logger.info(
+                            "Event %s seems to have been redacted; using our redacted "
+                            "copy",
+                            pdu.event_id,
+                        )
+                    else:
+                        logger.warning(
+                            "Event %s content has been tampered, redacting",
+                            pdu.event_id, pdu.get_pdu_json(),
+                        )
+                    return redacted_event
 
                 if self.spam_checker.check_event_for_spam(pdu):
                     logger.warn(
                         "Event contains spam, redacting %s: %s",
                         pdu.event_id, pdu.get_pdu_json()
                     )
-                    return redacted
+                    return prune_event(pdu)
 
                 return pdu
 
@@ -168,21 +182,121 @@ class FederationBase(object):
             failure.trap(SynapseError)
             with logcontext.PreserveLoggingContext(ctx):
                 logger.warn(
-                    "Signature check failed for %s",
-                    pdu.event_id,
+                    "Signature check failed for %s: %s",
+                    pdu.event_id, failure.getErrorMessage(),
                 )
             return failure
 
-        for deferred, pdu, redacted in zip(deferreds, pdus, redacted_pdus):
+        for deferred, pdu in zip(deferreds, pdus):
             deferred.addCallbacks(
                 callback, errback,
-                callbackArgs=[pdu, redacted],
+                callbackArgs=[pdu],
                 errbackArgs=[pdu],
             )
 
         return deferreds
 
 
+class PduToCheckSig(namedtuple("PduToCheckSig", [
+    "pdu", "redacted_pdu_json", "event_id_domain", "sender_domain", "deferreds",
+])):
+    pass
+
+
+def _check_sigs_on_pdus(keyring, pdus):
+    """Check that the given events are correctly signed
+
+    Args:
+        keyring (synapse.crypto.Keyring): keyring object to do the checks
+        pdus (Collection[EventBase]): the events to be checked
+
+    Returns:
+        List[Deferred]: a Deferred for each event in pdus, which will either succeed if
+           the signatures are valid, or fail (with a SynapseError) if not.
+    """
+
+    # (currently this is written assuming the v1 room structure; we'll probably want a
+    # separate function for checking v2 rooms)
+
+    # we want to check that the event is signed by:
+    #
+    # (a) the server which created the event_id
+    #
+    # (b) the sender's server.
+    #
+    #     - except in the case of invites created from a 3pid invite, which are exempt
+    #     from this check, because the sender has to match that of the original 3pid
+    #     invite, but the event may come from a different HS, for reasons that I don't
+    #     entirely grok (why do the senders have to match? and if they do, why doesn't the
+    #     joining server ask the inviting server to do the switcheroo with
+    #     exchange_third_party_invite?).
+    #
+    #     That's pretty awful, since redacting such an invite will render it invalid
+    #     (because it will then look like a regular invite without a valid signature),
+    #     and signatures are *supposed* to be valid whether or not an event has been
+    #     redacted. But this isn't the worst of the ways that 3pid invites are broken.
+    #
+    # let's start by getting the domain for each pdu, and flattening the event back
+    # to JSON.
+    pdus_to_check = [
+        PduToCheckSig(
+            pdu=p,
+            redacted_pdu_json=prune_event(p).get_pdu_json(),
+            event_id_domain=get_domain_from_id(p.event_id),
+            sender_domain=get_domain_from_id(p.sender),
+            deferreds=[],
+        )
+        for p in pdus
+    ]
+
+    # first make sure that the event is signed by the event_id's domain
+    deferreds = keyring.verify_json_objects_for_server([
+        (p.event_id_domain, p.redacted_pdu_json)
+        for p in pdus_to_check
+    ])
+
+    for p, d in zip(pdus_to_check, deferreds):
+        p.deferreds.append(d)
+
+    # now let's look for events where the sender's domain is different to the
+    # event id's domain (normally only the case for joins/leaves), and add additional
+    # checks.
+    pdus_to_check_sender = [
+        p for p in pdus_to_check
+        if p.sender_domain != p.event_id_domain and not _is_invite_via_3pid(p.pdu)
+    ]
+
+    more_deferreds = keyring.verify_json_objects_for_server([
+        (p.sender_domain, p.redacted_pdu_json)
+        for p in pdus_to_check_sender
+    ])
+
+    for p, d in zip(pdus_to_check_sender, more_deferreds):
+        p.deferreds.append(d)
+
+    # replace lists of deferreds with single Deferreds
+    return [_flatten_deferred_list(p.deferreds) for p in pdus_to_check]
+
+
+def _flatten_deferred_list(deferreds):
+    """Given a list of one or more deferreds, either return the single deferred, or
+    combine into a DeferredList.
+    """
+    if len(deferreds) > 1:
+        return DeferredList(deferreds, fireOnOneErrback=True, consumeErrors=True)
+    else:
+        assert len(deferreds) == 1
+        return deferreds[0]
+
+
+def _is_invite_via_3pid(event):
+    return (
+        event.type == EventTypes.Member
+        and event.membership == Membership.INVITE
+        and "third_party_invite" in event.content
+    )
+
+
 def event_from_pdu_json(pdu_json, outlier=False):
     """Construct a FrozenEvent from an event json received over federation
 
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index c9f3c2d352..fe67b2ff42 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -271,10 +271,10 @@ class FederationClient(FederationBase):
                     event_id, destination, e,
                 )
             except NotRetryingDestination as e:
-                logger.info(e.message)
+                logger.info(str(e))
                 continue
             except FederationDeniedError as e:
-                logger.info(e.message)
+                logger.info(str(e))
                 continue
             except Exception as e:
                 pdu_attempts[destination] = now
@@ -510,7 +510,7 @@ class FederationClient(FederationBase):
                 else:
                     logger.warn(
                         "Failed to %s via %s: %i %s",
-                        description, destination, e.code, e.message,
+                        description, destination, e.code, e.args[0],
                     )
             except Exception:
                 logger.warn(
@@ -875,7 +875,7 @@ class FederationClient(FederationBase):
             except Exception as e:
                 logger.exception(
                     "Failed to send_third_party_invite via %s: %s",
-                    destination, e.message
+                    destination, str(e)
                 )
 
         raise RuntimeError("Failed to send to any server.")
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3e0cd294a1..dbee404ea7 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -99,7 +99,7 @@ class FederationServer(FederationBase):
 
     @defer.inlineCallbacks
     @log_function
-    def on_incoming_transaction(self, transaction_data):
+    def on_incoming_transaction(self, origin, transaction_data):
         # keep this as early as possible to make the calculated origin ts as
         # accurate as possible.
         request_time = self._clock.time_msec()
@@ -108,34 +108,33 @@ class FederationServer(FederationBase):
 
         if not transaction.transaction_id:
             raise Exception("Transaction missing transaction_id")
-        if not transaction.origin:
-            raise Exception("Transaction missing origin")
 
         logger.debug("[%s] Got transaction", transaction.transaction_id)
 
         # use a linearizer to ensure that we don't process the same transaction
         # multiple times in parallel.
         with (yield self._transaction_linearizer.queue(
-                (transaction.origin, transaction.transaction_id),
+                (origin, transaction.transaction_id),
         )):
             result = yield self._handle_incoming_transaction(
-                transaction, request_time,
+                origin, transaction, request_time,
             )
 
         defer.returnValue(result)
 
     @defer.inlineCallbacks
-    def _handle_incoming_transaction(self, transaction, request_time):
+    def _handle_incoming_transaction(self, origin, transaction, request_time):
         """ Process an incoming transaction and return the HTTP response
 
         Args:
+            origin (unicode): the server making the request
             transaction (Transaction): incoming transaction
             request_time (int): timestamp that the HTTP request arrived at
 
         Returns:
             Deferred[(int, object)]: http response code and body
         """
-        response = yield self.transaction_actions.have_responded(transaction)
+        response = yield self.transaction_actions.have_responded(origin, transaction)
 
         if response:
             logger.debug(
@@ -149,7 +148,7 @@ class FederationServer(FederationBase):
 
         received_pdus_counter.inc(len(transaction.pdus))
 
-        origin_host, _ = parse_server_name(transaction.origin)
+        origin_host, _ = parse_server_name(origin)
 
         pdus_by_room = {}
 
@@ -190,7 +189,7 @@ class FederationServer(FederationBase):
                 event_id = pdu.event_id
                 try:
                     yield self._handle_received_pdu(
-                        transaction.origin, pdu
+                        origin, pdu
                     )
                     pdu_results[event_id] = {}
                 except FederationError as e:
@@ -212,7 +211,7 @@ class FederationServer(FederationBase):
         if hasattr(transaction, "edus"):
             for edu in (Edu(**x) for x in transaction.edus):
                 yield self.received_edu(
-                    transaction.origin,
+                    origin,
                     edu.edu_type,
                     edu.content
                 )
@@ -224,6 +223,7 @@ class FederationServer(FederationBase):
         logger.debug("Returning: %s", str(response))
 
         yield self.transaction_actions.set_response(
+            origin,
             transaction,
             200, response
         )
@@ -838,9 +838,9 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry):
             )
 
         return self._send_edu(
-                edu_type=edu_type,
-                origin=origin,
-                content=content,
+            edu_type=edu_type,
+            origin=origin,
+            content=content,
         )
 
     def on_query(self, query_type, args):
@@ -851,6 +851,6 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry):
             return handler(args)
 
         return self._get_query_client(
-                query_type=query_type,
-                args=args,
+            query_type=query_type,
+            args=args,
         )
diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py
index 9146215c21..74ffd13b4f 100644
--- a/synapse/federation/persistence.py
+++ b/synapse/federation/persistence.py
@@ -36,7 +36,7 @@ class TransactionActions(object):
         self.store = datastore
 
     @log_function
-    def have_responded(self, transaction):
+    def have_responded(self, origin, transaction):
         """ Have we already responded to a transaction with the same id and
         origin?
 
@@ -50,11 +50,11 @@ class TransactionActions(object):
                                "transaction_id")
 
         return self.store.get_received_txn_response(
-            transaction.transaction_id, transaction.origin
+            transaction.transaction_id, origin
         )
 
     @log_function
-    def set_response(self, transaction, code, response):
+    def set_response(self, origin, transaction, code, response):
         """ Persist how we responded to a transaction.
 
         Returns:
@@ -66,7 +66,7 @@ class TransactionActions(object):
 
         return self.store.set_received_txn_response(
             transaction.transaction_id,
-            transaction.origin,
+            origin,
             code,
             response,
         )
diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 94d7423d01..8cbf8c4f7f 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -463,7 +463,19 @@ class TransactionQueue(object):
                 # pending_transactions flag.
 
                 pending_pdus = self.pending_pdus_by_dest.pop(destination, [])
+
+                # We can only include at most 50 PDUs per transactions
+                pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:]
+                if leftover_pdus:
+                    self.pending_pdus_by_dest[destination] = leftover_pdus
+
                 pending_edus = self.pending_edus_by_dest.pop(destination, [])
+
+                # We can only include at most 100 EDUs per transactions
+                pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
+                if leftover_edus:
+                    self.pending_edus_by_dest[destination] = leftover_edus
+
                 pending_presence = self.pending_presence_by_dest.pop(destination, {})
 
                 pending_edus.extend(
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 1054441ca5..2ab973d6c8 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -15,7 +15,8 @@
 # limitations under the License.
 
 import logging
-import urllib
+
+from six.moves import urllib
 
 from twisted.internet import defer
 
@@ -951,4 +952,4 @@ def _create_path(prefix, path, *args):
     Returns:
         str
     """
-    return prefix + path % tuple(urllib.quote(arg, "") for arg in args)
+    return prefix + path % tuple(urllib.parse.quote(arg, "") for arg in args)
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 7a993fd1cf..2f874b4838 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -90,8 +90,8 @@ class Authenticator(object):
     @defer.inlineCallbacks
     def authenticate_request(self, request, content):
         json_request = {
-            "method": request.method,
-            "uri": request.uri,
+            "method": request.method.decode('ascii'),
+            "uri": request.uri.decode('ascii'),
             "destination": self.server_name,
             "signatures": {},
         }
@@ -252,7 +252,7 @@ class BaseFederationServlet(object):
                     by the callback method. None if the request has already been handled.
             """
             content = None
-            if request.method in ["PUT", "POST"]:
+            if request.method in [b"PUT", b"POST"]:
                 # TODO: Handle other method types? other content types?
                 content = parse_json_object_from_request(request)
 
@@ -353,7 +353,7 @@ class FederationSendServlet(BaseFederationServlet):
 
         try:
             code, response = yield self.handler.on_incoming_transaction(
-                transaction_data
+                origin, transaction_data,
             )
         except Exception:
             logger.exception("on_incoming_transaction failed")
@@ -386,7 +386,7 @@ class FederationStateServlet(BaseFederationServlet):
         return self.handler.on_context_state_request(
             origin,
             context,
-            query.get("event_id", [None])[0],
+            parse_string_from_args(query, "event_id", None),
         )
 
 
@@ -397,7 +397,7 @@ class FederationStateIdsServlet(BaseFederationServlet):
         return self.handler.on_state_ids_request(
             origin,
             room_id,
-            query.get("event_id", [None])[0],
+            parse_string_from_args(query, "event_id", None),
         )
 
 
@@ -405,14 +405,12 @@ class FederationBackfillServlet(BaseFederationServlet):
     PATH = "/backfill/(?P<context>[^/]*)/"
 
     def on_GET(self, origin, content, query, context):
-        versions = query["v"]
-        limits = query["limit"]
+        versions = [x.decode('ascii') for x in query[b"v"]]
+        limit = parse_integer_from_args(query, "limit", None)
 
-        if not limits:
+        if not limit:
             return defer.succeed((400, {"error": "Did not include limit param"}))
 
-        limit = int(limits[-1])
-
         return self.handler.on_backfill_request(origin, context, versions, limit)
 
 
@@ -423,7 +421,7 @@ class FederationQueryServlet(BaseFederationServlet):
     def on_GET(self, origin, content, query, query_type):
         return self.handler.on_query_request(
             query_type,
-            {k: v[0].decode("utf-8") for k, v in query.items()}
+            {k.decode('utf8'): v[0].decode("utf-8") for k, v in query.items()}
         )
 
 
@@ -630,14 +628,14 @@ class OpenIdUserInfo(BaseFederationServlet):
 
     @defer.inlineCallbacks
     def on_GET(self, origin, content, query):
-        token = query.get("access_token", [None])[0]
+        token = query.get(b"access_token", [None])[0]
         if token is None:
             defer.returnValue((401, {
                 "errcode": "M_MISSING_TOKEN", "error": "Access Token required"
             }))
             return
 
-        user_id = yield self.handler.on_openid_userinfo(token)
+        user_id = yield self.handler.on_openid_userinfo(token.decode('ascii'))
 
         if user_id is None:
             defer.returnValue((401, {
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 4a81bd2ba9..2a5eab124f 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -895,22 +895,24 @@ class AuthHandler(BaseHandler):
 
         Args:
             password (unicode): Password to hash.
-            stored_hash (unicode): Expected hash value.
+            stored_hash (bytes): Expected hash value.
 
         Returns:
             Deferred(bool): Whether self.hash(password) == stored_hash.
         """
-
         def _do_validate_hash():
             # Normalise the Unicode in the password
             pw = unicodedata.normalize("NFKC", password)
 
             return bcrypt.checkpw(
                 pw.encode('utf8') + self.hs.config.password_pepper.encode("utf8"),
-                stored_hash.encode('utf8')
+                stored_hash
             )
 
         if stored_hash:
+            if not isinstance(stored_hash, bytes):
+                stored_hash = stored_hash.encode('ascii')
+
             return make_deferred_yieldable(
                 threads.deferToThreadPool(
                     self.hs.get_reactor(),
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index ef866da1b6..18741c5fac 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -20,7 +20,14 @@ import string
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes
-from synapse.api.errors import AuthError, CodeMessageException, Codes, SynapseError
+from synapse.api.errors import (
+    AuthError,
+    CodeMessageException,
+    Codes,
+    NotFoundError,
+    StoreError,
+    SynapseError,
+)
 from synapse.types import RoomAlias, UserID, get_domain_from_id
 
 from ._base import BaseHandler
@@ -109,7 +116,13 @@ class DirectoryHandler(BaseHandler):
     def delete_association(self, requester, user_id, room_alias):
         # association deletion for human users
 
-        can_delete = yield self._user_can_delete_alias(room_alias, user_id)
+        try:
+            can_delete = yield self._user_can_delete_alias(room_alias, user_id)
+        except StoreError as e:
+            if e.code == 404:
+                raise NotFoundError("Unknown room alias")
+            raise
+
         if not can_delete:
             raise AuthError(
                 403, "You don't have permission to delete the alias.",
@@ -320,7 +333,7 @@ class DirectoryHandler(BaseHandler):
     def _user_can_delete_alias(self, alias, user_id):
         creator = yield self.store.get_room_alias_creator(alias.to_string())
 
-        if creator and creator == user_id:
+        if creator is not None and creator == user_id:
             defer.returnValue(True)
 
         is_admin = yield self.auth.is_server_admin(UserID.from_string(user_id))
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 5816bf8b4f..578e9250fb 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -330,7 +330,8 @@ class E2eKeysHandler(object):
                         (algorithm, key_id, ex_json, key)
                     )
             else:
-                new_keys.append((algorithm, key_id, encode_canonical_json(key)))
+                new_keys.append((
+                    algorithm, key_id, encode_canonical_json(key).decode('ascii')))
 
         yield self.store.add_e2e_one_time_keys(
             user_id, device_id, time_now, new_keys
@@ -358,7 +359,7 @@ def _exception_to_failure(e):
     # Note that some Exceptions (notably twisted's ResponseFailed etc) don't
     # give a string for e.message, which json then fails to serialize.
     return {
-        "status": 503, "message": str(e.message),
+        "status": 503, "message": str(e),
     }
 
 
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 3fa7a98445..f10b46414b 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -360,6 +360,35 @@ class FederationHandler(BaseHandler):
         # apparently.
         #
         # see https://github.com/matrix-org/synapse/pull/1744
+        #
+        # ----
+        #
+        # Update richvdh 2018/09/18: There are a number of problems with timing this
+        # request out agressively on the client side:
+        #
+        # - it plays badly with the server-side rate-limiter, which starts tarpitting you
+        #   if you send too many requests at once, so you end up with the server carefully
+        #   working through the backlog of your requests, which you have already timed
+        #   out.
+        #
+        # - for this request in particular, we now (as of
+        #   https://github.com/matrix-org/synapse/pull/3456) reject any PDUs where the
+        #   server can't produce a plausible-looking set of prev_events - so we becone
+        #   much more likely to reject the event.
+        #
+        # - contrary to what it says above, we do *not* fall back to fetching fresh state
+        #   for the room if get_missing_events times out. Rather, we give up processing
+        #   the PDU whose prevs we are missing, which then makes it much more likely that
+        #   we'll end up back here for the *next* PDU in the list, which exacerbates the
+        #   problem.
+        #
+        # - the agressive 10s timeout was introduced to deal with incoming federation
+        #   requests taking 8 hours to process. It's not entirely clear why that was going
+        #   on; certainly there were other issues causing traffic storms which are now
+        #   resolved, and I think in any case we may be more sensible about our locking
+        #   now. We're *certainly* more sensible about our logging.
+        #
+        # All that said: Let's try increasing the timout to 60s and see what happens.
 
         missing_events = yield self.federation_client.get_missing_events(
             origin,
@@ -368,7 +397,7 @@ class FederationHandler(BaseHandler):
             latest_events=[pdu],
             limit=10,
             min_depth=min_depth,
-            timeout=10000,
+            timeout=60000,
         )
 
         logger.info(
@@ -594,7 +623,7 @@ class FederationHandler(BaseHandler):
 
         required_auth = set(
             a_id
-            for event in events + state_events.values() + auth_events.values()
+            for event in events + list(state_events.values()) + list(auth_events.values())
             for a_id, _ in event.auth_events
         )
         auth_events.update({
@@ -802,7 +831,7 @@ class FederationHandler(BaseHandler):
                     )
                     continue
                 except NotRetryingDestination as e:
-                    logger.info(e.message)
+                    logger.info(str(e))
                     continue
                 except FederationDeniedError as e:
                     logger.info(e)
@@ -1358,7 +1387,7 @@ class FederationHandler(BaseHandler):
         )
 
         if state_groups:
-            _, state = state_groups.items().pop()
+            _, state = list(state_groups.items()).pop()
             results = state
 
             if event.is_state():
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 5170d093e3..a155b6e938 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -269,14 +269,7 @@ class PaginationHandler(object):
 
             if state_ids:
                 state = yield self.store.get_events(list(state_ids.values()))
-
-            if state:
-                state = yield filter_events_for_client(
-                    self.store,
-                    user_id,
-                    state.values(),
-                    is_peeking=(member_event_id is None),
-                )
+                state = state.values()
 
         time_now = self.clock.time_msec()
 
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 1e53f2c635..da914c46ff 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -534,4 +534,5 @@ class RegistrationHandler(BaseHandler):
             room_id=room_id,
             remote_room_hosts=remote_room_hosts,
             action="join",
+            ratelimit=False,
         )
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 37e41afd61..38e1737ec9 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -162,7 +162,7 @@ class RoomListHandler(BaseHandler):
         # Filter out rooms that we don't want to return
         rooms_to_scan = [
             r for r in sorted_rooms
-            if r not in newly_unpublished and rooms_to_num_joined[room_id] > 0
+            if r not in newly_unpublished and rooms_to_num_joined[r] > 0
         ]
 
         total_room_count = len(rooms_to_scan)
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index f643619047..07fd3e82fc 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -583,6 +583,11 @@ class RoomMemberHandler(object):
         room_id = mapping["room_id"]
         servers = mapping["servers"]
 
+        # put the server which owns the alias at the front of the server list.
+        if room_alias.domain in servers:
+            servers.remove(room_alias.domain)
+        servers.insert(0, room_alias.domain)
+
         defer.returnValue((RoomID.from_string(room_id), servers))
 
     @defer.inlineCallbacks
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index c464adbd0b..0c1d52fd11 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -54,7 +54,7 @@ class SearchHandler(BaseHandler):
         batch_token = None
         if batch:
             try:
-                b = decode_base64(batch)
+                b = decode_base64(batch).decode('ascii')
                 batch_group, batch_group_key, batch_token = b.split("\n")
 
                 assert batch_group is not None
@@ -258,18 +258,18 @@ class SearchHandler(BaseHandler):
                 # it returns more from the same group (if applicable) rather
                 # than reverting to searching all results again.
                 if batch_group and batch_group_key:
-                    global_next_batch = encode_base64("%s\n%s\n%s" % (
+                    global_next_batch = encode_base64(("%s\n%s\n%s" % (
                         batch_group, batch_group_key, pagination_token
-                    ))
+                    )).encode('ascii'))
                 else:
-                    global_next_batch = encode_base64("%s\n%s\n%s" % (
+                    global_next_batch = encode_base64(("%s\n%s\n%s" % (
                         "all", "", pagination_token
-                    ))
+                    )).encode('ascii'))
 
                 for room_id, group in room_groups.items():
-                    group["next_batch"] = encode_base64("%s\n%s\n%s" % (
+                    group["next_batch"] = encode_base64(("%s\n%s\n%s" % (
                         "room_id", room_id, pagination_token
-                    ))
+                    )).encode('ascii'))
 
             allowed_events.extend(room_events)
 
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index ef20c2296c..9bca4e7067 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -24,6 +24,7 @@ from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.push.clientformat import format_push_rules_for_user
+from synapse.storage.roommember import MemberSummary
 from synapse.types import RoomStreamToken
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -525,6 +526,8 @@ class SyncHandler(object):
              A deferred dict describing the room summary
         """
 
+        # FIXME: we could/should get this from room_stats when matthew/stats lands
+
         # FIXME: this promulgates https://github.com/matrix-org/synapse/issues/3305
         last_events, _ = yield self.store.get_recent_event_ids_for_room(
             room_id, end_token=now_token.room_key, limit=1,
@@ -537,44 +540,67 @@ class SyncHandler(object):
         last_event = last_events[-1]
         state_ids = yield self.store.get_state_ids_for_event(
             last_event.event_id, [
-                (EventTypes.Member, None),
                 (EventTypes.Name, ''),
                 (EventTypes.CanonicalAlias, ''),
             ]
         )
 
-        member_ids = {
-            state_key: event_id
-            for (t, state_key), event_id in state_ids.iteritems()
-            if t == EventTypes.Member
-        }
+        # this is heavily cached, thus: fast.
+        details = yield self.store.get_room_summary(room_id)
+
         name_id = state_ids.get((EventTypes.Name, ''))
         canonical_alias_id = state_ids.get((EventTypes.CanonicalAlias, ''))
 
         summary = {}
-
-        # FIXME: it feels very heavy to load up every single membership event
-        # just to calculate the counts.
-        member_events = yield self.store.get_events(member_ids.values())
-
-        joined_user_ids = []
-        invited_user_ids = []
-
-        for ev in member_events.values():
-            if ev.content.get("membership") == Membership.JOIN:
-                joined_user_ids.append(ev.state_key)
-            elif ev.content.get("membership") == Membership.INVITE:
-                invited_user_ids.append(ev.state_key)
+        empty_ms = MemberSummary([], 0)
 
         # TODO: only send these when they change.
-        summary["m.joined_member_count"] = len(joined_user_ids)
-        summary["m.invited_member_count"] = len(invited_user_ids)
+        summary["m.joined_member_count"] = (
+            details.get(Membership.JOIN, empty_ms).count
+        )
+        summary["m.invited_member_count"] = (
+            details.get(Membership.INVITE, empty_ms).count
+        )
 
-        if name_id or canonical_alias_id:
-            defer.returnValue(summary)
+        # if the room has a name or canonical_alias set, we can skip
+        # calculating heroes.  we assume that if the event has contents, it'll
+        # be a valid name or canonical_alias - i.e. we're checking that they
+        # haven't been "deleted" by blatting {} over the top.
+        if name_id:
+            name = yield self.store.get_event(name_id, allow_none=False)
+            if name and name.content:
+                defer.returnValue(summary)
+
+        if canonical_alias_id:
+            canonical_alias = yield self.store.get_event(
+                canonical_alias_id, allow_none=False,
+            )
+            if canonical_alias and canonical_alias.content:
+                defer.returnValue(summary)
 
-        # FIXME: order by stream ordering, not alphabetic
+        joined_user_ids = [
+            r[0] for r in details.get(Membership.JOIN, empty_ms).members
+        ]
+        invited_user_ids = [
+            r[0] for r in details.get(Membership.INVITE, empty_ms).members
+        ]
+        gone_user_ids = (
+            [r[0] for r in details.get(Membership.LEAVE, empty_ms).members] +
+            [r[0] for r in details.get(Membership.BAN, empty_ms).members]
+        )
 
+        # FIXME: only build up a member_ids list for our heroes
+        member_ids = {}
+        for membership in (
+            Membership.JOIN,
+            Membership.INVITE,
+            Membership.LEAVE,
+            Membership.BAN
+        ):
+            for user_id, event_id in details.get(membership, empty_ms).members:
+                member_ids[user_id] = event_id
+
+        # FIXME: order by stream ordering rather than as returned by SQL
         me = sync_config.user.to_string()
         if (joined_user_ids or invited_user_ids):
             summary['m.heroes'] = sorted(
@@ -586,7 +612,11 @@ class SyncHandler(object):
             )[0:5]
         else:
             summary['m.heroes'] = sorted(
-                [user_id for user_id in member_ids.keys() if user_id != me]
+                [
+                    user_id
+                    for user_id in gone_user_ids
+                    if user_id != me
+                ]
             )[0:5]
 
         if not sync_config.filter_collection.lazy_load_members():
@@ -719,6 +749,26 @@ class SyncHandler(object):
                     lazy_load_members=lazy_load_members,
                 )
             elif batch.limited:
+                state_at_timeline_start = yield self.store.get_state_ids_for_event(
+                    batch.events[0].event_id, types=types,
+                    filtered_types=filtered_types,
+                )
+
+                # for now, we disable LL for gappy syncs - see
+                # https://github.com/vector-im/riot-web/issues/7211#issuecomment-419976346
+                # N.B. this slows down incr syncs as we are now processing way
+                # more state in the server than if we were LLing.
+                #
+                # We still have to filter timeline_start to LL entries (above) in order
+                # for _calculate_state's LL logic to work, as we have to include LL
+                # members for timeline senders in case they weren't loaded in the initial
+                # sync.  We do this by (counterintuitively) by filtering timeline_start
+                # members to just be ones which were timeline senders, which then ensures
+                # all of the rest get included in the state block (if we need to know
+                # about them).
+                types = None
+                filtered_types = None
+
                 state_at_previous_sync = yield self.get_state_at(
                     room_id, stream_position=since_token, types=types,
                     filtered_types=filtered_types,
@@ -729,24 +779,21 @@ class SyncHandler(object):
                     filtered_types=filtered_types,
                 )
 
-                state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id, types=types,
-                    filtered_types=filtered_types,
-                )
-
                 state_ids = _calculate_state(
                     timeline_contains=timeline_state,
                     timeline_start=state_at_timeline_start,
                     previous=state_at_previous_sync,
                     current=current_state_ids,
+                    # we have to include LL members in case LL initial sync missed them
                     lazy_load_members=lazy_load_members,
                 )
             else:
                 state_ids = {}
                 if lazy_load_members:
                     if types:
-                        # We're returning an incremental sync, with no "gap" since
-                        # the previous sync, so normally there would be no state to return
+                        # We're returning an incremental sync, with no
+                        # "gap" since the previous sync, so normally there would be
+                        # no state to return.
                         # But we're lazy-loading, so the client might need some more
                         # member events to understand the events in this timeline.
                         # So we fish out all the member events corresponding to the
@@ -774,7 +821,7 @@ class SyncHandler(object):
                     logger.debug("filtering state from %r...", state_ids)
                     state_ids = {
                         t: event_id
-                        for t, event_id in state_ids.iteritems()
+                        for t, event_id in iteritems(state_ids)
                         if cache.get(t[1]) != event_id
                     }
                     logger.debug("...to %r", state_ids)
@@ -1575,6 +1622,19 @@ class SyncHandler(object):
             newly_joined_room=newly_joined,
         )
 
+        # When we join the room (or the client requests full_state), we should
+        # send down any existing tags. Usually the user won't have tags in a
+        # newly joined room, unless either a) they've joined before or b) the
+        # tag was added by synapse e.g. for server notice rooms.
+        if full_state:
+            user_id = sync_result_builder.sync_config.user.to_string()
+            tags = yield self.store.get_tags_for_room(user_id, room_id)
+
+            # If there aren't any tags, don't send the empty tags list down
+            # sync
+            if not tags:
+                tags = None
+
         account_data_events = []
         if tags is not None:
             account_data_events.append({
@@ -1603,10 +1663,24 @@ class SyncHandler(object):
         )
 
         summary = {}
+
+        # we include a summary in room responses when we're lazy loading
+        # members (as the client otherwise doesn't have enough info to form
+        # the name itself).
         if (
             sync_config.filter_collection.lazy_load_members() and
             (
+                # we recalulate the summary:
+                #   if there are membership changes in the timeline, or
+                #   if membership has changed during a gappy sync, or
+                #   if this is an initial sync.
                 any(ev.type == EventTypes.Member for ev in batch.events) or
+                (
+                    # XXX: this may include false positives in the form of LL
+                    # members which have snuck into state
+                    batch.limited and
+                    any(t == EventTypes.Member for (t, k) in state)
+                ) or
                 since_token is None
             )
         ):
@@ -1636,6 +1710,16 @@ class SyncHandler(object):
                     unread_notifications["highlight_count"] = notifs["highlight_count"]
 
                 sync_result_builder.joined.append(room_sync)
+
+            if batch.limited and since_token:
+                user_id = sync_result_builder.sync_config.user.to_string()
+                logger.info(
+                    "Incremental gappy sync of %s for user %s with %d state events" % (
+                        room_id,
+                        user_id,
+                        len(state),
+                    )
+                )
         elif room_builder.rtype == "archived":
             room_sync = ArchivedSyncResult(
                 room_id=room_id,
@@ -1729,17 +1813,17 @@ def _calculate_state(
     event_id_to_key = {
         e: key
         for key, e in itertools.chain(
-            timeline_contains.items(),
-            previous.items(),
-            timeline_start.items(),
-            current.items(),
+            iteritems(timeline_contains),
+            iteritems(previous),
+            iteritems(timeline_start),
+            iteritems(current),
         )
     }
 
-    c_ids = set(e for e in current.values())
-    ts_ids = set(e for e in timeline_start.values())
-    p_ids = set(e for e in previous.values())
-    tc_ids = set(e for e in timeline_contains.values())
+    c_ids = set(e for e in itervalues(current))
+    ts_ids = set(e for e in itervalues(timeline_start))
+    p_ids = set(e for e in itervalues(previous))
+    tc_ids = set(e for e in itervalues(timeline_contains))
 
     # If we are lazyloading room members, we explicitly add the membership events
     # for the senders in the timeline into the state block returned by /sync,
@@ -1753,7 +1837,7 @@ def _calculate_state(
 
     if lazy_load_members:
         p_ids.difference_update(
-            e for t, e in timeline_start.iteritems()
+            e for t, e in iteritems(timeline_start)
             if t[0] == EventTypes.Member
         )
 
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index 58ef8d3ce4..a3f9e4f67c 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -38,12 +38,12 @@ def cancelled_to_request_timed_out_error(value, timeout):
     return value
 
 
-ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$')
+ACCESS_TOKEN_RE = re.compile(r'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$')
 
 
 def redact_uri(uri):
     """Strips access tokens from the uri replaces with <redacted>"""
     return ACCESS_TOKEN_RE.sub(
-        br'\1<redacted>\3',
+        r'\1<redacted>\3',
         uri
     )
diff --git a/synapse/http/client.py b/synapse/http/client.py
index ab4fbf59b2..3d05f83b8c 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -13,24 +13,25 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import logging
-import urllib
 
-from six import StringIO
+from six import text_type
+from six.moves import urllib
 
+import treq
 from canonicaljson import encode_canonical_json, json
 from prometheus_client import Counter
 
 from OpenSSL import SSL
 from OpenSSL.SSL import VERIFY_NONE
-from twisted.internet import defer, protocol, reactor, ssl, task
+from twisted.internet import defer, protocol, reactor, ssl
 from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
 from twisted.web._newclient import ResponseDone
 from twisted.web.client import (
     Agent,
     BrowserLikeRedirectAgent,
     ContentDecoderAgent,
-    FileBodyProducer as TwistedFileBodyProducer,
     GzipDecoder,
     HTTPConnectionPool,
     PartialDownloadError,
@@ -42,7 +43,7 @@ from twisted.web.http_headers import Headers
 from synapse.api.errors import Codes, HttpResponseException, SynapseError
 from synapse.http import cancelled_to_request_timed_out_error, redact_uri
 from synapse.http.endpoint import SpiderEndpoint
-from synapse.util.async_helpers import add_timeout_to_deferred
+from synapse.util.async_helpers import timeout_deferred
 from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.logcontext import make_deferred_yieldable
 
@@ -83,8 +84,10 @@ class SimpleHttpClient(object):
         if hs.config.user_agent_suffix:
             self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,)
 
+        self.user_agent = self.user_agent.encode('ascii')
+
     @defer.inlineCallbacks
-    def request(self, method, uri, *args, **kwargs):
+    def request(self, method, uri, data=b'', headers=None):
         # A small wrapper around self.agent.request() so we can easily attach
         # counters to it
         outgoing_requests_counter.labels(method).inc()
@@ -93,10 +96,10 @@ class SimpleHttpClient(object):
         logger.info("Sending request %s %s", method, redact_uri(uri))
 
         try:
-            request_deferred = self.agent.request(
-                method, uri, *args, **kwargs
+            request_deferred = treq.request(
+                method, uri, agent=self.agent, data=data, headers=headers
             )
-            add_timeout_to_deferred(
+            request_deferred = timeout_deferred(
                 request_deferred, 60, self.hs.get_reactor(),
                 cancelled_to_request_timed_out_error,
             )
@@ -112,7 +115,7 @@ class SimpleHttpClient(object):
             incoming_responses_counter.labels(method, "ERR").inc()
             logger.info(
                 "Error sending request to  %s %s: %s %s",
-                method, redact_uri(uri), type(e).__name__, e.message
+                method, redact_uri(uri), type(e).__name__, e.args[0]
             )
             raise
 
@@ -137,7 +140,8 @@ class SimpleHttpClient(object):
         # TODO: Do we ever want to log message contents?
         logger.debug("post_urlencoded_get_json args: %s", args)
 
-        query_bytes = urllib.urlencode(encode_urlencode_args(args), True)
+        query_bytes = urllib.parse.urlencode(
+            encode_urlencode_args(args), True).encode("utf8")
 
         actual_headers = {
             b"Content-Type": [b"application/x-www-form-urlencoded"],
@@ -148,15 +152,14 @@ class SimpleHttpClient(object):
 
         response = yield self.request(
             "POST",
-            uri.encode("ascii"),
+            uri,
             headers=Headers(actual_headers),
-            bodyProducer=FileBodyProducer(StringIO(query_bytes))
+            data=query_bytes
         )
 
-        body = yield make_deferred_yieldable(readBody(response))
-
         if 200 <= response.code < 300:
-            defer.returnValue(json.loads(body))
+            body = yield make_deferred_yieldable(treq.json_content(response))
+            defer.returnValue(body)
         else:
             raise HttpResponseException(response.code, response.phrase, body)
 
@@ -191,9 +194,9 @@ class SimpleHttpClient(object):
 
         response = yield self.request(
             "POST",
-            uri.encode("ascii"),
+            uri,
             headers=Headers(actual_headers),
-            bodyProducer=FileBodyProducer(StringIO(json_str))
+            data=json_str
         )
 
         body = yield make_deferred_yieldable(readBody(response))
@@ -248,7 +251,7 @@ class SimpleHttpClient(object):
             ValueError: if the response was not JSON
         """
         if len(args):
-            query_bytes = urllib.urlencode(args, True)
+            query_bytes = urllib.parse.urlencode(args, True)
             uri = "%s?%s" % (uri, query_bytes)
 
         json_str = encode_canonical_json(json_body)
@@ -262,9 +265,9 @@ class SimpleHttpClient(object):
 
         response = yield self.request(
             "PUT",
-            uri.encode("ascii"),
+            uri,
             headers=Headers(actual_headers),
-            bodyProducer=FileBodyProducer(StringIO(json_str))
+            data=json_str
         )
 
         body = yield make_deferred_yieldable(readBody(response))
@@ -293,7 +296,7 @@ class SimpleHttpClient(object):
             HttpResponseException on a non-2xx HTTP response.
         """
         if len(args):
-            query_bytes = urllib.urlencode(args, True)
+            query_bytes = urllib.parse.urlencode(args, True)
             uri = "%s?%s" % (uri, query_bytes)
 
         actual_headers = {
@@ -304,7 +307,7 @@ class SimpleHttpClient(object):
 
         response = yield self.request(
             "GET",
-            uri.encode("ascii"),
+            uri,
             headers=Headers(actual_headers),
         )
 
@@ -339,13 +342,14 @@ class SimpleHttpClient(object):
 
         response = yield self.request(
             "GET",
-            url.encode("ascii"),
+            url,
             headers=Headers(actual_headers),
         )
 
         resp_headers = dict(response.headers.getAllRawHeaders())
 
-        if 'Content-Length' in resp_headers and resp_headers['Content-Length'] > max_size:
+        if (b'Content-Length' in resp_headers and
+                int(resp_headers[b'Content-Length']) > max_size):
             logger.warn("Requested URL is too large > %r bytes" % (self.max_size,))
             raise SynapseError(
                 502,
@@ -378,7 +382,12 @@ class SimpleHttpClient(object):
             )
 
         defer.returnValue(
-            (length, resp_headers, response.request.absoluteURI, response.code),
+            (
+                length,
+                resp_headers,
+                response.request.absoluteURI.decode('ascii'),
+                response.code,
+            ),
         )
 
 
@@ -434,12 +443,12 @@ class CaptchaServerHttpClient(SimpleHttpClient):
 
     @defer.inlineCallbacks
     def post_urlencoded_get_raw(self, url, args={}):
-        query_bytes = urllib.urlencode(encode_urlencode_args(args), True)
+        query_bytes = urllib.parse.urlencode(encode_urlencode_args(args), True)
 
         response = yield self.request(
             "POST",
-            url.encode("ascii"),
-            bodyProducer=FileBodyProducer(StringIO(query_bytes)),
+            url,
+            data=query_bytes,
             headers=Headers({
                 b"Content-Type": [b"application/x-www-form-urlencoded"],
                 b"User-Agent": [self.user_agent],
@@ -463,9 +472,9 @@ class SpiderEndpointFactory(object):
     def endpointForURI(self, uri):
         logger.info("Getting endpoint for %s", uri.toBytes())
 
-        if uri.scheme == "http":
+        if uri.scheme == b"http":
             endpoint_factory = HostnameEndpoint
-        elif uri.scheme == "https":
+        elif uri.scheme == b"https":
             tlsCreator = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port)
 
             def endpoint_factory(reactor, host, port, **kw):
@@ -510,7 +519,7 @@ def encode_urlencode_args(args):
 
 
 def encode_urlencode_arg(arg):
-    if isinstance(arg, unicode):
+    if isinstance(arg, text_type):
         return arg.encode('utf-8')
     elif isinstance(arg, list):
         return [encode_urlencode_arg(i) for i in arg]
@@ -542,26 +551,3 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory):
 
     def creatorForNetloc(self, hostname, port):
         return self
-
-
-class FileBodyProducer(TwistedFileBodyProducer):
-    """Workaround for https://twistedmatrix.com/trac/ticket/8473
-
-    We override the pauseProducing and resumeProducing methods in twisted's
-    FileBodyProducer so that they do not raise exceptions if the task has
-    already completed.
-    """
-
-    def pauseProducing(self):
-        try:
-            super(FileBodyProducer, self).pauseProducing()
-        except task.TaskDone:
-            # task has already completed
-            pass
-
-    def resumeProducing(self):
-        try:
-            super(FileBodyProducer, self).resumeProducing()
-        except task.NotPaused:
-            # task was not paused (probably because it had already completed)
-            pass
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
index b0c9369519..91025037a3 100644
--- a/synapse/http/endpoint.py
+++ b/synapse/http/endpoint.py
@@ -108,7 +108,7 @@ def matrix_federation_endpoint(reactor, destination, tls_client_options_factory=
 
     Args:
         reactor: Twisted reactor.
-        destination (bytes): The name of the server to connect to.
+        destination (unicode): The name of the server to connect to.
         tls_client_options_factory
             (synapse.crypto.context_factory.ClientTLSOptionsFactory):
             Factory which generates TLS options for client connections.
@@ -126,10 +126,17 @@ def matrix_federation_endpoint(reactor, destination, tls_client_options_factory=
         transport_endpoint = HostnameEndpoint
         default_port = 8008
     else:
+        # the SNI string should be the same as the Host header, minus the port.
+        # as per https://github.com/matrix-org/synapse/issues/2525#issuecomment-336896777,
+        # the Host header and SNI should therefore be the server_name of the remote
+        # server.
+        tls_options = tls_client_options_factory.get_options(domain)
+
         def transport_endpoint(reactor, host, port, timeout):
             return wrapClientTLS(
-                tls_client_options_factory.get_options(host),
-                HostnameEndpoint(reactor, host, port, timeout=timeout))
+                tls_options,
+                HostnameEndpoint(reactor, host, port, timeout=timeout),
+            )
         default_port = 8448
 
     if port is None:
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index b34bb8e31a..14b12cd1c4 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -17,19 +17,22 @@ import cgi
 import logging
 import random
 import sys
-import urllib
+from io import BytesIO
 
-from six import string_types
-from six.moves.urllib import parse as urlparse
+from six import PY3, string_types
+from six.moves import urllib
 
-from canonicaljson import encode_canonical_json, json
+import attr
+import treq
+from canonicaljson import encode_canonical_json
 from prometheus_client import Counter
 from signedjson.sign import sign_json
 
-from twisted.internet import defer, protocol, reactor
+from twisted.internet import defer, protocol
 from twisted.internet.error import DNSLookupError
+from twisted.internet.task import _EPSILON, Cooperator
 from twisted.web._newclient import ResponseDone
-from twisted.web.client import Agent, HTTPConnectionPool, readBody
+from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool
 from twisted.web.http_headers import Headers
 
 import synapse.metrics
@@ -40,14 +43,12 @@ from synapse.api.errors import (
     HttpResponseException,
     SynapseError,
 )
-from synapse.http import cancelled_to_request_timed_out_error
 from synapse.http.endpoint import matrix_federation_endpoint
-from synapse.util import logcontext
-from synapse.util.async_helpers import add_timeout_to_deferred
+from synapse.util.async_helpers import timeout_deferred
 from synapse.util.logcontext import make_deferred_yieldable
+from synapse.util.metrics import Measure
 
 logger = logging.getLogger(__name__)
-outbound_logger = logging.getLogger("synapse.http.outbound")
 
 outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests",
                                     "", ["method"])
@@ -58,20 +59,119 @@ incoming_responses_counter = Counter("synapse_http_matrixfederationclient_respon
 MAX_LONG_RETRIES = 10
 MAX_SHORT_RETRIES = 3
 
+if PY3:
+    MAXINT = sys.maxsize
+else:
+    MAXINT = sys.maxint
+
 
 class MatrixFederationEndpointFactory(object):
     def __init__(self, hs):
+        self.reactor = hs.get_reactor()
         self.tls_client_options_factory = hs.tls_client_options_factory
 
     def endpointForURI(self, uri):
-        destination = uri.netloc
+        destination = uri.netloc.decode('ascii')
 
         return matrix_federation_endpoint(
-            reactor, destination, timeout=10,
+            self.reactor, destination, timeout=10,
             tls_client_options_factory=self.tls_client_options_factory
         )
 
 
+_next_id = 1
+
+
+@attr.s
+class MatrixFederationRequest(object):
+    method = attr.ib()
+    """HTTP method
+    :type: str
+    """
+
+    path = attr.ib()
+    """HTTP path
+    :type: str
+    """
+
+    destination = attr.ib()
+    """The remote server to send the HTTP request to.
+    :type: str"""
+
+    json = attr.ib(default=None)
+    """JSON to send in the body.
+    :type: dict|None
+    """
+
+    json_callback = attr.ib(default=None)
+    """A callback to generate the JSON.
+    :type: func|None
+    """
+
+    query = attr.ib(default=None)
+    """Query arguments.
+    :type: dict|None
+    """
+
+    txn_id = attr.ib(default=None)
+    """Unique ID for this request (for logging)
+    :type: str|None
+    """
+
+    def __attrs_post_init__(self):
+        global _next_id
+        self.txn_id = "%s-O-%s" % (self.method, _next_id)
+        _next_id = (_next_id + 1) % (MAXINT - 1)
+
+    def get_json(self):
+        if self.json_callback:
+            return self.json_callback()
+        return self.json
+
+
+@defer.inlineCallbacks
+def _handle_json_response(reactor, timeout_sec, request, response):
+    """
+    Reads the JSON body of a response, with a timeout
+
+    Args:
+        reactor (IReactor): twisted reactor, for the timeout
+        timeout_sec (float): number of seconds to wait for response to complete
+        request (MatrixFederationRequest): the request that triggered the response
+        response (IResponse): response to the request
+
+    Returns:
+        dict: parsed JSON response
+    """
+    try:
+        check_content_type_is_json(response.headers)
+
+        d = treq.json_content(response)
+        d = timeout_deferred(
+            d,
+            timeout=timeout_sec,
+            reactor=reactor,
+        )
+
+        body = yield make_deferred_yieldable(d)
+    except Exception as e:
+        logger.warn(
+            "{%s} [%s] Error reading response: %s",
+            request.txn_id,
+            request.destination,
+            e,
+        )
+        raise
+    logger.info(
+        "{%s} [%s] Completed: %d %s",
+        request.txn_id,
+        request.destination,
+        response.code,
+        response.phrase.decode('ascii', errors='replace'),
+    )
+    defer.returnValue(body)
+
+
 class MatrixFederationHttpClient(object):
     """HTTP client used to talk to other homeservers over the federation
     protocol. Send client certificates and signs requests.
@@ -85,7 +185,9 @@ class MatrixFederationHttpClient(object):
         self.hs = hs
         self.signing_key = hs.config.signing_key[0]
         self.server_name = hs.hostname
+        reactor = hs.get_reactor()
         pool = HTTPConnectionPool(reactor)
+        pool.retryAutomatically = False
         pool.maxPersistentPerHost = 5
         pool.cachedConnectionTimeout = 2 * 60
         self.agent = Agent.usingEndpointFactory(
@@ -93,28 +195,36 @@ class MatrixFederationHttpClient(object):
         )
         self.clock = hs.get_clock()
         self._store = hs.get_datastore()
-        self.version_string = hs.version_string
-        self._next_id = 1
+        self.version_string = hs.version_string.encode('ascii')
+        self.default_timeout = 60
 
-    def _create_url(self, destination, path_bytes, param_bytes, query_bytes):
-        return urlparse.urlunparse(
-            ("matrix", destination, path_bytes, param_bytes, query_bytes, "")
-        )
+        def schedule(x):
+            reactor.callLater(_EPSILON, x)
+
+        self._cooperator = Cooperator(scheduler=schedule)
 
     @defer.inlineCallbacks
-    def _request(self, destination, method, path,
-                 body_callback, headers_dict={}, param_bytes=b"",
-                 query_bytes=b"", retry_on_dns_fail=True,
-                 timeout=None, long_retries=False,
-                 ignore_backoff=False,
-                 backoff_on_404=False):
-        """ Creates and sends a request to the given server
+    def _send_request(
+        self,
+        request,
+        retry_on_dns_fail=True,
+        timeout=None,
+        long_retries=False,
+        ignore_backoff=False,
+        backoff_on_404=False
+    ):
+        """
+        Sends a request to the given server.
+
         Args:
-            destination (str): The remote server to send the HTTP request to.
-            method (str): HTTP method
-            path (str): The HTTP path
+            request (MatrixFederationRequest): details of request to be sent
+
+            timeout (int|None): number of milliseconds to wait for the response headers
+                (including connecting to the server). 60s by default.
+
             ignore_backoff (bool): true to ignore the historical backoff data
                 and try the request anyway.
+
             backoff_on_404 (bool): Back off if we get a 404
 
         Returns:
@@ -132,38 +242,39 @@ class MatrixFederationHttpClient(object):
             (May also fail with plenty of other Exceptions for things like DNS
                 failures, connection failures, SSL failures.)
         """
+        if timeout:
+            _sec_timeout = timeout / 1000
+        else:
+            _sec_timeout = self.default_timeout
+
         if (
             self.hs.config.federation_domain_whitelist is not None and
-            destination not in self.hs.config.federation_domain_whitelist
+            request.destination not in self.hs.config.federation_domain_whitelist
         ):
-            raise FederationDeniedError(destination)
+            raise FederationDeniedError(request.destination)
 
         limiter = yield synapse.util.retryutils.get_retry_limiter(
-            destination,
+            request.destination,
             self.clock,
             self._store,
             backoff_on_404=backoff_on_404,
             ignore_backoff=ignore_backoff,
         )
 
-        destination = destination.encode("ascii")
-        path_bytes = path.encode("ascii")
-        with limiter:
-            headers_dict[b"User-Agent"] = [self.version_string]
-            headers_dict[b"Host"] = [destination]
-
-            url_bytes = self._create_url(
-                destination, path_bytes, param_bytes, query_bytes
-            )
-
-            txn_id = "%s-O-%s" % (method, self._next_id)
-            self._next_id = (self._next_id + 1) % (sys.maxint - 1)
+        method = request.method
+        destination = request.destination
+        path_bytes = request.path.encode("ascii")
+        if request.query:
+            query_bytes = encode_query_args(request.query)
+        else:
+            query_bytes = b""
 
-            outbound_logger.info(
-                "{%s} [%s] Sending request: %s %s",
-                txn_id, destination, method, url_bytes
-            )
+        headers_dict = {
+            "User-Agent": [self.version_string],
+            "Host": [request.destination],
+        }
 
+        with limiter:
             # XXX: Would be much nicer to retry only at the transaction-layer
             # (once we have reliable transactions in place)
             if long_retries:
@@ -171,88 +282,119 @@ class MatrixFederationHttpClient(object):
             else:
                 retries_left = MAX_SHORT_RETRIES
 
-            http_url_bytes = urlparse.urlunparse(
-                ("", "", path_bytes, param_bytes, query_bytes, "")
-            )
-
-            log_result = None
-            try:
-                while True:
-                    producer = None
-                    if body_callback:
-                        producer = body_callback(method, http_url_bytes, headers_dict)
-
-                    try:
-                        request_deferred = self.agent.request(
-                            method,
-                            url_bytes,
-                            Headers(headers_dict),
-                            producer
+            url = urllib.parse.urlunparse((
+                b"matrix", destination.encode("ascii"),
+                path_bytes, None, query_bytes, b"",
+            )).decode('ascii')
+
+            http_url = urllib.parse.urlunparse((
+                b"", b"",
+                path_bytes, None, query_bytes, b"",
+            )).decode('ascii')
+
+            while True:
+                try:
+                    json = request.get_json()
+                    if json:
+                        data = encode_canonical_json(json)
+                        headers_dict["Content-Type"] = ["application/json"]
+                        self.sign_request(
+                            destination, method, http_url, headers_dict, json
                         )
-                        add_timeout_to_deferred(
-                            request_deferred,
-                            timeout / 1000. if timeout else 60,
-                            self.hs.get_reactor(),
-                            cancelled_to_request_timed_out_error,
+                    else:
+                        data = None
+                        self.sign_request(destination, method, http_url, headers_dict)
+
+                    logger.info(
+                        "{%s} [%s] Sending request: %s %s",
+                        request.txn_id, destination, method, url
+                    )
+
+                    if data:
+                        producer = FileBodyProducer(
+                            BytesIO(data),
+                            cooperator=self._cooperator
                         )
+                    else:
+                        producer = None
+
+                    request_deferred = treq.request(
+                        method,
+                        url,
+                        headers=Headers(headers_dict),
+                        data=producer,
+                        agent=self.agent,
+                        reactor=self.hs.get_reactor(),
+                        unbuffered=True
+                    )
+
+                    request_deferred = timeout_deferred(
+                        request_deferred,
+                        timeout=_sec_timeout,
+                        reactor=self.hs.get_reactor(),
+                    )
+
+                    with Measure(self.clock, "outbound_request"):
                         response = yield make_deferred_yieldable(
                             request_deferred,
                         )
 
-                        log_result = "%d %s" % (response.code, response.phrase,)
-                        break
-                    except Exception as e:
-                        if not retry_on_dns_fail and isinstance(e, DNSLookupError):
-                            logger.warn(
-                                "DNS Lookup failed to %s with %s",
-                                destination,
-                                e
-                            )
-                            log_result = "DNS Lookup failed to %s with %s" % (
-                                destination, e
-                            )
-                            raise
-
-                        logger.warn(
-                            "{%s} Sending request failed to %s: %s %s: %s",
-                            txn_id,
+                    break
+                except Exception as e:
+                    logger.warn(
+                        "{%s} [%s] Request failed: %s %s: %s",
+                        request.txn_id,
+                        destination,
+                        method,
+                        url,
+                        _flatten_response_never_received(e),
+                    )
+
+                    if not retry_on_dns_fail and isinstance(e, DNSLookupError):
+                        raise
+
+                    if retries_left and not timeout:
+                        if long_retries:
+                            delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
+                            delay = min(delay, 60)
+                            delay *= random.uniform(0.8, 1.4)
+                        else:
+                            delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
+                            delay = min(delay, 2)
+                            delay *= random.uniform(0.8, 1.4)
+
+                        logger.debug(
+                            "{%s} [%s] Waiting %ss before re-sending...",
+                            request.txn_id,
                             destination,
-                            method,
-                            url_bytes,
-                            _flatten_response_never_received(e),
+                            delay,
                         )
 
-                        log_result = _flatten_response_never_received(e)
-
-                        if retries_left and not timeout:
-                            if long_retries:
-                                delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
-                                delay = min(delay, 60)
-                                delay *= random.uniform(0.8, 1.4)
-                            else:
-                                delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
-                                delay = min(delay, 2)
-                                delay *= random.uniform(0.8, 1.4)
-
-                            yield self.clock.sleep(delay)
-                            retries_left -= 1
-                        else:
-                            raise
-            finally:
-                outbound_logger.info(
-                    "{%s} [%s] Result: %s",
-                    txn_id,
-                    destination,
-                    log_result,
-                )
+                        yield self.clock.sleep(delay)
+                        retries_left -= 1
+                    else:
+                        raise
+
+            logger.info(
+                "{%s} [%s] Got response headers: %d %s",
+                request.txn_id,
+                destination,
+                response.code,
+                response.phrase.decode('ascii', errors='replace'),
+            )
 
             if 200 <= response.code < 300:
                 pass
             else:
                 # :'(
                 # Update transactions table?
-                with logcontext.PreserveLoggingContext():
-                    body = yield readBody(response)
+                d = treq.content(response)
+                d = timeout_deferred(
+                    d,
+                    timeout=_sec_timeout,
+                    reactor=self.hs.get_reactor(),
+                )
+                body = yield make_deferred_yieldable(d)
                 raise HttpResponseException(
                     response.code, response.phrase, body
                 )
@@ -297,11 +439,11 @@ class MatrixFederationHttpClient(object):
         auth_headers = []
 
         for key, sig in request["signatures"][self.server_name].items():
-            auth_headers.append(bytes(
+            auth_headers.append((
                 "X-Matrix origin=%s,key=\"%s\",sig=\"%s\"" % (
                     self.server_name, key, sig,
-                )
-            ))
+                )).encode('ascii')
+            )
 
         headers_dict[b"Authorization"] = auth_headers
 
@@ -346,38 +488,27 @@ class MatrixFederationHttpClient(object):
             is not on our federation whitelist
         """
 
-        if not json_data_callback:
-            def json_data_callback():
-                return data
+        request = MatrixFederationRequest(
+            method="PUT",
+            destination=destination,
+            path=path,
+            query=args,
+            json_callback=json_data_callback,
+            json=data,
+        )
 
-        def body_callback(method, url_bytes, headers_dict):
-            json_data = json_data_callback()
-            self.sign_request(
-                destination, method, url_bytes, headers_dict, json_data
-            )
-            producer = _JsonProducer(json_data)
-            return producer
-
-        response = yield self._request(
-            destination,
-            "PUT",
-            path,
-            body_callback=body_callback,
-            headers_dict={"Content-Type": ["application/json"]},
-            query_bytes=encode_query_args(args),
+        response = yield self._send_request(
+            request,
             long_retries=long_retries,
             timeout=timeout,
             ignore_backoff=ignore_backoff,
             backoff_on_404=backoff_on_404,
         )
 
-        if 200 <= response.code < 300:
-            # We need to update the transactions table to say it was sent?
-            check_content_type_is_json(response.headers)
-
-        with logcontext.PreserveLoggingContext():
-            body = yield readBody(response)
-        defer.returnValue(json.loads(body))
+        body = yield _handle_json_response(
+            self.hs.get_reactor(), self.default_timeout, request, response,
+        )
+        defer.returnValue(body)
 
     @defer.inlineCallbacks
     def post_json(self, destination, path, data={}, long_retries=False,
@@ -411,32 +542,30 @@ class MatrixFederationHttpClient(object):
             is not on our federation whitelist
         """
 
-        def body_callback(method, url_bytes, headers_dict):
-            self.sign_request(
-                destination, method, url_bytes, headers_dict, data
-            )
-            return _JsonProducer(data)
-
-        response = yield self._request(
-            destination,
-            "POST",
-            path,
-            query_bytes=encode_query_args(args),
-            body_callback=body_callback,
-            headers_dict={"Content-Type": ["application/json"]},
+        request = MatrixFederationRequest(
+            method="POST",
+            destination=destination,
+            path=path,
+            query=args,
+            json=data,
+        )
+
+        response = yield self._send_request(
+            request,
             long_retries=long_retries,
             timeout=timeout,
             ignore_backoff=ignore_backoff,
         )
 
-        if 200 <= response.code < 300:
-            # We need to update the transactions table to say it was sent?
-            check_content_type_is_json(response.headers)
-
-        with logcontext.PreserveLoggingContext():
-            body = yield readBody(response)
+        if timeout:
+            _sec_timeout = timeout / 1000
+        else:
+            _sec_timeout = self.default_timeout
 
-        defer.returnValue(json.loads(body))
+        body = yield _handle_json_response(
+            self.hs.get_reactor(), _sec_timeout, request, response,
+        )
+        defer.returnValue(body)
 
     @defer.inlineCallbacks
     def get_json(self, destination, path, args=None, retry_on_dns_fail=True,
@@ -471,29 +600,24 @@ class MatrixFederationHttpClient(object):
 
         logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail)
 
-        def body_callback(method, url_bytes, headers_dict):
-            self.sign_request(destination, method, url_bytes, headers_dict)
-            return None
+        request = MatrixFederationRequest(
+            method="GET",
+            destination=destination,
+            path=path,
+            query=args,
+        )
 
-        response = yield self._request(
-            destination,
-            "GET",
-            path,
-            query_bytes=encode_query_args(args),
-            body_callback=body_callback,
+        response = yield self._send_request(
+            request,
             retry_on_dns_fail=retry_on_dns_fail,
             timeout=timeout,
             ignore_backoff=ignore_backoff,
         )
 
-        if 200 <= response.code < 300:
-            # We need to update the transactions table to say it was sent?
-            check_content_type_is_json(response.headers)
-
-        with logcontext.PreserveLoggingContext():
-            body = yield readBody(response)
-
-        defer.returnValue(json.loads(body))
+        body = yield _handle_json_response(
+            self.hs.get_reactor(), self.default_timeout, request, response,
+        )
+        defer.returnValue(body)
 
     @defer.inlineCallbacks
     def delete_json(self, destination, path, long_retries=False,
@@ -523,26 +647,24 @@ class MatrixFederationHttpClient(object):
             Fails with ``FederationDeniedError`` if this destination
             is not on our federation whitelist
         """
+        request = MatrixFederationRequest(
+            method="DELETE",
+            destination=destination,
+            path=path,
+            query=args,
+        )
 
-        response = yield self._request(
-            destination,
-            "DELETE",
-            path,
-            query_bytes=encode_query_args(args),
-            headers_dict={"Content-Type": ["application/json"]},
+        response = yield self._send_request(
+            request,
             long_retries=long_retries,
             timeout=timeout,
             ignore_backoff=ignore_backoff,
         )
 
-        if 200 <= response.code < 300:
-            # We need to update the transactions table to say it was sent?
-            check_content_type_is_json(response.headers)
-
-        with logcontext.PreserveLoggingContext():
-            body = yield readBody(response)
-
-        defer.returnValue(json.loads(body))
+        body = yield _handle_json_response(
+            self.hs.get_reactor(), self.default_timeout, request, response,
+        )
+        defer.returnValue(body)
 
     @defer.inlineCallbacks
     def get_file(self, destination, path, output_stream, args={},
@@ -569,26 +691,15 @@ class MatrixFederationHttpClient(object):
             Fails with ``FederationDeniedError`` if this destination
             is not on our federation whitelist
         """
+        request = MatrixFederationRequest(
+            method="GET",
+            destination=destination,
+            path=path,
+            query=args,
+        )
 
-        encoded_args = {}
-        for k, vs in args.items():
-            if isinstance(vs, string_types):
-                vs = [vs]
-            encoded_args[k] = [v.encode("UTF-8") for v in vs]
-
-        query_bytes = urllib.urlencode(encoded_args, True)
-        logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail)
-
-        def body_callback(method, url_bytes, headers_dict):
-            self.sign_request(destination, method, url_bytes, headers_dict)
-            return None
-
-        response = yield self._request(
-            destination,
-            "GET",
-            path,
-            query_bytes=query_bytes,
-            body_callback=body_callback,
+        response = yield self._send_request(
+            request,
             retry_on_dns_fail=retry_on_dns_fail,
             ignore_backoff=ignore_backoff,
         )
@@ -596,14 +707,25 @@ class MatrixFederationHttpClient(object):
         headers = dict(response.headers.getAllRawHeaders())
 
         try:
-            with logcontext.PreserveLoggingContext():
-                length = yield _readBodyToFile(
-                    response, output_stream, max_size
-                )
-        except Exception:
-            logger.exception("Failed to download body")
+            d = _readBodyToFile(response, output_stream, max_size)
+            d.addTimeout(self.default_timeout, self.hs.get_reactor())
+            length = yield make_deferred_yieldable(d)
+        except Exception as e:
+            logger.warn(
+                "{%s} [%s] Error reading response: %s",
+                request.txn_id,
+                request.destination,
+                e,
+            )
             raise
-
+        logger.info(
+            "{%s} [%s] Completed: %d %s [%d bytes]",
+            request.txn_id,
+            request.destination,
+            response.code,
+            response.phrase.decode('ascii', errors='replace'),
+            length,
+        )
         defer.returnValue((length, headers))
 
 
@@ -639,30 +761,6 @@ def _readBodyToFile(response, stream, max_size):
     return d
 
 
-class _JsonProducer(object):
-    """ Used by the twisted http client to create the HTTP body from json
-    """
-    def __init__(self, jsn):
-        self.reset(jsn)
-
-    def reset(self, jsn):
-        self.body = encode_canonical_json(jsn)
-        self.length = len(self.body)
-
-    def startProducing(self, consumer):
-        consumer.write(self.body)
-        return defer.succeed(None)
-
-    def pauseProducing(self):
-        pass
-
-    def stopProducing(self):
-        pass
-
-    def resumeProducing(self):
-        pass
-
-
 def _flatten_response_never_received(e):
     if hasattr(e, "reasons"):
         reasons = ", ".join(
@@ -693,7 +791,7 @@ def check_content_type_is_json(headers):
             "No Content-Type header"
         )
 
-    c_type = c_type[0]  # only the first header
+    c_type = c_type[0].decode('ascii')  # only the first header
     val, options = cgi.parse_header(c_type)
     if val != "application/json":
         raise RuntimeError(
@@ -711,6 +809,6 @@ def encode_query_args(args):
             vs = [vs]
         encoded_args[k] = [v.encode("UTF-8") for v in vs]
 
-    query_bytes = urllib.urlencode(encoded_args, True)
+    query_bytes = urllib.parse.urlencode(encoded_args, True)
 
-    return query_bytes
+    return query_bytes.encode('utf8')
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index 72c2654678..fedb4e6b18 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -162,7 +162,7 @@ class RequestMetrics(object):
         with _in_flight_requests_lock:
             _in_flight_requests.add(self)
 
-    def stop(self, time_sec, request):
+    def stop(self, time_sec, response_code, sent_bytes):
         with _in_flight_requests_lock:
             _in_flight_requests.discard(self)
 
@@ -179,35 +179,35 @@ class RequestMetrics(object):
                 )
                 return
 
-        response_code = str(request.code)
+        response_code = str(response_code)
 
-        outgoing_responses_counter.labels(request.method, response_code).inc()
+        outgoing_responses_counter.labels(self.method, response_code).inc()
 
-        response_count.labels(request.method, self.name, tag).inc()
+        response_count.labels(self.method, self.name, tag).inc()
 
-        response_timer.labels(request.method, self.name, tag, response_code).observe(
+        response_timer.labels(self.method, self.name, tag, response_code).observe(
             time_sec - self.start
         )
 
         resource_usage = context.get_resource_usage()
 
-        response_ru_utime.labels(request.method, self.name, tag).inc(
+        response_ru_utime.labels(self.method, self.name, tag).inc(
             resource_usage.ru_utime,
         )
-        response_ru_stime.labels(request.method, self.name, tag).inc(
+        response_ru_stime.labels(self.method, self.name, tag).inc(
             resource_usage.ru_stime,
         )
-        response_db_txn_count.labels(request.method, self.name, tag).inc(
+        response_db_txn_count.labels(self.method, self.name, tag).inc(
             resource_usage.db_txn_count
         )
-        response_db_txn_duration.labels(request.method, self.name, tag).inc(
+        response_db_txn_duration.labels(self.method, self.name, tag).inc(
             resource_usage.db_txn_duration_sec
         )
-        response_db_sched_duration.labels(request.method, self.name, tag).inc(
+        response_db_sched_duration.labels(self.method, self.name, tag).inc(
             resource_usage.db_sched_duration_sec
         )
 
-        response_size.labels(request.method, self.name, tag).inc(request.sentLength)
+        response_size.labels(self.method, self.name, tag).inc(sent_bytes)
 
         # We always call this at the end to ensure that we update the metrics
         # regardless of whether a call to /metrics while the request was in
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 88ed3714f9..9579e8cd0d 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -82,10 +82,13 @@ class SynapseRequest(Request):
         )
 
     def get_request_id(self):
-        return "%s-%i" % (self.method, self.request_seq)
+        return "%s-%i" % (self.method.decode('ascii'), self.request_seq)
 
     def get_redacted_uri(self):
-        return redact_uri(self.uri)
+        uri = self.uri
+        if isinstance(uri, bytes):
+            uri = self.uri.decode('ascii')
+        return redact_uri(uri)
 
     def get_user_agent(self):
         return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
@@ -116,7 +119,7 @@ class SynapseRequest(Request):
             # dispatching to the handler, so that the handler
             # can update the servlet name in the request
             # metrics
-            requests_counter.labels(self.method,
+            requests_counter.labels(self.method.decode('ascii'),
                                     self.request_metrics.name).inc()
 
     @contextlib.contextmanager
@@ -204,14 +207,14 @@ class SynapseRequest(Request):
         self.start_time = time.time()
         self.request_metrics = RequestMetrics()
         self.request_metrics.start(
-            self.start_time, name=servlet_name, method=self.method,
+            self.start_time, name=servlet_name, method=self.method.decode('ascii'),
         )
 
         self.site.access_logger.info(
             "%s - %s - Received request: %s %s",
             self.getClientIP(),
             self.site.site_tag,
-            self.method,
+            self.method.decode('ascii'),
             self.get_redacted_uri()
         )
 
@@ -277,15 +280,15 @@ class SynapseRequest(Request):
             int(usage.db_txn_count),
             self.sentLength,
             code,
-            self.method,
+            self.method.decode('ascii'),
             self.get_redacted_uri(),
-            self.clientproto,
+            self.clientproto.decode('ascii', errors='replace'),
             user_agent,
             usage.evt_db_fetch_count,
         )
 
         try:
-            self.request_metrics.stop(self.finish_time, self)
+            self.request_metrics.stop(self.finish_time, self.code, self.sentLength)
         except Exception as e:
             logger.warn("Failed to stop metrics: %r", e)
 
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 550f8443f7..59900aa5d1 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -18,8 +18,11 @@ import gc
 import logging
 import os
 import platform
+import threading
 import time
 
+import six
+
 import attr
 from prometheus_client import Counter, Gauge, Histogram
 from prometheus_client.core import REGISTRY, GaugeMetricFamily
@@ -68,7 +71,7 @@ class LaterGauge(object):
             return
 
         if isinstance(calls, dict):
-            for k, v in calls.items():
+            for k, v in six.iteritems(calls):
                 g.add_metric(k, v)
         else:
             g.add_metric([], calls)
@@ -87,6 +90,109 @@ class LaterGauge(object):
         all_gauges[self.name] = self
 
 
+class InFlightGauge(object):
+    """Tracks number of things (e.g. requests, Measure blocks, etc) in flight
+    at any given time.
+
+    Each InFlightGauge will create a metric called `<name>_total` that counts
+    the number of in flight blocks, as well as a metrics for each item in the
+    given `sub_metrics` as `<name>_<sub_metric>` which will get updated by the
+    callbacks.
+
+    Args:
+        name (str)
+        desc (str)
+        labels (list[str])
+        sub_metrics (list[str]): A list of sub metrics that the callbacks
+            will update.
+    """
+
+    def __init__(self, name, desc, labels, sub_metrics):
+        self.name = name
+        self.desc = desc
+        self.labels = labels
+        self.sub_metrics = sub_metrics
+
+        # Create a class which have the sub_metrics values as attributes, which
+        # default to 0 on initialization. Used to pass to registered callbacks.
+        self._metrics_class = attr.make_class(
+            "_MetricsEntry",
+            attrs={x: attr.ib(0) for x in sub_metrics},
+            slots=True,
+        )
+
+        # Counts number of in flight blocks for a given set of label values
+        self._registrations = {}
+
+        # Protects access to _registrations
+        self._lock = threading.Lock()
+
+        self._register_with_collector()
+
+    def register(self, key, callback):
+        """Registers that we've entered a new block with labels `key`.
+
+        `callback` gets called each time the metrics are collected. The same
+        value must also be given to `unregister`.
+
+        `callback` gets called with an object that has an attribute per
+        sub_metric, which should be updated with the necessary values. Note that
+        the metrics object is shared between all callbacks registered with the
+        same key.
+
+        Note that `callback` may be called on a separate thread.
+        """
+        with self._lock:
+            self._registrations.setdefault(key, set()).add(callback)
+
+    def unregister(self, key, callback):
+        """Registers that we've exited a block with labels `key`.
+        """
+
+        with self._lock:
+            self._registrations.setdefault(key, set()).discard(callback)
+
+    def collect(self):
+        """Called by prometheus client when it reads metrics.
+
+        Note: may be called by a separate thread.
+        """
+        in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels)
+
+        metrics_by_key = {}
+
+        # We copy so that we don't mutate the list while iterating
+        with self._lock:
+            keys = list(self._registrations)
+
+        for key in keys:
+            with self._lock:
+                callbacks = set(self._registrations[key])
+
+            in_flight.add_metric(key, len(callbacks))
+
+            metrics = self._metrics_class()
+            metrics_by_key[key] = metrics
+            for callback in callbacks:
+                callback(metrics)
+
+        yield in_flight
+
+        for name in self.sub_metrics:
+            gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels)
+            for key, metrics in six.iteritems(metrics_by_key):
+                gauge.add_metric(key, getattr(metrics, name))
+            yield gauge
+
+    def _register_with_collector(self):
+        if self.name in all_gauges.keys():
+            logger.warning("%s already registered, reregistering" % (self.name,))
+            REGISTRY.unregister(all_gauges.pop(self.name))
+
+        REGISTRY.register(self)
+        all_gauges[self.name] = self
+
+
 #
 # Detailed CPU metrics
 #
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 82f391481c..f1d92c1395 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -25,11 +25,7 @@ from synapse.api.errors import AuthError
 from synapse.handlers.presence import format_user_presence_state
 from synapse.metrics import LaterGauge
 from synapse.types import StreamToken
-from synapse.util.async_helpers import (
-    DeferredTimeoutError,
-    ObservableDeferred,
-    add_timeout_to_deferred,
-)
+from synapse.util.async_helpers import ObservableDeferred, timeout_deferred
 from synapse.util.logcontext import PreserveLoggingContext, run_in_background
 from synapse.util.logutils import log_function
 from synapse.util.metrics import Measure
@@ -337,7 +333,7 @@ class Notifier(object):
                     # Now we wait for the _NotifierUserStream to be told there
                     # is a new token.
                     listener = user_stream.new_listener(prev_token)
-                    add_timeout_to_deferred(
+                    listener.deferred = timeout_deferred(
                         listener.deferred,
                         (end_time - now) / 1000.,
                         self.hs.get_reactor(),
@@ -354,7 +350,7 @@ class Notifier(object):
                     # Update the prev_token to the current_token since nothing
                     # has happened between the old prev_token and the current_token
                     prev_token = current_token
-                except DeferredTimeoutError:
+                except defer.TimeoutError:
                     break
                 except defer.CancelledError:
                     break
@@ -559,15 +555,16 @@ class Notifier(object):
             if end_time <= now:
                 break
 
-            add_timeout_to_deferred(
-                listener.deferred.addTimeout,
-                (end_time - now) / 1000.,
-                self.hs.get_reactor(),
+            listener.deferred = timeout_deferred(
+                listener.deferred,
+                timeout=(end_time - now) / 1000.,
+                reactor=self.hs.get_reactor(),
             )
+
             try:
                 with PreserveLoggingContext():
                     yield listener.deferred
-            except DeferredTimeoutError:
+            except defer.TimeoutError:
                 break
             except defer.CancelledError:
                 break
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 81e18bcf7d..48abd5e4d6 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 import logging
 
+import six
+
 from prometheus_client import Counter
 
 from twisted.internet import defer
@@ -26,6 +28,9 @@ from synapse.util.metrics import Measure
 
 from . import push_rule_evaluator, push_tools
 
+if six.PY3:
+    long = int
+
 logger = logging.getLogger(__name__)
 
 http_push_processed_counter = Counter("synapse_http_httppusher_http_pushes_processed", "")
@@ -96,7 +101,7 @@ class HttpPusher(object):
 
     @defer.inlineCallbacks
     def on_new_notifications(self, min_stream_ordering, max_stream_ordering):
-        self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering)
+        self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering or 0)
         yield self._process()
 
     @defer.inlineCallbacks
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index bfa6df7b68..b78ce10396 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -17,10 +17,11 @@ import email.mime.multipart
 import email.utils
 import logging
 import time
-import urllib
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 
+from six.moves import urllib
+
 import bleach
 import jinja2
 
@@ -474,7 +475,7 @@ class Mailer(object):
         # XXX: make r0 once API is stable
         return "%s_matrix/client/unstable/pushers/remove?%s" % (
             self.hs.config.public_baseurl,
-            urllib.urlencode(params),
+            urllib.parse.urlencode(params),
         )
 
 
@@ -561,7 +562,7 @@ def _create_mxc_to_http_filter(config):
         return "%s_matrix/media/v1/thumbnail/%s?%s%s" % (
             config.public_baseurl,
             serverAndMediaId,
-            urllib.urlencode(params),
+            urllib.parse.urlencode(params),
             fragment or "",
         )
 
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 942d7c721f..0d8de600cf 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -40,9 +40,10 @@ REQUIREMENTS = {
     "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"],
     "service_identity>=1.0.0": ["service_identity>=1.0.0"],
     "Twisted>=17.1.0": ["twisted>=17.1.0"],
+    "treq>=15.1": ["treq>=15.1"],
 
-    # We use crypto.get_elliptic_curve which is only supported in >=0.15
-    "pyopenssl>=0.15": ["OpenSSL>=0.15"],
+    # Twisted has required pyopenssl 16.0 since about Twisted 16.6.
+    "pyopenssl>=16.0.0": ["OpenSSL>=16.0.0"],
 
     "pyyaml": ["yaml"],
     "pyasn1": ["pyasn1"],
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index 8206a988f7..21b8c468fa 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import six
+
 from synapse.storage import DataStore
 from synapse.storage.end_to_end_keys import EndToEndKeyStore
 from synapse.util.caches.stream_change_cache import StreamChangeCache
@@ -21,6 +23,13 @@ from ._base import BaseSlavedStore
 from ._slaved_id_tracker import SlavedIdTracker
 
 
+def __func__(inp):
+    if six.PY3:
+        return inp
+    else:
+        return inp.__func__
+
+
 class SlavedDeviceStore(BaseSlavedStore):
     def __init__(self, db_conn, hs):
         super(SlavedDeviceStore, self).__init__(db_conn, hs)
@@ -38,14 +47,14 @@ class SlavedDeviceStore(BaseSlavedStore):
             "DeviceListFederationStreamChangeCache", device_list_max,
         )
 
-    get_device_stream_token = DataStore.get_device_stream_token.__func__
-    get_user_whose_devices_changed = DataStore.get_user_whose_devices_changed.__func__
-    get_devices_by_remote = DataStore.get_devices_by_remote.__func__
-    _get_devices_by_remote_txn = DataStore._get_devices_by_remote_txn.__func__
-    _get_e2e_device_keys_txn = DataStore._get_e2e_device_keys_txn.__func__
-    mark_as_sent_devices_by_remote = DataStore.mark_as_sent_devices_by_remote.__func__
+    get_device_stream_token = __func__(DataStore.get_device_stream_token)
+    get_user_whose_devices_changed = __func__(DataStore.get_user_whose_devices_changed)
+    get_devices_by_remote = __func__(DataStore.get_devices_by_remote)
+    _get_devices_by_remote_txn = __func__(DataStore._get_devices_by_remote_txn)
+    _get_e2e_device_keys_txn = __func__(DataStore._get_e2e_device_keys_txn)
+    mark_as_sent_devices_by_remote = __func__(DataStore.mark_as_sent_devices_by_remote)
     _mark_as_sent_devices_by_remote_txn = (
-        DataStore._mark_as_sent_devices_by_remote_txn.__func__
+        __func__(DataStore._mark_as_sent_devices_by_remote_txn)
     )
     count_e2e_one_time_keys = EndToEndKeyStore.__dict__["count_e2e_one_time_keys"]
 
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 74e892c104..5dc7b3fffc 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -590,9 +590,9 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
 pending_commands = LaterGauge(
     "synapse_replication_tcp_protocol_pending_commands",
     "",
-    ["name", "conn_id"],
+    ["name"],
     lambda: {
-        (p.name, p.conn_id): len(p.pending_commands) for p in connected_connections
+        (p.name,): len(p.pending_commands) for p in connected_connections
     },
 )
 
@@ -607,9 +607,9 @@ def transport_buffer_size(protocol):
 transport_send_buffer = LaterGauge(
     "synapse_replication_tcp_protocol_transport_send_buffer",
     "",
-    ["name", "conn_id"],
+    ["name"],
     lambda: {
-        (p.name, p.conn_id): transport_buffer_size(p) for p in connected_connections
+        (p.name,): transport_buffer_size(p) for p in connected_connections
     },
 )
 
@@ -632,9 +632,9 @@ def transport_kernel_read_buffer_size(protocol, read=True):
 tcp_transport_kernel_send_buffer = LaterGauge(
     "synapse_replication_tcp_protocol_transport_kernel_send_buffer",
     "",
-    ["name", "conn_id"],
+    ["name"],
     lambda: {
-        (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False)
+        (p.name,): transport_kernel_read_buffer_size(p, False)
         for p in connected_connections
     },
 )
@@ -643,9 +643,9 @@ tcp_transport_kernel_send_buffer = LaterGauge(
 tcp_transport_kernel_read_buffer = LaterGauge(
     "synapse_replication_tcp_protocol_transport_kernel_read_buffer",
     "",
-    ["name", "conn_id"],
+    ["name"],
     lambda: {
-        (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True)
+        (p.name,): transport_kernel_read_buffer_size(p, True)
         for p in connected_connections
     },
 )
@@ -654,9 +654,9 @@ tcp_transport_kernel_read_buffer = LaterGauge(
 tcp_inbound_commands = LaterGauge(
     "synapse_replication_tcp_protocol_inbound_commands",
     "",
-    ["command", "name", "conn_id"],
+    ["command", "name"],
     lambda: {
-        (k[0], p.name, p.conn_id): count
+        (k[0], p.name,): count
         for p in connected_connections
         for k, count in iteritems(p.inbound_commands_counter)
     },
@@ -665,9 +665,9 @@ tcp_inbound_commands = LaterGauge(
 tcp_outbound_commands = LaterGauge(
     "synapse_replication_tcp_protocol_outbound_commands",
     "",
-    ["command", "name", "conn_id"],
+    ["command", "name"],
     lambda: {
-        (k[0], p.name, p.conn_id): count
+        (k[0], p.name,): count
         for p in connected_connections
         for k, count in iteritems(p.outbound_commands_counter)
     },
diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py
index 55fe701c5c..c1e626be3f 100644
--- a/synapse/replication/tcp/streams.py
+++ b/synapse/replication/tcp/streams.py
@@ -196,7 +196,7 @@ class Stream(object):
             )
 
             if len(rows) >= MAX_EVENTS_BEHIND:
-                raise Exception("stream %s has fallen behined" % (self.NAME))
+                raise Exception("stream %s has fallen behind" % (self.NAME))
         else:
             rows = yield self.update_function(
                 from_token, current_token,
diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py
index ad536ab570..41534b8c2a 100644
--- a/synapse/rest/client/v1/admin.py
+++ b/synapse/rest/client/v1/admin.py
@@ -101,7 +101,7 @@ class UserRegisterServlet(ClientV1RestServlet):
 
         nonce = self.hs.get_secrets().token_hex(64)
         self.nonces[nonce] = int(self.reactor.seconds())
-        return (200, {"nonce": nonce.encode('ascii')})
+        return (200, {"nonce": nonce})
 
     @defer.inlineCallbacks
     def on_POST(self, request):
@@ -164,7 +164,7 @@ class UserRegisterServlet(ClientV1RestServlet):
             key=self.hs.config.registration_shared_secret.encode(),
             digestmod=hashlib.sha1,
         )
-        want_mac.update(nonce)
+        want_mac.update(nonce.encode('utf8'))
         want_mac.update(b"\x00")
         want_mac.update(username)
         want_mac.update(b"\x00")
@@ -173,7 +173,10 @@ class UserRegisterServlet(ClientV1RestServlet):
         want_mac.update(b"admin" if admin else b"notadmin")
         want_mac = want_mac.hexdigest()
 
-        if not hmac.compare_digest(want_mac, got_mac.encode('ascii')):
+        if not hmac.compare_digest(
+                want_mac.encode('ascii'),
+                got_mac.encode('ascii')
+        ):
             raise SynapseError(403, "HMAC incorrect")
 
         # Reuse the parts of RegisterRestServlet to reduce code duplication
diff --git a/synapse/rest/client/v1/events.py b/synapse/rest/client/v1/events.py
index 0f3a2e8b51..cd9b3bdbd1 100644
--- a/synapse/rest/client/v1/events.py
+++ b/synapse/rest/client/v1/events.py
@@ -45,20 +45,20 @@ class EventStreamRestServlet(ClientV1RestServlet):
         is_guest = requester.is_guest
         room_id = None
         if is_guest:
-            if "room_id" not in request.args:
+            if b"room_id" not in request.args:
                 raise SynapseError(400, "Guest users must specify room_id param")
-        if "room_id" in request.args:
-            room_id = request.args["room_id"][0]
+        if b"room_id" in request.args:
+            room_id = request.args[b"room_id"][0].decode('ascii')
 
         pagin_config = PaginationConfig.from_request(request)
         timeout = EventStreamRestServlet.DEFAULT_LONGPOLL_TIME_MS
-        if "timeout" in request.args:
+        if b"timeout" in request.args:
             try:
-                timeout = int(request.args["timeout"][0])
+                timeout = int(request.args[b"timeout"][0])
             except ValueError:
                 raise SynapseError(400, "timeout must be in milliseconds.")
 
-        as_client_event = "raw" not in request.args
+        as_client_event = b"raw" not in request.args
 
         chunk = yield self.event_stream_handler.get_stream(
             requester.user.to_string(),
diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/v1/initial_sync.py
index fd5f85b53e..3ead75cb77 100644
--- a/synapse/rest/client/v1/initial_sync.py
+++ b/synapse/rest/client/v1/initial_sync.py
@@ -32,7 +32,7 @@ class InitialSyncRestServlet(ClientV1RestServlet):
     @defer.inlineCallbacks
     def on_GET(self, request):
         requester = yield self.auth.get_user_by_req(request)
-        as_client_event = "raw" not in request.args
+        as_client_event = b"raw" not in request.args
         pagination_config = PaginationConfig.from_request(request)
         include_archived = parse_boolean(request, "archived", default=False)
         content = yield self.initial_sync_handler.snapshot_all_rooms(
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index cb85fa1436..0010699d31 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -14,10 +14,9 @@
 # limitations under the License.
 
 import logging
-import urllib
 import xml.etree.ElementTree as ET
 
-from six.moves.urllib import parse as urlparse
+from six.moves import urllib
 
 from canonicaljson import json
 from saml2 import BINDING_HTTP_POST, config
@@ -134,7 +133,7 @@ class LoginRestServlet(ClientV1RestServlet):
                                        LoginRestServlet.SAML2_TYPE):
                 relay_state = ""
                 if "relay_state" in login_submission:
-                    relay_state = "&RelayState=" + urllib.quote(
+                    relay_state = "&RelayState=" + urllib.parse.quote(
                                   login_submission["relay_state"])
                 result = {
                     "uri": "%s%s" % (self.idp_redirect_url, relay_state)
@@ -366,7 +365,7 @@ class SAML2RestServlet(ClientV1RestServlet):
             (user_id, token) = yield handler.register_saml2(username)
             # Forward to the RelayState callback along with ava
             if 'RelayState' in request.args:
-                request.redirect(urllib.unquote(
+                request.redirect(urllib.parse.unquote(
                                  request.args['RelayState'][0]) +
                                  '?status=authenticated&access_token=' +
                                  token + '&user_id=' + user_id + '&ava=' +
@@ -377,7 +376,7 @@ class SAML2RestServlet(ClientV1RestServlet):
                                      "user_id": user_id, "token": token,
                                      "ava": saml2_auth.ava}))
         elif 'RelayState' in request.args:
-            request.redirect(urllib.unquote(
+            request.redirect(urllib.parse.unquote(
                              request.args['RelayState'][0]) +
                              '?status=not_authenticated')
             finish_request(request)
@@ -390,21 +389,22 @@ class CasRedirectServlet(ClientV1RestServlet):
 
     def __init__(self, hs):
         super(CasRedirectServlet, self).__init__(hs)
-        self.cas_server_url = hs.config.cas_server_url
-        self.cas_service_url = hs.config.cas_service_url
+        self.cas_server_url = hs.config.cas_server_url.encode('ascii')
+        self.cas_service_url = hs.config.cas_service_url.encode('ascii')
 
     def on_GET(self, request):
         args = request.args
-        if "redirectUrl" not in args:
+        if b"redirectUrl" not in args:
             return (400, "Redirect URL not specified for CAS auth")
-        client_redirect_url_param = urllib.urlencode({
-            "redirectUrl": args["redirectUrl"][0]
-        })
-        hs_redirect_url = self.cas_service_url + "/_matrix/client/api/v1/login/cas/ticket"
-        service_param = urllib.urlencode({
-            "service": "%s?%s" % (hs_redirect_url, client_redirect_url_param)
-        })
-        request.redirect("%s/login?%s" % (self.cas_server_url, service_param))
+        client_redirect_url_param = urllib.parse.urlencode({
+            b"redirectUrl": args[b"redirectUrl"][0]
+        }).encode('ascii')
+        hs_redirect_url = (self.cas_service_url +
+                           b"/_matrix/client/api/v1/login/cas/ticket")
+        service_param = urllib.parse.urlencode({
+            b"service": b"%s?%s" % (hs_redirect_url, client_redirect_url_param)
+        }).encode('ascii')
+        request.redirect(b"%s/login?%s" % (self.cas_server_url, service_param))
         finish_request(request)
 
 
@@ -422,11 +422,11 @@ class CasTicketServlet(ClientV1RestServlet):
 
     @defer.inlineCallbacks
     def on_GET(self, request):
-        client_redirect_url = request.args["redirectUrl"][0]
+        client_redirect_url = request.args[b"redirectUrl"][0]
         http_client = self.hs.get_simple_http_client()
         uri = self.cas_server_url + "/proxyValidate"
         args = {
-            "ticket": request.args["ticket"],
+            "ticket": request.args[b"ticket"][0].decode('ascii'),
             "service": self.cas_service_url
         }
         try:
@@ -471,11 +471,11 @@ class CasTicketServlet(ClientV1RestServlet):
         finish_request(request)
 
     def add_login_token_to_redirect_url(self, url, token):
-        url_parts = list(urlparse.urlparse(url))
-        query = dict(urlparse.parse_qsl(url_parts[4]))
+        url_parts = list(urllib.parse.urlparse(url))
+        query = dict(urllib.parse.parse_qsl(url_parts[4]))
         query.update({"loginToken": token})
-        url_parts[4] = urllib.urlencode(query)
-        return urlparse.urlunparse(url_parts)
+        url_parts[4] = urllib.parse.urlencode(query).encode('ascii')
+        return urllib.parse.urlunparse(url_parts)
 
     def parse_cas_response(self, cas_response_body):
         user = None
diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py
index 6e95d9bec2..9382b1f124 100644
--- a/synapse/rest/client/v1/push_rule.py
+++ b/synapse/rest/client/v1/push_rule.py
@@ -46,7 +46,7 @@ class PushRuleRestServlet(ClientV1RestServlet):
         try:
             priority_class = _priority_class_from_spec(spec)
         except InvalidRuleException as e:
-            raise SynapseError(400, e.message)
+            raise SynapseError(400, str(e))
 
         requester = yield self.auth.get_user_by_req(request)
 
@@ -73,7 +73,7 @@ class PushRuleRestServlet(ClientV1RestServlet):
                 content,
             )
         except InvalidRuleException as e:
-            raise SynapseError(400, e.message)
+            raise SynapseError(400, str(e))
 
         before = parse_string(request, "before")
         if before:
@@ -95,9 +95,9 @@ class PushRuleRestServlet(ClientV1RestServlet):
             )
             self.notify_user(user_id)
         except InconsistentRuleException as e:
-            raise SynapseError(400, e.message)
+            raise SynapseError(400, str(e))
         except RuleNotFoundException as e:
-            raise SynapseError(400, e.message)
+            raise SynapseError(400, str(e))
 
         defer.returnValue((200, {}))
 
@@ -142,10 +142,10 @@ class PushRuleRestServlet(ClientV1RestServlet):
                 PushRuleRestServlet.SLIGHTLY_PEDANTIC_TRAILING_SLASH_ERROR
             )
 
-        if path[0] == '':
+        if path[0] == b'':
             defer.returnValue((200, rules))
-        elif path[0] == 'global':
-            path = path[1:]
+        elif path[0] == b'global':
+            path = [x.decode('ascii') for x in path[1:]]
             result = _filter_ruleset_with_path(rules['global'], path)
             defer.returnValue((200, result))
         else:
@@ -192,10 +192,10 @@ class PushRuleRestServlet(ClientV1RestServlet):
 def _rule_spec_from_path(path):
     if len(path) < 2:
         raise UnrecognizedRequestError()
-    if path[0] != 'pushrules':
+    if path[0] != b'pushrules':
         raise UnrecognizedRequestError()
 
-    scope = path[1]
+    scope = path[1].decode('ascii')
     path = path[2:]
     if scope != 'global':
         raise UnrecognizedRequestError()
@@ -203,13 +203,13 @@ def _rule_spec_from_path(path):
     if len(path) == 0:
         raise UnrecognizedRequestError()
 
-    template = path[0]
+    template = path[0].decode('ascii')
     path = path[1:]
 
     if len(path) == 0 or len(path[0]) == 0:
         raise UnrecognizedRequestError()
 
-    rule_id = path[0]
+    rule_id = path[0].decode('ascii')
 
     spec = {
         'scope': scope,
@@ -220,7 +220,7 @@ def _rule_spec_from_path(path):
     path = path[1:]
 
     if len(path) > 0 and len(path[0]) > 0:
-        spec['attr'] = path[0]
+        spec['attr'] = path[0].decode('ascii')
 
     return spec
 
diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py
index 182a68b1e2..b84f0260f2 100644
--- a/synapse/rest/client/v1/pusher.py
+++ b/synapse/rest/client/v1/pusher.py
@@ -59,7 +59,7 @@ class PushersRestServlet(ClientV1RestServlet):
         ]
 
         for p in pushers:
-            for k, v in p.items():
+            for k, v in list(p.items()):
                 if k not in allowed_keys:
                     del p[k]
 
@@ -126,7 +126,7 @@ class PushersSetRestServlet(ClientV1RestServlet):
                 profile_tag=content.get('profile_tag', ""),
             )
         except PusherConfigException as pce:
-            raise SynapseError(400, "Config Error: " + pce.message,
+            raise SynapseError(400, "Config Error: " + str(pce),
                                errcode=Codes.MISSING_PARAM)
 
         self.notifier.on_new_replication_data()
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 976d98387d..663934efd0 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -207,7 +207,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet):
             "sender": requester.user.to_string(),
         }
 
-        if 'ts' in request.args and requester.app_service:
+        if b'ts' in request.args and requester.app_service:
             event_dict['origin_server_ts'] = parse_integer(request, "ts", 0)
 
         event = yield self.event_creation_hander.create_and_send_nonmember_event(
@@ -255,7 +255,9 @@ class JoinRoomAliasServlet(ClientV1RestServlet):
         if RoomID.is_valid(room_identifier):
             room_id = room_identifier
             try:
-                remote_room_hosts = request.args["server_name"]
+                remote_room_hosts = [
+                    x.decode('ascii') for x in request.args[b"server_name"]
+                ]
             except Exception:
                 remote_room_hosts = None
         elif RoomAlias.is_valid(room_identifier):
@@ -461,10 +463,10 @@ class RoomMessageListRestServlet(ClientV1RestServlet):
         pagination_config = PaginationConfig.from_request(
             request, default_limit=10,
         )
-        as_client_event = "raw" not in request.args
-        filter_bytes = parse_string(request, "filter")
+        as_client_event = b"raw" not in request.args
+        filter_bytes = parse_string(request, b"filter", encoding=None)
         if filter_bytes:
-            filter_json = urlparse.unquote(filter_bytes).decode("UTF-8")
+            filter_json = urlparse.unquote(filter_bytes.decode("UTF-8"))
             event_filter = Filter(json.loads(filter_json))
         else:
             event_filter = None
@@ -560,7 +562,7 @@ class RoomEventContextServlet(ClientV1RestServlet):
         # picking the API shape for symmetry with /messages
         filter_bytes = parse_string(request, "filter")
         if filter_bytes:
-            filter_json = urlparse.unquote(filter_bytes).decode("UTF-8")
+            filter_json = urlparse.unquote(filter_bytes)
             event_filter = Filter(json.loads(filter_json))
         else:
             event_filter = None
diff --git a/synapse/rest/client/v1/voip.py b/synapse/rest/client/v1/voip.py
index 62f4c3d93e..53da905eea 100644
--- a/synapse/rest/client/v1/voip.py
+++ b/synapse/rest/client/v1/voip.py
@@ -42,7 +42,11 @@ class VoipRestServlet(ClientV1RestServlet):
             expiry = (self.hs.get_clock().time_msec() + userLifetime) / 1000
             username = "%d:%s" % (expiry, requester.user.to_string())
 
-            mac = hmac.new(turnSecret, msg=username, digestmod=hashlib.sha1)
+            mac = hmac.new(
+                turnSecret.encode(),
+                msg=username.encode(),
+                digestmod=hashlib.sha1
+            )
             # We need to use standard padded base64 encoding here
             # encode_base64 because we need to add the standard padding to get the
             # same result as the TURN server.
diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py
index 1275baa1ba..0251146722 100644
--- a/synapse/rest/client/v2_alpha/sync.py
+++ b/synapse/rest/client/v2_alpha/sync.py
@@ -25,6 +25,7 @@ from synapse.api.errors import SynapseError
 from synapse.api.filtering import DEFAULT_FILTER_COLLECTION, FilterCollection
 from synapse.events.utils import (
     format_event_for_client_v2_without_room_id,
+    format_event_raw,
     serialize_event,
 )
 from synapse.handlers.presence import format_user_presence_state
@@ -88,7 +89,7 @@ class SyncRestServlet(RestServlet):
 
     @defer.inlineCallbacks
     def on_GET(self, request):
-        if "from" in request.args:
+        if b"from" in request.args:
             # /events used to use 'from', but /sync uses 'since'.
             # Lets be helpful and whine if we see a 'from'.
             raise SynapseError(
@@ -175,17 +176,28 @@ class SyncRestServlet(RestServlet):
 
     @staticmethod
     def encode_response(time_now, sync_result, access_token_id, filter):
+        if filter.event_format == 'client':
+            event_formatter = format_event_for_client_v2_without_room_id
+        elif filter.event_format == 'federation':
+            event_formatter = format_event_raw
+        else:
+            raise Exception("Unknown event format %s" % (filter.event_format, ))
+
         joined = SyncRestServlet.encode_joined(
-            sync_result.joined, time_now, access_token_id, filter.event_fields
+            sync_result.joined, time_now, access_token_id,
+            filter.event_fields,
+            event_formatter,
         )
 
         invited = SyncRestServlet.encode_invited(
             sync_result.invited, time_now, access_token_id,
+            event_formatter,
         )
 
         archived = SyncRestServlet.encode_archived(
             sync_result.archived, time_now, access_token_id,
             filter.event_fields,
+            event_formatter,
         )
 
         return {
@@ -228,7 +240,7 @@ class SyncRestServlet(RestServlet):
         }
 
     @staticmethod
-    def encode_joined(rooms, time_now, token_id, event_fields):
+    def encode_joined(rooms, time_now, token_id, event_fields, event_formatter):
         """
         Encode the joined rooms in a sync result
 
@@ -240,7 +252,9 @@ class SyncRestServlet(RestServlet):
             token_id(int): ID of the user's auth token - used for namespacing
                 of transaction IDs
             event_fields(list<str>): List of event fields to include. If empty,
-            all fields will be returned.
+                all fields will be returned.
+            event_formatter (func[dict]): function to convert from federation format
+                to client format
         Returns:
             dict[str, dict[str, object]]: the joined rooms list, in our
                 response format
@@ -248,13 +262,14 @@ class SyncRestServlet(RestServlet):
         joined = {}
         for room in rooms:
             joined[room.room_id] = SyncRestServlet.encode_room(
-                room, time_now, token_id, only_fields=event_fields
+                room, time_now, token_id, joined=True, only_fields=event_fields,
+                event_formatter=event_formatter,
             )
 
         return joined
 
     @staticmethod
-    def encode_invited(rooms, time_now, token_id):
+    def encode_invited(rooms, time_now, token_id, event_formatter):
         """
         Encode the invited rooms in a sync result
 
@@ -264,7 +279,9 @@ class SyncRestServlet(RestServlet):
             time_now(int): current time - used as a baseline for age
                 calculations
             token_id(int): ID of the user's auth token - used for namespacing
-            of transaction IDs
+                of transaction IDs
+            event_formatter (func[dict]): function to convert from federation format
+                to client format
 
         Returns:
             dict[str, dict[str, object]]: the invited rooms list, in our
@@ -274,7 +291,7 @@ class SyncRestServlet(RestServlet):
         for room in rooms:
             invite = serialize_event(
                 room.invite, time_now, token_id=token_id,
-                event_format=format_event_for_client_v2_without_room_id,
+                event_format=event_formatter,
                 is_invite=True,
             )
             unsigned = dict(invite.get("unsigned", {}))
@@ -288,7 +305,7 @@ class SyncRestServlet(RestServlet):
         return invited
 
     @staticmethod
-    def encode_archived(rooms, time_now, token_id, event_fields):
+    def encode_archived(rooms, time_now, token_id, event_fields, event_formatter):
         """
         Encode the archived rooms in a sync result
 
@@ -300,7 +317,9 @@ class SyncRestServlet(RestServlet):
             token_id(int): ID of the user's auth token - used for namespacing
                 of transaction IDs
             event_fields(list<str>): List of event fields to include. If empty,
-            all fields will be returned.
+                all fields will be returned.
+            event_formatter (func[dict]): function to convert from federation format
+                to client format
         Returns:
             dict[str, dict[str, object]]: The invited rooms list, in our
                 response format
@@ -308,13 +327,18 @@ class SyncRestServlet(RestServlet):
         joined = {}
         for room in rooms:
             joined[room.room_id] = SyncRestServlet.encode_room(
-                room, time_now, token_id, joined=False, only_fields=event_fields
+                room, time_now, token_id, joined=False,
+                only_fields=event_fields,
+                event_formatter=event_formatter,
             )
 
         return joined
 
     @staticmethod
-    def encode_room(room, time_now, token_id, joined=True, only_fields=None):
+    def encode_room(
+            room, time_now, token_id, joined,
+            only_fields, event_formatter,
+    ):
         """
         Args:
             room (JoinedSyncResult|ArchivedSyncResult): sync result for a
@@ -326,14 +350,15 @@ class SyncRestServlet(RestServlet):
             joined (bool): True if the user is joined to this room - will mean
                 we handle ephemeral events
             only_fields(list<str>): Optional. The list of event fields to include.
+            event_formatter (func[dict]): function to convert from federation format
+                to client format
         Returns:
             dict[str, object]: the room, encoded in our response format
         """
         def serialize(event):
-            # TODO(mjark): Respect formatting requirements in the filter.
             return serialize_event(
                 event, time_now, token_id=token_id,
-                event_format=format_event_for_client_v2_without_room_id,
+                event_format=event_formatter,
                 only_event_fields=only_fields,
             )
 
diff --git a/synapse/rest/client/v2_alpha/thirdparty.py b/synapse/rest/client/v2_alpha/thirdparty.py
index d9d379182e..b9b5d07677 100644
--- a/synapse/rest/client/v2_alpha/thirdparty.py
+++ b/synapse/rest/client/v2_alpha/thirdparty.py
@@ -79,7 +79,7 @@ class ThirdPartyUserServlet(RestServlet):
         yield self.auth.get_user_by_req(request, allow_guest=True)
 
         fields = request.args
-        fields.pop("access_token", None)
+        fields.pop(b"access_token", None)
 
         results = yield self.appservice_handler.query_3pe(
             ThirdPartyEntityKind.USER, protocol, fields
@@ -102,7 +102,7 @@ class ThirdPartyLocationServlet(RestServlet):
         yield self.auth.get_user_by_req(request, allow_guest=True)
 
         fields = request.args
-        fields.pop("access_token", None)
+        fields.pop(b"access_token", None)
 
         results = yield self.appservice_handler.query_3pe(
             ThirdPartyEntityKind.LOCATION, protocol, fields
diff --git a/synapse/rest/key/v1/server_key_resource.py b/synapse/rest/key/v1/server_key_resource.py
index b9ee6e1c13..38eb2ee23f 100644
--- a/synapse/rest/key/v1/server_key_resource.py
+++ b/synapse/rest/key/v1/server_key_resource.py
@@ -88,5 +88,5 @@ class LocalKey(Resource):
         )
 
     def getChild(self, name, request):
-        if name == '':
+        if name == b'':
             return self
diff --git a/synapse/rest/key/v2/__init__.py b/synapse/rest/key/v2/__init__.py
index 3491fd2118..cb5abcf826 100644
--- a/synapse/rest/key/v2/__init__.py
+++ b/synapse/rest/key/v2/__init__.py
@@ -22,5 +22,5 @@ from .remote_key_resource import RemoteKey
 class KeyApiV2Resource(Resource):
     def __init__(self, hs):
         Resource.__init__(self)
-        self.putChild("server", LocalKey(hs))
-        self.putChild("query", RemoteKey(hs))
+        self.putChild(b"server", LocalKey(hs))
+        self.putChild(b"query", RemoteKey(hs))
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 7d67e4b064..eb8782aa6e 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -103,7 +103,7 @@ class RemoteKey(Resource):
     def async_render_GET(self, request):
         if len(request.postpath) == 1:
             server, = request.postpath
-            query = {server: {}}
+            query = {server.decode('ascii'): {}}
         elif len(request.postpath) == 2:
             server, key_id = request.postpath
             minimum_valid_until_ts = parse_integer(
@@ -112,11 +112,12 @@ class RemoteKey(Resource):
             arguments = {}
             if minimum_valid_until_ts is not None:
                 arguments["minimum_valid_until_ts"] = minimum_valid_until_ts
-            query = {server: {key_id: arguments}}
+            query = {server.decode('ascii'): {key_id.decode('ascii'): arguments}}
         else:
             raise SynapseError(
                 404, "Not found %r" % request.postpath, Codes.NOT_FOUND
             )
+
         yield self.query_keys(request, query, query_remote_on_cache_miss=True)
 
     def render_POST(self, request):
@@ -135,6 +136,7 @@ class RemoteKey(Resource):
     @defer.inlineCallbacks
     def query_keys(self, request, query, query_remote_on_cache_miss=False):
         logger.info("Handling query for keys %r", query)
+
         store_queries = []
         for server_name, key_ids in query.items():
             if (
diff --git a/synapse/rest/media/v0/content_repository.py b/synapse/rest/media/v0/content_repository.py
index f255f2883f..5a426ff2f6 100644
--- a/synapse/rest/media/v0/content_repository.py
+++ b/synapse/rest/media/v0/content_repository.py
@@ -56,7 +56,7 @@ class ContentRepoResource(resource.Resource):
         # servers.
 
         # TODO: A little crude here, we could do this better.
-        filename = request.path.split('/')[-1]
+        filename = request.path.decode('ascii').split('/')[-1]
         # be paranoid
         filename = re.sub("[^0-9A-z.-_]", "", filename)
 
@@ -78,7 +78,7 @@ class ContentRepoResource(resource.Resource):
             # select private. don't bother setting Expires as all our matrix
             # clients are smart enough to be happy with Cache-Control (right?)
             request.setHeader(
-                "Cache-Control", "public,max-age=86400,s-maxage=86400"
+                b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
             )
 
             d = FileSender().beginFileTransfer(f, request)
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 65f4bd2910..76e479afa3 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -15,9 +15,8 @@
 
 import logging
 import os
-import urllib
 
-from six.moves.urllib import parse as urlparse
+from six.moves import urllib
 
 from twisted.internet import defer
 from twisted.protocols.basic import FileSender
@@ -35,10 +34,15 @@ def parse_media_id(request):
         # This allows users to append e.g. /test.png to the URL. Useful for
         # clients that parse the URL to see content type.
         server_name, media_id = request.postpath[:2]
+
+        if isinstance(server_name, bytes):
+            server_name = server_name.decode('utf-8')
+            media_id = media_id.decode('utf8')
+
         file_name = None
         if len(request.postpath) > 2:
             try:
-                file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8")
+                file_name = urllib.parse.unquote(request.postpath[-1].decode("utf-8"))
             except UnicodeDecodeError:
                 pass
         return server_name, media_id, file_name
@@ -93,22 +97,18 @@ def add_file_headers(request, media_type, file_size, upload_name):
         file_size (int): Size in bytes of the media, if known.
         upload_name (str): The name of the requested file, if any.
     """
+    def _quote(x):
+        return urllib.parse.quote(x.encode("utf-8"))
+
     request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
     if upload_name:
         if is_ascii(upload_name):
-            request.setHeader(
-                b"Content-Disposition",
-                b"inline; filename=%s" % (
-                    urllib.quote(upload_name.encode("utf-8")),
-                ),
-            )
+            disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii")
         else:
-            request.setHeader(
-                b"Content-Disposition",
-                b"inline; filename*=utf-8''%s" % (
-                    urllib.quote(upload_name.encode("utf-8")),
-                ),
-            )
+            disposition = (
+                "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii")
+
+        request.setHeader(b"Content-Disposition", disposition)
 
     # cache for at least a day.
     # XXX: we might want to turn this off for data we don't want to
diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
index fbfa85f74f..ca90964d1d 100644
--- a/synapse/rest/media/v1/download_resource.py
+++ b/synapse/rest/media/v1/download_resource.py
@@ -47,12 +47,12 @@ class DownloadResource(Resource):
     def _async_render_GET(self, request):
         set_cors_headers(request)
         request.setHeader(
-            "Content-Security-Policy",
-            "default-src 'none';"
-            " script-src 'none';"
-            " plugin-types application/pdf;"
-            " style-src 'unsafe-inline';"
-            " object-src 'self';"
+            b"Content-Security-Policy",
+            b"default-src 'none';"
+            b" script-src 'none';"
+            b" plugin-types application/pdf;"
+            b" style-src 'unsafe-inline';"
+            b" object-src 'self';"
         )
         server_name, media_id, name = parse_media_id(request)
         if server_name == self.server_name:
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 241c972070..a828ff4438 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -20,7 +20,7 @@ import logging
 import os
 import shutil
 
-from six import iteritems
+from six import PY3, iteritems
 from six.moves.urllib import parse as urlparse
 
 import twisted.internet.error
@@ -397,13 +397,13 @@ class MediaRepository(object):
 
             yield finish()
 
-        media_type = headers["Content-Type"][0]
+        media_type = headers[b"Content-Type"][0].decode('ascii')
 
         time_now_ms = self.clock.time_msec()
 
-        content_disposition = headers.get("Content-Disposition", None)
+        content_disposition = headers.get(b"Content-Disposition", None)
         if content_disposition:
-            _, params = cgi.parse_header(content_disposition[0],)
+            _, params = cgi.parse_header(content_disposition[0].decode('ascii'),)
             upload_name = None
 
             # First check if there is a valid UTF-8 filename
@@ -419,9 +419,13 @@ class MediaRepository(object):
                     upload_name = upload_name_ascii
 
             if upload_name:
-                upload_name = urlparse.unquote(upload_name)
+                if PY3:
+                    upload_name = urlparse.unquote(upload_name)
+                else:
+                    upload_name = urlparse.unquote(upload_name.encode('ascii'))
                 try:
-                    upload_name = upload_name.decode("utf-8")
+                    if isinstance(upload_name, bytes):
+                        upload_name = upload_name.decode("utf-8")
                 except UnicodeDecodeError:
                     upload_name = None
         else:
@@ -755,14 +759,15 @@ class MediaRepositoryResource(Resource):
         Resource.__init__(self)
 
         media_repo = hs.get_media_repository()
-        self.putChild("upload", UploadResource(hs, media_repo))
-        self.putChild("download", DownloadResource(hs, media_repo))
-        self.putChild("thumbnail", ThumbnailResource(
+
+        self.putChild(b"upload", UploadResource(hs, media_repo))
+        self.putChild(b"download", DownloadResource(hs, media_repo))
+        self.putChild(b"thumbnail", ThumbnailResource(
             hs, media_repo, media_repo.media_storage,
         ))
-        self.putChild("identicon", IdenticonResource())
+        self.putChild(b"identicon", IdenticonResource())
         if hs.config.url_preview_enabled:
-            self.putChild("preview_url", PreviewUrlResource(
+            self.putChild(b"preview_url", PreviewUrlResource(
                 hs, media_repo, media_repo.media_storage,
             ))
-        self.putChild("config", MediaConfigResource(hs))
+        self.putChild(b"config", MediaConfigResource(hs))
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 778ef97337..cad2dec33a 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -261,7 +261,7 @@ class PreviewUrlResource(Resource):
 
         logger.debug("Calculated OG for %s as %s" % (url, og))
 
-        jsonog = json.dumps(og)
+        jsonog = json.dumps(og).encode('utf8')
 
         # store OG in history-aware DB cache
         yield self.store.store_url_cache(
@@ -301,20 +301,20 @@ class PreviewUrlResource(Resource):
                 logger.warn("Error downloading %s: %r", url, e)
                 raise SynapseError(
                     500, "Failed to download content: %s" % (
-                        traceback.format_exception_only(sys.exc_type, e),
+                        traceback.format_exception_only(sys.exc_info()[0], e),
                     ),
                     Codes.UNKNOWN,
                 )
             yield finish()
 
         try:
-            if "Content-Type" in headers:
-                media_type = headers["Content-Type"][0]
+            if b"Content-Type" in headers:
+                media_type = headers[b"Content-Type"][0].decode('ascii')
             else:
                 media_type = "application/octet-stream"
             time_now_ms = self.clock.time_msec()
 
-            content_disposition = headers.get("Content-Disposition", None)
+            content_disposition = headers.get(b"Content-Disposition", None)
             if content_disposition:
                 _, params = cgi.parse_header(content_disposition[0],)
                 download_name = None
diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py
index 808194236a..cfb687cb53 100644
--- a/synapse/storage/directory.py
+++ b/synapse/storage/directory.py
@@ -75,7 +75,6 @@ class DirectoryWorkerStore(SQLBaseStore):
             },
             retcol="creator",
             desc="get_room_alias_creator",
-            allow_none=True
         )
 
     @cached(max_entries=5000)
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 8bf87f38f7..e7487311ce 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -930,6 +930,10 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                 )
 
                 self._invalidate_cache_and_stream(
+                    txn, self.get_room_summary, (room_id,)
+                )
+
+                self._invalidate_cache_and_stream(
                     txn, self.get_current_state_ids, (room_id,)
                 )
 
@@ -1886,20 +1890,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
             ")"
         )
 
-        # create an index on should_delete because later we'll be looking for
-        # the should_delete / shouldn't_delete subsets
-        txn.execute(
-            "CREATE INDEX events_to_purge_should_delete"
-            " ON events_to_purge(should_delete)",
-        )
-
-        # We do joins against events_to_purge for e.g. calculating state
-        # groups to purge, etc., so lets make an index.
-        txn.execute(
-            "CREATE INDEX events_to_purge_id"
-            " ON events_to_purge(event_id)",
-        )
-
         # First ensure that we're not about to delete all the forward extremeties
         txn.execute(
             "SELECT e.event_id, e.depth FROM events as e "
@@ -1926,19 +1916,45 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
         should_delete_params = ()
         if not delete_local_events:
             should_delete_expr += " AND event_id NOT LIKE ?"
-            should_delete_params += ("%:" + self.hs.hostname, )
+
+            # We include the parameter twice since we use the expression twice
+            should_delete_params += (
+                "%:" + self.hs.hostname,
+                "%:" + self.hs.hostname,
+            )
 
         should_delete_params += (room_id, token.topological)
 
+        # Note that we insert events that are outliers and aren't going to be
+        # deleted, as nothing will happen to them.
         txn.execute(
             "INSERT INTO events_to_purge"
             " SELECT event_id, %s"
             " FROM events AS e LEFT JOIN state_events USING (event_id)"
-            " WHERE e.room_id = ? AND topological_ordering < ?" % (
+            " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
+            % (
+                should_delete_expr,
                 should_delete_expr,
             ),
             should_delete_params,
         )
+
+        # We create the indices *after* insertion as that's a lot faster.
+
+        # create an index on should_delete because later we'll be looking for
+        # the should_delete / shouldn't_delete subsets
+        txn.execute(
+            "CREATE INDEX events_to_purge_should_delete"
+            " ON events_to_purge(should_delete)",
+        )
+
+        # We do joins against events_to_purge for e.g. calculating state
+        # groups to purge, etc., so lets make an index.
+        txn.execute(
+            "CREATE INDEX events_to_purge_id"
+            " ON events_to_purge(event_id)",
+        )
+
         txn.execute(
             "SELECT event_id, should_delete FROM events_to_purge"
         )
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index f547977600..a1331c1a61 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -134,6 +134,7 @@ class KeyStore(SQLBaseStore):
         """
         key_id = "%s:%s" % (verify_key.alg, verify_key.version)
 
+        # XXX fix this to not need a lock (#3819)
         def _txn(txn):
             self._simple_upsert_txn(
                 txn,
diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py
index c7899d7fd2..59580949f1 100644
--- a/synapse/storage/monthly_active_users.py
+++ b/synapse/storage/monthly_active_users.py
@@ -147,6 +147,23 @@ class MonthlyActiveUsersStore(SQLBaseStore):
         return self.runInteraction("count_users", _count_users)
 
     @defer.inlineCallbacks
+    def get_registered_reserved_users_count(self):
+        """Of the reserved threepids defined in config, how many are associated
+        with registered users?
+
+        Returns:
+            Defered[int]: Number of real reserved users
+        """
+        count = 0
+        for tp in self.hs.config.mau_limits_reserved_threepids:
+            user_id = yield self.hs.get_datastore().get_user_id_by_threepid(
+                tp["medium"], tp["address"]
+            )
+            if user_id:
+                count = count + 1
+        defer.returnValue(count)
+
+    @defer.inlineCallbacks
     def upsert_monthly_active_user(self, user_id):
         """
             Updates or inserts monthly active user member
@@ -199,10 +216,14 @@ class MonthlyActiveUsersStore(SQLBaseStore):
         Args:
             user_id(str): the user_id to query
         """
+
         if self.hs.config.limit_usage_by_mau:
+            # Trial users and guests should not be included as part of MAU group
+            is_guest = yield self.is_guest(user_id)
+            if is_guest:
+                return
             is_trial = yield self.is_trial_user(user_id)
             if is_trial:
-                # we don't track trial users in the MAU table.
                 return
 
             last_seen_timestamp = yield self.user_last_seen_monthly_active(user_id)
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 9b4e6d6aa8..0707f9a86a 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -51,6 +51,12 @@ ProfileInfo = namedtuple(
     "ProfileInfo", ("avatar_url", "display_name")
 )
 
+# "members" points to a truncated list of (user_id, event_id) tuples for users of
+# a given membership type, suitable for use in calculating heroes for a room.
+# "count" points to the total numberr of users of a given membership type.
+MemberSummary = namedtuple(
+    "MemberSummary", ("members", "count")
+)
 
 _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"
 
@@ -82,6 +88,65 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             return [to_ascii(r[0]) for r in txn]
         return self.runInteraction("get_users_in_room", f)
 
+    @cached(max_entries=100000)
+    def get_room_summary(self, room_id):
+        """ Get the details of a room roughly suitable for use by the room
+        summary extension to /sync. Useful when lazy loading room members.
+        Args:
+            room_id (str): The room ID to query
+        Returns:
+            Deferred[dict[str, MemberSummary]:
+                dict of membership states, pointing to a MemberSummary named tuple.
+        """
+
+        def _get_room_summary_txn(txn):
+            # first get counts.
+            # We do this all in one transaction to keep the cache small.
+            # FIXME: get rid of this when we have room_stats
+            sql = """
+                SELECT count(*), m.membership FROM room_memberships as m
+                 INNER JOIN current_state_events as c
+                 ON m.event_id = c.event_id
+                 AND m.room_id = c.room_id
+                 AND m.user_id = c.state_key
+                 WHERE c.type = 'm.room.member' AND c.room_id = ?
+                 GROUP BY m.membership
+            """
+
+            txn.execute(sql, (room_id,))
+            res = {}
+            for count, membership in txn:
+                summary = res.setdefault(to_ascii(membership), MemberSummary([], count))
+
+            # we order by membership and then fairly arbitrarily by event_id so
+            # heroes are consistent
+            sql = """
+                SELECT m.user_id, m.membership, m.event_id
+                FROM room_memberships as m
+                 INNER JOIN current_state_events as c
+                 ON m.event_id = c.event_id
+                 AND m.room_id = c.room_id
+                 AND m.user_id = c.state_key
+                 WHERE c.type = 'm.room.member' AND c.room_id = ?
+                 ORDER BY
+                    CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+                    m.event_id ASC
+                 LIMIT ?
+            """
+
+            # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
+            txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
+            for user_id, membership, event_id in txn:
+                summary = res[to_ascii(membership)]
+                # we will always have a summary for this membership type at this
+                # point given the summary currently contains the counts.
+                members = summary.members
+                members.append((to_ascii(user_id), to_ascii(event_id)))
+
+            return res
+
+        return self.runInteraction("get_room_summary", _get_room_summary_txn)
+
     @cached()
     def get_invited_rooms_for_user(self, user_id):
         """ Get all the rooms the user is invited to
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 9b3f2f4b96..ec7b2c9672 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -374,29 +374,25 @@ class ReadWriteLock(object):
         defer.returnValue(_ctx_manager())
 
 
-class DeferredTimeoutError(Exception):
-    """
-    This error is raised by default when a L{Deferred} times out.
-    """
-
+def _cancelled_to_timed_out_error(value, timeout):
+    if isinstance(value, failure.Failure):
+        value.trap(CancelledError)
+        raise defer.TimeoutError(timeout, "Deferred")
+    return value
 
-def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None):
-    """
-    Add a timeout to a deferred by scheduling it to be cancelled after
-    timeout seconds.
 
-    This is essentially a backport of deferred.addTimeout, which was introduced
-    in twisted 16.5.
+def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None):
+    """The in built twisted `Deferred.addTimeout` fails to time out deferreds
+    that have a canceller that throws exceptions. This method creates a new
+    deferred that wraps and times out the given deferred, correctly handling
+    the case where the given deferred's canceller throws.
 
-    If the deferred gets timed out, it errbacks with a DeferredTimeoutError,
-    unless a cancelable function was passed to its initialization or unless
-    a different on_timeout_cancel callable is provided.
+    NOTE: Unlike `Deferred.addTimeout`, this function returns a new deferred
 
     Args:
-        deferred (defer.Deferred): deferred to be timed out
-        timeout (Number): seconds to time out after
-        reactor (twisted.internet.reactor): the Twisted reactor to use
-
+        deferred (Deferred)
+        timeout (float): Timeout in seconds
+        reactor (twisted.internet.reactor): The twisted reactor to use
         on_timeout_cancel (callable): A callable which is called immediately
             after the deferred times out, and not if this deferred is
             otherwise cancelled before the timeout.
@@ -406,13 +402,26 @@ def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None):
             the timeout.
 
             The default callable (if none is provided) will translate a
-            CancelledError Failure into a DeferredTimeoutError.
+            CancelledError Failure into a defer.TimeoutError.
+
+    Returns:
+        Deferred
     """
+
+    new_d = defer.Deferred()
+
     timed_out = [False]
 
     def time_it_out():
         timed_out[0] = True
-        deferred.cancel()
+
+        try:
+            deferred.cancel()
+        except:   # noqa: E722, if we throw any exception it'll break time outs
+            logger.exception("Canceller failed during timeout")
+
+        if not new_d.called:
+            new_d.errback(defer.TimeoutError(timeout, "Deferred"))
 
     delayed_call = reactor.callLater(timeout, time_it_out)
 
@@ -432,9 +441,14 @@ def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None):
 
     deferred.addBoth(cancel_timeout)
 
+    def success_cb(val):
+        if not new_d.called:
+            new_d.callback(val)
 
-def _cancelled_to_timed_out_error(value, timeout):
-    if isinstance(value, failure.Failure):
-        value.trap(CancelledError)
-        raise DeferredTimeoutError(timeout, "Deferred")
-    return value
+    def failure_cb(val):
+        if not new_d.called:
+            new_d.errback(val)
+
+    deferred.addCallbacks(success_cb, failure_cb)
+
+    return new_d
diff --git a/synapse/util/manhole.py b/synapse/util/manhole.py
index 14be3c7396..8d0f2a8918 100644
--- a/synapse/util/manhole.py
+++ b/synapse/util/manhole.py
@@ -19,22 +19,40 @@ from twisted.conch.ssh.keys import Key
 from twisted.cred import checkers, portal
 
 PUBLIC_KEY = (
-    "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAGEArzJx8OYOnJmzf4tfBEvLi8DVPrJ3/c9k2I/Az"
-    "64fxjHf9imyRJbixtQhlH9lfNjUIx+4LmrJH5QNRsFporcHDKOTwTTYLh5KmRpslkYHRivcJS"
-    "kbh/C+BR3utDS555mV"
+    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDHhGATaW4KhE23+7nrH4jFx3yLq9OjaEs5"
+    "XALqeK+7385NlLja3DE/DO9mGhnd9+bAy39EKT3sTV6+WXQ4yD0TvEEyUEMtjWkSEm6U32+C"
+    "DaS3TW/vPBUMeJQwq+Ydcif1UlnpXrDDTamD0AU9VaEvHq+3HAkipqn0TGpKON6aqk4vauDx"
+    "oXSsV5TXBVrxP/y7HpMOpU4GUWsaaacBTKKNnUaQB4UflvydaPJUuwdaCUJGTMjbhWrjVfK+"
+    "jslseSPxU6XvrkZMyCr4znxvuDxjMk1RGIdO7v+rbBMLEgqtSMNqJbYeVCnj2CFgc3fcTcld"
+    "X2uOJDrJb/WRlHulthCh"
 )
 
 PRIVATE_KEY = """-----BEGIN RSA PRIVATE KEY-----
-MIIByAIBAAJhAK8ycfDmDpyZs3+LXwRLy4vA1T6yd/3PZNiPwM+uH8Yx3/YpskSW
-4sbUIZR/ZXzY1CMfuC5qyR+UDUbBaaK3Bwyjk8E02C4eSpkabJZGB0Yr3CUpG4fw
-vgUd7rQ0ueeZlQIBIwJgbh+1VZfr7WftK5lu7MHtqE1S1vPWZQYE3+VUn8yJADyb
-Z4fsZaCrzW9lkIqXkE3GIY+ojdhZhkO1gbG0118sIgphwSWKRxK0mvh6ERxKqIt1
-xJEJO74EykXZV4oNJ8sjAjEA3J9r2ZghVhGN6V8DnQrTk24Td0E8hU8AcP0FVP+8
-PQm/g/aXf2QQkQT+omdHVEJrAjEAy0pL0EBH6EVS98evDCBtQw22OZT52qXlAwZ2
-gyTriKFVoqjeEjt3SZKKqXHSApP/AjBLpF99zcJJZRq2abgYlf9lv1chkrWqDHUu
-DZttmYJeEfiFBBavVYIF1dOlZT0G8jMCMBc7sOSZodFnAiryP+Qg9otSBjJ3bQML
-pSTqy7c3a2AScC/YyOwkDaICHnnD3XyjMwIxALRzl0tQEKMXs6hH8ToUdlLROCrP
-EhQ0wahUTCk1gKA4uPD6TMTChavbh4K63OvbKg==
+MIIEpQIBAAKCAQEAx4RgE2luCoRNt/u56x+Ixcd8i6vTo2hLOVwC6nivu9/OTZS4
+2twxPwzvZhoZ3ffmwMt/RCk97E1evll0OMg9E7xBMlBDLY1pEhJulN9vgg2kt01v
+7zwVDHiUMKvmHXIn9VJZ6V6ww02pg9AFPVWhLx6vtxwJIqap9ExqSjjemqpOL2rg
+8aF0rFeU1wVa8T/8ux6TDqVOBlFrGmmnAUyijZ1GkAeFH5b8nWjyVLsHWglCRkzI
+24Vq41Xyvo7JbHkj8VOl765GTMgq+M58b7g8YzJNURiHTu7/q2wTCxIKrUjDaiW2
+HlQp49ghYHN33E3JXV9rjiQ6yW/1kZR7pbYQoQIDAQABAoIBAQC8KJ0q8Wzzwh5B
+esa1dQHZ8+4DEsL/Amae66VcVwD0X3cCN1W2IZ7X5W0Ij2kBqr8V51RYhcR+S+Ek
+BtzSiBUBvbKGrqcMGKaUgomDIMzai99hd0gvCCyZnEW1OQhFkNkaRNXCfqiZJ27M
+fqvSUiU2eOwh9fCvmxoA6Of8o3FbzcJ+1GMcobWRllDtLmj6lgVbDzuA+0jC5daB
+9Tj1pBzu3wn3ufxiS+gBnJ+7NcXH3E73lqCcPa2ufbZ1haxfiGCnRIhFXuQDgxFX
+vKdEfDgtvas6r1ahGbc+b/q8E8fZT7cABuIU4yfOORK+MhpyWbvoyyzuVGKj3PKt
+KSPJu5CZAoGBAOkoJfAVyYteqKcmGTanGqQnAY43CaYf6GdSPX/jg+JmKZg0zqMC
+jWZUtPb93i+jnOInbrnuHOiHAxI8wmhEPed28H2lC/LU8PzlqFkZXKFZ4vLOhhRB
+/HeHCFIDosPFlohWi3b+GAjD7sXgnIuGmnXWe2ea/TS3yersifDEoKKjAoGBANsQ
+gJX2cJv1c3jhdgcs8vAt5zIOKcCLTOr/QPmVf/kxjNgndswcKHwsxE/voTO9q+TF
+v/6yCSTxAdjuKz1oIYWgi/dZo82bBKWxNRpgrGviU3/zwxiHlyIXUhzQu78q3VS/
+7S1XVbc7qMV++XkYKHPVD+nVG/gGzFxumX7MLXfrAoGBAJit9cn2OnjNj9uFE1W6
+r7N254ndeLAUjPe73xH0RtTm2a4WRopwjW/JYIetTuYbWgyujc+robqTTuuOZjAp
+H/CG7o0Ym251CypQqaFO/l2aowclPp/dZhpPjp9GSjuxFBZLtiBB3DNBOwbRQzIK
+/vLTdRQvZkgzYkI4i0vjNt3JAoGBANP8HSKBLymMlShlrSx2b8TB9tc2Y2riohVJ
+2ttqs0M2kt/dGJWdrgOz4mikL+983Olt/0P9juHDoxEEMK2kpcPEv40lnmBpYU7h
+s8yJvnBLvJe2EJYdJ8AipyAhUX1FgpbvfxmASP8eaUxsegeXvBWTGWojAoS6N2o+
+0KSl+l3vAoGAFqm0gO9f/Q1Se60YQd4l2PZeMnJFv0slpgHHUwegmd6wJhOD7zJ1
+CkZcXwiv7Nog7AI9qKJEUXLjoqL+vJskBzSOqU3tcd670YQMi1aXSXJqYE202K7o
+EddTrx3TNpr1D5m/f+6mnXWrc8u9y1+GNx9yz889xMjIBTBI9KqaaOs=
 -----END RSA PRIVATE KEY-----"""
 
 
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 97f1267380..4b4ac5f6c7 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -20,6 +20,7 @@ from prometheus_client import Counter
 
 from twisted.internet import defer
 
+from synapse.metrics import InFlightGauge
 from synapse.util.logcontext import LoggingContext
 
 logger = logging.getLogger(__name__)
@@ -45,6 +46,13 @@ block_db_txn_duration = Counter(
 block_db_sched_duration = Counter(
     "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"])
 
+# Tracks the number of blocks currently active
+in_flight = InFlightGauge(
+    "synapse_util_metrics_block_in_flight", "",
+    labels=["block_name"],
+    sub_metrics=["real_time_max", "real_time_sum"],
+)
+
 
 def measure_func(name):
     def wrapper(func):
@@ -82,10 +90,14 @@ class Measure(object):
 
         self.start_usage = self.start_context.get_resource_usage()
 
+        in_flight.register((self.name,), self._update_in_flight)
+
     def __exit__(self, exc_type, exc_val, exc_tb):
         if isinstance(exc_type, Exception) or not self.start_context:
             return
 
+        in_flight.unregister((self.name,), self._update_in_flight)
+
         duration = self.clock.time() - self.start
 
         block_counter.labels(self.name).inc()
@@ -120,3 +132,13 @@ class Measure(object):
 
         if self.created_context:
             self.start_context.__exit__(exc_type, exc_val, exc_tb)
+
+    def _update_in_flight(self, metrics):
+        """Gets called when processing in flight metrics
+        """
+        duration = self.clock.time() - self.start
+
+        metrics.real_time_max = max(metrics.real_time_max, duration)
+        metrics.real_time_sum += duration
+
+        # TODO: Add other in flight metrics.