141 files changed, 5583 insertions, 2084 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 43c5821ade..5a28fe2b82 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -27,4 +27,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "0.33.6"
+__version__ = "0.33.9"
diff --git a/synapse/_scripts/__init__.py b/synapse/_scripts/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/synapse/_scripts/__init__.py
diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py
new file mode 100644
index 0000000000..70cecde486
--- /dev/null
+++ b/synapse/_scripts/register_new_matrix_user.py
@@ -0,0 +1,215 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2018 New Vector
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import argparse
+import getpass
+import hashlib
+import hmac
+import logging
+import sys
+
+from six.moves import input
+
+import requests as _requests
+import yaml
+
+
+def request_registration(
+    user,
+    password,
+    server_location,
+    shared_secret,
+    admin=False,
+    requests=_requests,
+    _print=print,
+    exit=sys.exit,
+):
+
+    url = "%s/_matrix/client/r0/admin/register" % (server_location,)
+
+    # Get the nonce
+    r = requests.get(url, verify=False)
+
+    if r.status_code is not 200:
+        _print("ERROR! Received %d %s" % (r.status_code, r.reason))
+        if 400 <= r.status_code < 500:
+            try:
+                _print(r.json()["error"])
+            except Exception:
+                pass
+        return exit(1)
+
+    nonce = r.json()["nonce"]
+
+    mac = hmac.new(key=shared_secret.encode('utf8'), digestmod=hashlib.sha1)
+
+    mac.update(nonce.encode('utf8'))
+    mac.update(b"\x00")
+    mac.update(user.encode('utf8'))
+    mac.update(b"\x00")
+    mac.update(password.encode('utf8'))
+    mac.update(b"\x00")
+    mac.update(b"admin" if admin else b"notadmin")
+
+    mac = mac.hexdigest()
+
+    data = {
+        "nonce": nonce,
+        "username": user,
+        "password": password,
+        "mac": mac,
+        "admin": admin,
+    }
+
+    _print("Sending registration request...")
+    r = requests.post(url, json=data, verify=False)
+
+    if r.status_code is not 200:
+        _print("ERROR! Received %d %s" % (r.status_code, r.reason))
+        if 400 <= r.status_code < 500:
+            try:
+                _print(r.json()["error"])
+            except Exception:
+                pass
+        return exit(1)
+
+    _print("Success!")
+
+
+def register_new_user(user, password, server_location, shared_secret, admin):
+    if not user:
+        try:
+            default_user = getpass.getuser()
+        except Exception:
+            default_user = None
+
+        if default_user:
+            user = input("New user localpart [%s]: " % (default_user,))
+            if not user:
+                user = default_user
+        else:
+            user = input("New user localpart: ")
+
+    if not user:
+        print("Invalid user name")
+        sys.exit(1)
+
+    if not password:
+        password = getpass.getpass("Password: ")
+
+        if not password:
+            print("Password cannot be blank.")
+            sys.exit(1)
+
+        confirm_password = getpass.getpass("Confirm password: ")
+
+        if password != confirm_password:
+            print("Passwords do not match")
+            sys.exit(1)
+
+    if admin is None:
+        admin = input("Make admin [no]: ")
+        if admin in ("y", "yes", "true"):
+            admin = True
+        else:
+            admin = False
+
+    request_registration(user, password, server_location, shared_secret, bool(admin))
+
+
+def main():
+
+    logging.captureWarnings(True)
+
+    parser = argparse.ArgumentParser(
+        description="Used to register new users with a given home server when"
+        " registration has been disabled. The home server must be"
+        " configured with the 'registration_shared_secret' option"
+        " set."
+    )
+    parser.add_argument(
+        "-u",
+        "--user",
+        default=None,
+        help="Local part of the new user. Will prompt if omitted.",
+    )
+    parser.add_argument(
+        "-p",
+        "--password",
+        default=None,
+        help="New password for user. Will prompt if omitted.",
+    )
+    admin_group = parser.add_mutually_exclusive_group()
+    admin_group.add_argument(
+        "-a",
+        "--admin",
+        action="store_true",
+        help=(
+            "Register new user as an admin. "
+            "Will prompt if --no-admin is not set either."
+        ),
+    )
+    admin_group.add_argument(
+        "--no-admin",
+        action="store_true",
+        help=(
+            "Register new user as a regular user. "
+            "Will prompt if --admin is not set either."
+        ),
+    )
+
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        "-c",
+        "--config",
+        type=argparse.FileType('r'),
+        help="Path to server config file. Used to read in shared secret.",
+    )
+
+    group.add_argument(
+        "-k", "--shared-secret", help="Shared secret as defined in server config file."
+    )
+
+    parser.add_argument(
+        "server_url",
+        default="https://localhost:8448",
+        nargs='?',
+        help="URL to use to talk to the home server. Defaults to "
+        " 'https://localhost:8448'.",
+    )
+
+    args = parser.parse_args()
+
+    if "config" in args and args.config:
+        config = yaml.safe_load(args.config)
+        secret = config.get("registration_shared_secret", None)
+        if not secret:
+            print("No 'registration_shared_secret' defined in config.")
+            sys.exit(1)
+    else:
+        secret = args.shared_secret
+
+    admin = None
+    if args.admin or args.no_admin:
+        admin = args.admin
+
+    register_new_user(args.user, args.password, args.server_url, secret, admin)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 34382e4e3c..5309899703 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -188,17 +188,33 @@ class Auth(object):
         """
         # Can optionally look elsewhere in the request (e.g. headers)
         try:
+            ip_addr = self.hs.get_ip_from_request(request)
+            user_agent = request.requestHeaders.getRawHeaders(
+                b"User-Agent",
+                default=[b""]
+            )[0].decode('ascii', 'surrogateescape')
+
+            access_token = self.get_access_token_from_request(
+                request, self.TOKEN_NOT_FOUND_HTTP_STATUS
+            )
+
             user_id, app_service = yield self._get_appservice_user_id(request)
             if user_id:
                 request.authenticated_entity = user_id
+
+                if ip_addr and self.hs.config.track_appservice_user_ips:
+                    yield self.store.insert_client_ip(
+                        user_id=user_id,
+                        access_token=access_token,
+                        ip=ip_addr,
+                        user_agent=user_agent,
+                        device_id="dummy-device",  # stubbed
+                    )
+
                 defer.returnValue(
                     synapse.types.create_requester(user_id, app_service=app_service)
                 )
 
-            access_token = self.get_access_token_from_request(
-                request, self.TOKEN_NOT_FOUND_HTTP_STATUS
-            )
-
             user_info = yield self.get_user_by_access_token(access_token, rights)
             user = user_info["user"]
             token_id = user_info["token_id"]
@@ -208,11 +224,6 @@ class Auth(object):
             # stubbed out.
             device_id = user_info.get("device_id")
 
-            ip_addr = self.hs.get_ip_from_request(request)
-            user_agent = request.requestHeaders.getRawHeaders(
-                b"User-Agent",
-                default=[b""]
-            )[0].decode('ascii', 'surrogateescape')
             if user and access_token and ip_addr:
                 yield self.store.insert_client_ip(
                     user_id=user.to_string(),
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index c2630c4c64..f20e0fcf0b 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -51,6 +51,7 @@ class LoginType(object):
     EMAIL_IDENTITY = u"m.login.email.identity"
     MSISDN = u"m.login.msisdn"
     RECAPTCHA = u"m.login.recaptcha"
+    TERMS = u"m.login.terms"
     DUMMY = u"m.login.dummy"
 
     # Only for C/S API v1
@@ -61,6 +62,7 @@ class LoginType(object):
 class EventTypes(object):
     Member = "m.room.member"
     Create = "m.room.create"
+    Tombstone = "m.room.tombstone"
     JoinRules = "m.room.join_rules"
     PowerLevels = "m.room.power_levels"
     Aliases = "m.room.aliases"
@@ -101,6 +103,7 @@ class ThirdPartyEntityKind(object):
 class RoomVersions(object):
     V1 = "1"
     VDH_TEST = "vdh-test-version"
+    STATE_V2_TEST = "state-v2-test"
 
 
 # the version we will give rooms which are created on this server
@@ -108,7 +111,11 @@ DEFAULT_ROOM_VERSION = RoomVersions.V1
 
 # vdh-test-version is a placeholder to get room versioning support working and tested
 # until we have a working v2.
-KNOWN_ROOM_VERSIONS = {RoomVersions.V1, RoomVersions.VDH_TEST}
+KNOWN_ROOM_VERSIONS = {
+    RoomVersions.V1,
+    RoomVersions.VDH_TEST,
+    RoomVersions.STATE_V2_TEST,
+}
 
 ServerNoticeMsgType = "m.server_notice"
 ServerNoticeLimitReached = "m.server_notice.usage_limit_reached"
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 2e7f98404d..48b903374d 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -59,6 +59,7 @@ class Codes(object):
     RESOURCE_LIMIT_EXCEEDED = "M_RESOURCE_LIMIT_EXCEEDED"
     UNSUPPORTED_ROOM_VERSION = "M_UNSUPPORTED_ROOM_VERSION"
     INCOMPATIBLE_ROOM_VERSION = "M_INCOMPATIBLE_ROOM_VERSION"
+    WRONG_ROOM_KEYS_VERSION = "M_WRONG_ROOM_KEYS_VERSION"
 
 
 class CodeMessageException(RuntimeError):
@@ -312,6 +313,20 @@ class LimitExceededError(SynapseError):
         )
 
 
+class RoomKeysVersionError(SynapseError):
+    """A client has tried to upload to a non-current version of the room_keys store
+    """
+    def __init__(self, current_version):
+        """
+        Args:
+            current_version (str): the current version of the store they should have used
+        """
+        super(RoomKeysVersionError, self).__init__(
+            403, "Wrong room_keys version", Codes.WRONG_ROOM_KEYS_VERSION
+        )
+        self.current_version = current_version
+
+
 class IncompatibleRoomVersionError(SynapseError):
     """A server is trying to join a room whose version it does not support."""
 
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index eed8c67e6a..677c0bdd4c 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -172,7 +172,10 @@ USER_FILTER_SCHEMA = {
                 # events a lot easier as we can then use a negative lookbehind
                 # assertion to split '\.' If we allowed \\ then it would
                 # incorrectly split '\\.' See synapse.events.utils.serialize_event
-                "pattern": "^((?!\\\).)*$"
+                #
+                # Note that because this is a regular expression, we have to escape
+                # each backslash in the pattern.
+                "pattern": r"^((?!\\\\).)*$"
             }
         }
     },
diff --git a/synapse/api/urls.py b/synapse/api/urls.py
index 6d9f1ca0ef..f78695b657 100644
--- a/synapse/api/urls.py
+++ b/synapse/api/urls.py
@@ -28,7 +28,6 @@ FEDERATION_PREFIX = "/_matrix/federation/v1"
 STATIC_PREFIX = "/_matrix/static"
 WEB_CLIENT_PREFIX = "/_matrix/client"
 CONTENT_REPO_PREFIX = "/_matrix/content"
-SERVER_KEY_PREFIX = "/_matrix/key/v1"
 SERVER_KEY_V2_PREFIX = "/_matrix/key/v2"
 MEDIA_PREFIX = "/_matrix/media/r0"
 LEGACY_MEDIA_PREFIX = "/_matrix/media/v1"
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 7c866e246a..18584226e9 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -17,6 +17,7 @@ import gc
 import logging
 import sys
 
+import psutil
 from daemonize import Daemonize
 
 from twisted.internet import error, reactor
@@ -24,12 +25,6 @@ from twisted.internet import error, reactor
 from synapse.util import PreserveLoggingContext
 from synapse.util.rlimit import change_resource_limit
 
-try:
-    import affinity
-except Exception:
-    affinity = None
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -89,15 +84,20 @@ def start_reactor(
         with PreserveLoggingContext():
             logger.info("Running")
             if cpu_affinity is not None:
-                if not affinity:
-                    quit_with_error(
-                        "Missing package 'affinity' required for cpu_affinity\n"
-                        "option\n\n"
-                        "Install by running:\n\n"
-                        "   pip install affinity\n\n"
-                    )
-                logger.info("Setting CPU affinity to %s" % cpu_affinity)
-                affinity.set_process_affinity_mask(0, cpu_affinity)
+                # Turn the bitmask into bits, reverse it so we go from 0 up
+                mask_to_bits = bin(cpu_affinity)[2:][::-1]
+
+                cpus = []
+                cpu_num = 0
+
+                for i in mask_to_bits:
+                    if i == "1":
+                        cpus.append(cpu_num)
+                    cpu_num += 1
+
+                p = psutil.Process()
+                p.cpu_affinity(cpus)
+
             change_resource_limit(soft_file_limit)
             if gc_thresholds:
                 gc.set_threshold(*gc_thresholds)
diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py
index 9060ab14f6..e4a68715aa 100644
--- a/synapse/app/event_creator.py
+++ b/synapse/app/event_creator.py
@@ -178,6 +178,9 @@ def start(config_options):
 
     setup_logging(config, use_worker_options=True)
 
+    # This should only be done on the user directory worker or the master
+    config.update_user_directory = False
+
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
     database_engine = create_engine(config.database_config)
diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py
index fc4b25de1c..f5c61dec5b 100644
--- a/synapse/app/frontend_proxy.py
+++ b/synapse/app/frontend_proxy.py
@@ -68,7 +68,7 @@ class PresenceStatusStubServlet(ClientV1RestServlet):
             "Authorization": auth_headers,
         }
         result = yield self.http_client.get_json(
-            self.main_uri + request.uri,
+            self.main_uri + request.uri.decode('ascii'),
             headers=headers,
         )
         defer.returnValue((200, result))
@@ -125,7 +125,7 @@ class KeyUploadServlet(RestServlet):
                 "Authorization": auth_headers,
             }
             result = yield self.http_client.post_json_get_json(
-                self.main_uri + request.uri,
+                self.main_uri + request.uri.decode('ascii'),
                 body,
                 headers=headers,
             )
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index e3f0d99a3f..3e4dea2f19 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -20,6 +20,7 @@ import sys
 
 from six import iteritems
 
+import psutil
 from prometheus_client import Gauge
 
 from twisted.application import service
@@ -36,7 +37,6 @@ from synapse.api.urls import (
     FEDERATION_PREFIX,
     LEGACY_MEDIA_PREFIX,
     MEDIA_PREFIX,
-    SERVER_KEY_PREFIX,
     SERVER_KEY_V2_PREFIX,
     STATIC_PREFIX,
     WEB_CLIENT_PREFIX,
@@ -58,7 +58,6 @@ from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, check_requirem
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
 from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
 from synapse.rest import ClientRestResource
-from synapse.rest.key.v1.server_key_resource import LocalKey
 from synapse.rest.key.v2 import KeyApiV2Resource
 from synapse.rest.media.v0.content_repository import ContentRepoResource
 from synapse.server import HomeServer
@@ -235,10 +234,7 @@ class SynapseHomeServer(HomeServer):
                 )
 
         if name in ["keys", "federation"]:
-            resources.update({
-                SERVER_KEY_PREFIX: LocalKey(self),
-                SERVER_KEY_V2_PREFIX: KeyApiV2Resource(self),
-            })
+            resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self)
 
         if name == "webclient":
             resources[WEB_CLIENT_PREFIX] = build_resource_for_web_client(self)
@@ -502,7 +498,6 @@ def run(hs):
 
     def performance_stats_init():
         try:
-            import psutil
             process = psutil.Process()
             # Ensure we can fetch both, and make the initial request for cpu_percent
             # so the next request will use this as the initial point.
@@ -510,12 +505,9 @@ def run(hs):
             process.cpu_percent(interval=None)
             logger.info("report_stats can use psutil")
             stats_process.append(process)
-        except (ImportError, AttributeError):
-            logger.warn(
-                "report_stats enabled but psutil is not installed or incorrect version."
-                " Disabling reporting of memory/cpu stats."
-                " Ensuring psutil is available will help matrix.org track performance"
-                " changes across releases."
+        except (AttributeError):
+            logger.warning(
+                "Unable to read memory/cpu stats. Disabling reporting."
             )
 
     def generate_user_daily_visit_stats():
@@ -530,29 +522,35 @@ def run(hs):
     clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000)
 
     # monthly active user limiting functionality
-    clock.looping_call(
-        hs.get_datastore().reap_monthly_active_users, 1000 * 60 * 60
-    )
-    hs.get_datastore().reap_monthly_active_users()
+    def reap_monthly_active_users():
+        return run_as_background_process(
+            "reap_monthly_active_users",
+            hs.get_datastore().reap_monthly_active_users,
+        )
+    clock.looping_call(reap_monthly_active_users, 1000 * 60 * 60)
+    reap_monthly_active_users()
 
     @defer.inlineCallbacks
     def generate_monthly_active_users():
         current_mau_count = 0
         reserved_count = 0
         store = hs.get_datastore()
-        if hs.config.limit_usage_by_mau:
+        if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
             current_mau_count = yield store.get_monthly_active_count()
             reserved_count = yield store.get_registered_reserved_users_count()
         current_mau_gauge.set(float(current_mau_count))
         registered_reserved_users_mau_gauge.set(float(reserved_count))
         max_mau_gauge.set(float(hs.config.max_mau_value))
 
-    hs.get_datastore().initialise_reserved_users(
-        hs.config.mau_limits_reserved_threepids
-    )
-    generate_monthly_active_users()
+    def start_generate_monthly_active_users():
+        return run_as_background_process(
+            "generate_monthly_active_users",
+            generate_monthly_active_users,
+        )
+
+    start_generate_monthly_active_users()
     if hs.config.limit_usage_by_mau:
-        clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
+        clock.looping_call(start_generate_monthly_active_users, 5 * 60 * 1000)
     # End of monthly active user settings
 
     if hs.config.report_stats:
@@ -568,7 +566,7 @@ def run(hs):
         clock.call_later(5 * 60, start_phone_stats_home)
 
     if hs.config.daemonize and hs.config.print_pidfile:
-        print (hs.config.pid_file)
+        print(hs.config.pid_file)
 
     _base.start_reactor(
         "synapse-homeserver",
diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py
index 630dcda478..83b0863f00 100644
--- a/synapse/app/pusher.py
+++ b/synapse/app/pusher.py
@@ -28,6 +28,7 @@ from synapse.config.logger import setup_logging
 from synapse.http.site import SynapseSite
 from synapse.metrics import RegistryProxy
 from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.replication.slave.storage._base import __func__
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.replication.slave.storage.pushers import SlavedPusherStore
@@ -49,31 +50,31 @@ class PusherSlaveStore(
     SlavedAccountDataStore
 ):
     update_pusher_last_stream_ordering_and_success = (
-        DataStore.update_pusher_last_stream_ordering_and_success.__func__
+        __func__(DataStore.update_pusher_last_stream_ordering_and_success)
     )
 
     update_pusher_failing_since = (
-        DataStore.update_pusher_failing_since.__func__
+        __func__(DataStore.update_pusher_failing_since)
     )
 
     update_pusher_last_stream_ordering = (
-        DataStore.update_pusher_last_stream_ordering.__func__
+        __func__(DataStore.update_pusher_last_stream_ordering)
     )
 
     get_throttle_params_by_room = (
-        DataStore.get_throttle_params_by_room.__func__
+        __func__(DataStore.get_throttle_params_by_room)
     )
 
     set_throttle_params = (
-        DataStore.set_throttle_params.__func__
+        __func__(DataStore.set_throttle_params)
     )
 
     get_time_of_last_push_action_before = (
-        DataStore.get_time_of_last_push_action_before.__func__
+        __func__(DataStore.get_time_of_last_push_action_before)
     )
 
     get_profile_displayname = (
-        DataStore.get_profile_displayname.__func__
+        __func__(DataStore.get_profile_displayname)
     )
 
 
@@ -160,11 +161,11 @@ class PusherReplicationHandler(ReplicationClientHandler):
                     else:
                         yield self.start_pusher(row.user_id, row.app_id, row.pushkey)
             elif stream_name == "events":
-                self.pusher_pool.on_new_notifications(
+                yield self.pusher_pool.on_new_notifications(
                     token, token,
                 )
             elif stream_name == "receipts":
-                self.pusher_pool.on_new_receipts(
+                yield self.pusher_pool.on_new_receipts(
                     token, token, set(row.room_id for row in rows)
                 )
         except Exception:
@@ -182,7 +183,7 @@ class PusherReplicationHandler(ReplicationClientHandler):
     def start_pusher(self, user_id, app_id, pushkey):
         key = "%s:%s" % (app_id, pushkey)
         logger.info("Starting pusher %r / %r", user_id, key)
-        return self.pusher_pool._refresh_pusher(app_id, pushkey, user_id)
+        return self.pusher_pool.start_pusher_by_id(app_id, pushkey, user_id)
 
 
 def start(config_options):
diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py
index 9a7fc6ee9d..0354e82bf8 100644
--- a/synapse/app/synchrotron.py
+++ b/synapse/app/synchrotron.py
@@ -33,7 +33,7 @@ from synapse.http.server import JsonResource
 from synapse.http.site import SynapseSite
 from synapse.metrics import RegistryProxy
 from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
-from synapse.replication.slave.storage._base import BaseSlavedStore
+from synapse.replication.slave.storage._base import BaseSlavedStore, __func__
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.client_ips import SlavedClientIpStore
@@ -147,7 +147,7 @@ class SynchrotronPresence(object):
         and haven't come back yet. If there are poke the master about them.
         """
         now = self.clock.time_msec()
-        for user_id, last_sync_ms in self.users_going_offline.items():
+        for user_id, last_sync_ms in list(self.users_going_offline.items()):
             if now - last_sync_ms > 10 * 1000:
                 self.users_going_offline.pop(user_id, None)
                 self.send_user_sync(user_id, False, last_sync_ms)
@@ -156,9 +156,9 @@ class SynchrotronPresence(object):
         # TODO Hows this supposed to work?
         pass
 
-    get_states = PresenceHandler.get_states.__func__
-    get_state = PresenceHandler.get_state.__func__
-    current_state_for_users = PresenceHandler.current_state_for_users.__func__
+    get_states = __func__(PresenceHandler.get_states)
+    get_state = __func__(PresenceHandler.get_state)
+    current_state_for_users = __func__(PresenceHandler.current_state_for_users)
 
     def user_syncing(self, user_id, affect_presence):
         if affect_presence:
@@ -208,7 +208,7 @@ class SynchrotronPresence(object):
         ) for row in rows]
 
         for state in states:
-            self.user_to_current_state[row.user_id] = state
+            self.user_to_current_state[state.user_id] = state
 
         stream_id = token
         yield self.notify_from_replication(states, stream_id)
@@ -226,7 +226,15 @@ class SynchrotronPresence(object):
 class SynchrotronTyping(object):
     def __init__(self, hs):
         self._latest_room_serial = 0
+        self._reset()
+
+    def _reset(self):
+        """
+        Reset the typing handler's data caches.
+        """
+        # map room IDs to serial numbers
         self._room_serials = {}
+        # map room IDs to sets of users currently typing
         self._room_typing = {}
 
     def stream_positions(self):
@@ -236,6 +244,12 @@ class SynchrotronTyping(object):
         return {"typing": self._latest_room_serial}
 
     def process_replication_rows(self, token, rows):
+        if self._latest_room_serial > token:
+            # The master has gone backwards. To prevent inconsistent data, just
+            # clear everything.
+            self._reset()
+
+        # Set the latest serial token to whatever the server gave us.
         self._latest_room_serial = token
 
         for row in rows:
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 2430814796..685f15c061 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -53,8 +53,8 @@ import logging
 from twisted.internet import defer
 
 from synapse.appservice import ApplicationServiceState
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.util.logcontext import run_in_background
-from synapse.util.metrics import Measure
 
 logger = logging.getLogger(__name__)
 
@@ -104,27 +104,34 @@ class _ServiceQueuer(object):
         self.clock = clock
 
     def enqueue(self, service, event):
-        # if this service isn't being sent something
         self.queued_events.setdefault(service.id, []).append(event)
-        run_in_background(self._send_request, service)
 
-    @defer.inlineCallbacks
-    def _send_request(self, service):
+        # start a sender for this appservice if we don't already have one
+
         if service.id in self.requests_in_flight:
             return
 
+        run_as_background_process(
+            "as-sender-%s" % (service.id, ),
+            self._send_request, service,
+        )
+
+    @defer.inlineCallbacks
+    def _send_request(self, service):
+        # sanity-check: we shouldn't get here if this service already has a sender
+        # running.
+        assert(service.id not in self.requests_in_flight)
+
         self.requests_in_flight.add(service.id)
         try:
             while True:
                 events = self.queued_events.pop(service.id, [])
                 if not events:
                     return
-
-                with Measure(self.clock, "servicequeuer.send"):
-                    try:
-                        yield self.txn_ctrl.send(service, events)
-                    except Exception:
-                        logger.exception("AS request failed")
+                try:
+                    yield self.txn_ctrl.send(service, events)
+                except Exception:
+                    logger.exception("AS request failed")
         finally:
             self.requests_in_flight.discard(service.id)
 
@@ -223,7 +230,12 @@ class _Recoverer(object):
         self.backoff_counter = 1
 
     def recover(self):
-        self.clock.call_later((2 ** self.backoff_counter), self.retry)
+        def _retry():
+            run_as_background_process(
+                "as-recoverer-%s" % (self.service.id,),
+                self.retry,
+            )
+        self.clock.call_later((2 ** self.backoff_counter), _retry)
 
     def _backoff(self):
         # cap the backoff to be around 8.5min => (2^9) = 512 secs
diff --git a/synapse/config/__main__.py b/synapse/config/__main__.py
index 8fccf573ee..79fe9c3dac 100644
--- a/synapse/config/__main__.py
+++ b/synapse/config/__main__.py
@@ -28,7 +28,7 @@ if __name__ == "__main__":
             sys.stderr.write("\n" + str(e) + "\n")
             sys.exit(1)
 
-        print (getattr(config, key))
+        print(getattr(config, key))
         sys.exit(0)
     else:
         sys.stderr.write("Unknown command %r\n" % (action,))
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 3d2e90dd5b..14dae65ea0 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -106,10 +106,7 @@ class Config(object):
     @classmethod
     def check_file(cls, file_path, config_name):
         if file_path is None:
-            raise ConfigError(
-                "Missing config for %s."
-                % (config_name,)
-            )
+            raise ConfigError("Missing config for %s." % (config_name,))
         try:
             os.stat(file_path)
         except OSError as e:
@@ -128,9 +125,7 @@ class Config(object):
             if e.errno != errno.EEXIST:
                 raise
         if not os.path.isdir(dir_path):
-            raise ConfigError(
-                "%s is not a directory" % (dir_path,)
-            )
+            raise ConfigError("%s is not a directory" % (dir_path,))
         return dir_path
 
     @classmethod
@@ -156,21 +151,20 @@ class Config(object):
         return results
 
     def generate_config(
-            self,
-            config_dir_path,
-            server_name,
-            is_generating_file,
-            report_stats=None,
+        self, config_dir_path, server_name, is_generating_file, report_stats=None
     ):
         default_config = "# vim:ft=yaml\n"
 
-        default_config += "\n\n".join(dedent(conf) for conf in self.invoke_all(
-            "default_config",
-            config_dir_path=config_dir_path,
-            server_name=server_name,
-            is_generating_file=is_generating_file,
-            report_stats=report_stats,
-        ))
+        default_config += "\n\n".join(
+            dedent(conf)
+            for conf in self.invoke_all(
+                "default_config",
+                config_dir_path=config_dir_path,
+                server_name=server_name,
+                is_generating_file=is_generating_file,
+                report_stats=report_stats,
+            )
+        )
 
         config = yaml.load(default_config)
 
@@ -178,23 +172,22 @@ class Config(object):
 
     @classmethod
     def load_config(cls, description, argv):
-        config_parser = argparse.ArgumentParser(
-            description=description,
-        )
+        config_parser = argparse.ArgumentParser(description=description)
         config_parser.add_argument(
-            "-c", "--config-path",
+            "-c",
+            "--config-path",
             action="append",
             metavar="CONFIG_FILE",
             help="Specify config file. Can be given multiple times and"
-                 " may specify directories containing *.yaml files."
+            " may specify directories containing *.yaml files.",
         )
 
         config_parser.add_argument(
             "--keys-directory",
             metavar="DIRECTORY",
             help="Where files such as certs and signing keys are stored when"
-                 " their location is given explicitly in the config."
-                 " Defaults to the directory containing the last config file",
+            " their location is given explicitly in the config."
+            " Defaults to the directory containing the last config file",
         )
 
         config_args = config_parser.parse_args(argv)
@@ -203,9 +196,7 @@ class Config(object):
 
         obj = cls()
         obj.read_config_files(
-            config_files,
-            keys_directory=config_args.keys_directory,
-            generate_keys=False,
+            config_files, keys_directory=config_args.keys_directory, generate_keys=False
         )
         return obj
 
@@ -213,38 +204,38 @@ class Config(object):
     def load_or_generate_config(cls, description, argv):
         config_parser = argparse.ArgumentParser(add_help=False)
         config_parser.add_argument(
-            "-c", "--config-path",
+            "-c",
+            "--config-path",
             action="append",
             metavar="CONFIG_FILE",
             help="Specify config file. Can be given multiple times and"
-                 " may specify directories containing *.yaml files."
+            " may specify directories containing *.yaml files.",
         )
         config_parser.add_argument(
             "--generate-config",
             action="store_true",
-            help="Generate a config file for the server name"
+            help="Generate a config file for the server name",
         )
         config_parser.add_argument(
             "--report-stats",
             action="store",
             help="Whether the generated config reports anonymized usage statistics",
-            choices=["yes", "no"]
+            choices=["yes", "no"],
         )
         config_parser.add_argument(
             "--generate-keys",
             action="store_true",
-            help="Generate any missing key files then exit"
+            help="Generate any missing key files then exit",
         )
         config_parser.add_argument(
             "--keys-directory",
             metavar="DIRECTORY",
             help="Used with 'generate-*' options to specify where files such as"
-                 " certs and signing keys should be stored in, unless explicitly"
-                 " specified in the config."
+            " certs and signing keys should be stored in, unless explicitly"
+            " specified in the config.",
         )
         config_parser.add_argument(
-            "-H", "--server-name",
-            help="The server name to generate a config file for"
+            "-H", "--server-name", help="The server name to generate a config file for"
         )
         config_args, remaining_args = config_parser.parse_known_args(argv)
 
@@ -257,8 +248,8 @@ class Config(object):
         if config_args.generate_config:
             if config_args.report_stats is None:
                 config_parser.error(
-                    "Please specify either --report-stats=yes or --report-stats=no\n\n" +
-                    MISSING_REPORT_STATS_SPIEL
+                    "Please specify either --report-stats=yes or --report-stats=no\n\n"
+                    + MISSING_REPORT_STATS_SPIEL
                 )
             if not config_files:
                 config_parser.error(
@@ -287,26 +278,32 @@ class Config(object):
                         config_dir_path=config_dir_path,
                         server_name=server_name,
                         report_stats=(config_args.report_stats == "yes"),
-                        is_generating_file=True
+                        is_generating_file=True,
                     )
                     obj.invoke_all("generate_files", config)
                     config_file.write(config_str)
-                print((
-                    "A config file has been generated in %r for server name"
-                    " %r with corresponding SSL keys and self-signed"
-                    " certificates. Please review this file and customise it"
-                    " to your needs."
-                ) % (config_path, server_name))
+                print(
+                    (
+                        "A config file has been generated in %r for server name"
+                        " %r with corresponding SSL keys and self-signed"
+                        " certificates. Please review this file and customise it"
+                        " to your needs."
+                    )
+                    % (config_path, server_name)
+                )
                 print(
                     "If this server name is incorrect, you will need to"
                     " regenerate the SSL certificates"
                 )
                 return
             else:
-                print((
-                    "Config file %r already exists. Generating any missing key"
-                    " files."
-                ) % (config_path,))
+                print(
+                    (
+                        "Config file %r already exists. Generating any missing key"
+                        " files."
+                    )
+                    % (config_path,)
+                )
                 generate_keys = True
 
         parser = argparse.ArgumentParser(
@@ -338,8 +335,7 @@ class Config(object):
 
         return obj
 
-    def read_config_files(self, config_files, keys_directory=None,
-                          generate_keys=False):
+    def read_config_files(self, config_files, keys_directory=None, generate_keys=False):
         if not keys_directory:
             keys_directory = os.path.dirname(config_files[-1])
 
@@ -364,8 +360,9 @@ class Config(object):
 
         if "report_stats" not in config:
             raise ConfigError(
-                MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS + "\n" +
-                MISSING_REPORT_STATS_SPIEL
+                MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS
+                + "\n"
+                + MISSING_REPORT_STATS_SPIEL
             )
 
         if generate_keys:
@@ -399,16 +396,16 @@ def find_config_files(search_paths):
                 for entry in os.listdir(config_path):
                     entry_path = os.path.join(config_path, entry)
                     if not os.path.isfile(entry_path):
-                        print (
-                            "Found subdirectory in config directory: %r. IGNORING."
-                        ) % (entry_path, )
+                        err = "Found subdirectory in config directory: %r. IGNORING."
+                        print(err % (entry_path,))
                         continue
 
                     if not entry.endswith(".yaml"):
-                        print (
-                            "Found file in config directory that does not"
-                            " end in '.yaml': %r. IGNORING."
-                        ) % (entry_path, )
+                        err = (
+                            "Found file in config directory that does not end in "
+                            "'.yaml': %r. IGNORING."
+                        )
+                        print(err % (entry_path,))
                         continue
 
                     files.append(entry_path)
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index 3b161d708a..c21cb3dd87 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -33,11 +33,16 @@ class AppServiceConfig(Config):
     def read_config(self, config):
         self.app_service_config_files = config.get("app_service_config_files", [])
         self.notify_appservices = config.get("notify_appservices", True)
+        self.track_appservice_user_ips = config.get("track_appservice_user_ips", False)
 
     def default_config(cls, **kwargs):
         return """\
         # A list of application service config file to use
         app_service_config_files: []
+
+        # Whether or not to track application service IP addresses. Implicitly
+        # enables MAU tracking for application service users.
+        track_appservice_user_ips: False
         """
 
 
diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py
index e22c731aad..f193a090ae 100644
--- a/synapse/config/consent_config.py
+++ b/synapse/config/consent_config.py
@@ -42,6 +42,14 @@ DEFAULT_CONFIG = """\
 # until the user consents to the privacy policy. The value of the setting is
 # used as the text of the error.
 #
+# 'require_at_registration', if enabled, will add a step to the registration
+# process, similar to how captcha works. Users will be required to accept the
+# policy before their account is created.
+#
+# 'policy_name' is the display name of the policy users will see when registering
+# for an account. Has no effect unless `require_at_registration` is enabled.
+# Defaults to "Privacy Policy".
+#
 # user_consent:
 #   template_dir: res/templates/privacy
 #   version: 1.0
@@ -54,6 +62,8 @@ DEFAULT_CONFIG = """\
 #   block_events_error: >-
 #     To continue using this homeserver you must review and agree to the
 #     terms and conditions at %(consent_uri)s
+#   require_at_registration: False
+#   policy_name: Privacy Policy
 #
 """
 
@@ -67,6 +77,8 @@ class ConsentConfig(Config):
         self.user_consent_server_notice_content = None
         self.user_consent_server_notice_to_guests = False
         self.block_events_without_consent_error = None
+        self.user_consent_at_registration = False
+        self.user_consent_policy_name = "Privacy Policy"
 
     def read_config(self, config):
         consent_config = config.get("user_consent")
@@ -83,6 +95,12 @@ class ConsentConfig(Config):
         self.user_consent_server_notice_to_guests = bool(consent_config.get(
             "send_server_notice_to_guests", False,
         ))
+        self.user_consent_at_registration = bool(consent_config.get(
+            "require_at_registration", False,
+        ))
+        self.user_consent_policy_name = consent_config.get(
+            "policy_name", "Privacy Policy",
+        )
 
     def default_config(self, **kwargs):
         return DEFAULT_CONFIG
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index fe156b6930..93d70cff14 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -13,10 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import print_function
+
 # This file can't be called email.py because if it is, we cannot:
 import email.utils
+import logging
+import os
+
+import pkg_resources
 
-from ._base import Config
+from ._base import Config, ConfigError
+
+logger = logging.getLogger(__name__)
 
 
 class EmailConfig(Config):
@@ -38,7 +46,6 @@ class EmailConfig(Config):
                 "smtp_host",
                 "smtp_port",
                 "notif_from",
-                "template_dir",
                 "notif_template_html",
                 "notif_template_text",
             ]
@@ -62,9 +69,26 @@ class EmailConfig(Config):
             self.email_smtp_host = email_config["smtp_host"]
             self.email_smtp_port = email_config["smtp_port"]
             self.email_notif_from = email_config["notif_from"]
-            self.email_template_dir = email_config["template_dir"]
             self.email_notif_template_html = email_config["notif_template_html"]
             self.email_notif_template_text = email_config["notif_template_text"]
+
+            template_dir = email_config.get("template_dir")
+            # we need an absolute path, because we change directory after starting (and
+            # we don't yet know what auxilliary templates like mail.css we will need).
+            # (Note that loading as package_resources with jinja.PackageLoader doesn't
+            # work for the same reason.)
+            if not template_dir:
+                template_dir = pkg_resources.resource_filename(
+                    'synapse', 'res/templates'
+                )
+            template_dir = os.path.abspath(template_dir)
+
+            for f in self.email_notif_template_text, self.email_notif_template_html:
+                p = os.path.join(template_dir, f)
+                if not os.path.isfile(p):
+                    raise ConfigError("Unable to find email template file %s" % (p, ))
+            self.email_template_dir = template_dir
+
             self.email_notif_for_new_users = email_config.get(
                 "notif_for_new_users", True
             )
@@ -113,7 +137,9 @@ class EmailConfig(Config):
         #   require_transport_security: False
         #   notif_from: "Your Friendly %(app)s Home Server <noreply@example.com>"
         #   app_name: Matrix
-        #   template_dir: res/templates
+        #   # if template_dir is unset, uses the example templates that are part of
+        #   # the Synapse distribution.
+        #   #template_dir: res/templates
         #   notif_template_html: notif_mail.html
         #   notif_template_text: notif_mail.txt
         #   notif_for_new_users: True
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index b8d5690f2b..10dd40159f 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -31,6 +31,7 @@ from .push import PushConfig
 from .ratelimiting import RatelimitConfig
 from .registration import RegistrationConfig
 from .repository import ContentRepositoryConfig
+from .room_directory import RoomDirectoryConfig
 from .saml2 import SAML2Config
 from .server import ServerConfig
 from .server_notices_config import ServerNoticesConfig
@@ -49,7 +50,7 @@ class HomeServerConfig(TlsConfig, ServerConfig, DatabaseConfig, LoggingConfig,
                        WorkerConfig, PasswordAuthProviderConfig, PushConfig,
                        SpamCheckerConfig, GroupsConfig, UserDirectoryConfig,
                        ConsentConfig,
-                       ServerNoticesConfig,
+                       ServerNoticesConfig, RoomDirectoryConfig,
                        ):
     pass
 
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index e9a936118d..7081868963 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -50,6 +50,7 @@ handlers:
         maxBytes: 104857600
         backupCount: 10
         filters: [context]
+        encoding: utf8
     console:
         class: logging.StreamHandler
         formatter: precise
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index 0fb964eb67..717bbfec61 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -15,10 +15,10 @@
 
 from distutils.util import strtobool
 
+from synapse.config._base import Config, ConfigError
+from synapse.types import RoomAlias
 from synapse.util.stringutils import random_string_with_symbols
 
-from ._base import Config
-
 
 class RegistrationConfig(Config):
 
@@ -44,6 +44,10 @@ class RegistrationConfig(Config):
         )
 
         self.auto_join_rooms = config.get("auto_join_rooms", [])
+        for room_alias in self.auto_join_rooms:
+            if not RoomAlias.is_valid(room_alias):
+                raise ConfigError('Invalid auto_join_rooms entry %s' % (room_alias,))
+        self.autocreate_auto_join_rooms = config.get("autocreate_auto_join_rooms", True)
 
     def default_config(self, **kwargs):
         registration_shared_secret = random_string_with_symbols(50)
@@ -89,15 +93,24 @@ class RegistrationConfig(Config):
 
         # The list of identity servers trusted to verify third party
         # identifiers by this server.
+        #
+        # Also defines the ID server which will be called when an account is
+        # deactivated (one will be picked arbitrarily).
         trusted_third_party_id_servers:
             - matrix.org
             - vector.im
-            - riot.im
 
         # Users who register on this homeserver will automatically be joined
         # to these rooms
         #auto_join_rooms:
         #    - "#example:example.com"
+
+        # Where auto_join_rooms are specified, setting this flag ensures that the
+        # the rooms exist by creating them when the first user on the
+        # homeserver registers.
+        # Setting to false means that if the rooms are not manually created,
+        # users cannot be auto-joined since they do not exist.
+        autocreate_auto_join_rooms: true
         """ % locals()
 
     def add_arguments(self, parser):
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index fc909c1fac..06c62ab62c 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -178,7 +178,7 @@ class ContentRepositoryConfig(Config):
     def default_config(self, **kwargs):
         media_store = self.default_path("media_store")
         uploads_path = self.default_path("uploads")
-        return """
+        return r"""
         # Directory where uploaded images and attachments are stored.
         media_store_path: "%(media_store)s"
 
diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py
new file mode 100644
index 0000000000..9da13ab11b
--- /dev/null
+++ b/synapse/config/room_directory.py
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.util import glob_to_regex
+
+from ._base import Config, ConfigError
+
+
+class RoomDirectoryConfig(Config):
+    def read_config(self, config):
+        alias_creation_rules = config["alias_creation_rules"]
+
+        self._alias_creation_rules = [
+            _AliasRule(rule)
+            for rule in alias_creation_rules
+        ]
+
+    def default_config(self, config_dir_path, server_name, **kwargs):
+        return """
+        # The `alias_creation` option controls who's allowed to create aliases
+        # on this server.
+        #
+        # The format of this option is a list of rules that contain globs that
+        # match against user_id and the new alias (fully qualified with server
+        # name). The action in the first rule that matches is taken, which can
+        # currently either be "allow" or "deny".
+        #
+        # If no rules match the request is denied.
+        alias_creation_rules:
+            - user_id: "*"
+              alias: "*"
+              action: allow
+        """
+
+    def is_alias_creation_allowed(self, user_id, alias):
+        """Checks if the given user is allowed to create the given alias
+
+        Args:
+            user_id (str)
+            alias (str)
+
+        Returns:
+            boolean: True if user is allowed to crate the alias
+        """
+        for rule in self._alias_creation_rules:
+            if rule.matches(user_id, alias):
+                return rule.action == "allow"
+
+        return False
+
+
+class _AliasRule(object):
+    def __init__(self, rule):
+        action = rule["action"]
+        user_id = rule["user_id"]
+        alias = rule["alias"]
+
+        if action in ("allow", "deny"):
+            self.action = action
+        else:
+            raise ConfigError(
+                "alias_creation_rules rules can only have action of 'allow'"
+                " or 'deny'"
+            )
+
+        try:
+            self._user_id_regex = glob_to_regex(user_id)
+            self._alias_regex = glob_to_regex(alias)
+        except Exception as e:
+            raise ConfigError("Failed to parse glob into regex: %s", e)
+
+    def matches(self, user_id, alias):
+        """Tests if this rule matches the given user_id and alias.
+
+        Args:
+            user_id (str)
+            alias (str)
+
+        Returns:
+            boolean
+        """
+
+        # Note: The regexes are anchored at both ends
+        if not self._user_id_regex.match(user_id):
+            return False
+
+        if not self._alias_regex.match(alias):
+            return False
+
+        return True
diff --git a/synapse/config/server.py b/synapse/config/server.py
index c1c7c0105e..4a5b902f8e 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -62,6 +62,11 @@ class ServerConfig(Config):
         # master, potentially causing inconsistency.
         self.enable_media_repo = config.get("enable_media_repo", True)
 
+        # whether to enable search. If disabled, new entries will not be inserted
+        # into the search tables and they will not be indexed. Users will receive
+        # errors when attempting to search for messages.
+        self.enable_search = config.get("enable_search", True)
+
         self.filter_timeline_limit = config.get("filter_timeline_limit", -1)
 
         # Whether we should block invites sent to users on this server
@@ -77,6 +82,7 @@ class ServerConfig(Config):
             self.max_mau_value = config.get(
                 "max_mau_value", 0,
             )
+        self.mau_stats_only = config.get("mau_stats_only", False)
 
         self.mau_limits_reserved_threepids = config.get(
             "mau_limit_reserved_threepids", []
@@ -372,13 +378,23 @@ class ServerConfig(Config):
           # max_mau_value: 50
           # mau_trial_days: 2
           #
+          # If enabled, the metrics for the number of monthly active users will
+          # be populated, however no one will be limited. If limit_usage_by_mau
+          # is true, this is implied to be true.
+          # mau_stats_only: False
+          #
           # Sometimes the server admin will want to ensure certain accounts are
           # never blocked by mau checking. These accounts are specified here.
           #
           # mau_limit_reserved_threepids:
           # - medium: 'email'
           #   address: 'reserved_user@example.com'
-
+          #
+          # Room searching
+          #
+          # If disabled, new messages will not be indexed for searching and users
+          # will receive errors when searching for messages. Defaults to enabled.
+          # enable_search: true
         """ % locals()
 
     def read_arguments(self, args):
diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py
index 57d4665e84..d40e4b8591 100644
--- a/synapse/crypto/keyclient.py
+++ b/synapse/crypto/keyclient.py
@@ -15,6 +15,8 @@
 
 import logging
 
+from six.moves import urllib
+
 from canonicaljson import json
 
 from twisted.internet import defer, reactor
@@ -28,15 +30,15 @@ from synapse.util import logcontext
 
 logger = logging.getLogger(__name__)
 
-KEY_API_V1 = b"/_matrix/key/v1/"
+KEY_API_V2 = "/_matrix/key/v2/server/%s"
 
 
 @defer.inlineCallbacks
-def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1):
+def fetch_server_key(server_name, tls_client_options_factory, key_id):
     """Fetch the keys for a remote server."""
 
     factory = SynapseKeyClientFactory()
-    factory.path = path
+    factory.path = KEY_API_V2 % (urllib.parse.quote(key_id), )
     factory.host = server_name
     endpoint = matrix_federation_endpoint(
         reactor, server_name, tls_client_options_factory, timeout=30
@@ -55,7 +57,7 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1):
                 raise IOError("Cannot get key for %r" % server_name)
         except (ConnectError, DomainError) as e:
             logger.warn("Error getting key for %r: %s", server_name, e)
-        except Exception as e:
+        except Exception:
             logger.exception("Error getting key for %r", server_name)
     raise IOError("Cannot get key for %r" % server_name)
 
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index d89f94c219..515ebbc148 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2017 New Vector Ltd.
+# Copyright 2017, 2018 New Vector Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,8 +18,6 @@ import hashlib
 import logging
 from collections import namedtuple
 
-from six.moves import urllib
-
 from signedjson.key import (
     decode_verify_key_bytes,
     encode_verify_key_base64,
@@ -395,32 +393,13 @@ class Keyring(object):
 
     @defer.inlineCallbacks
     def get_keys_from_server(self, server_name_and_key_ids):
-        @defer.inlineCallbacks
-        def get_key(server_name, key_ids):
-            keys = None
-            try:
-                keys = yield self.get_server_verify_key_v2_direct(
-                    server_name, key_ids
-                )
-            except Exception as e:
-                logger.info(
-                    "Unable to get key %r for %r directly: %s %s",
-                    key_ids, server_name,
-                    type(e).__name__, str(e),
-                )
-
-            if not keys:
-                keys = yield self.get_server_verify_key_v1_direct(
-                    server_name, key_ids
-                )
-
-                keys = {server_name: keys}
-
-            defer.returnValue(keys)
-
         results = yield logcontext.make_deferred_yieldable(defer.gatherResults(
             [
-                run_in_background(get_key, server_name, key_ids)
+                run_in_background(
+                    self.get_server_verify_key_v2_direct,
+                    server_name,
+                    key_ids,
+                )
                 for server_name, key_ids in server_name_and_key_ids
             ],
             consumeErrors=True,
@@ -525,10 +504,7 @@ class Keyring(object):
                 continue
 
             (response, tls_certificate) = yield fetch_server_key(
-                server_name, self.hs.tls_client_options_factory,
-                path=("/_matrix/key/v2/server/%s" % (
-                    urllib.parse.quote(requested_key_id),
-                )).encode("ascii"),
+                server_name, self.hs.tls_client_options_factory, requested_key_id
             )
 
             if (u"signatures" not in response
@@ -657,78 +633,6 @@ class Keyring(object):
 
         defer.returnValue(results)
 
-    @defer.inlineCallbacks
-    def get_server_verify_key_v1_direct(self, server_name, key_ids):
-        """Finds a verification key for the server with one of the key ids.
-        Args:
-            server_name (str): The name of the server to fetch a key for.
-            keys_ids (list of str): The key_ids to check for.
-        """
-
-        # Try to fetch the key from the remote server.
-
-        (response, tls_certificate) = yield fetch_server_key(
-            server_name, self.hs.tls_client_options_factory
-        )
-
-        # Check the response.
-
-        x509_certificate_bytes = crypto.dump_certificate(
-            crypto.FILETYPE_ASN1, tls_certificate
-        )
-
-        if ("signatures" not in response
-                or server_name not in response["signatures"]):
-            raise KeyLookupError("Key response not signed by remote server")
-
-        if "tls_certificate" not in response:
-            raise KeyLookupError("Key response missing TLS certificate")
-
-        tls_certificate_b64 = response["tls_certificate"]
-
-        if encode_base64(x509_certificate_bytes) != tls_certificate_b64:
-            raise KeyLookupError("TLS certificate doesn't match")
-
-        # Cache the result in the datastore.
-
-        time_now_ms = self.clock.time_msec()
-
-        verify_keys = {}
-        for key_id, key_base64 in response["verify_keys"].items():
-            if is_signing_algorithm_supported(key_id):
-                key_bytes = decode_base64(key_base64)
-                verify_key = decode_verify_key_bytes(key_id, key_bytes)
-                verify_key.time_added = time_now_ms
-                verify_keys[key_id] = verify_key
-
-        for key_id in response["signatures"][server_name]:
-            if key_id not in response["verify_keys"]:
-                raise KeyLookupError(
-                    "Key response must include verification keys for all"
-                    " signatures"
-                )
-            if key_id in verify_keys:
-                verify_signed_json(
-                    response,
-                    server_name,
-                    verify_keys[key_id]
-                )
-
-        yield self.store.store_server_certificate(
-            server_name,
-            server_name,
-            time_now_ms,
-            tls_certificate,
-        )
-
-        yield self.store_keys(
-            server_name=server_name,
-            from_server=server_name,
-            verify_keys=verify_keys,
-        )
-
-        defer.returnValue(verify_keys)
-
     def store_keys(self, server_name, from_server, verify_keys):
         """Store a collection of verify keys for a given server
         Args:
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index af3eee95b9..c81d8e6729 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -200,11 +200,11 @@ def _is_membership_change_allowed(event, auth_events):
     membership = event.content["membership"]
 
     # Check if this is the room creator joining:
-    if len(event.prev_events) == 1 and Membership.JOIN == membership:
+    if len(event.prev_event_ids()) == 1 and Membership.JOIN == membership:
         # Get room creation event:
         key = (EventTypes.Create, "", )
         create = auth_events.get(key)
-        if create and event.prev_events[0][0] == create.event_id:
+        if create and event.prev_event_ids()[0] == create.event_id:
             if create.content["creator"] == event.state_key:
                 return
 
@@ -690,7 +690,7 @@ def auth_types_for_event(event):
     auth_types = []
 
     auth_types.append((EventTypes.PowerLevels, "", ))
-    auth_types.append((EventTypes.Member, event.user_id, ))
+    auth_types.append((EventTypes.Member, event.sender, ))
     auth_types.append((EventTypes.Create, "", ))
 
     if event.type == EventTypes.Member:
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 12f1eb0a3e..84c75495d5 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -159,6 +159,24 @@ class EventBase(object):
     def keys(self):
         return six.iterkeys(self._event_dict)
 
+    def prev_event_ids(self):
+        """Returns the list of prev event IDs. The order matches the order
+        specified in the event, though there is no meaning to it.
+
+        Returns:
+            list[str]: The list of event IDs of this event's prev_events
+        """
+        return [e for e, _ in self.prev_events]
+
+    def auth_event_ids(self):
+        """Returns the list of auth event IDs. The order matches the order
+        specified in the event, though there is no meaning to it.
+
+        Returns:
+            list[str]: The list of event IDs of this event's auth_events
+        """
+        return [e for e, _ in self.auth_events]
+
 
 class FrozenEvent(EventBase):
     def __init__(self, event_dict, internal_metadata_dict={}, rejected_reason=None):
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 819e8f7331..98722ae543 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import re
 
 import six
 from six import iteritems
@@ -44,6 +43,7 @@ from synapse.replication.http.federation import (
     ReplicationGetQueryRestServlet,
 )
 from synapse.types import get_domain_from_id
+from synapse.util import glob_to_regex
 from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.logcontext import nested_logging_context
@@ -162,8 +162,30 @@ class FederationServer(FederationBase):
                 p["age_ts"] = request_time - int(p["age"])
                 del p["age"]
 
+            # We try and pull out an event ID so that if later checks fail we
+            # can log something sensible. We don't mandate an event ID here in
+            # case future event formats get rid of the key.
+            possible_event_id = p.get("event_id", "<Unknown>")
+
+            # Now we get the room ID so that we can check that we know the
+            # version of the room.
+            room_id = p.get("room_id")
+            if not room_id:
+                logger.info(
+                    "Ignoring PDU as does not have a room_id. Event ID: %s",
+                    possible_event_id,
+                )
+                continue
+
+            try:
+                # In future we will actually use the room version to parse the
+                # PDU into an event.
+                yield self.store.get_room_version(room_id)
+            except NotFoundError:
+                logger.info("Ignoring PDU for unknown room_id: %s", room_id)
+                continue
+
             event = event_from_pdu_json(p)
-            room_id = event.room_id
             pdus_by_room.setdefault(room_id, []).append(event)
 
         pdu_results = {}
@@ -324,11 +346,6 @@ class FederationServer(FederationBase):
             defer.returnValue((404, ""))
 
     @defer.inlineCallbacks
-    @log_function
-    def on_pull_request(self, origin, versions):
-        raise NotImplementedError("Pull transactions not implemented")
-
-    @defer.inlineCallbacks
     def on_query_request(self, query_type, args):
         received_queries_counter.labels(query_type).inc()
         resp = yield self.registry.on_query(query_type, args)
@@ -507,19 +524,19 @@ class FederationServer(FederationBase):
     @defer.inlineCallbacks
     @log_function
     def on_get_missing_events(self, origin, room_id, earliest_events,
-                              latest_events, limit, min_depth):
+                              latest_events, limit):
         with (yield self._server_linearizer.queue((origin, room_id))):
             origin_host, _ = parse_server_name(origin)
             yield self.check_server_matches_acl(origin_host, room_id)
 
             logger.info(
                 "on_get_missing_events: earliest_events: %r, latest_events: %r,"
-                " limit: %d, min_depth: %d",
-                earliest_events, latest_events, limit, min_depth
+                " limit: %d",
+                earliest_events, latest_events, limit,
             )
 
             missing_events = yield self.handler.on_get_missing_events(
-                origin, room_id, earliest_events, latest_events, limit, min_depth
+                origin, room_id, earliest_events, latest_events, limit,
             )
 
             if len(missing_events) < 5:
@@ -729,22 +746,10 @@ def _acl_entry_matches(server_name, acl_entry):
     if not isinstance(acl_entry, six.string_types):
         logger.warn("Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry))
         return False
-    regex = _glob_to_regex(acl_entry)
+    regex = glob_to_regex(acl_entry)
     return regex.match(server_name)
 
 
-def _glob_to_regex(glob):
-    res = ''
-    for c in glob:
-        if c == '*':
-            res = res + '.*'
-        elif c == '?':
-            res = res + '.'
-        else:
-            res = res + re.escape(c)
-    return re.compile(res + "\\Z", re.IGNORECASE)
-
-
 class FederationHandlerRegistry(object):
     """Allows classes to register themselves as handlers for a given EDU or
     query type for incoming federation traffic.
@@ -800,7 +805,7 @@ class FederationHandlerRegistry(object):
             yield handler(origin, content)
         except SynapseError as e:
             logger.info("Failed to handle edu %r: %r", edu_type, e)
-        except Exception as e:
+        except Exception:
             logger.exception("Failed to handle edu %r", edu_type)
 
     def on_query(self, query_type, args):
diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 98b5950800..099ace28c1 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -183,9 +183,7 @@ class TransactionQueue(object):
                         # banned then it won't receive the event because it won't
                         # be in the room after the ban.
                         destinations = yield self.state.get_current_hosts_in_room(
-                            event.room_id, latest_event_ids=[
-                                prev_id for prev_id, _ in event.prev_events
-                            ],
+                            event.room_id, latest_event_ids=event.prev_event_ids(),
                         )
                     except Exception:
                         logger.exception(
@@ -633,14 +631,6 @@ class TransactionQueue(object):
                 transaction, json_data_cb
             )
             code = 200
-
-            if response:
-                for e_id, r in response.get("pdus", {}).items():
-                    if "error" in r:
-                        logger.warn(
-                            "Transaction returned error for %s: %s",
-                            e_id, r,
-                        )
         except HttpResponseException as e:
             code = e.code
             response = e.response
@@ -657,19 +647,24 @@ class TransactionQueue(object):
             destination, txn_id, code
         )
 
-        logger.debug("TX [%s] Sent transaction", destination)
-        logger.debug("TX [%s] Marking as delivered...", destination)
-
         yield self.transaction_actions.delivered(
             transaction, code, response
         )
 
-        logger.debug("TX [%s] Marked as delivered", destination)
+        logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id)
 
-        if code != 200:
+        if code == 200:
+            for e_id, r in response.get("pdus", {}).items():
+                if "error" in r:
+                    logger.warn(
+                        "TX [%s] {%s} Remote returned error for %s: %s",
+                        destination, txn_id, e_id, r,
+                    )
+        else:
             for p in pdus:
-                logger.info(
-                    "Failed to send event %s to %s", p.event_id, destination
+                logger.warn(
+                    "TX [%s] {%s} Failed to send event %s",
+                    destination, txn_id, p.event_id,
                 )
             success = False
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 2ab973d6c8..edba5a9808 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -143,9 +143,17 @@ class TransportLayerClient(object):
             transaction (Transaction)
 
         Returns:
-            Deferred: Results of the deferred is a tuple in the form of
-            (response_code, response_body) where the response_body is a
-            python dict decoded from json
+            Deferred: Succeeds when we get a 2xx HTTP response. The result
+            will be the decoded JSON body.
+
+            Fails with ``HTTPRequestException`` if we get an HTTP response
+            code >= 300.
+
+            Fails with ``NotRetryingDestination`` if we are not yet ready
+            to retry this server.
+
+            Fails with ``FederationDeniedError`` if this destination
+            is not on our federation whitelist
         """
         logger.debug(
             "send_data dest=%s, txid=%s",
@@ -170,11 +178,6 @@ class TransportLayerClient(object):
             backoff_on_404=True,  # If we get a 404 the other side has gone
         )
 
-        logger.debug(
-            "send_data dest=%s, txid=%s, got response: 200",
-            transaction.destination, transaction.transaction_id,
-        )
-
         defer.returnValue(response)
 
     @defer.inlineCallbacks
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 2f874b4838..3553f418f1 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -362,14 +362,6 @@ class FederationSendServlet(BaseFederationServlet):
         defer.returnValue((code, response))
 
 
-class FederationPullServlet(BaseFederationServlet):
-    PATH = "/pull/"
-
-    # This is for when someone asks us for everything since version X
-    def on_GET(self, origin, content, query):
-        return self.handler.on_pull_request(query["origin"][0], query["v"])
-
-
 class FederationEventServlet(BaseFederationServlet):
     PATH = "/event/(?P<event_id>[^/]*)/"
 
@@ -560,7 +552,6 @@ class FederationGetMissingEventsServlet(BaseFederationServlet):
     @defer.inlineCallbacks
     def on_POST(self, origin, content, query, room_id):
         limit = int(content.get("limit", 10))
-        min_depth = int(content.get("min_depth", 0))
         earliest_events = content.get("earliest_events", [])
         latest_events = content.get("latest_events", [])
 
@@ -569,7 +560,6 @@ class FederationGetMissingEventsServlet(BaseFederationServlet):
             room_id=room_id,
             earliest_events=earliest_events,
             latest_events=latest_events,
-            min_depth=min_depth,
             limit=limit,
         )
 
@@ -1263,7 +1253,6 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet):
 
 FEDERATION_SERVLET_CLASSES = (
     FederationSendServlet,
-    FederationPullServlet,
     FederationEventServlet,
     FederationStateServlet,
     FederationStateIdsServlet,
diff --git a/synapse/federation/units.py b/synapse/federation/units.py
index c5ab14314e..025a79c022 100644
--- a/synapse/federation/units.py
+++ b/synapse/federation/units.py
@@ -117,9 +117,6 @@ class Transaction(JsonEncodedObject):
                 "Require 'transaction_id' to construct a Transaction"
             )
 
-        for p in pdus:
-            p.transaction_id = kwargs["transaction_id"]
-
         kwargs["pdus"] = [p.get_pdu_json() for p in pdus]
 
         return Transaction(**kwargs)
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index f0f89af7dc..17eedf4dbf 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -28,6 +28,7 @@ from synapse.metrics import (
     event_processing_loop_room_count,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.util import log_failure
 from synapse.util.logcontext import make_deferred_yieldable, run_in_background
 from synapse.util.metrics import Measure
 
@@ -36,17 +37,6 @@ logger = logging.getLogger(__name__)
 events_processed_counter = Counter("synapse_handlers_appservice_events_processed", "")
 
 
-def log_failure(failure):
-    logger.error(
-        "Application Services Failure",
-        exc_info=(
-            failure.type,
-            failure.value,
-            failure.getTracebackObject()
-        )
-    )
-
-
 class ApplicationServicesHandler(object):
 
     def __init__(self, hs):
@@ -112,7 +102,10 @@ class ApplicationServicesHandler(object):
 
                         if not self.started_scheduler:
                             def start_scheduler():
-                                return self.scheduler.start().addErrback(log_failure)
+                                return self.scheduler.start().addErrback(
+                                    log_failure, "Application Services Failure",
+                                )
+
                             run_as_background_process("as_scheduler", start_scheduler)
                             self.started_scheduler = True
 
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 2a5eab124f..c6e89db4bc 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -22,7 +22,7 @@ import bcrypt
 import pymacaroons
 from canonicaljson import json
 
-from twisted.internet import defer, threads
+from twisted.internet import defer
 from twisted.web.client import PartialDownloadError
 
 import synapse.util.stringutils as stringutils
@@ -37,8 +37,8 @@ from synapse.api.errors import (
 )
 from synapse.module_api import ModuleApi
 from synapse.types import UserID
+from synapse.util import logcontext
 from synapse.util.caches.expiringcache import ExpiringCache
-from synapse.util.logcontext import make_deferred_yieldable
 
 from ._base import BaseHandler
 
@@ -59,6 +59,7 @@ class AuthHandler(BaseHandler):
             LoginType.EMAIL_IDENTITY: self._check_email_identity,
             LoginType.MSISDN: self._check_msisdn,
             LoginType.DUMMY: self._check_dummy_auth,
+            LoginType.TERMS: self._check_terms_auth,
         }
         self.bcrypt_rounds = hs.config.bcrypt_rounds
 
@@ -431,6 +432,9 @@ class AuthHandler(BaseHandler):
     def _check_dummy_auth(self, authdict, _):
         return defer.succeed(True)
 
+    def _check_terms_auth(self, authdict, _):
+        return defer.succeed(True)
+
     @defer.inlineCallbacks
     def _check_threepid(self, medium, authdict):
         if 'threepid_creds' not in authdict:
@@ -462,6 +466,22 @@ class AuthHandler(BaseHandler):
     def _get_params_recaptcha(self):
         return {"public_key": self.hs.config.recaptcha_public_key}
 
+    def _get_params_terms(self):
+        return {
+            "policies": {
+                "privacy_policy": {
+                    "version": self.hs.config.user_consent_version,
+                    "en": {
+                        "name": self.hs.config.user_consent_policy_name,
+                        "url": "%s_matrix/consent?v=%s" % (
+                            self.hs.config.public_baseurl,
+                            self.hs.config.user_consent_version,
+                        ),
+                    },
+                },
+            },
+        }
+
     def _auth_dict_for_flows(self, flows, session):
         public_flows = []
         for f in flows:
@@ -469,6 +489,7 @@ class AuthHandler(BaseHandler):
 
         get_params = {
             LoginType.RECAPTCHA: self._get_params_recaptcha,
+            LoginType.TERMS: self._get_params_terms,
         }
 
         params = {}
@@ -884,11 +905,7 @@ class AuthHandler(BaseHandler):
                 bcrypt.gensalt(self.bcrypt_rounds),
             ).decode('ascii')
 
-        return make_deferred_yieldable(
-            threads.deferToThreadPool(
-                self.hs.get_reactor(), self.hs.get_reactor().getThreadPool(), _do_hash
-            ),
-        )
+        return logcontext.defer_to_thread(self.hs.get_reactor(), _do_hash)
 
     def validate_hash(self, password, stored_hash):
         """Validates that self.hash(password) == stored_hash.
@@ -913,13 +930,7 @@ class AuthHandler(BaseHandler):
             if not isinstance(stored_hash, bytes):
                 stored_hash = stored_hash.encode('ascii')
 
-            return make_deferred_yieldable(
-                threads.deferToThreadPool(
-                    self.hs.get_reactor(),
-                    self.hs.get_reactor().getThreadPool(),
-                    _do_validate_hash,
-                ),
-            )
+            return logcontext.defer_to_thread(self.hs.get_reactor(), _do_validate_hash)
         else:
             return defer.succeed(False)
 
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index b078df4a76..75fe50c42c 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -17,8 +17,8 @@ import logging
 from twisted.internet import defer
 
 from synapse.api.errors import SynapseError
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.types import UserID, create_requester
-from synapse.util.logcontext import run_in_background
 
 from ._base import BaseHandler
 
@@ -121,7 +121,7 @@ class DeactivateAccountHandler(BaseHandler):
             None
         """
         if not self._user_parter_running:
-            run_in_background(self._user_parter_loop)
+            run_as_background_process("user_parter_loop", self._user_parter_loop)
 
     @defer.inlineCallbacks
     def _user_parter_loop(self):
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 18741c5fac..0699731c13 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -43,6 +43,7 @@ class DirectoryHandler(BaseHandler):
         self.state = hs.get_state_handler()
         self.appservice_handler = hs.get_application_service_handler()
         self.event_creation_handler = hs.get_event_creation_handler()
+        self.config = hs.config
 
         self.federation = hs.get_federation_client()
         hs.get_federation_registry().register_query_handler(
@@ -80,41 +81,88 @@ class DirectoryHandler(BaseHandler):
         )
 
     @defer.inlineCallbacks
-    def create_association(self, user_id, room_alias, room_id, servers=None):
-        # association creation for human users
-        # TODO(erikj): Do user auth.
+    def create_association(self, requester, room_alias, room_id, servers=None,
+                           send_event=True):
+        """Attempt to create a new alias
 
-        if not self.spam_checker.user_may_create_room_alias(user_id, room_alias):
-            raise SynapseError(
-                403, "This user is not permitted to create this alias",
-            )
+        Args:
+            requester (Requester)
+            room_alias (RoomAlias)
+            room_id (str)
+            servers (list[str]|None): List of servers that others servers
+                should try and join via
+            send_event (bool): Whether to send an updated m.room.aliases event
 
-        can_create = yield self.can_modify_alias(
-            room_alias,
-            user_id=user_id
-        )
-        if not can_create:
-            raise SynapseError(
-                400, "This alias is reserved by an application service.",
-                errcode=Codes.EXCLUSIVE
+        Returns:
+            Deferred
+        """
+
+        user_id = requester.user.to_string()
+
+        service = requester.app_service
+        if service:
+            if not service.is_interested_in_alias(room_alias.to_string()):
+                raise SynapseError(
+                    400, "This application service has not reserved"
+                    " this kind of alias.", errcode=Codes.EXCLUSIVE
+                )
+        else:
+            if not self.spam_checker.user_may_create_room_alias(user_id, room_alias):
+                raise AuthError(
+                    403, "This user is not permitted to create this alias",
+                )
+
+            if not self.config.is_alias_creation_allowed(user_id, room_alias.to_string()):
+                # Lets just return a generic message, as there may be all sorts of
+                # reasons why we said no. TODO: Allow configurable error messages
+                # per alias creation rule?
+                raise SynapseError(
+                    403, "Not allowed to create alias",
+                )
+
+            can_create = yield self.can_modify_alias(
+                room_alias,
+                user_id=user_id
             )
+            if not can_create:
+                raise AuthError(
+                    400, "This alias is reserved by an application service.",
+                    errcode=Codes.EXCLUSIVE
+                )
+
         yield self._create_association(room_alias, room_id, servers, creator=user_id)
+        if send_event:
+            yield self.send_room_alias_update_event(
+                requester,
+                room_id
+            )
 
     @defer.inlineCallbacks
-    def create_appservice_association(self, service, room_alias, room_id,
-                                      servers=None):
-        if not service.is_interested_in_alias(room_alias.to_string()):
-            raise SynapseError(
-                400, "This application service has not reserved"
-                " this kind of alias.", errcode=Codes.EXCLUSIVE
-            )
+    def delete_association(self, requester, room_alias, send_event=True):
+        """Remove an alias from the directory
 
-        # association creation for app services
-        yield self._create_association(room_alias, room_id, servers)
+        (this is only meant for human users; AS users should call
+        delete_appservice_association)
 
-    @defer.inlineCallbacks
-    def delete_association(self, requester, user_id, room_alias):
-        # association deletion for human users
+        Args:
+            requester (Requester):
+            room_alias (RoomAlias):
+            send_event (bool): Whether to send an updated m.room.aliases event.
+                Note that, if we delete the canonical alias, we will always attempt
+                to send an m.room.canonical_alias event
+
+        Returns:
+            Deferred[unicode]: room id that the alias used to point to
+
+        Raises:
+            NotFoundError: if the alias doesn't exist
+
+            AuthError: if the user doesn't have perms to delete the alias (ie, the user
+                is neither the creator of the alias, nor a server admin.
+
+            SynapseError: if the alias belongs to an AS
+        """
+        user_id = requester.user.to_string()
 
         try:
             can_delete = yield self._user_can_delete_alias(room_alias, user_id)
@@ -141,11 +189,11 @@ class DirectoryHandler(BaseHandler):
         room_id = yield self._delete_association(room_alias)
 
         try:
-            yield self.send_room_alias_update_event(
-                requester,
-                requester.user.to_string(),
-                room_id
-            )
+            if send_event:
+                yield self.send_room_alias_update_event(
+                    requester,
+                    room_id
+                )
 
             yield self._update_canonical_alias(
                 requester,
@@ -261,7 +309,7 @@ class DirectoryHandler(BaseHandler):
             )
 
     @defer.inlineCallbacks
-    def send_room_alias_update_event(self, requester, user_id, room_id):
+    def send_room_alias_update_event(self, requester, room_id):
         aliases = yield self.store.get_aliases_for_room(room_id)
 
         yield self.event_creation_handler.create_and_send_nonmember_event(
@@ -270,7 +318,7 @@ class DirectoryHandler(BaseHandler):
                 "type": EventTypes.Aliases,
                 "state_key": self.hs.hostname,
                 "room_id": room_id,
-                "sender": user_id,
+                "sender": requester.user.to_string(),
                 "content": {"aliases": aliases},
             },
             ratelimit=False
diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py
new file mode 100644
index 0000000000..42b040375f
--- /dev/null
+++ b/synapse/handlers/e2e_room_keys.py
@@ -0,0 +1,297 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017, 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from six import iteritems
+
+from twisted.internet import defer
+
+from synapse.api.errors import NotFoundError, RoomKeysVersionError, StoreError
+from synapse.util.async_helpers import Linearizer
+
+logger = logging.getLogger(__name__)
+
+
+class E2eRoomKeysHandler(object):
+    """
+    Implements an optional realtime backup mechanism for encrypted E2E megolm room keys.
+    This gives a way for users to store and recover their megolm keys if they lose all
+    their clients. It should also extend easily to future room key mechanisms.
+    The actual payload of the encrypted keys is completely opaque to the handler.
+    """
+
+    def __init__(self, hs):
+        self.store = hs.get_datastore()
+
+        # Used to lock whenever a client is uploading key data.  This prevents collisions
+        # between clients trying to upload the details of a new session, given all
+        # clients belonging to a user will receive and try to upload a new session at
+        # roughly the same time.  Also used to lock out uploads when the key is being
+        # changed.
+        self._upload_linearizer = Linearizer("upload_room_keys_lock")
+
+    @defer.inlineCallbacks
+    def get_room_keys(self, user_id, version, room_id=None, session_id=None):
+        """Bulk get the E2E room keys for a given backup, optionally filtered to a given
+        room, or a given session.
+        See EndToEndRoomKeyStore.get_e2e_room_keys for full details.
+
+        Args:
+            user_id(str): the user whose keys we're getting
+            version(str): the version ID of the backup we're getting keys from
+            room_id(string): room ID to get keys for, for None to get keys for all rooms
+            session_id(string): session ID to get keys for, for None to get keys for all
+                sessions
+        Raises:
+            NotFoundError: if the backup version does not exist
+        Returns:
+            A deferred list of dicts giving the session_data and message metadata for
+            these room keys.
+        """
+
+        # we deliberately take the lock to get keys so that changing the version
+        # works atomically
+        with (yield self._upload_linearizer.queue(user_id)):
+            # make sure the backup version exists
+            try:
+                yield self.store.get_e2e_room_keys_version_info(user_id, version)
+            except StoreError as e:
+                if e.code == 404:
+                    raise NotFoundError("Unknown backup version")
+                else:
+                    raise
+
+            results = yield self.store.get_e2e_room_keys(
+                user_id, version, room_id, session_id
+            )
+
+            defer.returnValue(results)
+
+    @defer.inlineCallbacks
+    def delete_room_keys(self, user_id, version, room_id=None, session_id=None):
+        """Bulk delete the E2E room keys for a given backup, optionally filtered to a given
+        room or a given session.
+        See EndToEndRoomKeyStore.delete_e2e_room_keys for full details.
+
+        Args:
+            user_id(str): the user whose backup we're deleting
+            version(str): the version ID of the backup we're deleting
+            room_id(string): room ID to delete keys for, for None to delete keys for all
+                rooms
+            session_id(string): session ID to delete keys for, for None to delete keys
+                for all sessions
+        Returns:
+            A deferred of the deletion transaction
+        """
+
+        # lock for consistency with uploading
+        with (yield self._upload_linearizer.queue(user_id)):
+            yield self.store.delete_e2e_room_keys(user_id, version, room_id, session_id)
+
+    @defer.inlineCallbacks
+    def upload_room_keys(self, user_id, version, room_keys):
+        """Bulk upload a list of room keys into a given backup version, asserting
+        that the given version is the current backup version.  room_keys are merged
+        into the current backup as described in RoomKeysServlet.on_PUT().
+
+        Args:
+            user_id(str): the user whose backup we're setting
+            version(str): the version ID of the backup we're updating
+            room_keys(dict): a nested dict describing the room_keys we're setting:
+
+        {
+            "rooms": {
+                "!abc:matrix.org": {
+                    "sessions": {
+                        "c0ff33": {
+                            "first_message_index": 1,
+                            "forwarded_count": 1,
+                            "is_verified": false,
+                            "session_data": "SSBBTSBBIEZJU0gK"
+                        }
+                    }
+                }
+            }
+        }
+
+        Raises:
+            NotFoundError: if there are no versions defined
+            RoomKeysVersionError: if the uploaded version is not the current version
+        """
+
+        # TODO: Validate the JSON to make sure it has the right keys.
+
+        # XXX: perhaps we should use a finer grained lock here?
+        with (yield self._upload_linearizer.queue(user_id)):
+
+            # Check that the version we're trying to upload is the current version
+            try:
+                version_info = yield self.store.get_e2e_room_keys_version_info(user_id)
+            except StoreError as e:
+                if e.code == 404:
+                    raise NotFoundError("Version '%s' not found" % (version,))
+                else:
+                    raise
+
+            if version_info['version'] != version:
+                # Check that the version we're trying to upload actually exists
+                try:
+                    version_info = yield self.store.get_e2e_room_keys_version_info(
+                        user_id, version,
+                    )
+                    # if we get this far, the version must exist
+                    raise RoomKeysVersionError(current_version=version_info['version'])
+                except StoreError as e:
+                    if e.code == 404:
+                        raise NotFoundError("Version '%s' not found" % (version,))
+                    else:
+                        raise
+
+            # go through the room_keys.
+            # XXX: this should/could be done concurrently, given we're in a lock.
+            for room_id, room in iteritems(room_keys['rooms']):
+                for session_id, session in iteritems(room['sessions']):
+                    yield self._upload_room_key(
+                        user_id, version, room_id, session_id, session
+                    )
+
+    @defer.inlineCallbacks
+    def _upload_room_key(self, user_id, version, room_id, session_id, room_key):
+        """Upload a given room_key for a given room and session into a given
+        version of the backup.  Merges the key with any which might already exist.
+
+        Args:
+            user_id(str): the user whose backup we're setting
+            version(str): the version ID of the backup we're updating
+            room_id(str): the ID of the room whose keys we're setting
+            session_id(str): the session whose room_key we're setting
+            room_key(dict): the room_key being set
+        """
+
+        # get the room_key for this particular row
+        current_room_key = None
+        try:
+            current_room_key = yield self.store.get_e2e_room_key(
+                user_id, version, room_id, session_id
+            )
+        except StoreError as e:
+            if e.code == 404:
+                pass
+            else:
+                raise
+
+        if self._should_replace_room_key(current_room_key, room_key):
+            yield self.store.set_e2e_room_key(
+                user_id, version, room_id, session_id, room_key
+            )
+
+    @staticmethod
+    def _should_replace_room_key(current_room_key, room_key):
+        """
+        Determine whether to replace a given current_room_key (if any)
+        with a newly uploaded room_key backup
+
+        Args:
+            current_room_key (dict): Optional, the current room_key dict if any
+            room_key (dict): The new room_key dict which may or may not be fit to
+                replace the current_room_key
+
+        Returns:
+            True if current_room_key should be replaced by room_key in the backup
+        """
+
+        if current_room_key:
+            # spelt out with if/elifs rather than nested boolean expressions
+            # purely for legibility.
+
+            if room_key['is_verified'] and not current_room_key['is_verified']:
+                return True
+            elif (
+                room_key['first_message_index'] <
+                current_room_key['first_message_index']
+            ):
+                return True
+            elif room_key['forwarded_count'] < current_room_key['forwarded_count']:
+                return True
+            else:
+                return False
+        return True
+
+    @defer.inlineCallbacks
+    def create_version(self, user_id, version_info):
+        """Create a new backup version.  This automatically becomes the new
+        backup version for the user's keys; previous backups will no longer be
+        writeable to.
+
+        Args:
+            user_id(str): the user whose backup version we're creating
+            version_info(dict): metadata about the new version being created
+
+        {
+            "algorithm": "m.megolm_backup.v1",
+            "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K"
+        }
+
+        Returns:
+            A deferred of a string that gives the new version number.
+        """
+
+        # TODO: Validate the JSON to make sure it has the right keys.
+
+        # lock everyone out until we've switched version
+        with (yield self._upload_linearizer.queue(user_id)):
+            new_version = yield self.store.create_e2e_room_keys_version(
+                user_id, version_info
+            )
+            defer.returnValue(new_version)
+
+    @defer.inlineCallbacks
+    def get_version_info(self, user_id, version=None):
+        """Get the info about a given version of the user's backup
+
+        Args:
+            user_id(str): the user whose current backup version we're querying
+            version(str): Optional; if None gives the most recent version
+                otherwise a historical one.
+        Raises:
+            StoreError: code 404 if the requested backup version doesn't exist
+        Returns:
+            A deferred of a info dict that gives the info about the new version.
+
+        {
+            "version": "1234",
+            "algorithm": "m.megolm_backup.v1",
+            "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K"
+        }
+        """
+
+        with (yield self._upload_linearizer.queue(user_id)):
+            res = yield self.store.get_e2e_room_keys_version_info(user_id, version)
+            defer.returnValue(res)
+
+    @defer.inlineCallbacks
+    def delete_version(self, user_id, version=None):
+        """Deletes a given version of the user's e2e_room_keys backup
+
+        Args:
+            user_id(str): the user whose current backup version we're deleting
+            version(str): the version id of the backup being deleted
+        Raises:
+            StoreError: code 404 if this backup version doesn't exist
+        """
+
+        with (yield self._upload_linearizer.queue(user_id)):
+            yield self.store.delete_e2e_room_keys_version(user_id, version)
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 45d955e6f5..a3bb864bb2 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -53,7 +53,7 @@ from synapse.replication.http.federation import (
     ReplicationFederationSendEventsRestServlet,
 )
 from synapse.replication.http.membership import ReplicationUserJoinedLeftRoomRestServlet
-from synapse.state import resolve_events_with_factory
+from synapse.state import StateResolutionStore, resolve_events_with_store
 from synapse.types import UserID, get_domain_from_id
 from synapse.util import logcontext, unwrapFirstError
 from synapse.util.async_helpers import Linearizer
@@ -202,27 +202,22 @@ class FederationHandler(BaseHandler):
             self.room_queues[room_id].append((pdu, origin))
             return
 
-        # If we're no longer in the room just ditch the event entirely. This
-        # is probably an old server that has come back and thinks we're still
-        # in the room (or we've been rejoined to the room by a state reset).
+        # If we're not in the room just ditch the event entirely. This is
+        # probably an old server that has come back and thinks we're still in
+        # the room (or we've been rejoined to the room by a state reset).
         #
-        # If we were never in the room then maybe our database got vaped and
-        # we should check if we *are* in fact in the room. If we are then we
-        # can magically rejoin the room.
+        # Note that if we were never in the room then we would have already
+        # dropped the event, since we wouldn't know the room version.
         is_in_room = yield self.auth.check_host_in_room(
             room_id,
             self.server_name
         )
         if not is_in_room:
-            was_in_room = yield self.store.was_host_joined(
-                pdu.room_id, self.server_name,
+            logger.info(
+                "[%s %s] Ignoring PDU from %s as we're not in the room",
+                room_id, event_id, origin,
             )
-            if was_in_room:
-                logger.info(
-                    "[%s %s] Ignoring PDU from %s as we've left the room",
-                    room_id, event_id, origin,
-                )
-                defer.returnValue(None)
+            defer.returnValue(None)
 
         state = None
         auth_chain = []
@@ -239,7 +234,7 @@ class FederationHandler(BaseHandler):
                 room_id, event_id, min_depth,
             )
 
-            prevs = {e_id for e_id, _ in pdu.prev_events}
+            prevs = set(pdu.prev_event_ids())
             seen = yield self.store.have_seen_events(prevs)
 
             if min_depth and pdu.depth < min_depth:
@@ -309,8 +304,8 @@ class FederationHandler(BaseHandler):
 
                 if sent_to_us_directly:
                     logger.warn(
-                        "[%s %s] Failed to fetch %d prev events: rejecting",
-                        room_id, event_id, len(prevs - seen),
+                        "[%s %s] Rejecting: failed to fetch %d prev events: %s",
+                        room_id, event_id, len(prevs - seen), shortstr(prevs - seen)
                     )
                     raise FederationError(
                         "ERROR",
@@ -384,24 +379,24 @@ class FederationHandler(BaseHandler):
                             for x in remote_state:
                                 event_map[x.event_id] = x
 
-                    # Resolve any conflicting state
-                    @defer.inlineCallbacks
-                    def fetch(ev_ids):
-                        fetched = yield self.store.get_events(
-                            ev_ids, get_prev_content=False, check_redacted=False,
-                        )
-                        # add any events we fetch here to the `event_map` so that we
-                        # can use them to build the state event list below.
-                        event_map.update(fetched)
-                        defer.returnValue(fetched)
-
                     room_version = yield self.store.get_room_version(room_id)
-                    state_map = yield resolve_events_with_factory(
-                        room_version, state_maps, event_map, fetch,
+                    state_map = yield resolve_events_with_store(
+                        room_version, state_maps, event_map,
+                        state_res_store=StateResolutionStore(self.store),
                     )
 
-                    # we need to give _process_received_pdu the actual state events
+                    # We need to give _process_received_pdu the actual state events
                     # rather than event ids, so generate that now.
+
+                    # First though we need to fetch all the events that are in
+                    # state_map, so we can build up the state below.
+                    evs = yield self.store.get_events(
+                        list(state_map.values()),
+                        get_prev_content=False,
+                        check_redacted=False,
+                    )
+                    event_map.update(evs)
+
                     state = [
                         event_map[e] for e in six.itervalues(state_map)
                     ]
@@ -452,8 +447,8 @@ class FederationHandler(BaseHandler):
         latest |= seen
 
         logger.info(
-            "[%s %s]: Requesting %d prev_events: %s",
-            room_id, event_id, len(prevs - seen), shortstr(prevs - seen)
+            "[%s %s]: Requesting missing events between %s and %s",
+            room_id, event_id, shortstr(latest), event_id,
         )
 
         # XXX: we set timeout to 10s to help workaround
@@ -557,86 +552,54 @@ class FederationHandler(BaseHandler):
             room_id, event_id, event,
         )
 
-        # FIXME (erikj): Awful hack to make the case where we are not currently
-        # in the room work
-        # If state and auth_chain are None, then we don't need to do this check
-        # as we already know we have enough state in the DB to handle this
-        # event.
-        if state and auth_chain and not event.internal_metadata.is_outlier():
-            is_in_room = yield self.auth.check_host_in_room(
-                room_id,
-                self.server_name
-            )
-        else:
-            is_in_room = True
-
-        if not is_in_room:
-            logger.info(
-                "[%s %s] Got event for room we're not in",
-                room_id, event_id,
-            )
-
-            try:
-                yield self._persist_auth_tree(
-                    origin, auth_chain, state, event
-                )
-            except AuthError as e:
-                raise FederationError(
-                    "ERROR",
-                    e.code,
-                    e.msg,
-                    affected=event_id,
-                )
-
-        else:
-            event_ids = set()
-            if state:
-                event_ids |= {e.event_id for e in state}
-            if auth_chain:
-                event_ids |= {e.event_id for e in auth_chain}
+        event_ids = set()
+        if state:
+            event_ids |= {e.event_id for e in state}
+        if auth_chain:
+            event_ids |= {e.event_id for e in auth_chain}
 
-            seen_ids = yield self.store.have_seen_events(event_ids)
+        seen_ids = yield self.store.have_seen_events(event_ids)
 
-            if state and auth_chain is not None:
-                # If we have any state or auth_chain given to us by the replication
-                # layer, then we should handle them (if we haven't before.)
+        if state and auth_chain is not None:
+            # If we have any state or auth_chain given to us by the replication
+            # layer, then we should handle them (if we haven't before.)
 
-                event_infos = []
+            event_infos = []
 
-                for e in itertools.chain(auth_chain, state):
-                    if e.event_id in seen_ids:
-                        continue
-                    e.internal_metadata.outlier = True
-                    auth_ids = [e_id for e_id, _ in e.auth_events]
-                    auth = {
-                        (e.type, e.state_key): e for e in auth_chain
-                        if e.event_id in auth_ids or e.type == EventTypes.Create
-                    }
-                    event_infos.append({
-                        "event": e,
-                        "auth_events": auth,
-                    })
-                    seen_ids.add(e.event_id)
+            for e in itertools.chain(auth_chain, state):
+                if e.event_id in seen_ids:
+                    continue
+                e.internal_metadata.outlier = True
+                auth_ids = e.auth_event_ids()
+                auth = {
+                    (e.type, e.state_key): e for e in auth_chain
+                    if e.event_id in auth_ids or e.type == EventTypes.Create
+                }
+                event_infos.append({
+                    "event": e,
+                    "auth_events": auth,
+                })
+                seen_ids.add(e.event_id)
 
-                logger.info(
-                    "[%s %s] persisting newly-received auth/state events %s",
-                    room_id, event_id, [e["event"].event_id for e in event_infos]
-                )
-                yield self._handle_new_events(origin, event_infos)
+            logger.info(
+                "[%s %s] persisting newly-received auth/state events %s",
+                room_id, event_id, [e["event"].event_id for e in event_infos]
+            )
+            yield self._handle_new_events(origin, event_infos)
 
-            try:
-                context = yield self._handle_new_event(
-                    origin,
-                    event,
-                    state=state,
-                )
-            except AuthError as e:
-                raise FederationError(
-                    "ERROR",
-                    e.code,
-                    e.msg,
-                    affected=event.event_id,
-                )
+        try:
+            context = yield self._handle_new_event(
+                origin,
+                event,
+                state=state,
+            )
+        except AuthError as e:
+            raise FederationError(
+                "ERROR",
+                e.code,
+                e.msg,
+                affected=event.event_id,
+            )
 
         room = yield self.store.get_room(room_id)
 
@@ -726,7 +689,7 @@ class FederationHandler(BaseHandler):
         edges = [
             ev.event_id
             for ev in events
-            if set(e_id for e_id, _ in ev.prev_events) - event_ids
+            if set(ev.prev_event_ids()) - event_ids
         ]
 
         logger.info(
@@ -753,7 +716,7 @@ class FederationHandler(BaseHandler):
         required_auth = set(
             a_id
             for event in events + list(state_events.values()) + list(auth_events.values())
-            for a_id, _ in event.auth_events
+            for a_id in event.auth_event_ids()
         )
         auth_events.update({
             e_id: event_map[e_id] for e_id in required_auth if e_id in event_map
@@ -769,7 +732,7 @@ class FederationHandler(BaseHandler):
             auth_events.update(ret_events)
 
             required_auth.update(
-                a_id for event in ret_events.values() for a_id, _ in event.auth_events
+                a_id for event in ret_events.values() for a_id in event.auth_event_ids()
             )
             missing_auth = required_auth - set(auth_events)
 
@@ -796,7 +759,7 @@ class FederationHandler(BaseHandler):
                 required_auth.update(
                     a_id
                     for event in results if event
-                    for a_id, _ in event.auth_events
+                    for a_id in event.auth_event_ids()
                 )
                 missing_auth = required_auth - set(auth_events)
 
@@ -816,7 +779,7 @@ class FederationHandler(BaseHandler):
                 "auth_events": {
                     (auth_events[a_id].type, auth_events[a_id].state_key):
                     auth_events[a_id]
-                    for a_id, _ in a.auth_events
+                    for a_id in a.auth_event_ids()
                     if a_id in auth_events
                 }
             })
@@ -828,7 +791,7 @@ class FederationHandler(BaseHandler):
                 "auth_events": {
                     (auth_events[a_id].type, auth_events[a_id].state_key):
                     auth_events[a_id]
-                    for a_id, _ in event_map[e_id].auth_events
+                    for a_id in event_map[e_id].auth_event_ids()
                     if a_id in auth_events
                 }
             })
@@ -1041,17 +1004,17 @@ class FederationHandler(BaseHandler):
         Raises:
             SynapseError if the event does not pass muster
         """
-        if len(ev.prev_events) > 20:
+        if len(ev.prev_event_ids()) > 20:
             logger.warn("Rejecting event %s which has %i prev_events",
-                        ev.event_id, len(ev.prev_events))
+                        ev.event_id, len(ev.prev_event_ids()))
             raise SynapseError(
                 http_client.BAD_REQUEST,
                 "Too many prev_events",
             )
 
-        if len(ev.auth_events) > 10:
+        if len(ev.auth_event_ids()) > 10:
             logger.warn("Rejecting event %s which has %i auth_events",
-                        ev.event_id, len(ev.auth_events))
+                        ev.event_id, len(ev.auth_event_ids()))
             raise SynapseError(
                 http_client.BAD_REQUEST,
                 "Too many auth_events",
@@ -1076,7 +1039,7 @@ class FederationHandler(BaseHandler):
     def on_event_auth(self, event_id):
         event = yield self.store.get_event(event_id)
         auth = yield self.store.get_auth_chain(
-            [auth_id for auth_id, _ in event.auth_events],
+            [auth_id for auth_id in event.auth_event_ids()],
             include_given=True
         )
         defer.returnValue([e for e in auth])
@@ -1698,7 +1661,7 @@ class FederationHandler(BaseHandler):
 
         missing_auth_events = set()
         for e in itertools.chain(auth_events, state, [event]):
-            for e_id, _ in e.auth_events:
+            for e_id in e.auth_event_ids():
                 if e_id not in event_map:
                     missing_auth_events.add(e_id)
 
@@ -1717,7 +1680,7 @@ class FederationHandler(BaseHandler):
         for e in itertools.chain(auth_events, state, [event]):
             auth_for_e = {
                 (event_map[e_id].type, event_map[e_id].state_key): event_map[e_id]
-                for e_id, _ in e.auth_events
+                for e_id in e.auth_event_ids()
                 if e_id in event_map
             }
             if create_event:
@@ -1785,10 +1748,10 @@ class FederationHandler(BaseHandler):
 
         # This is a hack to fix some old rooms where the initial join event
         # didn't reference the create event in its auth events.
-        if event.type == EventTypes.Member and not event.auth_events:
-            if len(event.prev_events) == 1 and event.depth < 5:
+        if event.type == EventTypes.Member and not event.auth_event_ids():
+            if len(event.prev_event_ids()) == 1 and event.depth < 5:
                 c = yield self.store.get_event(
-                    event.prev_events[0][0],
+                    event.prev_event_ids()[0],
                     allow_none=True,
                 )
                 if c and c.type == EventTypes.Create:
@@ -1835,7 +1798,7 @@ class FederationHandler(BaseHandler):
 
         # Now get the current auth_chain for the event.
         local_auth_chain = yield self.store.get_auth_chain(
-            [auth_id for auth_id, _ in event.auth_events],
+            [auth_id for auth_id in event.auth_event_ids()],
             include_given=True
         )
 
@@ -1852,7 +1815,7 @@ class FederationHandler(BaseHandler):
 
     @defer.inlineCallbacks
     def on_get_missing_events(self, origin, room_id, earliest_events,
-                              latest_events, limit, min_depth):
+                              latest_events, limit):
         in_room = yield self.auth.check_host_in_room(
             room_id,
             origin
@@ -1861,14 +1824,12 @@ class FederationHandler(BaseHandler):
             raise AuthError(403, "Host not in room.")
 
         limit = min(limit, 20)
-        min_depth = max(min_depth, 0)
 
         missing_events = yield self.store.get_missing_events(
             room_id=room_id,
             earliest_events=earliest_events,
             latest_events=latest_events,
             limit=limit,
-            min_depth=min_depth,
         )
 
         missing_events = yield filter_events_for_server(
@@ -1893,7 +1854,7 @@ class FederationHandler(BaseHandler):
         """
         # Check if we have all the auth events.
         current_state = set(e.event_id for e in auth_events.values())
-        event_auth_events = set(e_id for e_id, _ in event.auth_events)
+        event_auth_events = set(event.auth_event_ids())
 
         if event.is_state():
             event_key = (event.type, event.state_key)
@@ -1937,7 +1898,7 @@ class FederationHandler(BaseHandler):
                         continue
 
                     try:
-                        auth_ids = [e_id for e_id, _ in e.auth_events]
+                        auth_ids = e.auth_event_ids()
                         auth = {
                             (e.type, e.state_key): e for e in remote_auth_chain
                             if e.event_id in auth_ids or e.type == EventTypes.Create
@@ -1958,7 +1919,7 @@ class FederationHandler(BaseHandler):
                         pass
 
                 have_events = yield self.store.get_seen_events_with_rejections(
-                    [e_id for e_id, _ in event.auth_events]
+                    event.auth_event_ids()
                 )
                 seen_events = set(have_events.keys())
             except Exception:
@@ -2060,7 +2021,7 @@ class FederationHandler(BaseHandler):
                             continue
 
                         try:
-                            auth_ids = [e_id for e_id, _ in ev.auth_events]
+                            auth_ids = ev.auth_event_ids()
                             auth = {
                                 (e.type, e.state_key): e
                                 for e in result["auth_chain"]
@@ -2252,7 +2213,7 @@ class FederationHandler(BaseHandler):
         missing_remote_ids = [e.event_id for e in missing_remotes]
         base_remote_rejected = list(missing_remotes)
         for e in missing_remotes:
-            for e_id, _ in e.auth_events:
+            for e_id in e.auth_event_ids():
                 if e_id in missing_remote_ids:
                     try:
                         base_remote_rejected.remove(e)
@@ -2522,7 +2483,7 @@ class FederationHandler(BaseHandler):
 
             if not backfilled:  # Never notify for backfilled events
                 for event, _ in event_and_contexts:
-                    self._notify_persisted_event(event, max_stream_id)
+                    yield self._notify_persisted_event(event, max_stream_id)
 
     def _notify_persisted_event(self, event, max_stream_id):
         """Checks to see if notifier/pushers should be notified about the
@@ -2555,7 +2516,7 @@ class FederationHandler(BaseHandler):
             extra_users=extra_users
         )
 
-        self.pusher_pool.on_new_notifications(
+        return self.pusher_pool.on_new_notifications(
             event_stream_id, max_stream_id,
         )
 
diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py
index 53e5e2648b..173315af6c 100644
--- a/synapse/handlers/groups_local.py
+++ b/synapse/handlers/groups_local.py
@@ -20,7 +20,7 @@ from six import iteritems
 
 from twisted.internet import defer
 
-from synapse.api.errors import SynapseError
+from synapse.api.errors import HttpResponseException, SynapseError
 from synapse.types import get_domain_from_id
 
 logger = logging.getLogger(__name__)
@@ -37,9 +37,23 @@ def _create_rerouter(func_name):
             )
         else:
             destination = get_domain_from_id(group_id)
-            return getattr(self.transport_client, func_name)(
+            d = getattr(self.transport_client, func_name)(
                 destination, group_id, *args, **kwargs
             )
+
+            # Capture errors returned by the remote homeserver and
+            # re-throw specific errors as SynapseErrors. This is so
+            # when the remote end responds with things like 403 Not
+            # In Group, we can communicate that to the client instead
+            # of a 500.
+            def h(failure):
+                failure.trap(HttpResponseException)
+                e = failure.value
+                if e.code == 403:
+                    raise e.to_synapse_error()
+                return failure
+            d.addErrback(h)
+            return d
     return f
 
 
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index e009395207..563bb3cea3 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -156,7 +156,7 @@ class InitialSyncHandler(BaseHandler):
                     room_end_token = "s%d" % (event.stream_ordering,)
                     deferred_room_state = run_in_background(
                         self.store.get_state_for_events,
-                        [event.event_id], None,
+                        [event.event_id],
                     )
                     deferred_room_state.addCallback(
                         lambda states: states[event.event_id]
@@ -301,7 +301,7 @@ class InitialSyncHandler(BaseHandler):
     def _room_initial_sync_parted(self, user_id, room_id, pagin_config,
                                   membership, member_event_id, is_peeking):
         room_state = yield self.store.get_state_for_events(
-            [member_event_id], None
+            [member_event_id],
         )
 
         room_state = room_state[member_event_id]
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4954b23a0d..a7cd779b02 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -35,6 +35,7 @@ from synapse.crypto.event_signing import add_hashes_and_signatures
 from synapse.events.utils import serialize_event
 from synapse.events.validator import EventValidator
 from synapse.replication.http.send_event import ReplicationSendEventRestServlet
+from synapse.storage.state import StateFilter
 from synapse.types import RoomAlias, UserID
 from synapse.util.async_helpers import Linearizer
 from synapse.util.frozenutils import frozendict_json_encoder
@@ -80,7 +81,7 @@ class MessageHandler(object):
         elif membership == Membership.LEAVE:
             key = (event_type, state_key)
             room_state = yield self.store.get_state_for_events(
-                [membership_event_id], [key]
+                [membership_event_id], StateFilter.from_types([key])
             )
             data = room_state[membership_event_id].get(key)
 
@@ -88,7 +89,7 @@ class MessageHandler(object):
 
     @defer.inlineCallbacks
     def get_state_events(
-        self, user_id, room_id, types=None, filtered_types=None,
+        self, user_id, room_id, state_filter=StateFilter.all(),
         at_token=None, is_guest=False,
     ):
         """Retrieve all state events for a given room. If the user is
@@ -100,13 +101,8 @@ class MessageHandler(object):
         Args:
             user_id(str): The user requesting state events.
             room_id(str): The room ID to get all state events from.
-            types(list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-                May be None, which matches any key.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
             at_token(StreamToken|None): the stream token of the at which we are requesting
                 the stats. If the user is not allowed to view the state as of that
                 stream token, we raise a 403 SynapseError. If None, returns the current
@@ -139,7 +135,7 @@ class MessageHandler(object):
             event = last_events[0]
             if visible_events:
                 room_state = yield self.store.get_state_for_events(
-                    [event.event_id], types, filtered_types=filtered_types,
+                    [event.event_id], state_filter=state_filter,
                 )
                 room_state = room_state[event.event_id]
             else:
@@ -158,12 +154,12 @@ class MessageHandler(object):
 
             if membership == Membership.JOIN:
                 state_ids = yield self.store.get_filtered_current_state_ids(
-                    room_id, types, filtered_types=filtered_types,
+                    room_id, state_filter=state_filter,
                 )
                 room_state = yield self.store.get_events(state_ids.values())
             elif membership == Membership.LEAVE:
                 room_state = yield self.store.get_state_for_events(
-                    [membership_event_id], types, filtered_types=filtered_types,
+                    [membership_event_id], state_filter=state_filter,
                 )
                 room_state = room_state[membership_event_id]
 
@@ -431,6 +427,9 @@ class EventCreationHandler(object):
 
         if event.is_state():
             prev_state = yield self.deduplicate_state_event(event, context)
+            logger.info(
+                "Not bothering to persist duplicate state event %s", event.event_id,
+            )
             if prev_state is not None:
                 defer.returnValue(prev_state)
 
@@ -779,7 +778,7 @@ class EventCreationHandler(object):
             event, context=context
         )
 
-        self.pusher_pool.on_new_notifications(
+        yield self.pusher_pool.on_new_notifications(
             event_stream_id, max_stream_id,
         )
 
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index a155b6e938..43f81bd607 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -21,6 +21,7 @@ from twisted.python.failure import Failure
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import SynapseError
 from synapse.events.utils import serialize_event
+from synapse.storage.state import StateFilter
 from synapse.types import RoomStreamToken
 from synapse.util.async_helpers import ReadWriteLock
 from synapse.util.logcontext import run_in_background
@@ -255,16 +256,14 @@ class PaginationHandler(object):
         if event_filter and event_filter.lazy_load_members():
             # TODO: remove redundant members
 
-            types = [
-                (EventTypes.Member, state_key)
-                for state_key in set(
-                    event.sender  # FIXME: we also care about invite targets etc.
-                    for event in events
-                )
-            ]
+            # FIXME: we also care about invite targets etc.
+            state_filter = StateFilter.from_types(
+                (EventTypes.Member, event.sender)
+                for event in events
+            )
 
             state_ids = yield self.store.get_state_ids_for_event(
-                events[0].event_id, types=types,
+                events[0].event_id, state_filter=state_filter,
             )
 
             if state_ids:
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index a6f3181f09..4c2690ba26 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -119,7 +119,7 @@ class ReceiptsHandler(BaseHandler):
             "receipt_key", max_batch_id, rooms=affected_room_ids
         )
         # Note that the min here shouldn't be relied upon to be accurate.
-        self.hs.get_pusherpool().on_new_receipts(
+        yield self.hs.get_pusherpool().on_new_receipts(
             min_batch_id, max_batch_id, affected_room_ids,
         )
 
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index da914c46ff..015909bb26 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -217,19 +217,65 @@ class RegistrationHandler(BaseHandler):
                     user_id = None
                     token = None
                     attempts += 1
+        if not self.hs.config.user_consent_at_registration:
+            yield self._auto_join_rooms(user_id)
 
+        defer.returnValue((user_id, token))
+
+    @defer.inlineCallbacks
+    def _auto_join_rooms(self, user_id):
+        """Automatically joins users to auto join rooms - creating the room in the first place
+        if the user is the first to be created.
+
+        Args:
+            user_id(str): The user to join
+        """
         # auto-join the user to any rooms we're supposed to dump them into
         fake_requester = create_requester(user_id)
+
+        # try to create the room if we're the first user on the server
+        should_auto_create_rooms = False
+        if self.hs.config.autocreate_auto_join_rooms:
+            count = yield self.store.count_all_users()
+            should_auto_create_rooms = count == 1
         for r in self.hs.config.auto_join_rooms:
             try:
-                yield self._join_user_to_room(fake_requester, r)
+                if should_auto_create_rooms:
+                    room_alias = RoomAlias.from_string(r)
+                    if self.hs.hostname != room_alias.domain:
+                        logger.warning(
+                            'Cannot create room alias %s, '
+                            'it does not match server domain',
+                            r,
+                        )
+                    else:
+                        # create room expects the localpart of the room alias
+                        room_alias_localpart = room_alias.localpart
+
+                        # getting the RoomCreationHandler during init gives a dependency
+                        # loop
+                        yield self.hs.get_room_creation_handler().create_room(
+                            fake_requester,
+                            config={
+                                "preset": "public_chat",
+                                "room_alias_name": room_alias_localpart
+                            },
+                            ratelimit=False,
+                        )
+                else:
+                    yield self._join_user_to_room(fake_requester, r)
             except Exception as e:
                 logger.error("Failed to join new user to %r: %r", r, e)
 
-        # We used to generate default identicons here, but nowadays
-        # we want clients to generate their own as part of their branding
-        # rather than there being consistent matrix-wide ones, so we don't.
-        defer.returnValue((user_id, token))
+    @defer.inlineCallbacks
+    def post_consent_actions(self, user_id):
+        """A series of registration actions that can only be carried out once consent
+        has been granted
+
+        Args:
+            user_id (str): The user to join
+        """
+        yield self._auto_join_rooms(user_id)
 
     @defer.inlineCallbacks
     def appservice_register(self, user_localpart, as_token):
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index c3f820b975..3928faa6e7 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -21,7 +21,7 @@ import math
 import string
 from collections import OrderedDict
 
-from six import string_types
+from six import iteritems, string_types
 
 from twisted.internet import defer
 
@@ -32,9 +32,11 @@ from synapse.api.constants import (
     JoinRules,
     RoomCreationPreset,
 )
-from synapse.api.errors import AuthError, Codes, StoreError, SynapseError
+from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError
+from synapse.storage.state import StateFilter
 from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID
 from synapse.util import stringutils
+from synapse.util.async_helpers import Linearizer
 from synapse.visibility import filter_events_for_client
 
 from ._base import BaseHandler
@@ -72,6 +74,334 @@ class RoomCreationHandler(BaseHandler):
 
         self.spam_checker = hs.get_spam_checker()
         self.event_creation_handler = hs.get_event_creation_handler()
+        self.room_member_handler = hs.get_room_member_handler()
+
+        # linearizer to stop two upgrades happening at once
+        self._upgrade_linearizer = Linearizer("room_upgrade_linearizer")
+
+    @defer.inlineCallbacks
+    def upgrade_room(self, requester, old_room_id, new_version):
+        """Replace a room with a new room with a different version
+
+        Args:
+            requester (synapse.types.Requester): the user requesting the upgrade
+            old_room_id (unicode): the id of the room to be replaced
+            new_version (unicode): the new room version to use
+
+        Returns:
+            Deferred[unicode]: the new room id
+        """
+        yield self.ratelimit(requester)
+
+        user_id = requester.user.to_string()
+
+        with (yield self._upgrade_linearizer.queue(old_room_id)):
+            # start by allocating a new room id
+            r = yield self.store.get_room(old_room_id)
+            if r is None:
+                raise NotFoundError("Unknown room id %s" % (old_room_id,))
+            new_room_id = yield self._generate_room_id(
+                creator_id=user_id, is_public=r["is_public"],
+            )
+
+            logger.info("Creating new room %s to replace %s", new_room_id, old_room_id)
+
+            # we create and auth the tombstone event before properly creating the new
+            # room, to check our user has perms in the old room.
+            tombstone_event, tombstone_context = (
+                yield self.event_creation_handler.create_event(
+                    requester, {
+                        "type": EventTypes.Tombstone,
+                        "state_key": "",
+                        "room_id": old_room_id,
+                        "sender": user_id,
+                        "content": {
+                            "body": "This room has been replaced",
+                            "replacement_room": new_room_id,
+                        }
+                    },
+                    token_id=requester.access_token_id,
+                )
+            )
+            yield self.auth.check_from_context(tombstone_event, tombstone_context)
+
+            yield self.clone_exiting_room(
+                requester,
+                old_room_id=old_room_id,
+                new_room_id=new_room_id,
+                new_room_version=new_version,
+                tombstone_event_id=tombstone_event.event_id,
+            )
+
+            # now send the tombstone
+            yield self.event_creation_handler.send_nonmember_event(
+                requester, tombstone_event, tombstone_context,
+            )
+
+            old_room_state = yield tombstone_context.get_current_state_ids(self.store)
+
+            # update any aliases
+            yield self._move_aliases_to_new_room(
+                requester, old_room_id, new_room_id, old_room_state,
+            )
+
+            # and finally, shut down the PLs in the old room, and update them in the new
+            # room.
+            yield self._update_upgraded_room_pls(
+                requester, old_room_id, new_room_id, old_room_state,
+            )
+
+            defer.returnValue(new_room_id)
+
+    @defer.inlineCallbacks
+    def _update_upgraded_room_pls(
+            self, requester, old_room_id, new_room_id, old_room_state,
+    ):
+        """Send updated power levels in both rooms after an upgrade
+
+        Args:
+            requester (synapse.types.Requester): the user requesting the upgrade
+            old_room_id (unicode): the id of the room to be replaced
+            new_room_id (unicode): the id of the replacement room
+            old_room_state (dict[tuple[str, str], str]): the state map for the old room
+
+        Returns:
+            Deferred
+        """
+        old_room_pl_event_id = old_room_state.get((EventTypes.PowerLevels, ""))
+
+        if old_room_pl_event_id is None:
+            logger.warning(
+                "Not supported: upgrading a room with no PL event. Not setting PLs "
+                "in old room.",
+            )
+            return
+
+        old_room_pl_state = yield self.store.get_event(old_room_pl_event_id)
+
+        # we try to stop regular users from speaking by setting the PL required
+        # to send regular events and invites to 'Moderator' level. That's normally
+        # 50, but if the default PL in a room is 50 or more, then we set the
+        # required PL above that.
+
+        pl_content = dict(old_room_pl_state.content)
+        users_default = int(pl_content.get("users_default", 0))
+        restricted_level = max(users_default + 1, 50)
+
+        updated = False
+        for v in ("invite", "events_default"):
+            current = int(pl_content.get(v, 0))
+            if current < restricted_level:
+                logger.info(
+                    "Setting level for %s in %s to %i (was %i)",
+                    v, old_room_id, restricted_level, current,
+                )
+                pl_content[v] = restricted_level
+                updated = True
+            else:
+                logger.info(
+                    "Not setting level for %s (already %i)",
+                    v, current,
+                )
+
+        if updated:
+            try:
+                yield self.event_creation_handler.create_and_send_nonmember_event(
+                    requester, {
+                        "type": EventTypes.PowerLevels,
+                        "state_key": '',
+                        "room_id": old_room_id,
+                        "sender": requester.user.to_string(),
+                        "content": pl_content,
+                    }, ratelimit=False,
+                )
+            except AuthError as e:
+                logger.warning("Unable to update PLs in old room: %s", e)
+
+        logger.info("Setting correct PLs in new room")
+        yield self.event_creation_handler.create_and_send_nonmember_event(
+            requester, {
+                "type": EventTypes.PowerLevels,
+                "state_key": '',
+                "room_id": new_room_id,
+                "sender": requester.user.to_string(),
+                "content": old_room_pl_state.content,
+            }, ratelimit=False,
+        )
+
+    @defer.inlineCallbacks
+    def clone_exiting_room(
+            self, requester, old_room_id, new_room_id, new_room_version,
+            tombstone_event_id,
+    ):
+        """Populate a new room based on an old room
+
+        Args:
+            requester (synapse.types.Requester): the user requesting the upgrade
+            old_room_id (unicode): the id of the room to be replaced
+            new_room_id (unicode): the id to give the new room (should already have been
+                created with _gemerate_room_id())
+            new_room_version (unicode): the new room version to use
+            tombstone_event_id (unicode|str): the ID of the tombstone event in the old
+                room.
+        Returns:
+            Deferred[None]
+        """
+        user_id = requester.user.to_string()
+
+        if not self.spam_checker.user_may_create_room(user_id):
+            raise SynapseError(403, "You are not permitted to create rooms")
+
+        creation_content = {
+            "room_version": new_room_version,
+            "predecessor": {
+                "room_id": old_room_id,
+                "event_id": tombstone_event_id,
+            }
+        }
+
+        initial_state = dict()
+
+        types_to_copy = (
+            (EventTypes.JoinRules, ""),
+            (EventTypes.Name, ""),
+            (EventTypes.Topic, ""),
+            (EventTypes.RoomHistoryVisibility, ""),
+            (EventTypes.GuestAccess, ""),
+            (EventTypes.RoomAvatar, ""),
+        )
+
+        old_room_state_ids = yield self.store.get_filtered_current_state_ids(
+            old_room_id, StateFilter.from_types(types_to_copy),
+        )
+        # map from event_id to BaseEvent
+        old_room_state_events = yield self.store.get_events(old_room_state_ids.values())
+
+        for k, old_event_id in iteritems(old_room_state_ids):
+            old_event = old_room_state_events.get(old_event_id)
+            if old_event:
+                initial_state[k] = old_event.content
+
+        yield self._send_events_for_new_room(
+            requester,
+            new_room_id,
+
+            # we expect to override all the presets with initial_state, so this is
+            # somewhat arbitrary.
+            preset_config=RoomCreationPreset.PRIVATE_CHAT,
+
+            invite_list=[],
+            initial_state=initial_state,
+            creation_content=creation_content,
+        )
+
+        # XXX invites/joins
+        # XXX 3pid invites
+
+    @defer.inlineCallbacks
+    def _move_aliases_to_new_room(
+            self, requester, old_room_id, new_room_id, old_room_state,
+    ):
+        directory_handler = self.hs.get_handlers().directory_handler
+
+        aliases = yield self.store.get_aliases_for_room(old_room_id)
+
+        # check to see if we have a canonical alias.
+        canonical_alias = None
+        canonical_alias_event_id = old_room_state.get((EventTypes.CanonicalAlias, ""))
+        if canonical_alias_event_id:
+            canonical_alias_event = yield self.store.get_event(canonical_alias_event_id)
+            if canonical_alias_event:
+                canonical_alias = canonical_alias_event.content.get("alias", "")
+
+        # first we try to remove the aliases from the old room (we suppress sending
+        # the room_aliases event until the end).
+        #
+        # Note that we'll only be able to remove aliases that (a) aren't owned by an AS,
+        # and (b) unless the user is a server admin, which the user created.
+        #
+        # This is probably correct - given we don't allow such aliases to be deleted
+        # normally, it would be odd to allow it in the case of doing a room upgrade -
+        # but it makes the upgrade less effective, and you have to wonder why a room
+        # admin can't remove aliases that point to that room anyway.
+        # (cf https://github.com/matrix-org/synapse/issues/2360)
+        #
+        removed_aliases = []
+        for alias_str in aliases:
+            alias = RoomAlias.from_string(alias_str)
+            try:
+                yield directory_handler.delete_association(
+                    requester, alias, send_event=False,
+                )
+                removed_aliases.append(alias_str)
+            except SynapseError as e:
+                logger.warning(
+                    "Unable to remove alias %s from old room: %s",
+                    alias, e,
+                )
+
+        # if we didn't find any aliases, or couldn't remove anyway, we can skip the rest
+        # of this.
+        if not removed_aliases:
+            return
+
+        try:
+            # this can fail if, for some reason, our user doesn't have perms to send
+            # m.room.aliases events in the old room (note that we've already checked that
+            # they have perms to send a tombstone event, so that's not terribly likely).
+            #
+            # If that happens, it's regrettable, but we should carry on: it's the same
+            # as when you remove an alias from the directory normally - it just means that
+            # the aliases event gets out of sync with the directory
+            # (cf https://github.com/vector-im/riot-web/issues/2369)
+            yield directory_handler.send_room_alias_update_event(
+                requester, old_room_id,
+            )
+        except AuthError as e:
+            logger.warning(
+                "Failed to send updated alias event on old room: %s", e,
+            )
+
+        # we can now add any aliases we successfully removed to the new room.
+        for alias in removed_aliases:
+            try:
+                yield directory_handler.create_association(
+                    requester, RoomAlias.from_string(alias),
+                    new_room_id, servers=(self.hs.hostname, ),
+                    send_event=False,
+                )
+                logger.info("Moved alias %s to new room", alias)
+            except SynapseError as e:
+                # I'm not really expecting this to happen, but it could if the spam
+                # checking module decides it shouldn't, or similar.
+                logger.error(
+                    "Error adding alias %s to new room: %s",
+                    alias, e,
+                )
+
+        try:
+            if canonical_alias and (canonical_alias in removed_aliases):
+                yield self.event_creation_handler.create_and_send_nonmember_event(
+                    requester,
+                    {
+                        "type": EventTypes.CanonicalAlias,
+                        "state_key": "",
+                        "room_id": new_room_id,
+                        "sender": requester.user.to_string(),
+                        "content": {"alias": canonical_alias, },
+                    },
+                    ratelimit=False
+                )
+
+            yield directory_handler.send_room_alias_update_event(
+                requester, new_room_id,
+            )
+        except SynapseError as e:
+            # again I'm not really expecting this to fail, but if it does, I'd rather
+            # we returned the new room to the client at this point.
+            logger.error(
+                "Unable to send updated alias events in new room: %s", e,
+            )
 
     @defer.inlineCallbacks
     def create_room(self, requester, config, ratelimit=True,
@@ -164,36 +494,16 @@ class RoomCreationHandler(BaseHandler):
         visibility = config.get("visibility", None)
         is_public = visibility == "public"
 
-        # autogen room IDs and try to create it. We may clash, so just
-        # try a few times till one goes through, giving up eventually.
-        attempts = 0
-        room_id = None
-        while attempts < 5:
-            try:
-                random_string = stringutils.random_string(18)
-                gen_room_id = RoomID(
-                    random_string,
-                    self.hs.hostname,
-                )
-                yield self.store.store_room(
-                    room_id=gen_room_id.to_string(),
-                    room_creator_user_id=user_id,
-                    is_public=is_public
-                )
-                room_id = gen_room_id.to_string()
-                break
-            except StoreError:
-                attempts += 1
-        if not room_id:
-            raise StoreError(500, "Couldn't generate a room ID.")
+        room_id = yield self._generate_room_id(creator_id=user_id, is_public=is_public)
 
         if room_alias:
             directory_handler = self.hs.get_handlers().directory_handler
             yield directory_handler.create_association(
-                user_id=user_id,
+                requester=requester,
                 room_id=room_id,
                 room_alias=room_alias,
                 servers=[self.hs.hostname],
+                send_event=False,
             )
 
         preset_config = config.get(
@@ -214,18 +524,15 @@ class RoomCreationHandler(BaseHandler):
         # override any attempt to set room versions via the creation_content
         creation_content["room_version"] = room_version
 
-        room_member_handler = self.hs.get_room_member_handler()
-
         yield self._send_events_for_new_room(
             requester,
             room_id,
-            room_member_handler,
             preset_config=preset_config,
             invite_list=invite_list,
             initial_state=initial_state,
             creation_content=creation_content,
             room_alias=room_alias,
-            power_level_content_override=config.get("power_level_content_override", {}),
+            power_level_content_override=config.get("power_level_content_override"),
             creator_join_profile=creator_join_profile,
         )
 
@@ -261,7 +568,7 @@ class RoomCreationHandler(BaseHandler):
             if is_direct:
                 content["is_direct"] = is_direct
 
-            yield room_member_handler.update_membership(
+            yield self.room_member_handler.update_membership(
                 requester,
                 UserID.from_string(invitee),
                 room_id,
@@ -289,7 +596,7 @@ class RoomCreationHandler(BaseHandler):
         if room_alias:
             result["room_alias"] = room_alias.to_string()
             yield directory_handler.send_room_alias_update_event(
-                requester, user_id, room_id
+                requester, room_id
             )
 
         defer.returnValue(result)
@@ -299,14 +606,13 @@ class RoomCreationHandler(BaseHandler):
             self,
             creator,  # A Requester object.
             room_id,
-            room_member_handler,
             preset_config,
             invite_list,
             initial_state,
             creation_content,
-            room_alias,
-            power_level_content_override,
-            creator_join_profile,
+            room_alias=None,
+            power_level_content_override=None,
+            creator_join_profile=None,
     ):
         def create(etype, content, **kwargs):
             e = {
@@ -322,6 +628,7 @@ class RoomCreationHandler(BaseHandler):
         @defer.inlineCallbacks
         def send(etype, content, **kwargs):
             event = create(etype, content, **kwargs)
+            logger.info("Sending %s in new room", etype)
             yield self.event_creation_handler.create_and_send_nonmember_event(
                 creator,
                 event,
@@ -344,7 +651,8 @@ class RoomCreationHandler(BaseHandler):
             content=creation_content,
         )
 
-        yield room_member_handler.update_membership(
+        logger.info("Sending %s in new room", EventTypes.Member)
+        yield self.room_member_handler.update_membership(
             creator,
             creator.user,
             room_id,
@@ -386,7 +694,8 @@ class RoomCreationHandler(BaseHandler):
                 for invitee in invite_list:
                     power_level_content["users"][invitee] = 100
 
-            power_level_content.update(power_level_content_override)
+            if power_level_content_override:
+                power_level_content.update(power_level_content_override)
 
             yield send(
                 etype=EventTypes.PowerLevels,
@@ -425,6 +734,30 @@ class RoomCreationHandler(BaseHandler):
                 content=content,
             )
 
+    @defer.inlineCallbacks
+    def _generate_room_id(self, creator_id, is_public):
+        # autogen room IDs and try to create it. We may clash, so just
+        # try a few times till one goes through, giving up eventually.
+        attempts = 0
+        while attempts < 5:
+            try:
+                random_string = stringutils.random_string(18)
+                gen_room_id = RoomID(
+                    random_string,
+                    self.hs.hostname,
+                ).to_string()
+                if isinstance(gen_room_id, bytes):
+                    gen_room_id = gen_room_id.decode('utf-8')
+                yield self.store.store_room(
+                    room_id=gen_room_id,
+                    room_creator_user_id=creator_id,
+                    is_public=is_public,
+                )
+                defer.returnValue(gen_room_id)
+            except StoreError:
+                attempts += 1
+        raise StoreError(500, "Couldn't generate a room ID.")
+
 
 class RoomContextHandler(object):
     def __init__(self, hs):
@@ -488,23 +821,24 @@ class RoomContextHandler(object):
         else:
             last_event_id = event_id
 
-        types = None
-        filtered_types = None
         if event_filter and event_filter.lazy_load_members():
-            members = set(ev.sender for ev in itertools.chain(
-                results["events_before"],
-                (results["event"],),
-                results["events_after"],
-            ))
-            filtered_types = [EventTypes.Member]
-            types = [(EventTypes.Member, member) for member in members]
+            state_filter = StateFilter.from_lazy_load_member_list(
+                ev.sender
+                for ev in itertools.chain(
+                    results["events_before"],
+                    (results["event"],),
+                    results["events_after"],
+                )
+            )
+        else:
+            state_filter = StateFilter.all()
 
         # XXX: why do we return the state as of the last event rather than the
         # first? Shouldn't we be consistent with /sync?
         # https://github.com/matrix-org/matrix-doc/issues/687
 
         state = yield self.store.get_state_for_events(
-            [last_event_id], types, filtered_types=filtered_types,
+            [last_event_id], state_filter=state_filter,
         )
         results["state"] = list(state[last_event_id].values())
 
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 38e1737ec9..dc88620885 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -16,7 +16,7 @@
 import logging
 from collections import namedtuple
 
-from six import iteritems
+from six import PY3, iteritems
 from six.moves import range
 
 import msgpack
@@ -444,9 +444,16 @@ class RoomListNextBatch(namedtuple("RoomListNextBatch", (
 
     @classmethod
     def from_token(cls, token):
+        if PY3:
+            # The argument raw=False is only available on new versions of
+            # msgpack, and only really needed on Python 3. Gate it behind
+            # a PY3 check to avoid causing issues on Debian-packaged versions.
+            decoded = msgpack.loads(decode_base64(token), raw=False)
+        else:
+            decoded = msgpack.loads(decode_base64(token))
         return RoomListNextBatch(**{
             cls.REVERSE_KEY_DICT[key]: val
-            for key, val in msgpack.loads(decode_base64(token)).items()
+            for key, val in decoded.items()
         })
 
     def to_token(self):
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 0c1d52fd11..ec936bbb4e 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -24,6 +24,7 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import SynapseError
 from synapse.api.filtering import Filter
 from synapse.events.utils import serialize_event
+from synapse.storage.state import StateFilter
 from synapse.visibility import filter_events_for_client
 
 from ._base import BaseHandler
@@ -49,6 +50,9 @@ class SearchHandler(BaseHandler):
             dict to be returned to the client with results of search
         """
 
+        if not self.hs.config.enable_search:
+            raise SynapseError(400, "Search is disabled on this homeserver")
+
         batch_group = None
         batch_group_key = None
         batch_token = None
@@ -324,9 +328,12 @@ class SearchHandler(BaseHandler):
                     else:
                         last_event_id = event.event_id
 
+                    state_filter = StateFilter.from_types(
+                        [(EventTypes.Member, sender) for sender in senders]
+                    )
+
                     state = yield self.store.get_state_for_event(
-                        last_event_id,
-                        types=[(EventTypes.Member, sender) for sender in senders]
+                        last_event_id, state_filter
                     )
 
                     res["profile_info"] = {
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 67b8ca28c7..09739f2862 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -20,11 +20,14 @@ import logging
 
 from six import iteritems, itervalues
 
+from prometheus_client import Counter
+
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.roommember import MemberSummary
+from synapse.storage.state import StateFilter
 from synapse.types import RoomStreamToken
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -36,6 +39,19 @@ from synapse.visibility import filter_events_for_client
 
 logger = logging.getLogger(__name__)
 
+
+# Counts the number of times we returned a non-empty sync. `type` is one of
+# "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is
+# "true" or "false" depending on if the request asked for lazy loaded members or
+# not.
+non_empty_sync_counter = Counter(
+    "synapse_handlers_sync_nonempty_total",
+    "Count of non empty sync responses. type is initial_sync/full_state_sync"
+    "/incremental_sync. lazy_loaded indicates if lazy loaded members were "
+    "enabled for that request.",
+    ["type", "lazy_loaded"],
+)
+
 # Store the cache that tracks which lazy-loaded members have been sent to a given
 # client for no more than 30 minutes.
 LAZY_LOADED_MEMBERS_CACHE_MAX_AGE = 30 * 60 * 1000
@@ -227,14 +243,16 @@ class SyncHandler(object):
     @defer.inlineCallbacks
     def _wait_for_sync_for_user(self, sync_config, since_token, timeout,
                                 full_state):
+        if since_token is None:
+            sync_type = "initial_sync"
+        elif full_state:
+            sync_type = "full_state_sync"
+        else:
+            sync_type = "incremental_sync"
+
         context = LoggingContext.current_context()
         if context:
-            if since_token is None:
-                context.tag = "initial_sync"
-            elif full_state:
-                context.tag = "full_state_sync"
-            else:
-                context.tag = "incremental_sync"
+            context.tag = sync_type
 
         if timeout == 0 or since_token is None or full_state:
             # we are going to return immediately, so don't bother calling
@@ -242,7 +260,6 @@ class SyncHandler(object):
             result = yield self.current_sync_for_user(
                 sync_config, since_token, full_state=full_state,
             )
-            defer.returnValue(result)
         else:
             def current_sync_callback(before_token, after_token):
                 return self.current_sync_for_user(sync_config, since_token)
@@ -251,7 +268,15 @@ class SyncHandler(object):
                 sync_config.user.to_string(), timeout, current_sync_callback,
                 from_token=since_token,
             )
-            defer.returnValue(result)
+
+        if result:
+            if sync_config.filter_collection.lazy_load_members():
+                lazy_loaded = "true"
+            else:
+                lazy_loaded = "false"
+            non_empty_sync_counter.labels(sync_type, lazy_loaded).inc()
+
+        defer.returnValue(result)
 
     def current_sync_for_user(self, sync_config, since_token=None,
                               full_state=False):
@@ -445,25 +470,20 @@ class SyncHandler(object):
         ))
 
     @defer.inlineCallbacks
-    def get_state_after_event(self, event, types=None, filtered_types=None):
+    def get_state_after_event(self, event, state_filter=StateFilter.all()):
         """
         Get the room state after the given event
 
         Args:
             event(synapse.events.EventBase): event of interest
-            types(list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-                May be None, which matches any key.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
             A Deferred map from ((type, state_key)->Event)
         """
         state_ids = yield self.store.get_state_ids_for_event(
-            event.event_id, types, filtered_types=filtered_types,
+            event.event_id, state_filter=state_filter,
         )
         if event.is_state():
             state_ids = state_ids.copy()
@@ -471,18 +491,14 @@ class SyncHandler(object):
         defer.returnValue(state_ids)
 
     @defer.inlineCallbacks
-    def get_state_at(self, room_id, stream_position, types=None, filtered_types=None):
+    def get_state_at(self, room_id, stream_position, state_filter=StateFilter.all()):
         """ Get the room state at a particular stream position
 
         Args:
             room_id(str): room for which to get state
             stream_position(StreamToken): point at which to get state
-            types(list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
             A Deferred map from ((type, state_key)->Event)
@@ -498,7 +514,7 @@ class SyncHandler(object):
         if last_events:
             last_event = last_events[-1]
             state = yield self.get_state_after_event(
-                last_event, types, filtered_types=filtered_types,
+                last_event, state_filter=state_filter,
             )
 
         else:
@@ -539,10 +555,11 @@ class SyncHandler(object):
 
         last_event = last_events[-1]
         state_ids = yield self.store.get_state_ids_for_event(
-            last_event.event_id, [
+            last_event.event_id,
+            state_filter=StateFilter.from_types([
                 (EventTypes.Name, ''),
                 (EventTypes.CanonicalAlias, ''),
-            ]
+            ]),
         )
 
         # this is heavily cached, thus: fast.
@@ -693,8 +710,7 @@ class SyncHandler(object):
 
         with Measure(self.clock, "compute_state_delta"):
 
-            types = None
-            filtered_types = None
+            members_to_fetch = None
 
             lazy_load_members = sync_config.filter_collection.lazy_load_members()
             include_redundant_members = (
@@ -705,16 +721,21 @@ class SyncHandler(object):
                 # We only request state for the members needed to display the
                 # timeline:
 
-                types = [
-                    (EventTypes.Member, state_key)
-                    for state_key in set(
-                        event.sender  # FIXME: we also care about invite targets etc.
-                        for event in batch.events
-                    )
-                ]
+                members_to_fetch = set(
+                    event.sender  # FIXME: we also care about invite targets etc.
+                    for event in batch.events
+                )
 
-                # only apply the filtering to room members
-                filtered_types = [EventTypes.Member]
+                if full_state:
+                    # always make sure we LL ourselves so we know we're in the room
+                    # (if we are) to fix https://github.com/vector-im/riot-web/issues/7209
+                    # We only need apply this on full state syncs given we disabled
+                    # LL for incr syncs in #3840.
+                    members_to_fetch.add(sync_config.user.to_string())
+
+                state_filter = StateFilter.from_lazy_load_member_list(members_to_fetch)
+            else:
+                state_filter = StateFilter.all()
 
             timeline_state = {
                 (event.type, event.state_key): event.event_id
@@ -722,28 +743,19 @@ class SyncHandler(object):
             }
 
             if full_state:
-                if lazy_load_members:
-                    # always make sure we LL ourselves so we know we're in the room
-                    # (if we are) to fix https://github.com/vector-im/riot-web/issues/7209
-                    # We only need apply this on full state syncs given we disabled
-                    # LL for incr syncs in #3840.
-                    types.append((EventTypes.Member, sync_config.user.to_string()))
-
                 if batch:
                     current_state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[-1].event_id, types=types,
-                        filtered_types=filtered_types,
+                        batch.events[-1].event_id, state_filter=state_filter,
                     )
 
                     state_ids = yield self.store.get_state_ids_for_event(
-                        batch.events[0].event_id, types=types,
-                        filtered_types=filtered_types,
+                        batch.events[0].event_id, state_filter=state_filter,
                     )
 
                 else:
                     current_state_ids = yield self.get_state_at(
-                        room_id, stream_position=now_token, types=types,
-                        filtered_types=filtered_types,
+                        room_id, stream_position=now_token,
+                        state_filter=state_filter,
                     )
 
                     state_ids = current_state_ids
@@ -757,8 +769,7 @@ class SyncHandler(object):
                 )
             elif batch.limited:
                 state_at_timeline_start = yield self.store.get_state_ids_for_event(
-                    batch.events[0].event_id, types=types,
-                    filtered_types=filtered_types,
+                    batch.events[0].event_id, state_filter=state_filter,
                 )
 
                 # for now, we disable LL for gappy syncs - see
@@ -773,17 +784,15 @@ class SyncHandler(object):
                 # members to just be ones which were timeline senders, which then ensures
                 # all of the rest get included in the state block (if we need to know
                 # about them).
-                types = None
-                filtered_types = None
+                state_filter = StateFilter.all()
 
                 state_at_previous_sync = yield self.get_state_at(
-                    room_id, stream_position=since_token, types=types,
-                    filtered_types=filtered_types,
+                    room_id, stream_position=since_token,
+                    state_filter=state_filter,
                 )
 
                 current_state_ids = yield self.store.get_state_ids_for_event(
-                    batch.events[-1].event_id, types=types,
-                    filtered_types=filtered_types,
+                    batch.events[-1].event_id, state_filter=state_filter,
                 )
 
                 state_ids = _calculate_state(
@@ -797,7 +806,7 @@ class SyncHandler(object):
             else:
                 state_ids = {}
                 if lazy_load_members:
-                    if types and batch.events:
+                    if members_to_fetch and batch.events:
                         # We're returning an incremental sync, with no
                         # "gap" since the previous sync, so normally there would be
                         # no state to return.
@@ -807,8 +816,12 @@ class SyncHandler(object):
                         # timeline here, and then dedupe any redundant ones below.
 
                         state_ids = yield self.store.get_state_ids_for_event(
-                            batch.events[0].event_id, types=types,
-                            filtered_types=None,  # we only want members!
+                            batch.events[0].event_id,
+                            # we only want members!
+                            state_filter=StateFilter.from_types(
+                                (EventTypes.Member, member)
+                                for member in members_to_fetch
+                            ),
                         )
 
             if lazy_load_members and not include_redundant_members:
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index c610933dd4..a61bbf9392 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -63,11 +63,8 @@ class TypingHandler(object):
         self._member_typing_until = {}  # clock time we expect to stop
         self._member_last_federation_poke = {}
 
-        # map room IDs to serial numbers
-        self._room_serials = {}
         self._latest_room_serial = 0
-        # map room IDs to sets of users currently typing
-        self._room_typing = {}
+        self._reset()
 
         # caches which room_ids changed at which serials
         self._typing_stream_change_cache = StreamChangeCache(
@@ -79,6 +76,15 @@ class TypingHandler(object):
             5000,
         )
 
+    def _reset(self):
+        """
+        Reset the typing handler's data caches.
+        """
+        # map room IDs to serial numbers
+        self._room_serials = {}
+        # map room IDs to sets of users currently typing
+        self._room_typing = {}
+
     def _handle_timeouts(self):
         logger.info("Checking for typing timeouts")
 
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index d8413d6aa7..f11b430126 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -20,6 +20,7 @@ from six import iteritems
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, JoinRules, Membership
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.roommember import ProfileInfo
 from synapse.types import get_localpart_from_id
 from synapse.util.metrics import Measure
@@ -98,7 +99,6 @@ class UserDirectoryHandler(object):
         """
         return self.store.search_user_dir(user_id, search_term, limit)
 
-    @defer.inlineCallbacks
     def notify_new_event(self):
         """Called when there may be more deltas to process
         """
@@ -108,11 +108,15 @@ class UserDirectoryHandler(object):
         if self._is_processing:
             return
 
+        @defer.inlineCallbacks
+        def process():
+            try:
+                yield self._unsafe_process()
+            finally:
+                self._is_processing = False
+
         self._is_processing = True
-        try:
-            yield self._unsafe_process()
-        finally:
-            self._is_processing = False
+        run_as_background_process("user_directory.notify_new_event", process)
 
     @defer.inlineCallbacks
     def handle_local_profile_change(self, user_id, profile):
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 14b12cd1c4..24b6110c20 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -195,7 +195,7 @@ class MatrixFederationHttpClient(object):
         )
         self.clock = hs.get_clock()
         self._store = hs.get_datastore()
-        self.version_string = hs.version_string.encode('ascii')
+        self.version_string_bytes = hs.version_string.encode('ascii')
         self.default_timeout = 60
 
         def schedule(x):
@@ -230,7 +230,7 @@ class MatrixFederationHttpClient(object):
         Returns:
             Deferred: resolves with the http response object on success.
 
-            Fails with ``HTTPRequestException``: if we get an HTTP response
+            Fails with ``HttpResponseException``: if we get an HTTP response
                 code >= 300.
 
             Fails with ``NotRetryingDestination`` if we are not yet ready
@@ -261,8 +261,8 @@ class MatrixFederationHttpClient(object):
             ignore_backoff=ignore_backoff,
         )
 
-        method = request.method
-        destination = request.destination
+        method_bytes = request.method.encode("ascii")
+        destination_bytes = request.destination.encode("ascii")
         path_bytes = request.path.encode("ascii")
         if request.query:
             query_bytes = encode_query_args(request.query)
@@ -270,8 +270,8 @@ class MatrixFederationHttpClient(object):
             query_bytes = b""
 
         headers_dict = {
-            "User-Agent": [self.version_string],
-            "Host": [request.destination],
+            b"User-Agent": [self.version_string_bytes],
+            b"Host": [destination_bytes],
         }
 
         with limiter:
@@ -282,50 +282,51 @@ class MatrixFederationHttpClient(object):
             else:
                 retries_left = MAX_SHORT_RETRIES
 
-            url = urllib.parse.urlunparse((
-                b"matrix", destination.encode("ascii"),
+            url_bytes = urllib.parse.urlunparse((
+                b"matrix", destination_bytes,
                 path_bytes, None, query_bytes, b"",
-            )).decode('ascii')
+            ))
+            url_str = url_bytes.decode('ascii')
 
-            http_url = urllib.parse.urlunparse((
+            url_to_sign_bytes = urllib.parse.urlunparse((
                 b"", b"",
                 path_bytes, None, query_bytes, b"",
-            )).decode('ascii')
+            ))
 
             while True:
                 try:
                     json = request.get_json()
                     if json:
-                        data = encode_canonical_json(json)
-                        headers_dict["Content-Type"] = ["application/json"]
+                        headers_dict[b"Content-Type"] = [b"application/json"]
                         self.sign_request(
-                            destination, method, http_url, headers_dict, json
+                            destination_bytes, method_bytes, url_to_sign_bytes,
+                            headers_dict, json,
                         )
-                    else:
-                        data = None
-                        self.sign_request(destination, method, http_url, headers_dict)
-
-                    logger.info(
-                        "{%s} [%s] Sending request: %s %s",
-                        request.txn_id, destination, method, url
-                    )
-
-                    if data:
+                        data = encode_canonical_json(json)
                         producer = FileBodyProducer(
                             BytesIO(data),
-                            cooperator=self._cooperator
+                            cooperator=self._cooperator,
                         )
                     else:
                         producer = None
+                        self.sign_request(
+                            destination_bytes, method_bytes, url_to_sign_bytes,
+                            headers_dict,
+                        )
 
-                    request_deferred = treq.request(
-                        method,
-                        url,
+                    logger.info(
+                        "{%s} [%s] Sending request: %s %s",
+                        request.txn_id, request.destination, request.method,
+                        url_str,
+                    )
+
+                    # we don't want all the fancy cookie and redirect handling that
+                    # treq.request gives: just use the raw Agent.
+                    request_deferred = self.agent.request(
+                        method_bytes,
+                        url_bytes,
                         headers=Headers(headers_dict),
-                        data=producer,
-                        agent=self.agent,
-                        reactor=self.hs.get_reactor(),
-                        unbuffered=True
+                        bodyProducer=producer,
                     )
 
                     request_deferred = timeout_deferred(
@@ -344,9 +345,9 @@ class MatrixFederationHttpClient(object):
                     logger.warn(
                         "{%s} [%s] Request failed: %s %s: %s",
                         request.txn_id,
-                        destination,
-                        method,
-                        url,
+                        request.destination,
+                        request.method,
+                        url_str,
                         _flatten_response_never_received(e),
                     )
 
@@ -366,7 +367,7 @@ class MatrixFederationHttpClient(object):
                         logger.debug(
                             "{%s} [%s] Waiting %ss before re-sending...",
                             request.txn_id,
-                            destination,
+                            request.destination,
                             delay,
                         )
 
@@ -378,7 +379,7 @@ class MatrixFederationHttpClient(object):
             logger.info(
                 "{%s} [%s] Got response headers: %d %s",
                 request.txn_id,
-                destination,
+                request.destination,
                 response.code,
                 response.phrase.decode('ascii', errors='replace'),
             )
@@ -411,8 +412,9 @@ class MatrixFederationHttpClient(object):
                 destination_is must be non-None.
             method (bytes): The HTTP method of the request
             url_bytes (bytes): The URI path of the request
-            headers_dict (dict): Dictionary of request headers to append to
-            content (bytes): The body of the request
+            headers_dict (dict[bytes, list[bytes]]): Dictionary of request headers to
+                append to
+            content (object): The body of the request
             destination_is (bytes): As 'destination', but if the destination is an
                 identity server
 
@@ -478,7 +480,7 @@ class MatrixFederationHttpClient(object):
             Deferred: Succeeds when we get a 2xx HTTP response. The result
             will be the decoded JSON body.
 
-            Fails with ``HTTPRequestException`` if we get an HTTP response
+            Fails with ``HttpResponseException`` if we get an HTTP response
             code >= 300.
 
             Fails with ``NotRetryingDestination`` if we are not yet ready
@@ -532,7 +534,7 @@ class MatrixFederationHttpClient(object):
             Deferred: Succeeds when we get a 2xx HTTP response. The result
             will be the decoded JSON body.
 
-            Fails with ``HTTPRequestException`` if we get an HTTP response
+            Fails with ``HttpResponseException`` if we get an HTTP response
             code >= 300.
 
             Fails with ``NotRetryingDestination`` if we are not yet ready
@@ -587,7 +589,7 @@ class MatrixFederationHttpClient(object):
             Deferred: Succeeds when we get a 2xx HTTP response. The result
             will be the decoded JSON body.
 
-            Fails with ``HTTPRequestException`` if we get an HTTP response
+            Fails with ``HttpResponseException`` if we get an HTTP response
             code >= 300.
 
             Fails with ``NotRetryingDestination`` if we are not yet ready
@@ -638,7 +640,7 @@ class MatrixFederationHttpClient(object):
             Deferred: Succeeds when we get a 2xx HTTP response. The result
             will be the decoded JSON body.
 
-            Fails with ``HTTPRequestException`` if we get an HTTP response
+            Fails with ``HttpResponseException`` if we get an HTTP response
             code >= 300.
 
             Fails with ``NotRetryingDestination`` if we are not yet ready
@@ -682,7 +684,7 @@ class MatrixFederationHttpClient(object):
             Deferred: resolves with an (int,dict) tuple of the file length and
             a dict of the response headers.
 
-            Fails with ``HTTPRequestException`` if we get an HTTP response code
+            Fails with ``HttpResponseException`` if we get an HTTP response code
             >= 300
 
             Fails with ``NotRetryingDestination`` if we are not yet ready
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index fedb4e6b18..62045a918b 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -39,7 +39,8 @@ outgoing_responses_counter = Counter(
 )
 
 response_timer = Histogram(
-    "synapse_http_server_response_time_seconds", "sec",
+    "synapse_http_server_response_time_seconds",
+    "sec",
     ["method", "servlet", "tag", "code"],
 )
 
@@ -79,15 +80,11 @@ response_size = Counter(
 # than when the response was written.
 
 in_flight_requests_ru_utime = Counter(
-    "synapse_http_server_in_flight_requests_ru_utime_seconds",
-    "",
-    ["method", "servlet"],
+    "synapse_http_server_in_flight_requests_ru_utime_seconds", "", ["method", "servlet"]
 )
 
 in_flight_requests_ru_stime = Counter(
-    "synapse_http_server_in_flight_requests_ru_stime_seconds",
-    "",
-    ["method", "servlet"],
+    "synapse_http_server_in_flight_requests_ru_stime_seconds", "", ["method", "servlet"]
 )
 
 in_flight_requests_db_txn_count = Counter(
@@ -134,7 +131,7 @@ def _get_in_flight_counts():
     # type
     counts = {}
     for rm in reqs:
-        key = (rm.method, rm.name,)
+        key = (rm.method, rm.name)
         counts[key] = counts.get(key, 0) + 1
 
     return counts
@@ -175,7 +172,8 @@ class RequestMetrics(object):
             if context != self.start_context:
                 logger.warn(
                     "Context have unexpectedly changed %r, %r",
-                    context, self.start_context
+                    context,
+                    self.start_context,
                 )
                 return
 
@@ -192,10 +190,10 @@ class RequestMetrics(object):
         resource_usage = context.get_resource_usage()
 
         response_ru_utime.labels(self.method, self.name, tag).inc(
-            resource_usage.ru_utime,
+            resource_usage.ru_utime
         )
         response_ru_stime.labels(self.method, self.name, tag).inc(
-            resource_usage.ru_stime,
+            resource_usage.ru_stime
         )
         response_db_txn_count.labels(self.method, self.name, tag).inc(
             resource_usage.db_txn_count
@@ -222,8 +220,15 @@ class RequestMetrics(object):
         diff = new_stats - self._request_stats
         self._request_stats = new_stats
 
-        in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
-        in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
+        # max() is used since rapid use of ru_stime/ru_utime can end up with the
+        # count going backwards due to NTP, time smearing, fine-grained
+        # correction, or floating points. Who knows, really?
+        in_flight_requests_ru_utime.labels(self.method, self.name).inc(
+            max(diff.ru_utime, 0)
+        )
+        in_flight_requests_ru_stime.labels(self.method, self.name).inc(
+            max(diff.ru_stime, 0)
+        )
 
         in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
             diff.db_txn_count
diff --git a/synapse/http/server.py b/synapse/http/server.py
index b4b25cab19..6a427d96a6 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -468,13 +468,13 @@ def set_cors_headers(request):
     Args:
         request (twisted.web.http.Request): The http request to add CORs to.
     """
-    request.setHeader("Access-Control-Allow-Origin", "*")
+    request.setHeader(b"Access-Control-Allow-Origin", b"*")
     request.setHeader(
-        "Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS"
+        b"Access-Control-Allow-Methods", b"GET, POST, PUT, DELETE, OPTIONS"
     )
     request.setHeader(
-        "Access-Control-Allow-Headers",
-        "Origin, X-Requested-With, Content-Type, Accept, Authorization"
+        b"Access-Control-Allow-Headers",
+        b"Origin, X-Requested-With, Content-Type, Accept, Authorization"
     )
 
 
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index a1e4b88e6d..528125e737 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -121,16 +121,15 @@ def parse_string(request, name, default=None, required=False,
 
     Args:
         request: the twisted HTTP request.
-        name (bytes/unicode): the name of the query parameter.
-        default (bytes/unicode|None): value to use if the parameter is absent,
+        name (bytes|unicode): the name of the query parameter.
+        default (bytes|unicode|None): value to use if the parameter is absent,
             defaults to None. Must be bytes if encoding is None.
         required (bool): whether to raise a 400 SynapseError if the
             parameter is absent, defaults to False.
-        allowed_values (list[bytes/unicode]): List of allowed values for the
+        allowed_values (list[bytes|unicode]): List of allowed values for the
             string, or None if any value is allowed, defaults to None. Must be
             the same type as name, if given.
-        encoding: The encoding to decode the name to, and decode the string
-            content with.
+        encoding (str|None): The encoding to decode the string content with.
 
     Returns:
         bytes/unicode|None: A string value or the default. Unicode if encoding
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 340b16ce25..de02b1017e 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -186,9 +186,9 @@ class Notifier(object):
         def count_listeners():
             all_user_streams = set()
 
-            for x in self.room_to_user_streams.values():
+            for x in list(self.room_to_user_streams.values()):
                 all_user_streams |= x
-            for x in self.user_to_user_stream.values():
+            for x in list(self.user_to_user_stream.values()):
                 all_user_streams.add(x)
 
             return sum(stream.count_listeners() for stream in all_user_streams)
@@ -196,7 +196,7 @@ class Notifier(object):
 
         LaterGauge(
             "synapse_notifier_rooms", "", [],
-            lambda: count(bool, self.room_to_user_streams.values()),
+            lambda: count(bool, list(self.room_to_user_streams.values())),
         )
         LaterGauge(
             "synapse_notifier_users", "", [],
diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py
index d746371420..50e1007d84 100644
--- a/synapse/push/emailpusher.py
+++ b/synapse/push/emailpusher.py
@@ -18,8 +18,7 @@ import logging
 from twisted.internet import defer
 from twisted.internet.error import AlreadyCalled, AlreadyCancelled
 
-from synapse.util.logcontext import LoggingContext
-from synapse.util.metrics import Measure
+from synapse.metrics.background_process_metrics import run_as_background_process
 
 logger = logging.getLogger(__name__)
 
@@ -71,18 +70,11 @@ class EmailPusher(object):
         # See httppusher
         self.max_stream_ordering = None
 
-        self.processing = False
+        self._is_processing = False
 
-    @defer.inlineCallbacks
     def on_started(self):
         if self.mailer is not None:
-            try:
-                self.throttle_params = yield self.store.get_throttle_params_by_room(
-                    self.pusher_id
-                )
-                yield self._process()
-            except Exception:
-                logger.exception("Error starting email pusher")
+            self._start_processing()
 
     def on_stop(self):
         if self.timed_call:
@@ -92,43 +84,55 @@ class EmailPusher(object):
                 pass
             self.timed_call = None
 
-    @defer.inlineCallbacks
     def on_new_notifications(self, min_stream_ordering, max_stream_ordering):
-        self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering)
-        yield self._process()
+        if self.max_stream_ordering:
+            self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering)
+        else:
+            self.max_stream_ordering = max_stream_ordering
+        self._start_processing()
 
     def on_new_receipts(self, min_stream_id, max_stream_id):
         # We could wake up and cancel the timer but there tend to be quite a
         # lot of read receipts so it's probably less work to just let the
         # timer fire
-        return defer.succeed(None)
+        pass
 
-    @defer.inlineCallbacks
     def on_timer(self):
         self.timed_call = None
-        yield self._process()
+        self._start_processing()
+
+    def _start_processing(self):
+        if self._is_processing:
+            return
+
+        run_as_background_process("emailpush.process", self._process)
 
     @defer.inlineCallbacks
     def _process(self):
-        if self.processing:
-            return
+        # we should never get here if we are already processing
+        assert not self._is_processing
+
+        try:
+            self._is_processing = True
+
+            if self.throttle_params is None:
+                # this is our first loop: load up the throttle params
+                self.throttle_params = yield self.store.get_throttle_params_by_room(
+                    self.pusher_id
+                )
 
-        with LoggingContext("emailpush._process"):
-            with Measure(self.clock, "emailpush._process"):
+            # if the max ordering changes while we're running _unsafe_process,
+            # call it again, and so on until we've caught up.
+            while True:
+                starting_max_ordering = self.max_stream_ordering
                 try:
-                    self.processing = True
-                    # if the max ordering changes while we're running _unsafe_process,
-                    # call it again, and so on until we've caught up.
-                    while True:
-                        starting_max_ordering = self.max_stream_ordering
-                        try:
-                            yield self._unsafe_process()
-                        except Exception:
-                            logger.exception("Exception processing notifs")
-                        if self.max_stream_ordering == starting_max_ordering:
-                            break
-                finally:
-                    self.processing = False
+                    yield self._unsafe_process()
+                except Exception:
+                    logger.exception("Exception processing notifs")
+                if self.max_stream_ordering == starting_max_ordering:
+                    break
+        finally:
+            self._is_processing = False
 
     @defer.inlineCallbacks
     def _unsafe_process(self):
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 48abd5e4d6..87fa7f006a 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -22,9 +22,8 @@ from prometheus_client import Counter
 from twisted.internet import defer
 from twisted.internet.error import AlreadyCalled, AlreadyCancelled
 
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.push import PusherConfigException
-from synapse.util.logcontext import LoggingContext
-from synapse.util.metrics import Measure
 
 from . import push_rule_evaluator, push_tools
 
@@ -61,7 +60,7 @@ class HttpPusher(object):
         self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC
         self.failing_since = pusherdict['failing_since']
         self.timed_call = None
-        self.processing = False
+        self._is_processing = False
 
         # This is the highest stream ordering we know it's safe to process.
         # When new events arrive, we'll be given a window of new events: we
@@ -92,34 +91,27 @@ class HttpPusher(object):
         self.data_minus_url.update(self.data)
         del self.data_minus_url['url']
 
-    @defer.inlineCallbacks
     def on_started(self):
-        try:
-            yield self._process()
-        except Exception:
-            logger.exception("Error starting http pusher")
+        self._start_processing()
 
-    @defer.inlineCallbacks
     def on_new_notifications(self, min_stream_ordering, max_stream_ordering):
         self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering or 0)
-        yield self._process()
+        self._start_processing()
 
-    @defer.inlineCallbacks
     def on_new_receipts(self, min_stream_id, max_stream_id):
         # Note that the min here shouldn't be relied upon to be accurate.
 
         # We could check the receipts are actually m.read receipts here,
         # but currently that's the only type of receipt anyway...
-        with LoggingContext("push.on_new_receipts"):
-            with Measure(self.clock, "push.on_new_receipts"):
-                badge = yield push_tools.get_badge_count(
-                    self.hs.get_datastore(), self.user_id
-                )
-            yield self._send_badge(badge)
+        run_as_background_process("http_pusher.on_new_receipts", self._update_badge)
 
     @defer.inlineCallbacks
+    def _update_badge(self):
+        badge = yield push_tools.get_badge_count(self.hs.get_datastore(), self.user_id)
+        yield self._send_badge(badge)
+
     def on_timer(self):
-        yield self._process()
+        self._start_processing()
 
     def on_stop(self):
         if self.timed_call:
@@ -129,27 +121,31 @@ class HttpPusher(object):
                 pass
             self.timed_call = None
 
+    def _start_processing(self):
+        if self._is_processing:
+            return
+
+        run_as_background_process("httppush.process", self._process)
+
     @defer.inlineCallbacks
     def _process(self):
-        if self.processing:
-            return
+        # we should never get here if we are already processing
+        assert not self._is_processing
 
-        with LoggingContext("push._process"):
-            with Measure(self.clock, "push._process"):
+        try:
+            self._is_processing = True
+            # if the max ordering changes while we're running _unsafe_process,
+            # call it again, and so on until we've caught up.
+            while True:
+                starting_max_ordering = self.max_stream_ordering
                 try:
-                    self.processing = True
-                    # if the max ordering changes while we're running _unsafe_process,
-                    # call it again, and so on until we've caught up.
-                    while True:
-                        starting_max_ordering = self.max_stream_ordering
-                        try:
-                            yield self._unsafe_process()
-                        except Exception:
-                            logger.exception("Exception processing notifs")
-                        if self.max_stream_ordering == starting_max_ordering:
-                            break
-                finally:
-                    self.processing = False
+                    yield self._unsafe_process()
+                except Exception:
+                    logger.exception("Exception processing notifs")
+                if self.max_stream_ordering == starting_max_ordering:
+                    break
+        finally:
+            self._is_processing = False
 
     @defer.inlineCallbacks
     def _unsafe_process(self):
@@ -315,10 +311,10 @@ class HttpPusher(object):
                 ]
             }
         }
-        if event.type == 'm.room.member':
+        if event.type == 'm.room.member' and event.is_state():
             d['notification']['membership'] = event.content['membership']
             d['notification']['user_is_target'] = event.state_key == self.user_id
-        if self.hs.config.push_include_content and 'content' in event:
+        if self.hs.config.push_include_content and event.content:
             d['notification']['content'] = event.content
 
         # We no longer send aliases separately, instead, we send the human
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 1a5a10d974..1eb5be0957 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -26,7 +26,6 @@ import bleach
 import jinja2
 
 from twisted.internet import defer
-from twisted.mail.smtp import sendmail
 
 from synapse.api.constants import EventTypes
 from synapse.api.errors import StoreError
@@ -37,6 +36,7 @@ from synapse.push.presentable_names import (
 )
 from synapse.types import UserID
 from synapse.util.async_helpers import concurrently_execute
+from synapse.util.logcontext import make_deferred_yieldable
 from synapse.visibility import filter_events_for_client
 
 logger = logging.getLogger(__name__)
@@ -85,6 +85,7 @@ class Mailer(object):
         self.notif_template_html = notif_template_html
         self.notif_template_text = notif_template_text
 
+        self.sendmail = self.hs.get_sendmail()
         self.store = self.hs.get_datastore()
         self.macaroon_gen = self.hs.get_macaroon_generator()
         self.state_handler = self.hs.get_state_handler()
@@ -191,17 +192,17 @@ class Mailer(object):
         multipart_msg.attach(html_part)
 
         logger.info("Sending email push notification to %s" % email_address)
-        # logger.debug(html_text)
 
-        yield sendmail(
+        yield make_deferred_yieldable(self.sendmail(
             self.hs.config.email_smtp_host,
-            raw_from, raw_to, multipart_msg.as_string(),
+            raw_from, raw_to, multipart_msg.as_string().encode('utf8'),
+            reactor=self.hs.get_reactor(),
             port=self.hs.config.email_smtp_port,
             requireAuthentication=self.hs.config.email_smtp_user is not None,
             username=self.hs.config.email_smtp_user,
             password=self.hs.config.email_smtp_pass,
             requireTransportSecurity=self.hs.config.require_transport_security
-        )
+        ))
 
     @defer.inlineCallbacks
     def get_room_vars(self, room_id, user_id, notifs, notif_events, room_state_ids):
@@ -333,7 +334,7 @@ class Mailer(object):
                           notif_events, user_id, reason):
         if len(notifs_by_room) == 1:
             # Only one room has new stuff
-            room_id = notifs_by_room.keys()[0]
+            room_id = list(notifs_by_room.keys())[0]
 
             # If the room has some kind of name, use it, but we don't
             # want the generated-from-names one here otherwise we'll
@@ -526,8 +527,7 @@ def load_jinja2_templates(config):
     Returns:
         (notif_template_html, notif_template_text)
     """
-    logger.info("loading jinja2")
-
+    logger.info("loading email templates from '%s'", config.email_template_dir)
     loader = jinja2.FileSystemLoader(config.email_template_dir)
     env = jinja2.Environment(loader=loader)
     env.filters["format_ts"] = format_ts_filter
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 2bd321d530..cf6c8b875e 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -124,7 +124,7 @@ class PushRuleEvaluatorForEvent(object):
 
         # XXX: optimisation: cache our pattern regexps
         if condition['key'] == 'content.body':
-            body = self._event["content"].get("body", None)
+            body = self._event.content.get("body", None)
             if not body:
                 return False
 
@@ -140,7 +140,7 @@ class PushRuleEvaluatorForEvent(object):
         if not display_name:
             return False
 
-        body = self._event["content"].get("body", None)
+        body = self._event.content.get("body", None)
         if not body:
             return False
 
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index 9f7d5ef217..5a4e73ccd6 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -20,24 +20,39 @@ from twisted.internet import defer
 
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.push.pusher import PusherFactory
-from synapse.util.logcontext import make_deferred_yieldable, run_in_background
 
 logger = logging.getLogger(__name__)
 
 
 class PusherPool:
+    """
+    The pusher pool. This is responsible for dispatching notifications of new events to
+    the http and email pushers.
+
+    It provides three methods which are designed to be called by the rest of the
+    application: `start`, `on_new_notifications`, and `on_new_receipts`: each of these
+    delegates to each of the relevant pushers.
+
+    Note that it is expected that each pusher will have its own 'processing' loop which
+    will send out the notifications in the background, rather than blocking until the
+    notifications are sent; accordingly Pusher.on_started, Pusher.on_new_notifications and
+    Pusher.on_new_receipts are not expected to return deferreds.
+    """
     def __init__(self, _hs):
         self.hs = _hs
         self.pusher_factory = PusherFactory(_hs)
-        self.start_pushers = _hs.config.start_pushers
+        self._should_start_pushers = _hs.config.start_pushers
         self.store = self.hs.get_datastore()
         self.clock = self.hs.get_clock()
         self.pushers = {}
 
-    @defer.inlineCallbacks
     def start(self):
-        pushers = yield self.store.get_all_pushers()
-        self._start_pushers(pushers)
+        """Starts the pushers off in a background process.
+        """
+        if not self._should_start_pushers:
+            logger.info("Not starting pushers because they are disabled in the config")
+            return
+        run_as_background_process("start_pushers", self._start_pushers)
 
     @defer.inlineCallbacks
     def add_pusher(self, user_id, access_token, kind, app_id,
@@ -86,7 +101,7 @@ class PusherPool:
             last_stream_ordering=last_stream_ordering,
             profile_tag=profile_tag,
         )
-        yield self._refresh_pusher(app_id, pushkey, user_id)
+        yield self.start_pusher_by_id(app_id, pushkey, user_id)
 
     @defer.inlineCallbacks
     def remove_pushers_by_app_id_and_pushkey_not_user(self, app_id, pushkey,
@@ -123,45 +138,23 @@ class PusherPool:
                     p['app_id'], p['pushkey'], p['user_name'],
                 )
 
-    def on_new_notifications(self, min_stream_id, max_stream_id):
-        run_as_background_process(
-            "on_new_notifications",
-            self._on_new_notifications, min_stream_id, max_stream_id,
-        )
-
     @defer.inlineCallbacks
-    def _on_new_notifications(self, min_stream_id, max_stream_id):
+    def on_new_notifications(self, min_stream_id, max_stream_id):
         try:
             users_affected = yield self.store.get_push_action_users_in_range(
                 min_stream_id, max_stream_id
             )
 
-            deferreds = []
-
             for u in users_affected:
                 if u in self.pushers:
                     for p in self.pushers[u].values():
-                        deferreds.append(
-                            run_in_background(
-                                p.on_new_notifications,
-                                min_stream_id, max_stream_id,
-                            )
-                        )
-
-            yield make_deferred_yieldable(
-                defer.gatherResults(deferreds, consumeErrors=True),
-            )
+                        p.on_new_notifications(min_stream_id, max_stream_id)
+
         except Exception:
             logger.exception("Exception in pusher on_new_notifications")
 
-    def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids):
-        run_as_background_process(
-            "on_new_receipts",
-            self._on_new_receipts, min_stream_id, max_stream_id, affected_room_ids,
-        )
-
     @defer.inlineCallbacks
-    def _on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids):
+    def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids):
         try:
             # Need to subtract 1 from the minimum because the lower bound here
             # is not inclusive
@@ -171,26 +164,20 @@ class PusherPool:
             # This returns a tuple, user_id is at index 3
             users_affected = set([r[3] for r in updated_receipts])
 
-            deferreds = []
-
             for u in users_affected:
                 if u in self.pushers:
                     for p in self.pushers[u].values():
-                        deferreds.append(
-                            run_in_background(
-                                p.on_new_receipts,
-                                min_stream_id, max_stream_id,
-                            )
-                        )
-
-            yield make_deferred_yieldable(
-                defer.gatherResults(deferreds, consumeErrors=True),
-            )
+                        p.on_new_receipts(min_stream_id, max_stream_id)
+
         except Exception:
             logger.exception("Exception in pusher on_new_receipts")
 
     @defer.inlineCallbacks
-    def _refresh_pusher(self, app_id, pushkey, user_id):
+    def start_pusher_by_id(self, app_id, pushkey, user_id):
+        """Look up the details for the given pusher, and start it"""
+        if not self._should_start_pushers:
+            return
+
         resultlist = yield self.store.get_pushers_by_app_id_and_pushkey(
             app_id, pushkey
         )
@@ -201,33 +188,49 @@ class PusherPool:
                 p = r
 
         if p:
+            self._start_pusher(p)
 
-            self._start_pushers([p])
+    @defer.inlineCallbacks
+    def _start_pushers(self):
+        """Start all the pushers
 
-    def _start_pushers(self, pushers):
-        if not self.start_pushers:
-            logger.info("Not starting pushers because they are disabled in the config")
-            return
+        Returns:
+            Deferred
+        """
+        pushers = yield self.store.get_all_pushers()
         logger.info("Starting %d pushers", len(pushers))
         for pusherdict in pushers:
-            try:
-                p = self.pusher_factory.create_pusher(pusherdict)
-            except Exception:
-                logger.exception("Couldn't start a pusher: caught Exception")
-                continue
-            if p:
-                appid_pushkey = "%s:%s" % (
-                    pusherdict['app_id'],
-                    pusherdict['pushkey'],
-                )
-                byuser = self.pushers.setdefault(pusherdict['user_name'], {})
+            self._start_pusher(pusherdict)
+        logger.info("Started pushers")
 
-                if appid_pushkey in byuser:
-                    byuser[appid_pushkey].on_stop()
-                byuser[appid_pushkey] = p
-                run_in_background(p.on_started)
+    def _start_pusher(self, pusherdict):
+        """Start the given pusher
 
-        logger.info("Started pushers")
+        Args:
+            pusherdict (dict):
+
+        Returns:
+            None
+        """
+        try:
+            p = self.pusher_factory.create_pusher(pusherdict)
+        except Exception:
+            logger.exception("Couldn't start a pusher: caught Exception")
+            return
+
+        if not p:
+            return
+
+        appid_pushkey = "%s:%s" % (
+            pusherdict['app_id'],
+            pusherdict['pushkey'],
+        )
+        byuser = self.pushers.setdefault(pusherdict['user_name'], {})
+
+        if appid_pushkey in byuser:
+            byuser[appid_pushkey].on_stop()
+        byuser[appid_pushkey] = p
+        p.on_started()
 
     @defer.inlineCallbacks
     def remove_pusher(self, app_id, pushkey, user_id):
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index d4d983b00a..ca62ee7637 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -51,11 +51,11 @@ REQUIREMENTS = {
     "daemonize>=2.3.1": ["daemonize"],
     "bcrypt>=3.1.0": ["bcrypt>=3.1.0"],
     "pillow>=3.1.2": ["PIL"],
-    "pydenticon>=0.2": ["pydenticon"],
     "sortedcontainers>=1.4.4": ["sortedcontainers"],
+    "psutil>=2.0.0": ["psutil>=2.0.0"],
     "pysaml2>=3.0.0": ["saml2"],
     "pymacaroons-pynacl>=0.9.3": ["pymacaroons"],
-    "msgpack-python>=0.3.0": ["msgpack"],
+    "msgpack-python>=0.4.2": ["msgpack"],
     "phonenumbers>=8.2.0": ["phonenumbers"],
     "six>=1.10": ["six"],
 
@@ -79,12 +79,6 @@ CONDITIONAL_REQUIREMENTS = {
     "matrix-synapse-ldap3": {
         "matrix-synapse-ldap3>=0.1": ["ldap_auth_provider"],
     },
-    "psutil": {
-        "psutil>=2.0.0": ["psutil>=2.0.0"],
-    },
-    "affinity": {
-        "affinity": ["affinity"],
-    },
     "postgres": {
         "psycopg2>=2.6": ["psycopg2"]
     }
diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
index 3f7be74e02..2d81d49e9a 100644
--- a/synapse/replication/slave/storage/_base.py
+++ b/synapse/replication/slave/storage/_base.py
@@ -15,6 +15,8 @@
 
 import logging
 
+import six
+
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.engines import PostgresEngine
 
@@ -23,6 +25,13 @@ from ._slaved_id_tracker import SlavedIdTracker
 logger = logging.getLogger(__name__)
 
 
+def __func__(inp):
+    if six.PY3:
+        return inp
+    else:
+        return inp.__func__
+
+
 class BaseSlavedStore(SQLBaseStore):
     def __init__(self, db_conn, hs):
         super(BaseSlavedStore, self).__init__(db_conn, hs)
diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py
index 87eaa53004..4f19fd35aa 100644
--- a/synapse/replication/slave/storage/deviceinbox.py
+++ b/synapse/replication/slave/storage/deviceinbox.py
@@ -17,7 +17,7 @@ from synapse.storage import DataStore
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore
+from ._base import BaseSlavedStore, __func__
 from ._slaved_id_tracker import SlavedIdTracker
 
 
@@ -43,11 +43,11 @@ class SlavedDeviceInboxStore(BaseSlavedStore):
             expiry_ms=30 * 60 * 1000,
         )
 
-    get_to_device_stream_token = DataStore.get_to_device_stream_token.__func__
-    get_new_messages_for_device = DataStore.get_new_messages_for_device.__func__
-    get_new_device_msgs_for_remote = DataStore.get_new_device_msgs_for_remote.__func__
-    delete_messages_for_device = DataStore.delete_messages_for_device.__func__
-    delete_device_msgs_for_remote = DataStore.delete_device_msgs_for_remote.__func__
+    get_to_device_stream_token = __func__(DataStore.get_to_device_stream_token)
+    get_new_messages_for_device = __func__(DataStore.get_new_messages_for_device)
+    get_new_device_msgs_for_remote = __func__(DataStore.get_new_device_msgs_for_remote)
+    delete_messages_for_device = __func__(DataStore.delete_messages_for_device)
+    delete_device_msgs_for_remote = __func__(DataStore.delete_device_msgs_for_remote)
 
     def stream_positions(self):
         result = super(SlavedDeviceInboxStore, self).stream_positions()
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index 21b8c468fa..ec2fd561cc 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -13,23 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import six
-
 from synapse.storage import DataStore
 from synapse.storage.end_to_end_keys import EndToEndKeyStore
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore
+from ._base import BaseSlavedStore, __func__
 from ._slaved_id_tracker import SlavedIdTracker
 
 
-def __func__(inp):
-    if six.PY3:
-        return inp
-    else:
-        return inp.__func__
-
-
 class SlavedDeviceStore(BaseSlavedStore):
     def __init__(self, db_conn, hs):
         super(SlavedDeviceStore, self).__init__(db_conn, hs)
diff --git a/synapse/replication/slave/storage/groups.py b/synapse/replication/slave/storage/groups.py
index 5777f07c8d..e933b170bb 100644
--- a/synapse/replication/slave/storage/groups.py
+++ b/synapse/replication/slave/storage/groups.py
@@ -16,7 +16,7 @@
 from synapse.storage import DataStore
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore
+from ._base import BaseSlavedStore, __func__
 from ._slaved_id_tracker import SlavedIdTracker
 
 
@@ -33,9 +33,9 @@ class SlavedGroupServerStore(BaseSlavedStore):
             "_group_updates_stream_cache", self._group_updates_id_gen.get_current_token(),
         )
 
-    get_groups_changes_for_user = DataStore.get_groups_changes_for_user.__func__
-    get_group_stream_token = DataStore.get_group_stream_token.__func__
-    get_all_groups_for_user = DataStore.get_all_groups_for_user.__func__
+    get_groups_changes_for_user = __func__(DataStore.get_groups_changes_for_user)
+    get_group_stream_token = __func__(DataStore.get_group_stream_token)
+    get_all_groups_for_user = __func__(DataStore.get_all_groups_for_user)
 
     def stream_positions(self):
         result = super(SlavedGroupServerStore, self).stream_positions()
diff --git a/synapse/replication/slave/storage/keys.py b/synapse/replication/slave/storage/keys.py
index 05ed168463..8032f53fec 100644
--- a/synapse/replication/slave/storage/keys.py
+++ b/synapse/replication/slave/storage/keys.py
@@ -16,7 +16,7 @@
 from synapse.storage import DataStore
 from synapse.storage.keys import KeyStore
 
-from ._base import BaseSlavedStore
+from ._base import BaseSlavedStore, __func__
 
 
 class SlavedKeyStore(BaseSlavedStore):
@@ -24,11 +24,11 @@ class SlavedKeyStore(BaseSlavedStore):
         "_get_server_verify_key"
     ]
 
-    get_server_verify_keys = DataStore.get_server_verify_keys.__func__
-    store_server_verify_key = DataStore.store_server_verify_key.__func__
+    get_server_verify_keys = __func__(DataStore.get_server_verify_keys)
+    store_server_verify_key = __func__(DataStore.store_server_verify_key)
 
-    get_server_certificate = DataStore.get_server_certificate.__func__
-    store_server_certificate = DataStore.store_server_certificate.__func__
+    get_server_certificate = __func__(DataStore.get_server_certificate)
+    store_server_certificate = __func__(DataStore.store_server_certificate)
 
-    get_server_keys_json = DataStore.get_server_keys_json.__func__
-    store_server_keys_json = DataStore.store_server_keys_json.__func__
+    get_server_keys_json = __func__(DataStore.get_server_keys_json)
+    store_server_keys_json = __func__(DataStore.store_server_keys_json)
diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py
index 80b744082a..92447b00d4 100644
--- a/synapse/replication/slave/storage/presence.py
+++ b/synapse/replication/slave/storage/presence.py
@@ -17,7 +17,7 @@ from synapse.storage import DataStore
 from synapse.storage.presence import PresenceStore
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore
+from ._base import BaseSlavedStore, __func__
 from ._slaved_id_tracker import SlavedIdTracker
 
 
@@ -34,8 +34,8 @@ class SlavedPresenceStore(BaseSlavedStore):
             "PresenceStreamChangeCache", self._presence_id_gen.get_current_token()
         )
 
-    _get_active_presence = DataStore._get_active_presence.__func__
-    take_presence_startup_info = DataStore.take_presence_startup_info.__func__
+    _get_active_presence = __func__(DataStore._get_active_presence)
+    take_presence_startup_info = __func__(DataStore.take_presence_startup_info)
     _get_presence_for_user = PresenceStore.__dict__["_get_presence_for_user"]
     get_presence_for_users = PresenceStore.__dict__["get_presence_for_users"]
 
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index cbe9645817..586dddb40b 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -106,7 +106,7 @@ class ReplicationClientHandler(object):
 
         Can be overriden in subclasses to handle more.
         """
-        logger.info("Received rdata %s -> %s", stream_name, token)
+        logger.debug("Received rdata %s -> %s", stream_name, token)
         return self.store.process_replication_rows(stream_name, token, rows)
 
     def on_position(self, stream_name, token):
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 5dc7b3fffc..0b3fe6cbf5 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -656,7 +656,7 @@ tcp_inbound_commands = LaterGauge(
     "",
     ["command", "name"],
     lambda: {
-        (k[0], p.name,): count
+        (k, p.name,): count
         for p in connected_connections
         for k, count in iteritems(p.inbound_commands_counter)
     },
@@ -667,7 +667,7 @@ tcp_outbound_commands = LaterGauge(
     "",
     ["command", "name"],
     lambda: {
-        (k[0], p.name,): count
+        (k, p.name,): count
         for p in connected_connections
         for k, count in iteritems(p.outbound_commands_counter)
     },
diff --git a/synapse/res/templates/mail-Vector.css b/synapse/res/templates/mail-Vector.css
new file mode 100644
index 0000000000..6a3e36eda1
--- /dev/null
+++ b/synapse/res/templates/mail-Vector.css
@@ -0,0 +1,7 @@
+.header {
+    border-bottom: 4px solid #e4f7ed ! important;
+}
+
+.notif_link a, .footer a {
+    color: #76CFA6 ! important;
+}
diff --git a/synapse/res/templates/mail.css b/synapse/res/templates/mail.css
new file mode 100644
index 0000000000..5ab3e1b06d
--- /dev/null
+++ b/synapse/res/templates/mail.css
@@ -0,0 +1,156 @@
+body {
+    margin: 0px;
+}
+
+pre, code {
+    word-break: break-word;
+    white-space: pre-wrap;
+}
+
+#page {
+    font-family: 'Open Sans', Helvetica, Arial, Sans-Serif;
+    font-color: #454545;
+    font-size: 12pt;
+    width: 100%;
+    padding: 20px;
+}
+
+#inner {
+    width: 640px;
+}
+
+.header {
+    width: 100%;
+    height: 87px;
+    color: #454545;
+    border-bottom: 4px solid #e5e5e5;
+}
+
+.logo {
+    text-align: right;
+    margin-left: 20px;
+}
+
+.salutation {
+    padding-top: 10px;
+    font-weight: bold;
+}
+
+.summarytext {
+}
+
+.room {
+    width: 100%;
+    color: #454545;
+    border-bottom: 1px solid #e5e5e5;
+}
+
+.room_header td {
+    padding-top: 38px;
+    padding-bottom: 10px;
+    border-bottom: 1px solid #e5e5e5;
+}
+
+.room_name {
+    vertical-align: middle;
+    font-size: 18px;
+    font-weight: bold;
+}
+
+.room_header h2 {
+    margin-top: 0px;
+    margin-left: 75px;
+    font-size: 20px;
+}
+
+.room_avatar {
+    width: 56px;
+    line-height: 0px;
+    text-align: center;
+    vertical-align: middle;
+}
+
+.room_avatar img {
+    width: 48px;
+    height: 48px;
+    object-fit: cover;
+    border-radius: 24px;
+}
+
+.notif {
+    border-bottom: 1px solid #e5e5e5;
+    margin-top: 16px;
+    padding-bottom: 16px;
+}
+
+.historical_message .sender_avatar {
+    opacity: 0.3;
+}
+
+/* spell out opacity and historical_message class names for Outlook aka Word */
+.historical_message .sender_name {
+    color: #e3e3e3;
+}
+
+.historical_message .message_time {
+    color: #e3e3e3;
+}
+
+.historical_message .message_body {
+    color: #c7c7c7;
+}
+
+.historical_message td,
+.message td {
+    padding-top: 10px;
+}
+
+.sender_avatar {
+    width: 56px;
+    text-align: center;
+    vertical-align: top;
+}
+
+.sender_avatar img {
+    margin-top: -2px;
+    width: 32px;
+    height: 32px;
+    border-radius: 16px;
+}
+
+.sender_name  {
+    display: inline;
+    font-size: 13px;
+    color: #a2a2a2;
+}
+
+.message_time  {
+    text-align: right;
+    width: 100px;
+    font-size: 11px;
+    color: #a2a2a2;
+}
+
+.message_body {
+}
+
+.notif_link td {
+    padding-top: 10px;
+    padding-bottom: 10px;
+    font-weight: bold;
+}
+
+.notif_link a, .footer a {
+    color: #454545;
+    text-decoration: none;
+}
+
+.debug {
+    font-size: 10px;
+    color: #888;
+}
+
+.footer {
+    margin-top: 20px;
+    text-align: center;
+}
\ No newline at end of file
diff --git a/synapse/res/templates/notif.html b/synapse/res/templates/notif.html
new file mode 100644
index 0000000000..88b921ca9c
--- /dev/null
+++ b/synapse/res/templates/notif.html
@@ -0,0 +1,45 @@
+{% for message in notif.messages %}
+    <tr class="{{ "historical_message" if message.is_historical else "message" }}">
+        <td class="sender_avatar">
+            {% if loop.index0 == 0 or notif.messages[loop.index0 - 1].sender_name != notif.messages[loop.index0].sender_name %}
+                {% if message.sender_avatar_url %}
+                    <img alt="" class="sender_avatar" src="{{ message.sender_avatar_url|mxc_to_http(32,32) }}"  />
+                {% else %}
+                    {% if message.sender_hash % 3 == 0 %}
+                        <img class="sender_avatar" src="https://vector.im/beta/img/76cfa6.png"  />
+                    {% elif message.sender_hash % 3 == 1 %}
+                        <img class="sender_avatar" src="https://vector.im/beta/img/50e2c2.png"  />
+                    {% else %}
+                        <img class="sender_avatar" src="https://vector.im/beta/img/f4c371.png"  />
+                    {% endif %}
+                {% endif %}
+            {% endif %}
+        </td>
+        <td class="message_contents">
+            {% if loop.index0 == 0 or notif.messages[loop.index0 - 1].sender_name != notif.messages[loop.index0].sender_name %}
+                <div class="sender_name">{% if message.msgtype == "m.emote" %}*{% endif %} {{ message.sender_name }}</div>
+            {% endif %}
+            <div class="message_body">
+                {% if message.msgtype == "m.text" %}
+                    {{ message.body_text_html }}
+                {% elif message.msgtype == "m.emote" %}
+                    {{ message.body_text_html }}
+                {% elif message.msgtype == "m.notice" %}
+                    {{ message.body_text_html }}
+                {% elif message.msgtype == "m.image" %}
+                    <img src="{{ message.image_url|mxc_to_http(640, 480, scale) }}" />
+                {% elif message.msgtype == "m.file" %}
+                    <span class="filename">{{ message.body_text_plain }}</span>
+                {% endif %}
+            </div>
+        </td>
+        <td class="message_time">{{ message.ts|format_ts("%H:%M") }}</td>
+    </tr>
+{% endfor %}
+<tr class="notif_link">
+    <td></td>
+    <td>
+        <a href="{{ notif.link }}">View {{ room.title }}</a>
+    </td>
+    <td></td>
+</tr>
diff --git a/synapse/res/templates/notif.txt b/synapse/res/templates/notif.txt
new file mode 100644
index 0000000000..a37bee9833
--- /dev/null
+++ b/synapse/res/templates/notif.txt
@@ -0,0 +1,16 @@
+{% for message in notif.messages %}
+{% if message.msgtype == "m.emote" %}* {% endif %}{{ message.sender_name }} ({{ message.ts|format_ts("%H:%M") }})
+{% if message.msgtype == "m.text" %}
+{{ message.body_text_plain }}
+{% elif message.msgtype == "m.emote" %}
+{{ message.body_text_plain }}
+{% elif message.msgtype == "m.notice" %}
+{{ message.body_text_plain }}
+{% elif message.msgtype == "m.image" %}
+{{ message.body_text_plain }}
+{% elif message.msgtype == "m.file" %}
+{{ message.body_text_plain }}
+{% endif %}
+{% endfor %}
+
+View {{ room.title }} at {{ notif.link }}
diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html
new file mode 100644
index 0000000000..fcdb3109fe
--- /dev/null
+++ b/synapse/res/templates/notif_mail.html
@@ -0,0 +1,55 @@
+<!doctype html>
+<html lang="en">
+    <head>
+        <style type="text/css">
+            {% include 'mail.css' without context %}
+            {% include "mail-%s.css" % app_name ignore missing without context %}
+        </style>
+    </head>
+    <body>
+        <table id="page">
+            <tr>
+                <td> </td>
+                <td id="inner">
+                    <table class="header">
+                        <tr>
+                            <td>
+                                <div class="salutation">Hi {{ user_display_name }},</div>
+                                <div class="summarytext">{{ summary_text }}</div>
+                            </td>
+                            <td class="logo">
+                                {% if app_name == "Riot" %}
+                                    <img src="http://matrix.org/img/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+                                {% elif app_name == "Vector" %}
+                                    <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+                                {% else %}
+                                    <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+                                {% endif %}
+                            </td>
+                        </tr>
+                    </table>
+                    {% for room in rooms %}
+                        {% include 'room.html' with context %}
+                    {% endfor %}
+                    <div class="footer">
+                        <a href="{{ unsubscribe_link }}">Unsubscribe</a>
+                        <br/>
+                        <br/>
+                        <div class="debug">
+                            Sending email at {{ reason.now|format_ts("%c") }} due to activity in room {{ reason.room_name }} because
+                            an event was received at {{ reason.received_at|format_ts("%c") }}
+                            which is more than {{ "%.1f"|format(reason.delay_before_mail_ms / (60*1000)) }} ({{ reason.delay_before_mail_ms }}) mins ago,
+                            {% if reason.last_sent_ts %}
+                                and the last time we sent a mail for this room was {{ reason.last_sent_ts|format_ts("%c") }},
+                                which is more than {{ "%.1f"|format(reason.throttle_ms / (60*1000)) }} (current throttle_ms) mins ago.
+                            {% else %}
+                                and we don't have a last time we sent a mail for this room.
+                            {% endif %}
+                        </div>
+                    </div>
+                </td>
+                <td> </td>
+            </tr>
+        </table>
+    </body>
+</html>
diff --git a/synapse/res/templates/notif_mail.txt b/synapse/res/templates/notif_mail.txt
new file mode 100644
index 0000000000..24843042a5
--- /dev/null
+++ b/synapse/res/templates/notif_mail.txt
@@ -0,0 +1,10 @@
+Hi {{ user_display_name }},
+
+{{ summary_text }}
+
+{% for room in rooms %}
+{% include 'room.txt' with context %}
+{% endfor %}
+
+You can disable these notifications at {{ unsubscribe_link }}
+
diff --git a/synapse/res/templates/room.html b/synapse/res/templates/room.html
new file mode 100644
index 0000000000..723c222d25
--- /dev/null
+++ b/synapse/res/templates/room.html
@@ -0,0 +1,33 @@
+<table class="room">
+    <tr class="room_header">
+        <td class="room_avatar">
+            {% if room.avatar_url %}
+                <img alt="" src="{{ room.avatar_url|mxc_to_http(48,48) }}" />
+            {% else %}
+                {% if room.hash % 3 == 0 %}
+                    <img alt="" src="https://vector.im/beta/img/76cfa6.png"  />
+                {% elif room.hash % 3 == 1 %}
+                    <img alt="" src="https://vector.im/beta/img/50e2c2.png"  />
+                {% else %}
+                    <img alt="" src="https://vector.im/beta/img/f4c371.png"  />
+                {% endif %}
+            {% endif %}
+        </td>
+        <td class="room_name" colspan="2">
+            {{ room.title }}
+        </td>
+    </tr>
+    {% if room.invite %}
+        <tr>
+            <td></td>
+            <td>
+                <a href="{{ room.link }}">Join the conversation.</a>
+            </td>
+            <td></td>
+        </tr>
+    {% else %}
+        {% for notif in room.notifs %}
+            {% include 'notif.html' with context %}
+        {% endfor %}
+    {% endif %}
+</table>
diff --git a/synapse/res/templates/room.txt b/synapse/res/templates/room.txt
new file mode 100644
index 0000000000..84648c710e
--- /dev/null
+++ b/synapse/res/templates/room.txt
@@ -0,0 +1,9 @@
+{{ room.title }}
+
+{% if room.invite %}
+    You've been invited, join at {{ room.link }}
+{% else %}
+    {% for notif in room.notifs %}
+        {% include 'notif.txt' with context %}
+    {% endfor %}
+{% endif %}
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 3418f06fd6..5f35c2d1be 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -46,6 +46,8 @@ from synapse.rest.client.v2_alpha import (
     receipts,
     register,
     report_event,
+    room_keys,
+    room_upgrade_rest_servlet,
     sendtodevice,
     sync,
     tags,
@@ -102,6 +104,7 @@ class ClientRestResource(JsonResource):
         auth.register_servlets(hs, client_resource)
         receipts.register_servlets(hs, client_resource)
         read_marker.register_servlets(hs, client_resource)
+        room_keys.register_servlets(hs, client_resource)
         keys.register_servlets(hs, client_resource)
         tokenrefresh.register_servlets(hs, client_resource)
         tags.register_servlets(hs, client_resource)
@@ -114,3 +117,4 @@ class ClientRestResource(JsonResource):
         sendtodevice.register_servlets(hs, client_resource)
         user_directory.register_servlets(hs, client_resource)
         groups.register_servlets(hs, client_resource)
+        room_upgrade_rest_servlet.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/v1/directory.py b/synapse/rest/client/v1/directory.py
index 97733f3026..0220acf644 100644
--- a/synapse/rest/client/v1/directory.py
+++ b/synapse/rest/client/v1/directory.py
@@ -74,38 +74,11 @@ class ClientDirectoryServer(ClientV1RestServlet):
         if room is None:
             raise SynapseError(400, "Room does not exist")
 
-        dir_handler = self.handlers.directory_handler
+        requester = yield self.auth.get_user_by_req(request)
 
-        try:
-            # try to auth as a user
-            requester = yield self.auth.get_user_by_req(request)
-            try:
-                user_id = requester.user.to_string()
-                yield dir_handler.create_association(
-                    user_id, room_alias, room_id, servers
-                )
-                yield dir_handler.send_room_alias_update_event(
-                    requester,
-                    user_id,
-                    room_id
-                )
-            except SynapseError as e:
-                raise e
-            except Exception:
-                logger.exception("Failed to create association")
-                raise
-        except AuthError:
-            # try to auth as an application service
-            service = yield self.auth.get_appservice_by_req(request)
-            yield dir_handler.create_appservice_association(
-                service, room_alias, room_id, servers
-            )
-            logger.info(
-                "Application service at %s created alias %s pointing to %s",
-                service.url,
-                room_alias.to_string(),
-                room_id
-            )
+        yield self.handlers.directory_handler.create_association(
+            requester, room_alias, room_id, servers
+        )
 
         defer.returnValue((200, {}))
 
@@ -135,7 +108,7 @@ class ClientDirectoryServer(ClientV1RestServlet):
         room_alias = RoomAlias.from_string(room_alias)
 
         yield dir_handler.delete_association(
-            requester, user.to_string(), room_alias
+            requester, room_alias
         )
 
         logger.info(
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 0010699d31..f6b4a85e40 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -27,7 +27,7 @@ from twisted.web.client import PartialDownloadError
 
 from synapse.api.errors import Codes, LoginError, SynapseError
 from synapse.http.server import finish_request
-from synapse.http.servlet import parse_json_object_from_request
+from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.types import UserID
 from synapse.util.msisdn import phone_number_to_msisdn
 
@@ -83,6 +83,7 @@ class LoginRestServlet(ClientV1RestServlet):
     PATTERNS = client_path_patterns("/login$")
     SAML2_TYPE = "m.login.saml2"
     CAS_TYPE = "m.login.cas"
+    SSO_TYPE = "m.login.sso"
     TOKEN_TYPE = "m.login.token"
     JWT_TYPE = "m.login.jwt"
 
@@ -105,6 +106,10 @@ class LoginRestServlet(ClientV1RestServlet):
         if self.saml2_enabled:
             flows.append({"type": LoginRestServlet.SAML2_TYPE})
         if self.cas_enabled:
+            flows.append({"type": LoginRestServlet.SSO_TYPE})
+
+            # we advertise CAS for backwards compat, though MSC1721 renamed it
+            # to SSO.
             flows.append({"type": LoginRestServlet.CAS_TYPE})
 
             # While its valid for us to advertise this login type generally,
@@ -384,11 +389,11 @@ class SAML2RestServlet(ClientV1RestServlet):
         defer.returnValue((200, {"status": "not_authenticated"}))
 
 
-class CasRedirectServlet(ClientV1RestServlet):
-    PATTERNS = client_path_patterns("/login/cas/redirect", releases=())
+class CasRedirectServlet(RestServlet):
+    PATTERNS = client_path_patterns("/login/(cas|sso)/redirect")
 
     def __init__(self, hs):
-        super(CasRedirectServlet, self).__init__(hs)
+        super(CasRedirectServlet, self).__init__()
         self.cas_server_url = hs.config.cas_server_url.encode('ascii')
         self.cas_service_url = hs.config.cas_service_url.encode('ascii')
 
diff --git a/synapse/rest/client/v1/push_rule.py b/synapse/rest/client/v1/push_rule.py
index 9382b1f124..c654f9b5f0 100644
--- a/synapse/rest/client/v1/push_rule.py
+++ b/synapse/rest/client/v1/push_rule.py
@@ -42,7 +42,7 @@ class PushRuleRestServlet(ClientV1RestServlet):
 
     @defer.inlineCallbacks
     def on_PUT(self, request):
-        spec = _rule_spec_from_path(request.postpath)
+        spec = _rule_spec_from_path([x.decode('utf8') for x in request.postpath])
         try:
             priority_class = _priority_class_from_spec(spec)
         except InvalidRuleException as e:
@@ -103,7 +103,7 @@ class PushRuleRestServlet(ClientV1RestServlet):
 
     @defer.inlineCallbacks
     def on_DELETE(self, request):
-        spec = _rule_spec_from_path(request.postpath)
+        spec = _rule_spec_from_path([x.decode('utf8') for x in request.postpath])
 
         requester = yield self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
@@ -134,7 +134,7 @@ class PushRuleRestServlet(ClientV1RestServlet):
 
         rules = format_push_rules_for_user(requester.user, rules)
 
-        path = request.postpath[1:]
+        path = [x.decode('utf8') for x in request.postpath][1:]
 
         if path == []:
             # we're a reference impl: pedantry is our job.
@@ -142,11 +142,10 @@ class PushRuleRestServlet(ClientV1RestServlet):
                 PushRuleRestServlet.SLIGHTLY_PEDANTIC_TRAILING_SLASH_ERROR
             )
 
-        if path[0] == b'':
+        if path[0] == '':
             defer.returnValue((200, rules))
-        elif path[0] == b'global':
-            path = [x.decode('ascii') for x in path[1:]]
-            result = _filter_ruleset_with_path(rules['global'], path)
+        elif path[0] == 'global':
+            result = _filter_ruleset_with_path(rules['global'], path[1:])
             defer.returnValue((200, result))
         else:
             raise UnrecognizedRequestError()
@@ -190,12 +189,24 @@ class PushRuleRestServlet(ClientV1RestServlet):
 
 
 def _rule_spec_from_path(path):
+    """Turn a sequence of path components into a rule spec
+
+    Args:
+        path (sequence[unicode]): the URL path components.
+
+    Returns:
+        dict: rule spec dict, containing scope/template/rule_id entries,
+            and possibly attr.
+
+    Raises:
+        UnrecognizedRequestError if the path components cannot be parsed.
+    """
     if len(path) < 2:
         raise UnrecognizedRequestError()
-    if path[0] != b'pushrules':
+    if path[0] != 'pushrules':
         raise UnrecognizedRequestError()
 
-    scope = path[1].decode('ascii')
+    scope = path[1]
     path = path[2:]
     if scope != 'global':
         raise UnrecognizedRequestError()
@@ -203,13 +214,13 @@ def _rule_spec_from_path(path):
     if len(path) == 0:
         raise UnrecognizedRequestError()
 
-    template = path[0].decode('ascii')
+    template = path[0]
     path = path[1:]
 
     if len(path) == 0 or len(path[0]) == 0:
         raise UnrecognizedRequestError()
 
-    rule_id = path[0].decode('ascii')
+    rule_id = path[0]
 
     spec = {
         'scope': scope,
@@ -220,7 +231,7 @@ def _rule_spec_from_path(path):
     path = path[1:]
 
     if len(path) > 0 and len(path[0]) > 0:
-        spec['attr'] = path[0].decode('ascii')
+        spec['attr'] = path[0]
 
     return spec
 
diff --git a/synapse/rest/client/v1/pusher.py b/synapse/rest/client/v1/pusher.py
index b84f0260f2..4c07ae7f45 100644
--- a/synapse/rest/client/v1/pusher.py
+++ b/synapse/rest/client/v1/pusher.py
@@ -142,7 +142,7 @@ class PushersRemoveRestServlet(RestServlet):
     To allow pusher to be delete by clicking a link (ie. GET request)
     """
     PATTERNS = client_path_patterns("/pushers/remove$")
-    SUCCESS_HTML = "<html><body>You have been unsubscribed</body><html>"
+    SUCCESS_HTML = b"<html><body>You have been unsubscribed</body><html>"
 
     def __init__(self, hs):
         super(PushersRemoveRestServlet, self).__init__()
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 663934efd0..fcfe7857f6 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -33,6 +33,7 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import RoomAlias, RoomID, StreamToken, ThirdPartyInstanceID, UserID
 
@@ -409,7 +410,7 @@ class RoomMemberListRestServlet(ClientV1RestServlet):
             room_id=room_id,
             user_id=requester.user.to_string(),
             at_token=at_token,
-            types=[(EventTypes.Member, None)],
+            state_filter=StateFilter.from_types([(EventTypes.Member, None)]),
         )
 
         chunk = []
diff --git a/synapse/rest/client/v2_alpha/auth.py b/synapse/rest/client/v2_alpha/auth.py
index bd8b5f4afa..fa73bdf3a1 100644
--- a/synapse/rest/client/v2_alpha/auth.py
+++ b/synapse/rest/client/v2_alpha/auth.py
@@ -21,7 +21,7 @@ from synapse.api.constants import LoginType
 from synapse.api.errors import SynapseError
 from synapse.api.urls import CLIENT_V2_ALPHA_PREFIX
 from synapse.http.server import finish_request
-from synapse.http.servlet import RestServlet
+from synapse.http.servlet import RestServlet, parse_string
 
 from ._base import client_v2_patterns
 
@@ -68,6 +68,29 @@ function captchaDone() {
 </html>
 """
 
+TERMS_TEMPLATE = """
+<html>
+<head>
+<title>Authentication</title>
+<meta name='viewport' content='width=device-width, initial-scale=1,
+    user-scalable=no, minimum-scale=1.0, maximum-scale=1.0'>
+<link rel="stylesheet" href="/_matrix/static/client/register/style.css">
+</head>
+<body>
+<form id="registrationForm" method="post" action="%(myurl)s">
+    <div>
+        <p>
+            Please click the button below if you agree to the
+            <a href="%(terms_url)s">privacy policy of this homeserver.</a>
+        </p>
+        <input type="hidden" name="session" value="%(session)s" />
+        <input type="submit" value="Agree" />
+    </div>
+</form>
+</body>
+</html>
+"""
+
 SUCCESS_TEMPLATE = """
 <html>
 <head>
@@ -99,7 +122,7 @@ class AuthRestServlet(RestServlet):
     cannot be handled in the normal flow (with requests to the same endpoint).
     Current use is for web fallback auth.
     """
-    PATTERNS = client_v2_patterns("/auth/(?P<stagetype>[\w\.]*)/fallback/web")
+    PATTERNS = client_v2_patterns(r"/auth/(?P<stagetype>[\w\.]*)/fallback/web")
 
     def __init__(self, hs):
         super(AuthRestServlet, self).__init__()
@@ -108,16 +131,12 @@ class AuthRestServlet(RestServlet):
         self.auth_handler = hs.get_auth_handler()
         self.registration_handler = hs.get_handlers().registration_handler
 
-    @defer.inlineCallbacks
     def on_GET(self, request, stagetype):
-        yield
-        if stagetype == LoginType.RECAPTCHA:
-            if ('session' not in request.args or
-                    len(request.args['session']) == 0):
-                raise SynapseError(400, "No session supplied")
-
-            session = request.args["session"][0]
+        session = parse_string(request, "session")
+        if not session:
+            raise SynapseError(400, "No session supplied")
 
+        if stagetype == LoginType.RECAPTCHA:
             html = RECAPTCHA_TEMPLATE % {
                 'session': session,
                 'myurl': "%s/auth/%s/fallback/web" % (
@@ -132,25 +151,44 @@ class AuthRestServlet(RestServlet):
 
             request.write(html_bytes)
             finish_request(request)
-            defer.returnValue(None)
+            return None
+        elif stagetype == LoginType.TERMS:
+            html = TERMS_TEMPLATE % {
+                'session': session,
+                'terms_url': "%s_matrix/consent?v=%s" % (
+                    self.hs.config.public_baseurl,
+                    self.hs.config.user_consent_version,
+                ),
+                'myurl': "%s/auth/%s/fallback/web" % (
+                    CLIENT_V2_ALPHA_PREFIX, LoginType.TERMS
+                ),
+            }
+            html_bytes = html.encode("utf8")
+            request.setResponseCode(200)
+            request.setHeader(b"Content-Type", b"text/html; charset=utf-8")
+            request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),))
+
+            request.write(html_bytes)
+            finish_request(request)
+            return None
         else:
             raise SynapseError(404, "Unknown auth stage type")
 
     @defer.inlineCallbacks
     def on_POST(self, request, stagetype):
-        yield
-        if stagetype == "m.login.recaptcha":
-            if ('g-recaptcha-response' not in request.args or
-                    len(request.args['g-recaptcha-response'])) == 0:
-                raise SynapseError(400, "No captcha response supplied")
-            if ('session' not in request.args or
-                    len(request.args['session'])) == 0:
-                raise SynapseError(400, "No session supplied")
 
-            session = request.args['session'][0]
+        session = parse_string(request, "session")
+        if not session:
+            raise SynapseError(400, "No session supplied")
+
+        if stagetype == LoginType.RECAPTCHA:
+            response = parse_string(request, "g-recaptcha-response")
+
+            if not response:
+                raise SynapseError(400, "No captcha response supplied")
 
             authdict = {
-                'response': request.args['g-recaptcha-response'][0],
+                'response': response,
                 'session': session,
             }
 
@@ -179,6 +217,41 @@ class AuthRestServlet(RestServlet):
             finish_request(request)
 
             defer.returnValue(None)
+        elif stagetype == LoginType.TERMS:
+            if ('session' not in request.args or
+                    len(request.args['session'])) == 0:
+                raise SynapseError(400, "No session supplied")
+
+            session = request.args['session'][0]
+            authdict = {'session': session}
+
+            success = yield self.auth_handler.add_oob_auth(
+                LoginType.TERMS,
+                authdict,
+                self.hs.get_ip_from_request(request)
+            )
+
+            if success:
+                html = SUCCESS_TEMPLATE
+            else:
+                html = TERMS_TEMPLATE % {
+                    'session': session,
+                    'terms_url': "%s_matrix/consent?v=%s" % (
+                        self.hs.config.public_baseurl,
+                        self.hs.config.user_consent_version,
+                    ),
+                    'myurl': "%s/auth/%s/fallback/web" % (
+                        CLIENT_V2_ALPHA_PREFIX, LoginType.TERMS
+                    ),
+                }
+            html_bytes = html.encode("utf8")
+            request.setResponseCode(200)
+            request.setHeader(b"Content-Type", b"text/html; charset=utf-8")
+            request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),))
+
+            request.write(html_bytes)
+            finish_request(request)
+            defer.returnValue(None)
         else:
             raise SynapseError(404, "Unknown auth stage type")
 
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 192f52e462..aec0c6b075 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -359,6 +359,13 @@ class RegisterRestServlet(RestServlet):
                     [LoginType.MSISDN, LoginType.EMAIL_IDENTITY]
                 ])
 
+        # Append m.login.terms to all flows if we're requiring consent
+        if self.hs.config.user_consent_at_registration:
+            new_flows = []
+            for flow in flows:
+                flow.append(LoginType.TERMS)
+            flows.extend(new_flows)
+
         auth_result, params, session_id = yield self.auth_handler.check_auth(
             flows, body, self.hs.get_ip_from_request(request)
         )
@@ -445,6 +452,13 @@ class RegisterRestServlet(RestServlet):
                 params.get("bind_msisdn")
             )
 
+        if auth_result and LoginType.TERMS in auth_result:
+            logger.info("%s has consented to the privacy policy" % registered_user_id)
+            yield self.store.user_set_consent_version(
+                registered_user_id, self.hs.config.user_consent_version,
+            )
+            yield self.registration_handler.post_consent_actions(registered_user_id)
+
         defer.returnValue((200, return_dict))
 
     def on_OPTIONS(self, _):
diff --git a/synapse/rest/client/v2_alpha/room_keys.py b/synapse/rest/client/v2_alpha/room_keys.py
new file mode 100644
index 0000000000..ab3f1bd21a
--- /dev/null
+++ b/synapse/rest/client/v2_alpha/room_keys.py
@@ -0,0 +1,387 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017, 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.errors import Codes, NotFoundError, SynapseError
+from synapse.http.servlet import (
+    RestServlet,
+    parse_json_object_from_request,
+    parse_string,
+)
+
+from ._base import client_v2_patterns
+
+logger = logging.getLogger(__name__)
+
+
+class RoomKeysServlet(RestServlet):
+    PATTERNS = client_v2_patterns(
+        "/room_keys/keys(/(?P<room_id>[^/]+))?(/(?P<session_id>[^/]+))?$"
+    )
+
+    def __init__(self, hs):
+        """
+        Args:
+            hs (synapse.server.HomeServer): server
+        """
+        super(RoomKeysServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler()
+
+    @defer.inlineCallbacks
+    def on_PUT(self, request, room_id, session_id):
+        """
+        Uploads one or more encrypted E2E room keys for backup purposes.
+        room_id: the ID of the room the keys are for (optional)
+        session_id: the ID for the E2E room keys for the room (optional)
+        version: the version of the user's backup which this data is for.
+        the version must already have been created via the /room_keys/version API.
+
+        Each session has:
+         * first_message_index: a numeric index indicating the oldest message
+           encrypted by this session.
+         * forwarded_count: how many times the uploading client claims this key
+           has been shared (forwarded)
+         * is_verified: whether the client that uploaded the keys claims they
+           were sent by a device which they've verified
+         * session_data: base64-encrypted data describing the session.
+
+        Returns 200 OK on success with body {}
+        Returns 403 Forbidden if the version in question is not the most recently
+        created version (i.e. if this is an old client trying to write to a stale backup)
+        Returns 404 Not Found if the version in question doesn't exist
+
+        The API is designed to be otherwise agnostic to the room_key encryption
+        algorithm being used.  Sessions are merged with existing ones in the
+        backup using the heuristics:
+         * is_verified sessions always win over unverified sessions
+         * older first_message_index always win over newer sessions
+         * lower forwarded_count always wins over higher forwarded_count
+
+        We trust the clients not to lie and corrupt their own backups.
+        It also means that if your access_token is stolen, the attacker could
+        delete your backup.
+
+        POST /room_keys/keys/!abc:matrix.org/c0ff33?version=1 HTTP/1.1
+        Content-Type: application/json
+
+        {
+            "first_message_index": 1,
+            "forwarded_count": 1,
+            "is_verified": false,
+            "session_data": "SSBBTSBBIEZJU0gK"
+        }
+
+        Or...
+
+        POST /room_keys/keys/!abc:matrix.org?version=1 HTTP/1.1
+        Content-Type: application/json
+
+        {
+            "sessions": {
+                "c0ff33": {
+                    "first_message_index": 1,
+                    "forwarded_count": 1,
+                    "is_verified": false,
+                    "session_data": "SSBBTSBBIEZJU0gK"
+                }
+            }
+        }
+
+        Or...
+
+        POST /room_keys/keys?version=1 HTTP/1.1
+        Content-Type: application/json
+
+        {
+            "rooms": {
+                "!abc:matrix.org": {
+                    "sessions": {
+                        "c0ff33": {
+                            "first_message_index": 1,
+                            "forwarded_count": 1,
+                            "is_verified": false,
+                            "session_data": "SSBBTSBBIEZJU0gK"
+                        }
+                    }
+                }
+            }
+        }
+        """
+        requester = yield self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+        body = parse_json_object_from_request(request)
+        version = parse_string(request, "version")
+
+        if session_id:
+            body = {
+                "sessions": {
+                    session_id: body
+                }
+            }
+
+        if room_id:
+            body = {
+                "rooms": {
+                    room_id: body
+                }
+            }
+
+        yield self.e2e_room_keys_handler.upload_room_keys(
+            user_id, version, body
+        )
+        defer.returnValue((200, {}))
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, room_id, session_id):
+        """
+        Retrieves one or more encrypted E2E room keys for backup purposes.
+        Symmetric with the PUT version of the API.
+
+        room_id: the ID of the room to retrieve the keys for (optional)
+        session_id: the ID for the E2E room keys to retrieve the keys for (optional)
+        version: the version of the user's backup which this data is for.
+        the version must already have been created via the /change_secret API.
+
+        Returns as follows:
+
+        GET /room_keys/keys/!abc:matrix.org/c0ff33?version=1 HTTP/1.1
+        {
+            "first_message_index": 1,
+            "forwarded_count": 1,
+            "is_verified": false,
+            "session_data": "SSBBTSBBIEZJU0gK"
+        }
+
+        Or...
+
+        GET /room_keys/keys/!abc:matrix.org?version=1 HTTP/1.1
+        {
+            "sessions": {
+                "c0ff33": {
+                    "first_message_index": 1,
+                    "forwarded_count": 1,
+                    "is_verified": false,
+                    "session_data": "SSBBTSBBIEZJU0gK"
+                }
+            }
+        }
+
+        Or...
+
+        GET /room_keys/keys?version=1 HTTP/1.1
+        {
+            "rooms": {
+                "!abc:matrix.org": {
+                    "sessions": {
+                        "c0ff33": {
+                            "first_message_index": 1,
+                            "forwarded_count": 1,
+                            "is_verified": false,
+                            "session_data": "SSBBTSBBIEZJU0gK"
+                        }
+                    }
+                }
+            }
+        }
+        """
+        requester = yield self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+        version = parse_string(request, "version")
+
+        room_keys = yield self.e2e_room_keys_handler.get_room_keys(
+            user_id, version, room_id, session_id
+        )
+
+        # Convert room_keys to the right format to return.
+        if session_id:
+            # If the client requests a specific session, but that session was
+            # not backed up, then return an M_NOT_FOUND.
+            if room_keys['rooms'] == {}:
+                raise NotFoundError("No room_keys found")
+            else:
+                room_keys = room_keys['rooms'][room_id]['sessions'][session_id]
+        elif room_id:
+            # If the client requests all sessions from a room, but no sessions
+            # are found, then return an empty result rather than an error, so
+            # that clients don't have to handle an error condition, and an
+            # empty result is valid.  (Similarly if the client requests all
+            # sessions from the backup, but in that case, room_keys is already
+            # in the right format, so we don't need to do anything about it.)
+            if room_keys['rooms'] == {}:
+                room_keys = {'sessions': {}}
+            else:
+                room_keys = room_keys['rooms'][room_id]
+
+        defer.returnValue((200, room_keys))
+
+    @defer.inlineCallbacks
+    def on_DELETE(self, request, room_id, session_id):
+        """
+        Deletes one or more encrypted E2E room keys for a user for backup purposes.
+
+        DELETE /room_keys/keys/!abc:matrix.org/c0ff33?version=1
+        HTTP/1.1 200 OK
+        {}
+
+        room_id: the ID of the room whose keys to delete (optional)
+        session_id: the ID for the E2E session to delete (optional)
+        version: the version of the user's backup which this data is for.
+        the version must already have been created via the /change_secret API.
+        """
+
+        requester = yield self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+        version = parse_string(request, "version")
+
+        yield self.e2e_room_keys_handler.delete_room_keys(
+            user_id, version, room_id, session_id
+        )
+        defer.returnValue((200, {}))
+
+
+class RoomKeysNewVersionServlet(RestServlet):
+    PATTERNS = client_v2_patterns(
+        "/room_keys/version$"
+    )
+
+    def __init__(self, hs):
+        """
+        Args:
+            hs (synapse.server.HomeServer): server
+        """
+        super(RoomKeysNewVersionServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler()
+
+    @defer.inlineCallbacks
+    def on_POST(self, request):
+        """
+        Create a new backup version for this user's room_keys with the given
+        info.  The version is allocated by the server and returned to the user
+        in the response.  This API is intended to be used whenever the user
+        changes the encryption key for their backups, ensuring that backups
+        encrypted with different keys don't collide.
+
+        It takes out an exclusive lock on this user's room_key backups, to ensure
+        clients only upload to the current backup.
+
+        The algorithm passed in the version info is a reverse-DNS namespaced
+        identifier to describe the format of the encrypted backupped keys.
+
+        The auth_data is { user_id: "user_id", nonce: <random string> }
+        encrypted using the algorithm and current encryption key described above.
+
+        POST /room_keys/version
+        Content-Type: application/json
+        {
+            "algorithm": "m.megolm_backup.v1",
+            "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K"
+        }
+
+        HTTP/1.1 200 OK
+        Content-Type: application/json
+        {
+            "version": 12345
+        }
+        """
+        requester = yield self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+        info = parse_json_object_from_request(request)
+
+        new_version = yield self.e2e_room_keys_handler.create_version(
+            user_id, info
+        )
+        defer.returnValue((200, {"version": new_version}))
+
+    # we deliberately don't have a PUT /version, as these things really should
+    # be immutable to avoid people footgunning
+
+
+class RoomKeysVersionServlet(RestServlet):
+    PATTERNS = client_v2_patterns(
+        "/room_keys/version(/(?P<version>[^/]+))?$"
+    )
+
+    def __init__(self, hs):
+        """
+        Args:
+            hs (synapse.server.HomeServer): server
+        """
+        super(RoomKeysVersionServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler()
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, version):
+        """
+        Retrieve the version information about a given version of the user's
+        room_keys backup.  If the version part is missing, returns info about the
+        most current backup version (if any)
+
+        It takes out an exclusive lock on this user's room_key backups, to ensure
+        clients only upload to the current backup.
+
+        Returns 404 if the given version does not exist.
+
+        GET /room_keys/version/12345 HTTP/1.1
+        {
+            "version": "12345",
+            "algorithm": "m.megolm_backup.v1",
+            "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K"
+        }
+        """
+        requester = yield self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+
+        try:
+            info = yield self.e2e_room_keys_handler.get_version_info(
+                user_id, version
+            )
+        except SynapseError as e:
+            if e.code == 404:
+                raise SynapseError(404, "No backup found", Codes.NOT_FOUND)
+        defer.returnValue((200, info))
+
+    @defer.inlineCallbacks
+    def on_DELETE(self, request, version):
+        """
+        Delete the information about a given version of the user's
+        room_keys backup.  If the version part is missing, deletes the most
+        current backup version (if any). Doesn't delete the actual room data.
+
+        DELETE /room_keys/version/12345 HTTP/1.1
+        HTTP/1.1 200 OK
+        {}
+        """
+        if version is None:
+            raise SynapseError(400, "No version specified to delete", Codes.NOT_FOUND)
+
+        requester = yield self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+
+        yield self.e2e_room_keys_handler.delete_version(
+            user_id, version
+        )
+        defer.returnValue((200, {}))
+
+
+def register_servlets(hs, http_server):
+    RoomKeysServlet(hs).register(http_server)
+    RoomKeysVersionServlet(hs).register(http_server)
+    RoomKeysNewVersionServlet(hs).register(http_server)
diff --git a/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py b/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py
new file mode 100644
index 0000000000..e6356101fd
--- /dev/null
+++ b/synapse/rest/client/v2_alpha/room_upgrade_rest_servlet.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import KNOWN_ROOM_VERSIONS
+from synapse.api.errors import Codes, SynapseError
+from synapse.http.servlet import (
+    RestServlet,
+    assert_params_in_dict,
+    parse_json_object_from_request,
+)
+
+from ._base import client_v2_patterns
+
+logger = logging.getLogger(__name__)
+
+
+class RoomUpgradeRestServlet(RestServlet):
+    """Handler for room uprade requests.
+
+    Handles requests of the form:
+
+        POST /_matrix/client/r0/rooms/$roomid/upgrade HTTP/1.1
+        Content-Type: application/json
+
+        {
+            "new_version": "2",
+        }
+
+    Creates a new room and shuts down the old one. Returns the ID of the new room.
+
+    Args:
+        hs (synapse.server.HomeServer):
+    """
+    PATTERNS = client_v2_patterns(
+        # /rooms/$roomid/upgrade
+        "/rooms/(?P<room_id>[^/]*)/upgrade$",
+        v2_alpha=False,
+    )
+
+    def __init__(self, hs):
+        super(RoomUpgradeRestServlet, self).__init__()
+        self._hs = hs
+        self._room_creation_handler = hs.get_room_creation_handler()
+        self._auth = hs.get_auth()
+
+    @defer.inlineCallbacks
+    def on_POST(self, request, room_id):
+        requester = yield self._auth.get_user_by_req(request)
+
+        content = parse_json_object_from_request(request)
+        assert_params_in_dict(content, ("new_version", ))
+        new_version = content["new_version"]
+
+        if new_version not in KNOWN_ROOM_VERSIONS:
+            raise SynapseError(
+                400,
+                "Your homeserver does not support this room version",
+                Codes.UNSUPPORTED_ROOM_VERSION,
+            )
+
+        new_room_id = yield self._room_creation_handler.upgrade_room(
+            requester, room_id, new_version
+        )
+
+        ret = {
+            "replacement_room": new_room_id,
+        }
+
+        defer.returnValue((200, ret))
+
+
+def register_servlets(hs, http_server):
+    RoomUpgradeRestServlet(hs).register(http_server)
diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py
index 7362e1858d..80611cfe84 100644
--- a/synapse/rest/consent/consent_resource.py
+++ b/synapse/rest/consent/consent_resource.py
@@ -89,6 +89,7 @@ class ConsentResource(Resource):
 
         self.hs = hs
         self.store = hs.get_datastore()
+        self.registration_handler = hs.get_handlers().registration_handler
 
         # this is required by the request_handler wrapper
         self.clock = hs.get_clock()
@@ -137,27 +138,36 @@ class ConsentResource(Resource):
             request (twisted.web.http.Request):
         """
 
-        version = parse_string(request, "v",
-                               default=self._default_consent_version)
-        username = parse_string(request, "u", required=True)
-        userhmac = parse_string(request, "h", required=True, encoding=None)
+        version = parse_string(request, "v", default=self._default_consent_version)
+        username = parse_string(request, "u", required=False, default="")
+        userhmac = None
+        has_consented = False
+        public_version = username == ""
+        if not public_version:
+            userhmac_bytes = parse_string(request, "h", required=True, encoding=None)
 
-        self._check_hash(username, userhmac)
+            self._check_hash(username, userhmac_bytes)
 
-        if username.startswith('@'):
-            qualified_user_id = username
-        else:
-            qualified_user_id = UserID(username, self.hs.hostname).to_string()
+            if username.startswith('@'):
+                qualified_user_id = username
+            else:
+                qualified_user_id = UserID(username, self.hs.hostname).to_string()
 
-        u = yield self.store.get_user_by_id(qualified_user_id)
-        if u is None:
-            raise NotFoundError("Unknown user")
+            u = yield self.store.get_user_by_id(qualified_user_id)
+            if u is None:
+                raise NotFoundError("Unknown user")
+
+            has_consented = u["consent_version"] == version
+            userhmac = userhmac_bytes.decode("ascii")
 
         try:
             self._render_template(
                 request, "%s.html" % (version,),
-                user=username, userhmac=userhmac, version=version,
-                has_consented=(u["consent_version"] == version),
+                user=username,
+                userhmac=userhmac,
+                version=version,
+                has_consented=has_consented,
+                public_version=public_version,
             )
         except TemplateNotFound:
             raise NotFoundError("Unknown policy version")
@@ -190,6 +200,7 @@ class ConsentResource(Resource):
             if e.code != 404:
                 raise
             raise NotFoundError("Unknown user")
+        yield self.registration_handler.post_consent_actions(qualified_user_id)
 
         try:
             self._render_template(request, "success.html")
@@ -223,7 +234,7 @@ class ConsentResource(Resource):
             key=self._hmac_secret,
             msg=userid.encode('utf-8'),
             digestmod=sha256,
-        ).hexdigest()
+        ).hexdigest().encode('ascii')
 
         if not compare_digest(want_mac, userhmac):
             raise SynapseError(http_client.FORBIDDEN, "HMAC incorrect")
diff --git a/synapse/rest/key/v1/__init__.py b/synapse/rest/key/v1/__init__.py
deleted file mode 100644
index fe0ac3f8e9..0000000000
--- a/synapse/rest/key/v1/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/synapse/rest/key/v1/server_key_resource.py b/synapse/rest/key/v1/server_key_resource.py
deleted file mode 100644
index 38eb2ee23f..0000000000
--- a/synapse/rest/key/v1/server_key_resource.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2014-2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import logging
-
-from canonicaljson import encode_canonical_json
-from signedjson.sign import sign_json
-from unpaddedbase64 import encode_base64
-
-from OpenSSL import crypto
-from twisted.web.resource import Resource
-
-from synapse.http.server import respond_with_json_bytes
-
-logger = logging.getLogger(__name__)
-
-
-class LocalKey(Resource):
-    """HTTP resource containing encoding the TLS X.509 certificate and NACL
-    signature verification keys for this server::
-
-        GET /key HTTP/1.1
-
-        HTTP/1.1 200 OK
-        Content-Type: application/json
-        {
-            "server_name": "this.server.example.com"
-            "verify_keys": {
-                "algorithm:version": # base64 encoded NACL verification key.
-            },
-            "tls_certificate": # base64 ASN.1 DER encoded X.509 tls cert.
-            "signatures": {
-                "this.server.example.com": {
-                   "algorithm:version": # NACL signature for this server.
-                }
-            }
-        }
-    """
-
-    def __init__(self, hs):
-        self.response_body = encode_canonical_json(
-            self.response_json_object(hs.config)
-        )
-        Resource.__init__(self)
-
-    @staticmethod
-    def response_json_object(server_config):
-        verify_keys = {}
-        for key in server_config.signing_key:
-            verify_key_bytes = key.verify_key.encode()
-            key_id = "%s:%s" % (key.alg, key.version)
-            verify_keys[key_id] = encode_base64(verify_key_bytes)
-
-        x509_certificate_bytes = crypto.dump_certificate(
-            crypto.FILETYPE_ASN1,
-            server_config.tls_certificate
-        )
-        json_object = {
-            u"server_name": server_config.server_name,
-            u"verify_keys": verify_keys,
-            u"tls_certificate": encode_base64(x509_certificate_bytes)
-        }
-        for key in server_config.signing_key:
-            json_object = sign_json(
-                json_object,
-                server_config.server_name,
-                key,
-            )
-
-        return json_object
-
-    def render_GET(self, request):
-        return respond_with_json_bytes(
-            request, 200, self.response_body,
-        )
-
-    def getChild(self, name, request):
-        if name == b'':
-            return self
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 76e479afa3..efe42a429d 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -16,6 +16,7 @@
 import logging
 import os
 
+from six import PY3
 from six.moves import urllib
 
 from twisted.internet import defer
@@ -48,26 +49,21 @@ def parse_media_id(request):
         return server_name, media_id, file_name
     except Exception:
         raise SynapseError(
-            404,
-            "Invalid media id token %r" % (request.postpath,),
-            Codes.UNKNOWN,
+            404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
         )
 
 
 def respond_404(request):
     respond_with_json(
-        request, 404,
-        cs_error(
-            "Not found %r" % (request.postpath,),
-            code=Codes.NOT_FOUND,
-        ),
-        send_cors=True
+        request,
+        404,
+        cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
+        send_cors=True,
     )
 
 
 @defer.inlineCallbacks
-def respond_with_file(request, media_type, file_path,
-                      file_size=None, upload_name=None):
+def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None):
     logger.debug("Responding with %r", file_path)
 
     if os.path.isfile(file_path):
@@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name):
         file_size (int): Size in bytes of the media, if known.
         upload_name (str): The name of the requested file, if any.
     """
+
     def _quote(x):
         return urllib.parse.quote(x.encode("utf-8"))
 
     request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
     if upload_name:
         if is_ascii(upload_name):
-            disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii")
+            disposition = "inline; filename=%s" % (_quote(upload_name),)
         else:
-            disposition = (
-                "inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii")
+            disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
 
-        request.setHeader(b"Content-Disposition", disposition)
+        request.setHeader(b"Content-Disposition", disposition.encode('ascii'))
 
     # cache for at least a day.
     # XXX: we might want to turn this off for data we don't want to
     # recommend caching as it's sensitive or private - or at least
     # select private. don't bother setting Expires as all our
     # clients are smart enough to be happy with Cache-Control
-    request.setHeader(
-        b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
-    )
-
-    request.setHeader(
-        b"Content-Length", b"%d" % (file_size,)
-    )
+    request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
+    request.setHeader(b"Content-Length", b"%d" % (file_size,))
 
 
 @defer.inlineCallbacks
@@ -153,6 +144,7 @@ class Responder(object):
     Responder is a context manager which *must* be used, so that any resources
     held can be cleaned up.
     """
+
     def write_to_consumer(self, consumer):
         """Stream response into consumer
 
@@ -186,9 +178,18 @@ class FileInfo(object):
         thumbnail_method (str)
         thumbnail_type (str): Content type of thumbnail, e.g. image/png
     """
-    def __init__(self, server_name, file_id, url_cache=False,
-                 thumbnail=False, thumbnail_width=None, thumbnail_height=None,
-                 thumbnail_method=None, thumbnail_type=None):
+
+    def __init__(
+        self,
+        server_name,
+        file_id,
+        url_cache=False,
+        thumbnail=False,
+        thumbnail_width=None,
+        thumbnail_height=None,
+        thumbnail_method=None,
+        thumbnail_type=None,
+    ):
         self.server_name = server_name
         self.file_id = file_id
         self.url_cache = url_cache
@@ -197,3 +198,74 @@ class FileInfo(object):
         self.thumbnail_height = thumbnail_height
         self.thumbnail_method = thumbnail_method
         self.thumbnail_type = thumbnail_type
+
+
+def get_filename_from_headers(headers):
+    """
+    Get the filename of the downloaded file by inspecting the
+    Content-Disposition HTTP header.
+
+    Args:
+        headers (twisted.web.http_headers.Headers): The HTTP
+            request headers.
+
+    Returns:
+        A Unicode string of the filename, or None.
+    """
+    content_disposition = headers.get(b"Content-Disposition", [b''])
+
+    # No header, bail out.
+    if not content_disposition[0]:
+        return
+
+    # dict of unicode: bytes, corresponding to the key value sections of the
+    # Content-Disposition header.
+    params = {}
+    parts = content_disposition[0].split(b";")
+    for i in parts:
+        # Split into key-value pairs, if able
+        # We don't care about things like `inline`, so throw it out
+        if b"=" not in i:
+            continue
+
+        key, value = i.strip().split(b"=")
+        params[key.decode('ascii')] = value
+
+    upload_name = None
+
+    # First check if there is a valid UTF-8 filename
+    upload_name_utf8 = params.get("filename*", None)
+    if upload_name_utf8:
+        if upload_name_utf8.lower().startswith(b"utf-8''"):
+            upload_name_utf8 = upload_name_utf8[7:]
+            # We have a filename*= section. This MUST be ASCII, and any UTF-8
+            # bytes are %-quoted.
+            if PY3:
+                try:
+                    # Once it is decoded, we can then unquote the %-encoded
+                    # parts strictly into a unicode string.
+                    upload_name = urllib.parse.unquote(
+                        upload_name_utf8.decode('ascii'), errors="strict"
+                    )
+                except UnicodeDecodeError:
+                    # Incorrect UTF-8.
+                    pass
+            else:
+                # On Python 2, we first unquote the %-encoded parts and then
+                # decode it strictly using UTF-8.
+                try:
+                    upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8')
+                except UnicodeDecodeError:
+                    pass
+
+    # If there isn't check for an ascii name.
+    if not upload_name:
+        upload_name_ascii = params.get("filename", None)
+        if upload_name_ascii and is_ascii(upload_name_ascii):
+            # Make sure there's no %-quoted bytes. If there is, reject it as
+            # non-valid ASCII.
+            if b"%" not in upload_name_ascii:
+                upload_name = upload_name_ascii.decode('ascii')
+
+    # This may be None here, indicating we did not find a matching name.
+    return upload_name
diff --git a/synapse/rest/media/v1/identicon_resource.py b/synapse/rest/media/v1/identicon_resource.py
deleted file mode 100644
index bdbd8d50dd..0000000000
--- a/synapse/rest/media/v1/identicon_resource.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from pydenticon import Generator
-
-from twisted.web.resource import Resource
-
-from synapse.http.servlet import parse_integer
-
-FOREGROUND = [
-    "rgb(45,79,255)",
-    "rgb(254,180,44)",
-    "rgb(226,121,234)",
-    "rgb(30,179,253)",
-    "rgb(232,77,65)",
-    "rgb(49,203,115)",
-    "rgb(141,69,170)"
-]
-
-BACKGROUND = "rgb(224,224,224)"
-SIZE = 5
-
-
-class IdenticonResource(Resource):
-    isLeaf = True
-
-    def __init__(self):
-        Resource.__init__(self)
-        self.generator = Generator(
-            SIZE, SIZE, foreground=FOREGROUND, background=BACKGROUND,
-        )
-
-    def generate_identicon(self, name, width, height):
-        v_padding = width % SIZE
-        h_padding = height % SIZE
-        top_padding = v_padding // 2
-        left_padding = h_padding // 2
-        bottom_padding = v_padding - top_padding
-        right_padding = h_padding - left_padding
-        width -= v_padding
-        height -= h_padding
-        padding = (top_padding, bottom_padding, left_padding, right_padding)
-        identicon = self.generator.generate(
-            name, width, height, padding=padding
-        )
-        return identicon
-
-    def render_GET(self, request):
-        name = "/".join(request.postpath)
-        width = parse_integer(request, "width", default=96)
-        height = parse_integer(request, "height", default=96)
-        identicon_bytes = self.generate_identicon(name, width, height)
-        request.setHeader(b"Content-Type", b"image/png")
-        request.setHeader(
-            b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
-        )
-        return identicon_bytes
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index a828ff4438..e117836e9a 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -14,18 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import cgi
 import errno
 import logging
 import os
 import shutil
 
-from six import PY3, iteritems
-from six.moves.urllib import parse as urlparse
+from six import iteritems
 
 import twisted.internet.error
 import twisted.web.http
-from twisted.internet import defer, threads
+from twisted.internet import defer
 from twisted.web.resource import Resource
 
 from synapse.api.errors import (
@@ -34,18 +32,21 @@ from synapse.api.errors import (
     NotFoundError,
     SynapseError,
 )
-from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.util import logcontext
 from synapse.util.async_helpers import Linearizer
-from synapse.util.logcontext import make_deferred_yieldable
 from synapse.util.retryutils import NotRetryingDestination
-from synapse.util.stringutils import is_ascii, random_string
+from synapse.util.stringutils import random_string
 
-from ._base import FileInfo, respond_404, respond_with_responder
+from ._base import (
+    FileInfo,
+    get_filename_from_headers,
+    respond_404,
+    respond_with_responder,
+)
 from .config_resource import MediaConfigResource
 from .download_resource import DownloadResource
 from .filepath import MediaFilePaths
-from .identicon_resource import IdenticonResource
 from .media_storage import MediaStorage
 from .preview_url_resource import PreviewUrlResource
 from .storage_provider import StorageProviderWrapper
@@ -63,7 +64,7 @@ class MediaRepository(object):
     def __init__(self, hs):
         self.hs = hs
         self.auth = hs.get_auth()
-        self.client = MatrixFederationHttpClient(hs)
+        self.client = hs.get_http_client()
         self.clock = hs.get_clock()
         self.server_name = hs.hostname
         self.store = hs.get_datastore()
@@ -398,39 +399,9 @@ class MediaRepository(object):
             yield finish()
 
         media_type = headers[b"Content-Type"][0].decode('ascii')
-
+        upload_name = get_filename_from_headers(headers)
         time_now_ms = self.clock.time_msec()
 
-        content_disposition = headers.get(b"Content-Disposition", None)
-        if content_disposition:
-            _, params = cgi.parse_header(content_disposition[0].decode('ascii'),)
-            upload_name = None
-
-            # First check if there is a valid UTF-8 filename
-            upload_name_utf8 = params.get("filename*", None)
-            if upload_name_utf8:
-                if upload_name_utf8.lower().startswith("utf-8''"):
-                    upload_name = upload_name_utf8[7:]
-
-            # If there isn't check for an ascii name.
-            if not upload_name:
-                upload_name_ascii = params.get("filename", None)
-                if upload_name_ascii and is_ascii(upload_name_ascii):
-                    upload_name = upload_name_ascii
-
-            if upload_name:
-                if PY3:
-                    upload_name = urlparse.unquote(upload_name)
-                else:
-                    upload_name = urlparse.unquote(upload_name.encode('ascii'))
-                try:
-                    if isinstance(upload_name, bytes):
-                        upload_name = upload_name.decode("utf-8")
-                except UnicodeDecodeError:
-                    upload_name = None
-        else:
-            upload_name = None
-
         logger.info("Stored remote media in file %r", fname)
 
         yield self.store.store_cached_remote_media(
@@ -492,10 +463,11 @@ class MediaRepository(object):
         ))
 
         thumbnailer = Thumbnailer(input_path)
-        t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
+        t_byte_source = yield logcontext.defer_to_thread(
+            self.hs.get_reactor(),
             self._generate_thumbnail,
             thumbnailer, t_width, t_height, t_method, t_type
-        ))
+        )
 
         if t_byte_source:
             try:
@@ -534,10 +506,11 @@ class MediaRepository(object):
         ))
 
         thumbnailer = Thumbnailer(input_path)
-        t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
+        t_byte_source = yield logcontext.defer_to_thread(
+            self.hs.get_reactor(),
             self._generate_thumbnail,
             thumbnailer, t_width, t_height, t_method, t_type
-        ))
+        )
 
         if t_byte_source:
             try:
@@ -620,15 +593,17 @@ class MediaRepository(object):
         for (t_width, t_height, t_type), t_method in iteritems(thumbnails):
             # Generate the thumbnail
             if t_method == "crop":
-                t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
+                t_byte_source = yield logcontext.defer_to_thread(
+                    self.hs.get_reactor(),
                     thumbnailer.crop,
                     t_width, t_height, t_type,
-                ))
+                )
             elif t_method == "scale":
-                t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
+                t_byte_source = yield logcontext.defer_to_thread(
+                    self.hs.get_reactor(),
                     thumbnailer.scale,
                     t_width, t_height, t_type,
-                ))
+                )
             else:
                 logger.error("Unrecognized method: %r", t_method)
                 continue
@@ -765,7 +740,6 @@ class MediaRepositoryResource(Resource):
         self.putChild(b"thumbnail", ThumbnailResource(
             hs, media_repo, media_repo.media_storage,
         ))
-        self.putChild(b"identicon", IdenticonResource())
         if hs.config.url_preview_enabled:
             self.putChild(b"preview_url", PreviewUrlResource(
                 hs, media_repo, media_repo.media_storage,
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index a6189224ee..896078fe76 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -21,9 +21,10 @@ import sys
 
 import six
 
-from twisted.internet import defer, threads
+from twisted.internet import defer
 from twisted.protocols.basic import FileSender
 
+from synapse.util import logcontext
 from synapse.util.file_consumer import BackgroundFileConsumer
 from synapse.util.logcontext import make_deferred_yieldable
 
@@ -64,9 +65,10 @@ class MediaStorage(object):
 
         with self.store_into_file(file_info) as (f, fname, finish_cb):
             # Write to the main repository
-            yield make_deferred_yieldable(threads.deferToThread(
+            yield logcontext.defer_to_thread(
+                self.hs.get_reactor(),
                 _write_file_synchronously, source, f,
-            ))
+            )
             yield finish_cb()
 
         defer.returnValue(fname)
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index af01040a38..d0ecf241b6 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import cgi
+
 import datetime
 import errno
 import fnmatch
@@ -24,6 +24,7 @@ import shutil
 import sys
 import traceback
 
+import six
 from six import string_types
 from six.moves import urllib_parse as urlparse
 
@@ -42,15 +43,19 @@ from synapse.http.server import (
 )
 from synapse.http.servlet import parse_integer, parse_string
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.rest.media.v1._base import get_filename_from_headers
 from synapse.util.async_helpers import ObservableDeferred
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.logcontext import make_deferred_yieldable, run_in_background
-from synapse.util.stringutils import is_ascii, random_string
+from synapse.util.stringutils import random_string
 
 from ._base import FileInfo
 
 logger = logging.getLogger(__name__)
 
+_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
+_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
+
 
 class PreviewUrlResource(Resource):
     isLeaf = True
@@ -98,7 +103,7 @@ class PreviewUrlResource(Resource):
         # XXX: if get_user_by_req fails, what should we do in an async render?
         requester = yield self.auth.get_user_by_req(request)
         url = parse_string(request, "url")
-        if "ts" in request.args:
+        if b"ts" in request.args:
             ts = parse_integer(request, "ts")
         else:
             ts = self.clock.time_msec()
@@ -180,7 +185,12 @@ class PreviewUrlResource(Resource):
             cache_result["expires_ts"] > ts and
             cache_result["response_code"] / 100 == 2
         ):
-            defer.returnValue(cache_result["og"])
+            # It may be stored as text in the database, not as bytes (such as
+            # PostgreSQL). If so, encode it back before handing it on.
+            og = cache_result["og"]
+            if isinstance(og, six.text_type):
+                og = og.encode('utf8')
+            defer.returnValue(og)
             return
 
         media_info = yield self._download_url(url, user)
@@ -213,15 +223,28 @@ class PreviewUrlResource(Resource):
         elif _is_html(media_info['media_type']):
             # TODO: somehow stop a big HTML tree from exploding synapse's RAM
 
-            file = open(media_info['filename'])
-            body = file.read()
-            file.close()
+            with open(media_info['filename'], 'rb') as file:
+                body = file.read()
+
+            encoding = None
+
+            # Let's try and figure out if it has an encoding set in a meta tag.
+            # Limit it to the first 1kb, since it ought to be in the meta tags
+            # at the top.
+            match = _charset_match.search(body[:1000])
+
+            # If we find a match, it should take precedence over the
+            # Content-Type header, so set it here.
+            if match:
+                encoding = match.group(1).decode('ascii')
 
-            # clobber the encoding from the content-type, or default to utf-8
-            # XXX: this overrides any <meta/> or XML charset headers in the body
-            # which may pose problems, but so far seems to work okay.
-            match = re.match(r'.*; *charset=(.*?)(;|$)', media_info['media_type'], re.I)
-            encoding = match.group(1) if match else "utf-8"
+            # If we don't find a match, we'll look at the HTTP Content-Type, and
+            # if that doesn't exist, we'll fall back to UTF-8.
+            if not encoding:
+                match = _content_type_match.match(
+                    media_info['media_type']
+                )
+                encoding = match.group(1) if match else "utf-8"
 
             og = decode_and_calc_og(body, media_info['uri'], encoding)
 
@@ -313,31 +336,7 @@ class PreviewUrlResource(Resource):
                 media_type = "application/octet-stream"
             time_now_ms = self.clock.time_msec()
 
-            content_disposition = headers.get(b"Content-Disposition", None)
-            if content_disposition:
-                _, params = cgi.parse_header(content_disposition[0],)
-                download_name = None
-
-                # First check if there is a valid UTF-8 filename
-                download_name_utf8 = params.get("filename*", None)
-                if download_name_utf8:
-                    if download_name_utf8.lower().startswith("utf-8''"):
-                        download_name = download_name_utf8[7:]
-
-                # If there isn't check for an ascii name.
-                if not download_name:
-                    download_name_ascii = params.get("filename", None)
-                    if download_name_ascii and is_ascii(download_name_ascii):
-                        download_name = download_name_ascii
-
-                if download_name:
-                    download_name = urlparse.unquote(download_name)
-                    try:
-                        download_name = download_name.decode("utf-8")
-                    except UnicodeDecodeError:
-                        download_name = None
-            else:
-                download_name = None
+            download_name = get_filename_from_headers(headers)
 
             yield self.store.store_local_media(
                 media_id=file_id,
@@ -596,10 +595,13 @@ def _iterate_over_text(tree, *tags_to_ignore):
     # to be returned.
     elements = iter([tree])
     while True:
-        el = next(elements)
+        el = next(elements, None)
+        if el is None:
+            return
+
         if isinstance(el, string_types):
             yield el
-        elif el is not None and el.tag not in tags_to_ignore:
+        elif el.tag not in tags_to_ignore:
             # el.text is the text before the first child, so we can immediately
             # return it if the text exists.
             if el.text:
@@ -671,7 +673,7 @@ def summarize_paragraphs(text_nodes, min_size=200, max_size=500):
         # This splits the paragraph into words, but keeping the
         # (preceeding) whitespace intact so we can easily concat
         # words back together.
-        for match in re.finditer("\s*\S+", description):
+        for match in re.finditer(r"\s*\S+", description):
             word = match.group()
 
             # Keep adding words while the total length is less than
diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index 7b9f8b4d79..5aa03031f6 100644
--- a/synapse/rest/media/v1/storage_provider.py
+++ b/synapse/rest/media/v1/storage_provider.py
@@ -17,9 +17,10 @@ import logging
 import os
 import shutil
 
-from twisted.internet import defer, threads
+from twisted.internet import defer
 
 from synapse.config._base import Config
+from synapse.util import logcontext
 from synapse.util.logcontext import run_in_background
 
 from .media_storage import FileResponder
@@ -120,7 +121,8 @@ class FileStorageProviderBackend(StorageProvider):
         if not os.path.exists(dirname):
             os.makedirs(dirname)
 
-        return threads.deferToThread(
+        return logcontext.defer_to_thread(
+            self.hs.get_reactor(),
             shutil.copyfile, primary_fname, backup_fname,
         )
 
diff --git a/synapse/server.py b/synapse/server.py
index 938a05f9dc..9985687b95 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -23,6 +23,7 @@ import abc
 import logging
 
 from twisted.enterprise import adbapi
+from twisted.mail.smtp import sendmail
 from twisted.web.client import BrowserLikePolicyForHTTPS
 
 from synapse.api.auth import Auth
@@ -51,6 +52,7 @@ from synapse.handlers.deactivate_account import DeactivateAccountHandler
 from synapse.handlers.device import DeviceHandler
 from synapse.handlers.devicemessage import DeviceMessageHandler
 from synapse.handlers.e2e_keys import E2eKeysHandler
+from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler
 from synapse.handlers.events import EventHandler, EventStreamHandler
 from synapse.handlers.groups_local import GroupsLocalHandler
 from synapse.handlers.initial_sync import InitialSyncHandler
@@ -130,6 +132,7 @@ class HomeServer(object):
         'auth_handler',
         'device_handler',
         'e2e_keys_handler',
+        'e2e_room_keys_handler',
         'event_handler',
         'event_stream_handler',
         'initial_sync_handler',
@@ -172,6 +175,7 @@ class HomeServer(object):
         'message_handler',
         'pagination_handler',
         'room_context_handler',
+        'sendmail',
     ]
 
     # This is overridden in derived application classes
@@ -205,6 +209,7 @@ class HomeServer(object):
         logger.info("Setting up.")
         with self.get_db_conn() as conn:
             self.datastore = self.DATASTORE_CLASS(conn, self)
+            conn.commit()
         logger.info("Finished setting up.")
 
     def get_reactor(self):
@@ -266,6 +271,9 @@ class HomeServer(object):
     def build_room_creation_handler(self):
         return RoomCreationHandler(self)
 
+    def build_sendmail(self):
+        return sendmail
+
     def build_state_handler(self):
         return StateHandler(self)
 
@@ -299,6 +307,9 @@ class HomeServer(object):
     def build_e2e_keys_handler(self):
         return E2eKeysHandler(self)
 
+    def build_e2e_room_keys_handler(self):
+        return E2eRoomKeysHandler(self)
+
     def build_application_service_api(self):
         return ApplicationServiceApi(self)
 
diff --git a/synapse/server.pyi b/synapse/server.pyi
index ce28486233..06cd083a74 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -7,6 +7,9 @@ import synapse.handlers.auth
 import synapse.handlers.deactivate_account
 import synapse.handlers.device
 import synapse.handlers.e2e_keys
+import synapse.handlers.room
+import synapse.handlers.room_member
+import synapse.handlers.message
 import synapse.handlers.set_password
 import synapse.rest.media.v1.media_repository
 import synapse.server_notices.server_notices_manager
@@ -50,6 +53,9 @@ class HomeServer(object):
     def get_room_creation_handler(self) -> synapse.handlers.room.RoomCreationHandler:
         pass
 
+    def get_room_member_handler(self) -> synapse.handlers.room_member.RoomMemberHandler:
+        pass
+
     def get_event_creation_handler(self) -> synapse.handlers.message.EventCreationHandler:
         pass
 
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index b22495c1f9..70048b0c09 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -19,13 +19,14 @@ from collections import namedtuple
 
 from six import iteritems, itervalues
 
+import attr
 from frozendict import frozendict
 
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, RoomVersions
 from synapse.events.snapshot import EventContext
-from synapse.state import v1
+from synapse.state import v1, v2
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -260,7 +261,7 @@ class StateHandler(object):
         logger.debug("calling resolve_state_groups from compute_event_context")
 
         entry = yield self.resolve_state_groups_for_events(
-            event.room_id, [e for e, _ in event.prev_events],
+            event.room_id, event.prev_event_ids(),
         )
 
         prev_state_ids = entry.state
@@ -372,15 +373,10 @@ class StateHandler(object):
 
         result = yield self._state_resolution_handler.resolve_state_groups(
             room_id, room_version, state_groups_ids, None,
-            self._state_map_factory,
+            state_res_store=StateResolutionStore(self.store),
         )
         defer.returnValue(result)
 
-    def _state_map_factory(self, ev_ids):
-        return self.store.get_events(
-            ev_ids, get_prev_content=False, check_redacted=False,
-        )
-
     @defer.inlineCallbacks
     def resolve_events(self, room_version, state_sets, event):
         logger.info(
@@ -398,10 +394,10 @@ class StateHandler(object):
         }
 
         with Measure(self.clock, "state._resolve_events"):
-            new_state = yield resolve_events_with_factory(
+            new_state = yield resolve_events_with_store(
                 room_version, state_set_ids,
                 event_map=state_map,
-                state_map_factory=self._state_map_factory
+                state_res_store=StateResolutionStore(self.store),
             )
 
         new_state = {
@@ -436,7 +432,7 @@ class StateResolutionHandler(object):
     @defer.inlineCallbacks
     @log_function
     def resolve_state_groups(
-        self, room_id, room_version, state_groups_ids, event_map, state_map_factory,
+        self, room_id, room_version, state_groups_ids, event_map, state_res_store,
     ):
         """Resolves conflicts between a set of state groups
 
@@ -454,9 +450,11 @@ class StateResolutionHandler(object):
                 a dict from event_id to event, for any events that we happen to
                 have in flight (eg, those currently being persisted). This will be
                 used as a starting point fof finding the state we need; any missing
-                events will be requested via state_map_factory.
+                events will be requested via state_res_store.
+
+                If None, all events will be fetched via state_res_store.
 
-                If None, all events will be fetched via state_map_factory.
+            state_res_store (StateResolutionStore)
 
         Returns:
             Deferred[_StateCacheEntry]: resolved state
@@ -480,10 +478,10 @@ class StateResolutionHandler(object):
 
             # start by assuming we won't have any conflicted state, and build up the new
             # state map by iterating through the state groups. If we discover a conflict,
-            # we give up and instead use `resolve_events_with_factory`.
+            # we give up and instead use `resolve_events_with_store`.
             #
             # XXX: is this actually worthwhile, or should we just let
-            # resolve_events_with_factory do it?
+            # resolve_events_with_store do it?
             new_state = {}
             conflicted_state = False
             for st in itervalues(state_groups_ids):
@@ -498,11 +496,11 @@ class StateResolutionHandler(object):
             if conflicted_state:
                 logger.info("Resolving conflicted state for %r", room_id)
                 with Measure(self.clock, "state._resolve_events"):
-                    new_state = yield resolve_events_with_factory(
+                    new_state = yield resolve_events_with_store(
                         room_version,
                         list(itervalues(state_groups_ids)),
                         event_map=event_map,
-                        state_map_factory=state_map_factory,
+                        state_res_store=state_res_store,
                     )
 
             # if the new state matches any of the input state groups, we can
@@ -583,7 +581,7 @@ def _make_state_cache_entry(
     )
 
 
-def resolve_events_with_factory(room_version, state_sets, event_map, state_map_factory):
+def resolve_events_with_store(room_version, state_sets, event_map, state_res_store):
     """
     Args:
         room_version(str): Version of the room
@@ -599,17 +597,19 @@ def resolve_events_with_factory(room_version, state_sets, event_map, state_map_f
 
             If None, all events will be fetched via state_map_factory.
 
-        state_map_factory(func): will be called
-            with a list of event_ids that are needed, and should return with
-            a Deferred of dict of event_id to event.
+        state_res_store (StateResolutionStore)
 
     Returns
         Deferred[dict[(str, str), str]]:
             a map from (type, state_key) to event_id.
     """
-    if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,):
-        return v1.resolve_events_with_factory(
-            state_sets, event_map, state_map_factory,
+    if room_version == RoomVersions.V1:
+        return v1.resolve_events_with_store(
+            state_sets, event_map, state_res_store.get_events,
+        )
+    elif room_version in (RoomVersions.VDH_TEST, RoomVersions.STATE_V2_TEST):
+        return v2.resolve_events_with_store(
+            state_sets, event_map, state_res_store,
         )
     else:
         # This should only happen if we added a version but forgot to add it to
@@ -617,3 +617,54 @@ def resolve_events_with_factory(room_version, state_sets, event_map, state_map_f
         raise Exception(
             "No state resolution algorithm defined for version %r" % (room_version,)
         )
+
+
+@attr.s
+class StateResolutionStore(object):
+    """Interface that allows state resolution algorithms to access the database
+    in well defined way.
+
+    Args:
+        store (DataStore)
+    """
+
+    store = attr.ib()
+
+    def get_events(self, event_ids, allow_rejected=False):
+        """Get events from the database
+
+        Args:
+            event_ids (list): The event_ids of the events to fetch
+            allow_rejected (bool): If True return rejected events.
+
+        Returns:
+            Deferred[dict[str, FrozenEvent]]: Dict from event_id to event.
+        """
+
+        return self.store.get_events(
+            event_ids,
+            check_redacted=False,
+            get_prev_content=False,
+            allow_rejected=allow_rejected,
+        )
+
+    def get_auth_chain(self, event_ids):
+        """Gets the full auth chain for a set of events (including rejected
+        events).
+
+        Includes the given event IDs in the result.
+
+        Note that:
+            1. All events must be state events.
+            2. For v1 rooms this may not have the full auth chain in the
+               presence of rejected events
+
+        Args:
+            event_ids (list): The event IDs of the events to fetch the auth
+                chain for. Must be state events.
+
+        Returns:
+            Deferred[list[str]]: List of event IDs of the auth chain.
+        """
+
+        return self.store.get_auth_chain_ids(event_ids, include_given=True)
diff --git a/synapse/state/v1.py b/synapse/state/v1.py
index 7a7157b352..19e091ce3b 100644
--- a/synapse/state/v1.py
+++ b/synapse/state/v1.py
@@ -31,7 +31,7 @@ POWER_KEY = (EventTypes.PowerLevels, "")
 
 
 @defer.inlineCallbacks
-def resolve_events_with_factory(state_sets, event_map, state_map_factory):
+def resolve_events_with_store(state_sets, event_map, state_map_factory):
     """
     Args:
         state_sets(list): List of dicts of (type, state_key) -> event_id,
@@ -298,6 +298,8 @@ def _resolve_normal_events(events, auth_events):
 
 def _ordered_events(events):
     def key_func(e):
-        return -int(e.depth), hashlib.sha1(e.event_id.encode('ascii')).hexdigest()
+        # we have to use utf-8 rather than ascii here because it turns out we allow
+        # people to send us events with non-ascii event IDs :/
+        return -int(e.depth), hashlib.sha1(e.event_id.encode('utf-8')).hexdigest()
 
     return sorted(events, key=key_func)
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
new file mode 100644
index 0000000000..3573bb0028
--- /dev/null
+++ b/synapse/state/v2.py
@@ -0,0 +1,548 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import heapq
+import itertools
+import logging
+
+from six import iteritems, itervalues
+
+from twisted.internet import defer
+
+from synapse import event_auth
+from synapse.api.constants import EventTypes
+from synapse.api.errors import AuthError
+
+logger = logging.getLogger(__name__)
+
+
+@defer.inlineCallbacks
+def resolve_events_with_store(state_sets, event_map, state_res_store):
+    """Resolves the state using the v2 state resolution algorithm
+
+    Args:
+        state_sets(list): List of dicts of (type, state_key) -> event_id,
+            which are the different state groups to resolve.
+
+        event_map(dict[str,FrozenEvent]|None):
+            a dict from event_id to event, for any events that we happen to
+            have in flight (eg, those currently being persisted). This will be
+            used as a starting point fof finding the state we need; any missing
+            events will be requested via state_res_store.
+
+            If None, all events will be fetched via state_res_store.
+
+        state_res_store (StateResolutionStore)
+
+    Returns
+        Deferred[dict[(str, str), str]]:
+            a map from (type, state_key) to event_id.
+    """
+
+    logger.debug("Computing conflicted state")
+
+    # We use event_map as a cache, so if its None we need to initialize it
+    if event_map is None:
+        event_map = {}
+
+    # First split up the un/conflicted state
+    unconflicted_state, conflicted_state = _seperate(state_sets)
+
+    if not conflicted_state:
+        defer.returnValue(unconflicted_state)
+
+    logger.debug("%d conflicted state entries", len(conflicted_state))
+    logger.debug("Calculating auth chain difference")
+
+    # Also fetch all auth events that appear in only some of the state sets'
+    # auth chains.
+    auth_diff = yield _get_auth_chain_difference(
+        state_sets, event_map, state_res_store,
+    )
+
+    full_conflicted_set = set(itertools.chain(
+        itertools.chain.from_iterable(itervalues(conflicted_state)),
+        auth_diff,
+    ))
+
+    events = yield state_res_store.get_events([
+        eid for eid in full_conflicted_set
+        if eid not in event_map
+    ], allow_rejected=True)
+    event_map.update(events)
+
+    full_conflicted_set = set(eid for eid in full_conflicted_set if eid in event_map)
+
+    logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
+
+    # Get and sort all the power events (kicks/bans/etc)
+    power_events = (
+        eid for eid in full_conflicted_set
+        if _is_power_event(event_map[eid])
+    )
+
+    sorted_power_events = yield _reverse_topological_power_sort(
+        power_events,
+        event_map,
+        state_res_store,
+        full_conflicted_set,
+    )
+
+    logger.debug("sorted %d power events", len(sorted_power_events))
+
+    # Now sequentially auth each one
+    resolved_state = yield _iterative_auth_checks(
+        sorted_power_events, unconflicted_state, event_map,
+        state_res_store,
+    )
+
+    logger.debug("resolved power events")
+
+    # OK, so we've now resolved the power events. Now sort the remaining
+    # events using the mainline of the resolved power level.
+
+    leftover_events = [
+        ev_id
+        for ev_id in full_conflicted_set
+        if ev_id not in sorted_power_events
+    ]
+
+    logger.debug("sorting %d remaining events", len(leftover_events))
+
+    pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
+    leftover_events = yield _mainline_sort(
+        leftover_events, pl, event_map, state_res_store,
+    )
+
+    logger.debug("resolving remaining events")
+
+    resolved_state = yield _iterative_auth_checks(
+        leftover_events, resolved_state, event_map,
+        state_res_store,
+    )
+
+    logger.debug("resolved")
+
+    # We make sure that unconflicted state always still applies.
+    resolved_state.update(unconflicted_state)
+
+    logger.debug("done")
+
+    defer.returnValue(resolved_state)
+
+
+@defer.inlineCallbacks
+def _get_power_level_for_sender(event_id, event_map, state_res_store):
+    """Return the power level of the sender of the given event according to
+    their auth events.
+
+    Args:
+        event_id (str)
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+
+    Returns:
+        Deferred[int]
+    """
+    event = yield _get_event(event_id, event_map, state_res_store)
+
+    pl = None
+    for aid in event.auth_event_ids():
+        aev = yield _get_event(aid, event_map, state_res_store)
+        if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
+            pl = aev
+            break
+
+    if pl is None:
+        # Couldn't find power level. Check if they're the creator of the room
+        for aid in event.auth_event_ids():
+            aev = yield _get_event(aid, event_map, state_res_store)
+            if (aev.type, aev.state_key) == (EventTypes.Create, ""):
+                if aev.content.get("creator") == event.sender:
+                    defer.returnValue(100)
+                break
+        defer.returnValue(0)
+
+    level = pl.content.get("users", {}).get(event.sender)
+    if level is None:
+        level = pl.content.get("users_default", 0)
+
+    if level is None:
+        defer.returnValue(0)
+    else:
+        defer.returnValue(int(level))
+
+
+@defer.inlineCallbacks
+def _get_auth_chain_difference(state_sets, event_map, state_res_store):
+    """Compare the auth chains of each state set and return the set of events
+    that only appear in some but not all of the auth chains.
+
+    Args:
+        state_sets (list)
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+
+    Returns:
+        Deferred[set[str]]: Set of event IDs
+    """
+    common = set(itervalues(state_sets[0])).intersection(
+        *(itervalues(s) for s in state_sets[1:])
+    )
+
+    auth_sets = []
+    for state_set in state_sets:
+        auth_ids = set(
+            eid
+            for key, eid in iteritems(state_set)
+            if (key[0] in (
+                EventTypes.Member,
+                EventTypes.ThirdPartyInvite,
+            ) or key in (
+                (EventTypes.PowerLevels, ''),
+                (EventTypes.Create, ''),
+                (EventTypes.JoinRules, ''),
+            )) and eid not in common
+        )
+
+        auth_chain = yield state_res_store.get_auth_chain(auth_ids)
+        auth_ids.update(auth_chain)
+
+        auth_sets.append(auth_ids)
+
+    intersection = set(auth_sets[0]).intersection(*auth_sets[1:])
+    union = set().union(*auth_sets)
+
+    defer.returnValue(union - intersection)
+
+
+def _seperate(state_sets):
+    """Return the unconflicted and conflicted state. This is different than in
+    the original algorithm, as this defines a key to be conflicted if one of
+    the state sets doesn't have that key.
+
+    Args:
+        state_sets (list)
+
+    Returns:
+        tuple[dict, dict]: A tuple of unconflicted and conflicted state. The
+        conflicted state dict is a map from type/state_key to set of event IDs
+    """
+    unconflicted_state = {}
+    conflicted_state = {}
+
+    for key in set(itertools.chain.from_iterable(state_sets)):
+        event_ids = set(state_set.get(key) for state_set in state_sets)
+        if len(event_ids) == 1:
+            unconflicted_state[key] = event_ids.pop()
+        else:
+            event_ids.discard(None)
+            conflicted_state[key] = event_ids
+
+    return unconflicted_state, conflicted_state
+
+
+def _is_power_event(event):
+    """Return whether or not the event is a "power event", as defined by the
+    v2 state resolution algorithm
+
+    Args:
+        event (FrozenEvent)
+
+    Returns:
+        boolean
+    """
+    if (event.type, event.state_key) in (
+        (EventTypes.PowerLevels, ""),
+        (EventTypes.JoinRules, ""),
+        (EventTypes.Create, ""),
+    ):
+        return True
+
+    if event.type == EventTypes.Member:
+        if event.membership in ('leave', 'ban'):
+            return event.sender != event.state_key
+
+    return False
+
+
+@defer.inlineCallbacks
+def _add_event_and_auth_chain_to_graph(graph, event_id, event_map,
+                                       state_res_store, auth_diff):
+    """Helper function for _reverse_topological_power_sort that add the event
+    and its auth chain (that is in the auth diff) to the graph
+
+    Args:
+        graph (dict[str, set[str]]): A map from event ID to the events auth
+            event IDs
+        event_id (str): Event to add to the graph
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+        auth_diff (set[str]): Set of event IDs that are in the auth difference.
+    """
+
+    state = [event_id]
+    while state:
+        eid = state.pop()
+        graph.setdefault(eid, set())
+
+        event = yield _get_event(eid, event_map, state_res_store)
+        for aid in event.auth_event_ids():
+            if aid in auth_diff:
+                if aid not in graph:
+                    state.append(aid)
+
+                graph.setdefault(eid, set()).add(aid)
+
+
+@defer.inlineCallbacks
+def _reverse_topological_power_sort(event_ids, event_map, state_res_store, auth_diff):
+    """Returns a list of the event_ids sorted by reverse topological ordering,
+    and then by power level and origin_server_ts
+
+    Args:
+        event_ids (list[str]): The events to sort
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+        auth_diff (set[str]): Set of event IDs that are in the auth difference.
+
+    Returns:
+        Deferred[list[str]]: The sorted list
+    """
+
+    graph = {}
+    for event_id in event_ids:
+        yield _add_event_and_auth_chain_to_graph(
+            graph, event_id, event_map, state_res_store, auth_diff,
+        )
+
+    event_to_pl = {}
+    for event_id in graph:
+        pl = yield _get_power_level_for_sender(event_id, event_map, state_res_store)
+        event_to_pl[event_id] = pl
+
+    def _get_power_order(event_id):
+        ev = event_map[event_id]
+        pl = event_to_pl[event_id]
+
+        return -pl, ev.origin_server_ts, event_id
+
+    # Note: graph is modified during the sort
+    it = lexicographical_topological_sort(
+        graph,
+        key=_get_power_order,
+    )
+    sorted_events = list(it)
+
+    defer.returnValue(sorted_events)
+
+
+@defer.inlineCallbacks
+def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store):
+    """Sequentially apply auth checks to each event in given list, updating the
+    state as it goes along.
+
+    Args:
+        event_ids (list[str]): Ordered list of events to apply auth checks to
+        base_state (dict[tuple[str, str], str]): The set of state to start with
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+
+    Returns:
+        Deferred[dict[tuple[str, str], str]]: Returns the final updated state
+    """
+    resolved_state = base_state.copy()
+
+    for event_id in event_ids:
+        event = event_map[event_id]
+
+        auth_events = {}
+        for aid in event.auth_event_ids():
+            ev = yield _get_event(aid, event_map, state_res_store)
+
+            if ev.rejected_reason is None:
+                auth_events[(ev.type, ev.state_key)] = ev
+
+        for key in event_auth.auth_types_for_event(event):
+            if key in resolved_state:
+                ev_id = resolved_state[key]
+                ev = yield _get_event(ev_id, event_map, state_res_store)
+
+                if ev.rejected_reason is None:
+                    auth_events[key] = event_map[ev_id]
+
+        try:
+            event_auth.check(
+                event, auth_events,
+                do_sig_check=False,
+                do_size_check=False
+            )
+
+            resolved_state[(event.type, event.state_key)] = event_id
+        except AuthError:
+            pass
+
+    defer.returnValue(resolved_state)
+
+
+@defer.inlineCallbacks
+def _mainline_sort(event_ids, resolved_power_event_id, event_map,
+                   state_res_store):
+    """Returns a sorted list of event_ids sorted by mainline ordering based on
+    the given event resolved_power_event_id
+
+    Args:
+        event_ids (list[str]): Events to sort
+        resolved_power_event_id (str): The final resolved power level event ID
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+
+    Returns:
+        Deferred[list[str]]: The sorted list
+    """
+    mainline = []
+    pl = resolved_power_event_id
+    while pl:
+        mainline.append(pl)
+        pl_ev = yield _get_event(pl, event_map, state_res_store)
+        auth_events = pl_ev.auth_event_ids()
+        pl = None
+        for aid in auth_events:
+            ev = yield _get_event(aid, event_map, state_res_store)
+            if (ev.type, ev.state_key) == (EventTypes.PowerLevels, ""):
+                pl = aid
+                break
+
+    mainline_map = {ev_id: i + 1 for i, ev_id in enumerate(reversed(mainline))}
+
+    event_ids = list(event_ids)
+
+    order_map = {}
+    for ev_id in event_ids:
+        depth = yield _get_mainline_depth_for_event(
+            event_map[ev_id], mainline_map,
+            event_map, state_res_store,
+        )
+        order_map[ev_id] = (depth, event_map[ev_id].origin_server_ts, ev_id)
+
+    event_ids.sort(key=lambda ev_id: order_map[ev_id])
+
+    defer.returnValue(event_ids)
+
+
+@defer.inlineCallbacks
+def _get_mainline_depth_for_event(event, mainline_map, event_map, state_res_store):
+    """Get the mainline depths for the given event based on the mainline map
+
+    Args:
+        event (FrozenEvent)
+        mainline_map (dict[str, int]): Map from event_id to mainline depth for
+            events in the mainline.
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+
+    Returns:
+        Deferred[int]
+    """
+
+    # We do an iterative search, replacing `event with the power level in its
+    # auth events (if any)
+    while event:
+        depth = mainline_map.get(event.event_id)
+        if depth is not None:
+            defer.returnValue(depth)
+
+        auth_events = event.auth_event_ids()
+        event = None
+
+        for aid in auth_events:
+            aev = yield _get_event(aid, event_map, state_res_store)
+            if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
+                event = aev
+                break
+
+    # Didn't find a power level auth event, so we just return 0
+    defer.returnValue(0)
+
+
+@defer.inlineCallbacks
+def _get_event(event_id, event_map, state_res_store):
+    """Helper function to look up event in event_map, falling back to looking
+    it up in the store
+
+    Args:
+        event_id (str)
+        event_map (dict[str,FrozenEvent])
+        state_res_store (StateResolutionStore)
+
+    Returns:
+        Deferred[FrozenEvent]
+    """
+    if event_id not in event_map:
+        events = yield state_res_store.get_events([event_id], allow_rejected=True)
+        event_map.update(events)
+    defer.returnValue(event_map[event_id])
+
+
+def lexicographical_topological_sort(graph, key):
+    """Performs a lexicographic reverse topological sort on the graph.
+
+    This returns a reverse topological sort (i.e. if node A references B then B
+    appears before A in the sort), with ties broken lexicographically based on
+    return value of the `key` function.
+
+    NOTE: `graph` is modified during the sort.
+
+    Args:
+        graph (dict[str, set[str]]): A representation of the graph where each
+            node is a key in the dict and its value are the nodes edges.
+        key (func): A function that takes a node and returns a value that is
+            comparable and used to order nodes
+
+    Yields:
+        str: The next node in the topological sort
+    """
+
+    # Note, this is basically Kahn's algorithm except we look at nodes with no
+    # outgoing edges, c.f.
+    # https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
+    outdegree_map = graph
+    reverse_graph = {}
+
+    # Lists of nodes with zero out degree. Is actually a tuple of
+    # `(key(node), node)` so that sorting does the right thing
+    zero_outdegree = []
+
+    for node, edges in iteritems(graph):
+        if len(edges) == 0:
+            zero_outdegree.append((key(node), node))
+
+        reverse_graph.setdefault(node, set())
+        for edge in edges:
+            reverse_graph.setdefault(edge, set()).add(node)
+
+    # heapq is a built in implementation of a sorted queue.
+    heapq.heapify(zero_outdegree)
+
+    while zero_outdegree:
+        _, node = heapq.heappop(zero_outdegree)
+
+        for parent in reverse_graph[node]:
+            out = outdegree_map[parent]
+            out.discard(node)
+            if len(out) == 0:
+                heapq.heappush(zero_outdegree, (key(parent), parent))
+
+        yield node
diff --git a/synapse/static/client/login/index.html b/synapse/static/client/login/index.html
index 96c8723cab..bcb6bc6bb7 100644
--- a/synapse/static/client/login/index.html
+++ b/synapse/static/client/login/index.html
@@ -12,35 +12,30 @@
         <h1>Log in with one of the following methods</h1>
 
         <span id="feedback" style="color: #f00"></span>
-        <br/>
-        <br/>
 
         <div id="loading">
             <img src="spinner.gif" />
         </div>
 
-        <div id="cas_flow" class="login_flow" style="display:none"
-                onclick="gotoCas(); return false;">
-            CAS Authentication: <button id="cas_button" style="margin: 10px">Log in</button>
+        <div id="sso_flow" class="login_flow" style="display:none">
+            Single-sign on:
+            <form id="sso_form" action="/_matrix/client/r0/login/sso/redirect" method="get">
+                <input id="sso_redirect_url" type="hidden" name="redirectUrl" value=""/>
+                <input type="submit" value="Log in"/>
+            </form>
         </div>
 
-        <br/>
-
-        <form id="password_form" class="login_flow" style="display:none"
-                onsubmit="matrixLogin.password_login(); return false;">
-            <div>
-                Password Authentication:<br/>
-
-                <div style="text-align: center">
-                    <input id="user_id" size="32" type="text" placeholder="Matrix ID (e.g. bob)" autocapitalize="off" autocorrect="off" />
-                    <br/>
-                    <input id="password" size="32" type="password" placeholder="Password"/>
-                    <br/>
+        <div id="password_flow" class="login_flow" style="display:none">
+            Password Authentication:
+            <form onsubmit="matrixLogin.password_login(); return false;">
+                <input id="user_id" size="32" type="text" placeholder="Matrix ID (e.g. bob)" autocapitalize="off" autocorrect="off" />
+                <br/>
+                <input id="password" size="32" type="password" placeholder="Password"/>
+                <br/>
 
-                    <button type="submit" style="margin: 10px">Log in</button>
-                </div>
-            </div>
-        </form>
+                <input type="submit" value="Log in"/>
+            </form>
+        </div>
 
         <div id="no_login_types" type="button" class="login_flow" style="display:none">
             Log in currently unavailable.
diff --git a/synapse/static/client/login/js/login.js b/synapse/static/client/login/js/login.js
index bfb7386035..3a958749a1 100644
--- a/synapse/static/client/login/js/login.js
+++ b/synapse/static/client/login/js/login.js
@@ -1,7 +1,8 @@
 window.matrixLogin = {
-    endpoint: location.origin + "/_matrix/client/api/v1/login",
+    endpoint: location.origin + "/_matrix/client/r0/login",
     serverAcceptsPassword: false,
-    serverAcceptsCas: false
+    serverAcceptsCas: false,
+    serverAcceptsSso: false,
 };
 
 var submitPassword = function(user, pwd) {
@@ -40,12 +41,6 @@ var errorFunc = function(err) {
     }
 };
 
-var gotoCas = function() {
-    var this_page = window.location.origin + window.location.pathname;
-    var redirect_url = matrixLogin.endpoint + "/cas/redirect?redirectUrl=" + encodeURIComponent(this_page);
-    window.location.replace(redirect_url);
-}
-
 var setFeedbackString = function(text) {
     $("#feedback").text(text);
 };
@@ -53,12 +48,18 @@ var setFeedbackString = function(text) {
 var show_login = function() {
     $("#loading").hide();
 
+    var this_page = window.location.origin + window.location.pathname;
+    $("#sso_redirect_url").val(encodeURIComponent(this_page));
+
     if (matrixLogin.serverAcceptsPassword) {
-        $("#password_form").show();
+        $("#password_flow").show();
     }
 
-    if (matrixLogin.serverAcceptsCas) {
-        $("#cas_flow").show();
+    if (matrixLogin.serverAcceptsSso) {
+        $("#sso_flow").show();
+    } else if (matrixLogin.serverAcceptsCas) {
+        $("#sso_form").attr("action", "/_matrix/client/r0/login/cas/redirect");
+        $("#sso_flow").show();
     }
 
     if (!matrixLogin.serverAcceptsPassword && !matrixLogin.serverAcceptsCas) {
@@ -67,8 +68,8 @@ var show_login = function() {
 };
 
 var show_spinner = function() {
-    $("#password_form").hide();
-    $("#cas_flow").hide();
+    $("#password_flow").hide();
+    $("#sso_flow").hide();
     $("#no_login_types").hide();
     $("#loading").show();
 };
@@ -84,7 +85,10 @@ var fetch_info = function(cb) {
                 matrixLogin.serverAcceptsCas = true;
                 console.log("Server accepts CAS");
             }
-
+            if ("m.login.sso" === flow.type) {
+                matrixLogin.serverAcceptsSso = true;
+                console.log("Server accepts SSO");
+            }
             if ("m.login.password" === flow.type) {
                 matrixLogin.serverAcceptsPassword = true;
                 console.log("Server accepts password");
diff --git a/synapse/static/client/login/style.css b/synapse/static/client/login/style.css
index 73da0b5117..1cce5ed950 100644
--- a/synapse/static/client/login/style.css
+++ b/synapse/static/client/login/style.css
@@ -19,30 +19,23 @@ a:hover   { color: #000; }
 a:active  { color: #000; }
 
 input {
-   width: 90%
-}
-
-textarea, input {
-   font-family: inherit;
-   font-size: inherit;
    margin: 5px;
 }
 
-.smallPrint {
-    color: #888;
-    font-size: 9pt ! important;
-    font-style: italic ! important;
+textbox, input[type="text"], input[type="password"] {
+   width: 90%;
 }
 
-.g-recaptcha div {
-    margin: auto;
+form {
+    text-align: center;
+    margin: 10px 0 0 0;
 }
 
 .login_flow {
+    width: 300px;
     text-align: left;
     padding: 10px;
     margin-bottom: 40px;
-    display: inline-block;
 
     -webkit-border-radius: 10px;
     -moz-border-radius: 10px;
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 23b4a8d76d..b23fb7e56c 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -30,6 +30,7 @@ from .appservice import ApplicationServiceStore, ApplicationServiceTransactionSt
 from .client_ips import ClientIpStore
 from .deviceinbox import DeviceInboxStore
 from .directory import DirectoryStore
+from .e2e_room_keys import EndToEndRoomKeyStore
 from .end_to_end_keys import EndToEndKeyStore
 from .engines import PostgresEngine
 from .event_federation import EventFederationStore
@@ -77,6 +78,7 @@ class DataStore(RoomMemberStore, RoomStore,
                 ApplicationServiceTransactionStore,
                 ReceiptsStore,
                 EndToEndKeyStore,
+                EndToEndRoomKeyStore,
                 SearchStore,
                 TagsStore,
                 AccountDataStore,
@@ -117,7 +119,6 @@ class DataStore(RoomMemberStore, RoomStore,
             db_conn, "device_lists_stream", "stream_id",
         )
 
-        self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id")
         self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
         self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
         self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id")
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index be61147b9b..1d3069b143 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -18,7 +18,7 @@ import threading
 import time
 
 from six import PY2, iteritems, iterkeys, itervalues
-from six.moves import intern, range
+from six.moves import builtins, intern, range
 
 from canonicaljson import json
 from prometheus_client import Histogram
@@ -29,6 +29,7 @@ from synapse.api.errors import StoreError
 from synapse.storage.engines import PostgresEngine
 from synapse.util.caches.descriptors import Cache
 from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
+from synapse.util.stringutils import exception_to_unicode
 
 logger = logging.getLogger(__name__)
 
@@ -249,32 +250,32 @@ class SQLBaseStore(object):
                 except self.database_engine.module.OperationalError as e:
                     # This can happen if the database disappears mid
                     # transaction.
-                    logger.warn(
+                    logger.warning(
                         "[TXN OPERROR] {%s} %s %d/%d",
-                        name, e, i, N
+                        name, exception_to_unicode(e), i, N
                     )
                     if i < N:
                         i += 1
                         try:
                             conn.rollback()
                         except self.database_engine.module.Error as e1:
-                            logger.warn(
+                            logger.warning(
                                 "[TXN EROLL] {%s} %s",
-                                name, e1,
+                                name, exception_to_unicode(e1),
                             )
                         continue
                     raise
                 except self.database_engine.module.DatabaseError as e:
                     if self.database_engine.is_deadlock(e):
-                        logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
+                        logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
                         if i < N:
                             i += 1
                             try:
                                 conn.rollback()
                             except self.database_engine.module.Error as e1:
-                                logger.warn(
+                                logger.warning(
                                     "[TXN EROLL] {%s} %s",
-                                    name, e1,
+                                    name, exception_to_unicode(e1),
                                 )
                             continue
                     raise
@@ -849,9 +850,9 @@ class SQLBaseStore(object):
         rowcount = cls._simple_update_txn(txn, table, keyvalues, updatevalues)
 
         if rowcount == 0:
-            raise StoreError(404, "No row found")
+            raise StoreError(404, "No row found (%s)" % (table,))
         if rowcount > 1:
-            raise StoreError(500, "More than one row matched")
+            raise StoreError(500, "More than one row matched (%s)" % (table,))
 
     @staticmethod
     def _simple_select_one_txn(txn, table, keyvalues, retcols,
@@ -868,9 +869,9 @@ class SQLBaseStore(object):
         if not row:
             if allow_none:
                 return None
-            raise StoreError(404, "No row found")
+            raise StoreError(404, "No row found (%s)" % (table,))
         if txn.rowcount > 1:
-            raise StoreError(500, "More than one row matched")
+            raise StoreError(500, "More than one row matched (%s)" % (table,))
 
         return dict(zip(retcols, row))
 
@@ -902,9 +903,9 @@ class SQLBaseStore(object):
 
         txn.execute(sql, list(keyvalues.values()))
         if txn.rowcount == 0:
-            raise StoreError(404, "No row found")
+            raise StoreError(404, "No row found (%s)" % (table,))
         if txn.rowcount > 1:
-            raise StoreError(500, "more than one row matched")
+            raise StoreError(500, "More than one row matched (%s)" % (table,))
 
     def _simple_delete(self, table, keyvalues, desc):
         return self.runInteraction(
@@ -1233,7 +1234,7 @@ def db_to_json(db_content):
 
     # psycopg2 on Python 2 returns buffer objects, which we need to cast to
     # bytes to decode
-    if PY2 and isinstance(db_content, buffer):
+    if PY2 and isinstance(db_content, builtins.buffer):
         db_content = bytes(db_content)
 
     # Decode it to a Unicode string before feeding it to json.loads, so we
diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py
index d10ff9e4b9..ecdab34e7d 100644
--- a/synapse/storage/devices.py
+++ b/synapse/storage/devices.py
@@ -22,14 +22,19 @@ from twisted.internet import defer
 
 from synapse.api.errors import StoreError
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
 
-from ._base import Cache, SQLBaseStore, db_to_json
+from ._base import Cache, db_to_json
 
 logger = logging.getLogger(__name__)
 
+DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = (
+    "drop_device_list_streams_non_unique_indexes"
+)
 
-class DeviceStore(SQLBaseStore):
+
+class DeviceStore(BackgroundUpdateStore):
     def __init__(self, db_conn, hs):
         super(DeviceStore, self).__init__(db_conn, hs)
 
@@ -52,6 +57,30 @@ class DeviceStore(SQLBaseStore):
             columns=["user_id", "device_id"],
         )
 
+        # create a unique index on device_lists_remote_cache
+        self.register_background_index_update(
+            "device_lists_remote_cache_unique_idx",
+            index_name="device_lists_remote_cache_unique_id",
+            table="device_lists_remote_cache",
+            columns=["user_id", "device_id"],
+            unique=True,
+        )
+
+        # And one on device_lists_remote_extremeties
+        self.register_background_index_update(
+            "device_lists_remote_extremeties_unique_idx",
+            index_name="device_lists_remote_extremeties_unique_idx",
+            table="device_lists_remote_extremeties",
+            columns=["user_id"],
+            unique=True,
+        )
+
+        # once they complete, we can remove the old non-unique indexes.
+        self.register_background_update_handler(
+            DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES,
+            self._drop_device_list_streams_non_unique_indexes,
+        )
+
     @defer.inlineCallbacks
     def store_device(self, user_id, device_id,
                      initial_device_display_name):
@@ -239,7 +268,19 @@ class DeviceStore(SQLBaseStore):
 
     def update_remote_device_list_cache_entry(self, user_id, device_id, content,
                                               stream_id):
-        """Updates a single user's device in the cache.
+        """Updates a single device in the cache of a remote user's devicelist.
+
+        Note: assumes that we are the only thread that can be updating this user's
+        device list.
+
+        Args:
+            user_id (str): User to update device list for
+            device_id (str): ID of decivice being updated
+            content (dict): new data on this device
+            stream_id (int): the version of the device list
+
+        Returns:
+            Deferred[None]
         """
         return self.runInteraction(
             "update_remote_device_list_cache_entry",
@@ -272,7 +313,11 @@ class DeviceStore(SQLBaseStore):
                 },
                 values={
                     "content": json.dumps(content),
-                }
+                },
+
+                # we don't need to lock, because we assume we are the only thread
+                # updating this user's devices.
+                lock=False,
             )
 
         txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,))
@@ -289,11 +334,26 @@ class DeviceStore(SQLBaseStore):
             },
             values={
                 "stream_id": stream_id,
-            }
+            },
+
+            # again, we can assume we are the only thread updating this user's
+            # extremity.
+            lock=False,
         )
 
     def update_remote_device_list_cache(self, user_id, devices, stream_id):
-        """Replace the cache of the remote user's devices.
+        """Replace the entire cache of the remote user's devices.
+
+        Note: assumes that we are the only thread that can be updating this user's
+        device list.
+
+        Args:
+            user_id (str): User to update device list for
+            devices (list[dict]): list of device objects supplied over federation
+            stream_id (int): the version of the device list
+
+        Returns:
+            Deferred[None]
         """
         return self.runInteraction(
             "update_remote_device_list_cache",
@@ -338,7 +398,11 @@ class DeviceStore(SQLBaseStore):
             },
             values={
                 "stream_id": stream_id,
-            }
+            },
+
+            # we don't need to lock, because we can assume we are the only thread
+            # updating this user's extremity.
+            lock=False,
         )
 
     def get_devices_by_remote(self, destination, from_stream_id):
@@ -589,10 +653,14 @@ class DeviceStore(SQLBaseStore):
         combined list of changes to devices, and which destinations need to be
         poked. `destination` may be None if no destinations need to be poked.
         """
+        # We do a group by here as there can be a large number of duplicate
+        # entries, since we throw away device IDs.
         sql = """
-            SELECT stream_id, user_id, destination FROM device_lists_stream
+            SELECT MAX(stream_id) AS stream_id, user_id, destination
+            FROM device_lists_stream
             LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id)
             WHERE ? < stream_id AND stream_id <= ?
+            GROUP BY user_id, destination
         """
         return self._execute(
             "get_all_device_list_changes_for_remotes", None,
@@ -718,3 +786,19 @@ class DeviceStore(SQLBaseStore):
             "_prune_old_outbound_device_pokes",
             _prune_txn,
         )
+
+    @defer.inlineCallbacks
+    def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size):
+        def f(conn):
+            txn = conn.cursor()
+            txn.execute(
+                "DROP INDEX IF EXISTS device_lists_remote_cache_id"
+            )
+            txn.execute(
+                "DROP INDEX IF EXISTS device_lists_remote_extremeties_id"
+            )
+            txn.close()
+
+        yield self.runWithConnection(f)
+        yield self._end_background_update(DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES)
+        defer.returnValue(1)
diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py
index cfb687cb53..61a029a53c 100644
--- a/synapse/storage/directory.py
+++ b/synapse/storage/directory.py
@@ -90,7 +90,7 @@ class DirectoryWorkerStore(SQLBaseStore):
 class DirectoryStore(DirectoryWorkerStore):
     @defer.inlineCallbacks
     def create_room_alias_association(self, room_alias, room_id, servers, creator=None):
-        """ Creates an associatin between  a room alias and room_id/servers
+        """ Creates an association between a room alias and room_id/servers
 
         Args:
             room_alias (RoomAlias)
diff --git a/synapse/storage/e2e_room_keys.py b/synapse/storage/e2e_room_keys.py
new file mode 100644
index 0000000000..16b7f005aa
--- /dev/null
+++ b/synapse/storage/e2e_room_keys.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from twisted.internet import defer
+
+from synapse.api.errors import StoreError
+
+from ._base import SQLBaseStore
+
+
+class EndToEndRoomKeyStore(SQLBaseStore):
+
+    @defer.inlineCallbacks
+    def get_e2e_room_key(self, user_id, version, room_id, session_id):
+        """Get the encrypted E2E room key for a given session from a given
+        backup version of room_keys.  We only store the 'best' room key for a given
+        session at a given time, as determined by the handler.
+
+        Args:
+            user_id(str): the user whose backup we're querying
+            version(str): the version ID of the backup for the set of keys we're querying
+            room_id(str): the ID of the room whose keys we're querying.
+                This is a bit redundant as it's implied by the session_id, but
+                we include for consistency with the rest of the API.
+            session_id(str): the session whose room_key we're querying.
+
+        Returns:
+            A deferred dict giving the session_data and message metadata for
+            this room key.
+        """
+
+        row = yield self._simple_select_one(
+            table="e2e_room_keys",
+            keyvalues={
+                "user_id": user_id,
+                "version": version,
+                "room_id": room_id,
+                "session_id": session_id,
+            },
+            retcols=(
+                "first_message_index",
+                "forwarded_count",
+                "is_verified",
+                "session_data",
+            ),
+            desc="get_e2e_room_key",
+        )
+
+        row["session_data"] = json.loads(row["session_data"])
+
+        defer.returnValue(row)
+
+    @defer.inlineCallbacks
+    def set_e2e_room_key(self, user_id, version, room_id, session_id, room_key):
+        """Replaces or inserts the encrypted E2E room key for a given session in
+        a given backup
+
+        Args:
+            user_id(str): the user whose backup we're setting
+            version(str): the version ID of the backup we're updating
+            room_id(str): the ID of the room whose keys we're setting
+            session_id(str): the session whose room_key we're setting
+            room_key(dict): the room_key being set
+        Raises:
+            StoreError
+        """
+
+        yield self._simple_upsert(
+            table="e2e_room_keys",
+            keyvalues={
+                "user_id": user_id,
+                "room_id": room_id,
+                "session_id": session_id,
+            },
+            values={
+                "version": version,
+                "first_message_index": room_key['first_message_index'],
+                "forwarded_count": room_key['forwarded_count'],
+                "is_verified": room_key['is_verified'],
+                "session_data": json.dumps(room_key['session_data']),
+            },
+            lock=False,
+        )
+
+    @defer.inlineCallbacks
+    def get_e2e_room_keys(
+        self, user_id, version, room_id=None, session_id=None
+    ):
+        """Bulk get the E2E room keys for a given backup, optionally filtered to a given
+        room, or a given session.
+
+        Args:
+            user_id(str): the user whose backup we're querying
+            version(str): the version ID of the backup for the set of keys we're querying
+            room_id(str): Optional. the ID of the room whose keys we're querying, if any.
+                If not specified, we return the keys for all the rooms in the backup.
+            session_id(str): Optional. the session whose room_key we're querying, if any.
+                If specified, we also require the room_id to be specified.
+                If not specified, we return all the keys in this version of
+                the backup (or for the specified room)
+
+        Returns:
+            A deferred list of dicts giving the session_data and message metadata for
+            these room keys.
+        """
+
+        try:
+            version = int(version)
+        except ValueError:
+            defer.returnValue({'rooms': {}})
+
+        keyvalues = {
+            "user_id": user_id,
+            "version": version,
+        }
+        if room_id:
+            keyvalues['room_id'] = room_id
+            if session_id:
+                keyvalues['session_id'] = session_id
+
+        rows = yield self._simple_select_list(
+            table="e2e_room_keys",
+            keyvalues=keyvalues,
+            retcols=(
+                "user_id",
+                "room_id",
+                "session_id",
+                "first_message_index",
+                "forwarded_count",
+                "is_verified",
+                "session_data",
+            ),
+            desc="get_e2e_room_keys",
+        )
+
+        sessions = {'rooms': {}}
+        for row in rows:
+            room_entry = sessions['rooms'].setdefault(row['room_id'], {"sessions": {}})
+            room_entry['sessions'][row['session_id']] = {
+                "first_message_index": row["first_message_index"],
+                "forwarded_count": row["forwarded_count"],
+                "is_verified": row["is_verified"],
+                "session_data": json.loads(row["session_data"]),
+            }
+
+        defer.returnValue(sessions)
+
+    @defer.inlineCallbacks
+    def delete_e2e_room_keys(
+        self, user_id, version, room_id=None, session_id=None
+    ):
+        """Bulk delete the E2E room keys for a given backup, optionally filtered to a given
+        room or a given session.
+
+        Args:
+            user_id(str): the user whose backup we're deleting from
+            version(str): the version ID of the backup for the set of keys we're deleting
+            room_id(str): Optional. the ID of the room whose keys we're deleting, if any.
+                If not specified, we delete the keys for all the rooms in the backup.
+            session_id(str): Optional. the session whose room_key we're querying, if any.
+                If specified, we also require the room_id to be specified.
+                If not specified, we delete all the keys in this version of
+                the backup (or for the specified room)
+
+        Returns:
+            A deferred of the deletion transaction
+        """
+
+        keyvalues = {
+            "user_id": user_id,
+            "version": version,
+        }
+        if room_id:
+            keyvalues['room_id'] = room_id
+            if session_id:
+                keyvalues['session_id'] = session_id
+
+        yield self._simple_delete(
+            table="e2e_room_keys",
+            keyvalues=keyvalues,
+            desc="delete_e2e_room_keys",
+        )
+
+    @staticmethod
+    def _get_current_version(txn, user_id):
+        txn.execute(
+            "SELECT MAX(version) FROM e2e_room_keys_versions "
+            "WHERE user_id=? AND deleted=0",
+            (user_id,)
+        )
+        row = txn.fetchone()
+        if not row:
+            raise StoreError(404, 'No current backup version')
+        return row[0]
+
+    def get_e2e_room_keys_version_info(self, user_id, version=None):
+        """Get info metadata about a version of our room_keys backup.
+
+        Args:
+            user_id(str): the user whose backup we're querying
+            version(str): Optional. the version ID of the backup we're querying about
+                If missing, we return the information about the current version.
+        Raises:
+            StoreError: with code 404 if there are no e2e_room_keys_versions present
+        Returns:
+            A deferred dict giving the info metadata for this backup version, with
+            fields including:
+                version(str)
+                algorithm(str)
+                auth_data(object): opaque dict supplied by the client
+        """
+
+        def _get_e2e_room_keys_version_info_txn(txn):
+            if version is None:
+                this_version = self._get_current_version(txn, user_id)
+            else:
+                try:
+                    this_version = int(version)
+                except ValueError:
+                    # Our versions are all ints so if we can't convert it to an integer,
+                    # it isn't there.
+                    raise StoreError(404, "No row found")
+
+            result = self._simple_select_one_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                keyvalues={
+                    "user_id": user_id,
+                    "version": this_version,
+                    "deleted": 0,
+                },
+                retcols=(
+                    "version",
+                    "algorithm",
+                    "auth_data",
+                ),
+            )
+            result["auth_data"] = json.loads(result["auth_data"])
+            result["version"] = str(result["version"])
+            return result
+
+        return self.runInteraction(
+            "get_e2e_room_keys_version_info",
+            _get_e2e_room_keys_version_info_txn
+        )
+
+    def create_e2e_room_keys_version(self, user_id, info):
+        """Atomically creates a new version of this user's e2e_room_keys store
+        with the given version info.
+
+        Args:
+            user_id(str): the user whose backup we're creating a version
+            info(dict): the info about the backup version to be created
+
+        Returns:
+            A deferred string for the newly created version ID
+        """
+
+        def _create_e2e_room_keys_version_txn(txn):
+            txn.execute(
+                "SELECT MAX(version) FROM e2e_room_keys_versions WHERE user_id=?",
+                (user_id,)
+            )
+            current_version = txn.fetchone()[0]
+            if current_version is None:
+                current_version = '0'
+
+            new_version = str(int(current_version) + 1)
+
+            self._simple_insert_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                values={
+                    "user_id": user_id,
+                    "version": new_version,
+                    "algorithm": info["algorithm"],
+                    "auth_data": json.dumps(info["auth_data"]),
+                },
+            )
+
+            return new_version
+
+        return self.runInteraction(
+            "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn
+        )
+
+    def delete_e2e_room_keys_version(self, user_id, version=None):
+        """Delete a given backup version of the user's room keys.
+        Doesn't delete their actual key data.
+
+        Args:
+            user_id(str): the user whose backup version we're deleting
+            version(str): Optional. the version ID of the backup version we're deleting
+                If missing, we delete the current backup version info.
+        Raises:
+            StoreError: with code 404 if there are no e2e_room_keys_versions present,
+                or if the version requested doesn't exist.
+        """
+
+        def _delete_e2e_room_keys_version_txn(txn):
+            if version is None:
+                this_version = self._get_current_version(txn, user_id)
+            else:
+                this_version = version
+
+            return self._simple_update_one_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                keyvalues={
+                    "user_id": user_id,
+                    "version": this_version,
+                },
+                updatevalues={
+                    "deleted": 1,
+                }
+            )
+
+        return self.runInteraction(
+            "delete_e2e_room_keys_version",
+            _delete_e2e_room_keys_version_txn
+        )
diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py
index 1f1721e820..2a0f6cfca9 100644
--- a/synapse/storage/end_to_end_keys.py
+++ b/synapse/storage/end_to_end_keys.py
@@ -40,7 +40,10 @@ class EndToEndKeyStore(SQLBaseStore):
                 allow_none=True,
             )
 
-            new_key_json = encode_canonical_json(device_keys)
+            # In py3 we need old_key_json to match new_key_json type. The DB
+            # returns unicode while encode_canonical_json returns bytes.
+            new_key_json = encode_canonical_json(device_keys).decode("utf-8")
+
             if old_key_json == new_key_json:
                 return False
 
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index 24345b20a6..d3b9dea1d6 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -376,33 +376,25 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
 
     @defer.inlineCallbacks
     def get_missing_events(self, room_id, earliest_events, latest_events,
-                           limit, min_depth):
+                           limit):
         ids = yield self.runInteraction(
             "get_missing_events",
             self._get_missing_events,
-            room_id, earliest_events, latest_events, limit, min_depth
+            room_id, earliest_events, latest_events, limit,
         )
-
         events = yield self._get_events(ids)
-
-        events = sorted(
-            [ev for ev in events if ev.depth >= min_depth],
-            key=lambda e: e.depth,
-        )
-
-        defer.returnValue(events[:limit])
+        defer.returnValue(events)
 
     def _get_missing_events(self, txn, room_id, earliest_events, latest_events,
-                            limit, min_depth):
-
-        earliest_events = set(earliest_events)
-        front = set(latest_events) - earliest_events
+                            limit):
 
-        event_results = set()
+        seen_events = set(earliest_events)
+        front = set(latest_events) - seen_events
+        event_results = []
 
         query = (
             "SELECT prev_event_id FROM event_edges "
-            "WHERE event_id = ? AND is_state = ? "
+            "WHERE room_id = ? AND event_id = ? AND is_state = ? "
             "LIMIT ?"
         )
 
@@ -411,18 +403,20 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
             for event_id in front:
                 txn.execute(
                     query,
-                    (event_id, False, limit - len(event_results))
+                    (room_id, event_id, False, limit - len(event_results))
                 )
 
-                for e_id, in txn:
-                    new_front.add(e_id)
+                new_results = set(t[0] for t in txn) - seen_events
 
-            new_front -= earliest_events
-            new_front -= event_results
+                new_front |= new_results
+                seen_events |= new_results
+                event_results.extend(new_results)
 
             front = new_front
-            event_results |= new_front
 
+        # we built the list working backwards from latest_events; we now need to
+        # reverse it so that the events are approximately chronological.
+        event_results.reverse()
         return event_results
 
 
@@ -483,7 +477,7 @@ class EventFederationStore(EventFederationWorkerStore):
                     "is_state": False,
                 }
                 for ev in events
-                for e_id, _ in ev.prev_events
+                for e_id in ev.prev_event_ids()
             ],
         )
 
@@ -516,7 +510,7 @@ class EventFederationStore(EventFederationWorkerStore):
 
         txn.executemany(query, [
             (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False)
-            for ev in events for e_id, _ in ev.prev_events
+            for ev in events for e_id in ev.prev_event_ids()
             if not ev.internal_metadata.is_outlier()
         ])
 
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index e7487311ce..2047110b1d 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -34,10 +34,13 @@ from synapse.api.errors import SynapseError
 from synapse.events import EventBase  # noqa: F401
 from synapse.events.snapshot import EventContext  # noqa: F401
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.state import StateResolutionStore
 from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.storage.event_federation import EventFederationStore
 from synapse.storage.events_worker import EventsWorkerStore
+from synapse.storage.state import StateGroupWorkerStore
 from synapse.types import RoomStreamToken, get_domain_from_id
+from synapse.util import batch_iter
 from synapse.util.async_helpers import ObservableDeferred
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
 from synapse.util.frozenutils import frozendict_json_encoder
@@ -203,7 +206,8 @@ def _retry_on_integrity_error(func):
 
 # inherits from EventFederationStore so that we can call _update_backward_extremities
 # and _handle_mult_prev_events (though arguably those could both be moved in here)
-class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore):
+class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore,
+                  BackgroundUpdateStore):
     EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
     EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url"
 
@@ -386,12 +390,10 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                             )
 
                         for room_id, ev_ctx_rm in iteritems(events_by_room):
-                            # Work out new extremities by recursively adding and removing
-                            # the new events.
                             latest_event_ids = yield self.get_latest_event_ids_in_room(
                                 room_id
                             )
-                            new_latest_event_ids = yield self._calculate_new_extremeties(
+                            new_latest_event_ids = yield self._calculate_new_extremities(
                                 room_id, ev_ctx_rm, latest_event_ids
                             )
 
@@ -400,6 +402,12 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                                 # No change in extremities, so no change in state
                                 continue
 
+                            # there should always be at least one forward extremity.
+                            # (except during the initial persistence of the send_join
+                            # results, in which case there will be no existing
+                            # extremities, so we'll `continue` above and skip this bit.)
+                            assert new_latest_event_ids, "No forward extremities left!"
+
                             new_forward_extremeties[room_id] = new_latest_event_ids
 
                             len_1 = (
@@ -408,7 +416,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                             )
                             if len_1:
                                 all_single_prev_not_state = all(
-                                    len(event.prev_events) == 1
+                                    len(event.prev_event_ids()) == 1
                                     and not event.is_state()
                                     for event, ctx in ev_ctx_rm
                                 )
@@ -432,7 +440,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                                 # guess this by looking at the prev_events and checking
                                 # if they match the current forward extremities.
                                 for ev, _ in ev_ctx_rm:
-                                    prev_event_ids = set(e for e, _ in ev.prev_events)
+                                    prev_event_ids = set(ev.prev_event_ids())
                                     if latest_event_ids == prev_event_ids:
                                         state_delta_reuse_delta_counter.inc()
                                         break
@@ -517,44 +525,79 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                     )
 
     @defer.inlineCallbacks
-    def _calculate_new_extremeties(self, room_id, event_contexts, latest_event_ids):
-        """Calculates the new forward extremeties for a room given events to
+    def _calculate_new_extremities(self, room_id, event_contexts, latest_event_ids):
+        """Calculates the new forward extremities for a room given events to
         persist.
 
         Assumes that we are only persisting events for one room at a time.
         """
-        new_latest_event_ids = set(latest_event_ids)
-        # First, add all the new events to the list
-        new_latest_event_ids.update(
-            event.event_id for event, ctx in event_contexts
+
+        # we're only interested in new events which aren't outliers and which aren't
+        # being rejected.
+        new_events = [
+            event for event, ctx in event_contexts
             if not event.internal_metadata.is_outlier() and not ctx.rejected
+        ]
+
+        # start with the existing forward extremities
+        result = set(latest_event_ids)
+
+        # add all the new events to the list
+        result.update(
+            event.event_id for event in new_events
         )
-        # Now remove all events that are referenced by the to-be-added events
-        new_latest_event_ids.difference_update(
+
+        # Now remove all events which are prev_events of any of the new events
+        result.difference_update(
             e_id
-            for event, ctx in event_contexts
-            for e_id, _ in event.prev_events
-            if not event.internal_metadata.is_outlier() and not ctx.rejected
+            for event in new_events
+            for e_id in event.prev_event_ids()
         )
 
-        # And finally remove any events that are referenced by previously added
-        # events.
-        rows = yield self._simple_select_many_batch(
-            table="event_edges",
-            column="prev_event_id",
-            iterable=list(new_latest_event_ids),
-            retcols=["prev_event_id"],
-            keyvalues={
-                "is_state": False,
-            },
-            desc="_calculate_new_extremeties",
-        )
+        # Finally, remove any events which are prev_events of any existing events.
+        existing_prevs = yield self._get_events_which_are_prevs(result)
+        result.difference_update(existing_prevs)
 
-        new_latest_event_ids.difference_update(
-            row["prev_event_id"] for row in rows
-        )
+        defer.returnValue(result)
 
-        defer.returnValue(new_latest_event_ids)
+    @defer.inlineCallbacks
+    def _get_events_which_are_prevs(self, event_ids):
+        """Filter the supplied list of event_ids to get those which are prev_events of
+        existing (non-outlier/rejected) events.
+
+        Args:
+            event_ids (Iterable[str]): event ids to filter
+
+        Returns:
+            Deferred[List[str]]: filtered event ids
+        """
+        results = []
+
+        def _get_events(txn, batch):
+            sql = """
+            SELECT prev_event_id
+            FROM event_edges
+                INNER JOIN events USING (event_id)
+                LEFT JOIN rejections USING (event_id)
+            WHERE
+                prev_event_id IN (%s)
+                AND NOT events.outlier
+                AND rejections.event_id IS NULL
+            """ % (
+                ",".join("?" for _ in batch),
+            )
+
+            txn.execute(sql, batch)
+            results.extend(r[0] for r in txn)
+
+        for chunk in batch_iter(event_ids, 100):
+            yield self.runInteraction(
+                "_get_events_which_are_prevs",
+                _get_events,
+                chunk,
+            )
+
+        defer.returnValue(results)
 
     @defer.inlineCallbacks
     def _get_new_state_after_events(self, room_id, events_context, old_latest_event_ids,
@@ -586,10 +629,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
             the new current state is only returned if we've already calculated
             it.
         """
-
-        if not new_latest_event_ids:
-            return
-
         # map from state_group to ((type, key) -> event_id) state map
         state_groups_map = {}
 
@@ -695,11 +734,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
 
         # Ok, we need to defer to the state handler to resolve our state sets.
 
-        def get_events(ev_ids):
-            return self.get_events(
-                ev_ids, get_prev_content=False, check_redacted=False,
-            )
-
         state_groups = {
             sg: state_groups_map[sg] for sg in new_state_groups
         }
@@ -709,7 +743,8 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
 
         logger.debug("calling resolve_state_groups from preserve_events")
         res = yield self._state_resolution_handler.resolve_state_groups(
-            room_id, room_version, state_groups, events_map, get_events
+            room_id, room_version, state_groups, events_map,
+            state_res_store=StateResolutionStore(self)
         )
 
         defer.returnValue((res.state, None))
@@ -818,6 +853,27 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
         # Insert into event_to_state_groups.
         self._store_event_state_mappings_txn(txn, events_and_contexts)
 
+        # We want to store event_auth mappings for rejected events, as they're
+        # used in state res v2.
+        # This is only necessary if the rejected event appears in an accepted
+        # event's auth chain, but its easier for now just to store them (and
+        # it doesn't take much storage compared to storing the entire event
+        # anyway).
+        self._simple_insert_many_txn(
+            txn,
+            table="event_auth",
+            values=[
+                {
+                    "event_id": event.event_id,
+                    "room_id": event.room_id,
+                    "auth_id": auth_id,
+                }
+                for event, _ in events_and_contexts
+                for auth_id in event.auth_event_ids()
+                if event.is_state()
+            ],
+        )
+
         # _store_rejected_events_txn filters out any events which were
         # rejected, and returns the filtered list.
         events_and_contexts = self._store_rejected_events_txn(
@@ -1293,21 +1349,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                     txn, event.room_id, event.redacts
                 )
 
-        self._simple_insert_many_txn(
-            txn,
-            table="event_auth",
-            values=[
-                {
-                    "event_id": event.event_id,
-                    "room_id": event.room_id,
-                    "auth_id": auth_id,
-                }
-                for event, _ in events_and_contexts
-                for auth_id, _ in event.auth_events
-                if event.is_state()
-            ],
-        )
-
         # Update the event_forward_extremities, event_backward_extremities and
         # event_edges tables.
         self._handle_mult_prev_events(
@@ -1995,62 +2036,44 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
 
         logger.info("[purge] finding redundant state groups")
 
-        # Get all state groups that are only referenced by events that are
-        # to be deleted.
-        # This works by first getting state groups that we may want to delete,
-        # joining against event_to_state_groups to get events that use that
-        # state group, then left joining against events_to_purge again. Any
-        # state group where the left join produce *no nulls* are referenced
-        # only by events that are going to be purged.
+        # Get all state groups that are referenced by events that are to be
+        # deleted. We then go and check if they are referenced by other events
+        # or state groups, and if not we delete them.
         txn.execute("""
-            SELECT state_group FROM
-            (
-                SELECT DISTINCT state_group FROM events_to_purge
-                INNER JOIN event_to_state_groups USING (event_id)
-            ) AS sp
-            INNER JOIN event_to_state_groups USING (state_group)
-            LEFT JOIN events_to_purge AS ep USING (event_id)
-            GROUP BY state_group
-            HAVING SUM(CASE WHEN ep.event_id IS NULL THEN 1 ELSE 0 END) = 0
+            SELECT DISTINCT state_group FROM events_to_purge
+            INNER JOIN event_to_state_groups USING (event_id)
         """)
 
-        state_rows = txn.fetchall()
-        logger.info("[purge] found %i redundant state groups", len(state_rows))
-
-        # make a set of the redundant state groups, so that we can look them up
-        # efficiently
-        state_groups_to_delete = set([sg for sg, in state_rows])
-
-        # Now we get all the state groups that rely on these state groups
-        logger.info("[purge] finding state groups which depend on redundant"
-                    " state groups")
-        remaining_state_groups = []
-        for i in range(0, len(state_rows), 100):
-            chunk = [sg for sg, in state_rows[i:i + 100]]
-            # look for state groups whose prev_state_group is one we are about
-            # to delete
-            rows = self._simple_select_many_txn(
-                txn,
-                table="state_group_edges",
-                column="prev_state_group",
-                iterable=chunk,
-                retcols=["state_group"],
-                keyvalues={},
-            )
-            remaining_state_groups.extend(
-                row["state_group"] for row in rows
+        referenced_state_groups = set(sg for sg, in txn)
+        logger.info(
+            "[purge] found %i referenced state groups",
+            len(referenced_state_groups),
+        )
+
+        logger.info("[purge] finding state groups that can be deleted")
 
-                # exclude state groups we are about to delete: no point in
-                # updating them
-                if row["state_group"] not in state_groups_to_delete
+        state_groups_to_delete, remaining_state_groups = (
+            self._find_unreferenced_groups_during_purge(
+                txn, referenced_state_groups,
             )
+        )
+
+        logger.info(
+            "[purge] found %i state groups to delete",
+            len(state_groups_to_delete),
+        )
+
+        logger.info(
+            "[purge] de-delta-ing %i remaining state groups",
+            len(remaining_state_groups),
+        )
 
         # Now we turn the state groups that reference to-be-deleted state
         # groups to non delta versions.
         for sg in remaining_state_groups:
             logger.info("[purge] de-delta-ing remaining state group %s", sg)
             curr_state = self._get_state_groups_from_groups_txn(
-                txn, [sg], types=None
+                txn, [sg],
             )
             curr_state = curr_state[sg]
 
@@ -2088,11 +2111,11 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
         logger.info("[purge] removing redundant state groups")
         txn.executemany(
             "DELETE FROM state_groups_state WHERE state_group = ?",
-            state_rows
+            ((sg,) for sg in state_groups_to_delete),
         )
         txn.executemany(
             "DELETE FROM state_groups WHERE id = ?",
-            state_rows
+            ((sg,) for sg in state_groups_to_delete),
         )
 
         logger.info("[purge] removing events from event_to_state_groups")
@@ -2188,6 +2211,85 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
 
         logger.info("[purge] done")
 
+    def _find_unreferenced_groups_during_purge(self, txn, state_groups):
+        """Used when purging history to figure out which state groups can be
+        deleted and which need to be de-delta'ed (due to one of its prev groups
+        being scheduled for deletion).
+
+        Args:
+            txn
+            state_groups (set[int]): Set of state groups referenced by events
+                that are going to be deleted.
+
+        Returns:
+            tuple[set[int], set[int]]: The set of state groups that can be
+            deleted and the set of state groups that need to be de-delta'ed
+        """
+        # Graph of state group -> previous group
+        graph = {}
+
+        # Set of events that we have found to be referenced by events
+        referenced_groups = set()
+
+        # Set of state groups we've already seen
+        state_groups_seen = set(state_groups)
+
+        # Set of state groups to handle next.
+        next_to_search = set(state_groups)
+        while next_to_search:
+            # We bound size of groups we're looking up at once, to stop the
+            # SQL query getting too big
+            if len(next_to_search) < 100:
+                current_search = next_to_search
+                next_to_search = set()
+            else:
+                current_search = set(itertools.islice(next_to_search, 100))
+                next_to_search -= current_search
+
+            # Check if state groups are referenced
+            sql = """
+                SELECT DISTINCT state_group FROM event_to_state_groups
+                LEFT JOIN events_to_purge AS ep USING (event_id)
+                WHERE state_group IN (%s) AND ep.event_id IS NULL
+            """ % (",".join("?" for _ in current_search),)
+            txn.execute(sql, list(current_search))
+
+            referenced = set(sg for sg, in txn)
+            referenced_groups |= referenced
+
+            # We don't continue iterating up the state group graphs for state
+            # groups that are referenced.
+            current_search -= referenced
+
+            rows = self._simple_select_many_txn(
+                txn,
+                table="state_group_edges",
+                column="prev_state_group",
+                iterable=current_search,
+                keyvalues={},
+                retcols=("prev_state_group", "state_group",),
+            )
+
+            prevs = set(row["state_group"] for row in rows)
+            # We don't bother re-handling groups we've already seen
+            prevs -= state_groups_seen
+            next_to_search |= prevs
+            state_groups_seen |= prevs
+
+            for row in rows:
+                # Note: Each state group can have at most one prev group
+                graph[row["state_group"]] = row["prev_state_group"]
+
+        to_delete = state_groups_seen - referenced_groups
+
+        to_dedelta = set()
+        for sg in referenced_groups:
+            prev_sg = graph.get(sg)
+            if prev_sg and prev_sg in to_delete:
+                to_dedelta.add(sg)
+
+        return to_delete, to_dedelta
+
     @defer.inlineCallbacks
     def is_event_after(self, event_id1, event_id2):
         """Returns True if event_id1 is after event_id2 in the stream
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index a1331c1a61..8af17921e3 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
 # py2 sqlite has buffer hardcoded as only binary type, so we must use it,
 # despite being deprecated and removed in favor of memoryview
 if six.PY2:
-    db_binary_type = buffer
+    db_binary_type = six.moves.builtins.buffer
 else:
     db_binary_type = memoryview
 
diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py
index 0fe8c8e24c..479e01ddc1 100644
--- a/synapse/storage/monthly_active_users.py
+++ b/synapse/storage/monthly_active_users.py
@@ -33,19 +33,30 @@ class MonthlyActiveUsersStore(SQLBaseStore):
         self._clock = hs.get_clock()
         self.hs = hs
         self.reserved_users = ()
+        # Do not add more reserved users than the total allowable number
+        self._new_transaction(
+            dbconn, "initialise_mau_threepids", [], [],
+            self._initialise_reserved_users,
+            hs.config.mau_limits_reserved_threepids[:self.hs.config.max_mau_value],
+        )
 
-    @defer.inlineCallbacks
-    def initialise_reserved_users(self, threepids):
-        store = self.hs.get_datastore()
+    def _initialise_reserved_users(self, txn, threepids):
+        """Ensures that reserved threepids are accounted for in the MAU table, should
+        be called on start up.
+
+        Args:
+            txn (cursor):
+            threepids (list[dict]): List of threepid dicts to reserve
+        """
         reserved_user_list = []
 
-        # Do not add more reserved users than the total allowable number
-        for tp in threepids[:self.hs.config.max_mau_value]:
-            user_id = yield store.get_user_id_by_threepid(
+        for tp in threepids:
+            user_id = self.get_user_id_by_threepid_txn(
+                txn,
                 tp["medium"], tp["address"]
             )
             if user_id:
-                yield self.upsert_monthly_active_user(user_id)
+                self.upsert_monthly_active_user_txn(txn, user_id)
                 reserved_user_list.append(user_id)
             else:
                 logger.warning(
@@ -55,8 +66,7 @@ class MonthlyActiveUsersStore(SQLBaseStore):
 
     @defer.inlineCallbacks
     def reap_monthly_active_users(self):
-        """
-        Cleans out monthly active user table to ensure that no stale
+        """Cleans out monthly active user table to ensure that no stale
         entries exist.
 
         Returns:
@@ -87,37 +97,38 @@ class MonthlyActiveUsersStore(SQLBaseStore):
 
             txn.execute(sql, query_args)
 
-            # If MAU user count still exceeds the MAU threshold, then delete on
-            # a least recently active basis.
-            # Note it is not possible to write this query using OFFSET due to
-            # incompatibilities in how sqlite and postgres support the feature.
-            # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
-            # While Postgres does not require 'LIMIT', but also does not support
-            # negative LIMIT values. So there is no way to write it that both can
-            # support
-            safe_guard = self.hs.config.max_mau_value - len(self.reserved_users)
-            # Must be greater than zero for postgres
-            safe_guard = safe_guard if safe_guard > 0 else 0
-            query_args = [safe_guard]
-
-            base_sql = """
-                DELETE FROM monthly_active_users
-                WHERE user_id NOT IN (
-                    SELECT user_id FROM monthly_active_users
-                    ORDER BY timestamp DESC
-                    LIMIT ?
+            if self.hs.config.limit_usage_by_mau:
+                # If MAU user count still exceeds the MAU threshold, then delete on
+                # a least recently active basis.
+                # Note it is not possible to write this query using OFFSET due to
+                # incompatibilities in how sqlite and postgres support the feature.
+                # sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
+                # While Postgres does not require 'LIMIT', but also does not support
+                # negative LIMIT values. So there is no way to write it that both can
+                # support
+                safe_guard = self.hs.config.max_mau_value - len(self.reserved_users)
+                # Must be greater than zero for postgres
+                safe_guard = safe_guard if safe_guard > 0 else 0
+                query_args = [safe_guard]
+
+                base_sql = """
+                    DELETE FROM monthly_active_users
+                    WHERE user_id NOT IN (
+                        SELECT user_id FROM monthly_active_users
+                        ORDER BY timestamp DESC
+                        LIMIT ?
+                        )
+                    """
+                # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
+                # when len(reserved_users) == 0. Works fine on sqlite.
+                if len(self.reserved_users) > 0:
+                    query_args.extend(self.reserved_users)
+                    sql = base_sql + """ AND user_id NOT IN ({})""".format(
+                        ','.join(questionmarks)
                     )
-                """
-            # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
-            # when len(reserved_users) == 0. Works fine on sqlite.
-            if len(self.reserved_users) > 0:
-                query_args.extend(self.reserved_users)
-                sql = base_sql + """ AND user_id NOT IN ({})""".format(
-                    ','.join(questionmarks)
-                )
-            else:
-                sql = base_sql
-            txn.execute(sql, query_args)
+                else:
+                    sql = base_sql
+                txn.execute(sql, query_args)
 
         yield self.runInteraction("reap_monthly_active_users", _reap_users)
         # It seems poor to invalidate the whole cache, Postgres supports
@@ -165,19 +176,44 @@ class MonthlyActiveUsersStore(SQLBaseStore):
 
     @defer.inlineCallbacks
     def upsert_monthly_active_user(self, user_id):
+        """Updates or inserts the user into the monthly active user table, which
+        is used to track the current MAU usage of the server
+
+        Args:
+            user_id (str): user to add/update
         """
-            Updates or inserts monthly active user member
-            Arguments:
-                user_id (str): user to add/update
-            Deferred[bool]: True if a new entry was created, False if an
-                existing one was updated.
+        is_insert = yield self.runInteraction(
+            "upsert_monthly_active_user", self.upsert_monthly_active_user_txn,
+            user_id
+        )
+
+        if is_insert:
+            self.user_last_seen_monthly_active.invalidate((user_id,))
+            self.get_monthly_active_count.invalidate(())
+
+    def upsert_monthly_active_user_txn(self, txn, user_id):
+        """Updates or inserts monthly active user member
+
+        Note that, after calling this method, it will generally be necessary
+        to invalidate the caches on user_last_seen_monthly_active and
+        get_monthly_active_count. We can't do that here, because we are running
+        in a database thread rather than the main thread, and we can't call
+        txn.call_after because txn may not be a LoggingTransaction.
+
+        Args:
+            txn (cursor):
+            user_id (str): user to add/update
+
+        Returns:
+            bool: True if a new entry was created, False if an
+            existing one was updated.
         """
         # Am consciously deciding to lock the table on the basis that is ought
         # never be a big table and alternative approaches (batching multiple
         # upserts into a single txn) introduced a lot of extra complexity.
         # See https://github.com/matrix-org/synapse/issues/3854 for more
-        is_insert = yield self._simple_upsert(
-            desc="upsert_monthly_active_user",
+        is_insert = self._simple_upsert_txn(
+            txn,
             table="monthly_active_users",
             keyvalues={
                 "user_id": user_id,
@@ -186,9 +222,8 @@ class MonthlyActiveUsersStore(SQLBaseStore):
                 "timestamp": int(self._clock.time_msec()),
             },
         )
-        if is_insert:
-            self.user_last_seen_monthly_active.invalidate((user_id,))
-            self.get_monthly_active_count.invalidate(())
+
+        return is_insert
 
     @cached(num_args=1)
     def user_last_seen_monthly_active(self, user_id):
@@ -219,8 +254,7 @@ class MonthlyActiveUsersStore(SQLBaseStore):
         Args:
             user_id(str): the user_id to query
         """
-
-        if self.hs.config.limit_usage_by_mau:
+        if self.hs.config.limit_usage_by_mau or self.hs.config.mau_stats_only:
             # Trial users and guests should not be included as part of MAU group
             is_guest = yield self.is_guest(user_id)
             if is_guest:
@@ -238,8 +272,14 @@ class MonthlyActiveUsersStore(SQLBaseStore):
             # but only update if we have not previously seen the user for
             # LAST_SEEN_GRANULARITY ms
             if last_seen_timestamp is None:
-                count = yield self.get_monthly_active_count()
-                if count < self.hs.config.max_mau_value:
+                # In the case where mau_stats_only is True and limit_usage_by_mau is
+                # False, there is no point in checking get_monthly_active_count - it
+                # adds no value and will break the logic if max_mau_value is exceeded.
+                if not self.hs.config.limit_usage_by_mau:
                     yield self.upsert_monthly_active_user(user_id)
+                else:
+                    count = yield self.get_monthly_active_count()
+                    if count < self.hs.config.max_mau_value:
+                        yield self.upsert_monthly_active_user(user_id)
             elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY:
                 yield self.upsert_monthly_active_user(user_id)
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index b364719312..fa36daac52 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
 
 # Remember to update this number every time a change is made to database
 # schema files, so the users will be informed on server restarts.
-SCHEMA_VERSION = 51
+SCHEMA_VERSION = 53
 
 dir_path = os.path.abspath(os.path.dirname(__file__))
 
@@ -257,7 +257,7 @@ def _upgrade_existing_database(cur, current_version, applied_delta_files,
                 module.run_create(cur, database_engine)
                 if not is_empty:
                     module.run_upgrade(cur, database_engine, config=config)
-            elif ext == ".pyc":
+            elif ext == ".pyc" or file_name == "__pycache__":
                 # Sometimes .pyc files turn up anyway even though we've
                 # disabled their generation; e.g. from distribution package
                 # installers. Silently skip it
diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py
index c7987bfcdd..2743b52bad 100644
--- a/synapse/storage/pusher.py
+++ b/synapse/storage/pusher.py
@@ -29,7 +29,7 @@ from ._base import SQLBaseStore
 logger = logging.getLogger(__name__)
 
 if six.PY2:
-    db_binary_type = buffer
+    db_binary_type = six.moves.builtins.buffer
 else:
     db_binary_type = memoryview
 
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 26b429e307..80d76bf9d7 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -474,17 +474,44 @@ class RegistrationStore(RegistrationWorkerStore,
 
     @defer.inlineCallbacks
     def get_user_id_by_threepid(self, medium, address):
-        ret = yield self._simple_select_one(
+        """Returns user id from threepid
+
+        Args:
+            medium (str): threepid medium e.g. email
+            address (str): threepid address e.g. me@example.com
+
+        Returns:
+            Deferred[str|None]: user id or None if no user id/threepid mapping exists
+        """
+        user_id = yield self.runInteraction(
+            "get_user_id_by_threepid", self.get_user_id_by_threepid_txn,
+            medium, address
+        )
+        defer.returnValue(user_id)
+
+    def get_user_id_by_threepid_txn(self, txn, medium, address):
+        """Returns user id from threepid
+
+        Args:
+            txn (cursor):
+            medium (str): threepid medium e.g. email
+            address (str): threepid address e.g. me@example.com
+
+        Returns:
+            str|None: user id or None if no user id/threepid mapping exists
+        """
+        ret = self._simple_select_one_txn(
+            txn,
             "user_threepids",
             {
                 "medium": medium,
                 "address": address
             },
-            ['user_id'], True, 'get_user_id_by_threepid'
+            ['user_id'], True
         )
         if ret:
-            defer.returnValue(ret['user_id'])
-        defer.returnValue(None)
+            return ret['user_id']
+        return None
 
     def user_delete_threepid(self, user_id, medium, address):
         return self._simple_delete(
@@ -567,7 +594,7 @@ class RegistrationStore(RegistrationWorkerStore,
         def _find_next_generated_user_id(txn):
             txn.execute("SELECT name FROM users")
 
-            regex = re.compile("^@(\d+):")
+            regex = re.compile(r"^@(\d+):")
 
             found = set()
 
diff --git a/synapse/storage/room.py b/synapse/storage/room.py
index 61013b8919..41c65e112a 100644
--- a/synapse/storage/room.py
+++ b/synapse/storage/room.py
@@ -47,7 +47,7 @@ class RoomWorkerStore(SQLBaseStore):
         Args:
             room_id (str): The ID of the room to retrieve.
         Returns:
-            A namedtuple containing the room information, or an empty list.
+            A dict containing the room information, or None if the room is unknown.
         """
         return self._simple_select_one(
             table="rooms",
diff --git a/synapse/storage/schema/delta/34/sent_txn_purge.py b/synapse/storage/schema/delta/34/sent_txn_purge.py
deleted file mode 100644
index 0ffab10b6f..0000000000
--- a/synapse/storage/schema/delta/34/sent_txn_purge.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from synapse.storage.engines import PostgresEngine
-
-logger = logging.getLogger(__name__)
-
-
-def run_create(cur, database_engine, *args, **kwargs):
-    if isinstance(database_engine, PostgresEngine):
-        cur.execute("TRUNCATE sent_transactions")
-    else:
-        cur.execute("DELETE FROM sent_transactions")
-
-    cur.execute("CREATE INDEX sent_transactions_ts ON sent_transactions(ts)")
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/delta/40/device_list_streams.sql b/synapse/storage/schema/delta/40/device_list_streams.sql
index 54841b3843..dd6dcb65f1 100644
--- a/synapse/storage/schema/delta/40/device_list_streams.sql
+++ b/synapse/storage/schema/delta/40/device_list_streams.sql
@@ -20,9 +20,6 @@ CREATE TABLE device_lists_remote_cache (
     content TEXT NOT NULL
 );
 
-CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id);
-
-
 -- The last update we got for a user. Empty if we're not receiving updates for
 -- that user.
 CREATE TABLE device_lists_remote_extremeties (
@@ -30,7 +27,11 @@ CREATE TABLE device_lists_remote_extremeties (
     stream_id TEXT NOT NULL
 );
 
-CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id);
+-- we used to create non-unique indexes on these tables, but as of update 52 we create
+-- unique indexes concurrently:
+--
+-- CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id);
+-- CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id);
 
 
 -- Stream of device lists updates. Includes both local and remotes
diff --git a/synapse/storage/schema/delta/51/e2e_room_keys.sql b/synapse/storage/schema/delta/51/e2e_room_keys.sql
new file mode 100644
index 0000000000..c0e66a697d
--- /dev/null
+++ b/synapse/storage/schema/delta/51/e2e_room_keys.sql
@@ -0,0 +1,39 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- users' optionally backed up encrypted e2e sessions
+CREATE TABLE e2e_room_keys (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    session_id TEXT NOT NULL,
+    version TEXT NOT NULL,
+    first_message_index INT,
+    forwarded_count INT,
+    is_verified BOOLEAN,
+    session_data TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id);
+
+-- the metadata for each generation of encrypted e2e session backups
+CREATE TABLE e2e_room_keys_versions (
+    user_id TEXT NOT NULL,
+    version TEXT NOT NULL,
+    algorithm TEXT NOT NULL,
+    auth_data TEXT NOT NULL,
+    deleted SMALLINT DEFAULT 0 NOT NULL
+);
+
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);
diff --git a/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql
new file mode 100644
index 0000000000..91e03d13e1
--- /dev/null
+++ b/synapse/storage/schema/delta/52/add_event_to_state_group_index.sql
@@ -0,0 +1,19 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- This is needed to efficiently check for unreferenced state groups during
+-- purge. Added events_to_state_group(state_group) index
+INSERT into background_updates (update_name, progress_json)
+    VALUES ('event_to_state_groups_sg_index', '{}');
diff --git a/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql b/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql
new file mode 100644
index 0000000000..bfa49e6f92
--- /dev/null
+++ b/synapse/storage/schema/delta/52/device_list_streams_unique_idx.sql
@@ -0,0 +1,36 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- register a background update which will create a unique index on
+-- device_lists_remote_cache
+INSERT into background_updates (update_name, progress_json)
+    VALUES ('device_lists_remote_cache_unique_idx', '{}');
+
+-- and one on device_lists_remote_extremeties
+INSERT into background_updates (update_name, progress_json, depends_on)
+    VALUES (
+        'device_lists_remote_extremeties_unique_idx', '{}',
+
+        -- doesn't really depend on this, but we need to make sure both happen
+        -- before we drop the old indexes.
+        'device_lists_remote_cache_unique_idx'
+    );
+
+-- once they complete, we can drop the old indexes.
+INSERT into background_updates (update_name, progress_json, depends_on)
+    VALUES (
+        'drop_device_list_streams_non_unique_indexes', '{}',
+        'device_lists_remote_extremeties_unique_idx'
+    );
diff --git a/synapse/storage/schema/delta/52/e2e_room_keys.sql b/synapse/storage/schema/delta/52/e2e_room_keys.sql
new file mode 100644
index 0000000000..db687cccae
--- /dev/null
+++ b/synapse/storage/schema/delta/52/e2e_room_keys.sql
@@ -0,0 +1,53 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Change version column to an integer so we can do MAX() sensibly
+ */
+CREATE TABLE e2e_room_keys_versions_new (
+    user_id TEXT NOT NULL,
+    version BIGINT NOT NULL,
+    algorithm TEXT NOT NULL,
+    auth_data TEXT NOT NULL,
+    deleted SMALLINT DEFAULT 0 NOT NULL
+);
+
+INSERT INTO e2e_room_keys_versions_new
+    SELECT user_id, CAST(version as BIGINT), algorithm, auth_data, deleted FROM e2e_room_keys_versions;
+
+DROP TABLE e2e_room_keys_versions;
+ALTER TABLE e2e_room_keys_versions_new RENAME TO e2e_room_keys_versions;
+
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);
+
+/* Change e2e_rooms_keys to match
+ */
+CREATE TABLE e2e_room_keys_new (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    session_id TEXT NOT NULL,
+    version BIGINT NOT NULL,
+    first_message_index INT,
+    forwarded_count INT,
+    is_verified BOOLEAN,
+    session_data TEXT NOT NULL
+);
+
+INSERT INTO e2e_room_keys_new
+    SELECT user_id, room_id, session_id, CAST(version as BIGINT), first_message_index, forwarded_count, is_verified, session_data FROM e2e_room_keys;
+
+DROP TABLE e2e_room_keys;
+ALTER TABLE e2e_room_keys_new RENAME TO e2e_room_keys;
+
+CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id);
diff --git a/synapse/storage/schema/delta/11/v11.sql b/synapse/storage/schema/delta/53/drop_sent_transactions.sql
index e7b4f90127..e372f5a44a 100644
--- a/synapse/storage/schema/delta/11/v11.sql
+++ b/synapse/storage/schema/delta/53/drop_sent_transactions.sql
@@ -1,4 +1,4 @@
-/* Copyright 2015, 2016 OpenMarket Ltd
+/* Copyright 2018 New Vector Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,4 +13,4 @@
  * limitations under the License.
  */
 
-CREATE INDEX IF NOT EXISTS sent_transaction_txn_id ON sent_transactions(transaction_id);
\ No newline at end of file
+DROP TABLE IF EXISTS sent_transactions;
diff --git a/synapse/storage/schema/full_schemas/11/transactions.sql b/synapse/storage/schema/full_schemas/11/transactions.sql
index a3f4a0a790..f6a058832e 100644
--- a/synapse/storage/schema/full_schemas/11/transactions.sql
+++ b/synapse/storage/schema/full_schemas/11/transactions.sql
@@ -25,25 +25,6 @@ CREATE TABLE IF NOT EXISTS received_transactions(
 
 CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0;
 
-
--- Stores what transactions we've sent, what their response was (if we got one) and whether we have
--- since referenced the transaction in another outgoing transaction
-CREATE TABLE IF NOT EXISTS sent_transactions(
-    id INTEGER PRIMARY KEY AUTOINCREMENT, -- This is used to apply insertion ordering
-    transaction_id TEXT,
-    destination TEXT,
-    response_code INTEGER DEFAULT 0,
-    response_json TEXT,
-    ts BIGINT
-);
-
-CREATE INDEX sent_transaction_dest ON sent_transactions(destination);
-CREATE INDEX sent_transaction_txn_id ON sent_transactions(transaction_id);
--- So that we can do an efficient look up of all transactions that have yet to be successfully
--- sent.
-CREATE INDEX sent_transaction_sent ON sent_transactions(response_code);
-
-
 -- For sent transactions only.
 CREATE TABLE IF NOT EXISTS transaction_id_to_pdu(
     transaction_id INTEGER,
diff --git a/synapse/storage/schema/full_schemas/16/transactions.sql b/synapse/storage/schema/full_schemas/16/transactions.sql
index 14b67cce25..17e67bedac 100644
--- a/synapse/storage/schema/full_schemas/16/transactions.sql
+++ b/synapse/storage/schema/full_schemas/16/transactions.sql
@@ -25,25 +25,6 @@ CREATE TABLE IF NOT EXISTS received_transactions(
 
 CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0;
 
-
--- Stores what transactions we've sent, what their response was (if we got one) and whether we have
--- since referenced the transaction in another outgoing transaction
-CREATE TABLE IF NOT EXISTS sent_transactions(
-    id BIGINT PRIMARY KEY, -- This is used to apply insertion ordering
-    transaction_id TEXT,
-    destination TEXT,
-    response_code INTEGER DEFAULT 0,
-    response_json TEXT,
-    ts BIGINT
-);
-
-CREATE INDEX sent_transaction_dest ON sent_transactions(destination);
-CREATE INDEX sent_transaction_txn_id ON sent_transactions(transaction_id);
--- So that we can do an efficient look up of all transactions that have yet to be successfully
--- sent.
-CREATE INDEX sent_transaction_sent ON sent_transactions(response_code);
-
-
 -- For sent transactions only.
 CREATE TABLE IF NOT EXISTS transaction_id_to_pdu(
     transaction_id INTEGER,
diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index d5b5df93e6..c6420b2374 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -45,6 +45,10 @@ class SearchStore(BackgroundUpdateStore):
 
     def __init__(self, db_conn, hs):
         super(SearchStore, self).__init__(db_conn, hs)
+
+        if not hs.config.enable_search:
+            return
+
         self.register_background_update_handler(
             self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search
         )
@@ -316,6 +320,8 @@ class SearchStore(BackgroundUpdateStore):
             entries (iterable[SearchEntry]):
                 entries to be added to the table
         """
+        if not self.hs.config.enable_search:
+            return
         if isinstance(self.database_engine, PostgresEngine):
             sql = (
                 "INSERT INTO event_search"
diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py
index 5623391f6e..158e9dbe7b 100644
--- a/synapse/storage/signatures.py
+++ b/synapse/storage/signatures.py
@@ -27,7 +27,7 @@ from ._base import SQLBaseStore
 # py2 sqlite has buffer hardcoded as only binary type, so we must use it,
 # despite being deprecated and removed in favor of memoryview
 if six.PY2:
-    db_binary_type = buffer
+    db_binary_type = six.moves.builtins.buffer
 else:
     db_binary_type = memoryview
 
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 3f4cbd61c4..d737bd6778 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -19,6 +19,8 @@ from collections import namedtuple
 from six import iteritems, itervalues
 from six.moves import range
 
+import attr
+
 from twisted.internet import defer
 
 from synapse.api.constants import EventTypes
@@ -48,6 +50,318 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt
         return len(self.delta_ids) if self.delta_ids else 0
 
 
+@attr.s(slots=True)
+class StateFilter(object):
+    """A filter used when querying for state.
+
+    Attributes:
+        types (dict[str, set[str]|None]): Map from type to set of state keys (or
+            None). This specifies which state_keys for the given type to fetch
+            from the DB. If None then all events with that type are fetched. If
+            the set is empty then no events with that type are fetched.
+        include_others (bool): Whether to fetch events with types that do not
+            appear in `types`.
+    """
+
+    types = attr.ib()
+    include_others = attr.ib(default=False)
+
+    def __attrs_post_init__(self):
+        # If `include_others` is set we canonicalise the filter by removing
+        # wildcards from the types dictionary
+        if self.include_others:
+            self.types = {
+                k: v for k, v in iteritems(self.types)
+                if v is not None
+            }
+
+    @staticmethod
+    def all():
+        """Creates a filter that fetches everything.
+
+        Returns:
+            StateFilter
+        """
+        return StateFilter(types={}, include_others=True)
+
+    @staticmethod
+    def none():
+        """Creates a filter that fetches nothing.
+
+        Returns:
+            StateFilter
+        """
+        return StateFilter(types={}, include_others=False)
+
+    @staticmethod
+    def from_types(types):
+        """Creates a filter that only fetches the given types
+
+        Args:
+            types (Iterable[tuple[str, str|None]]): A list of type and state
+                keys to fetch. A state_key of None fetches everything for
+                that type
+
+        Returns:
+            StateFilter
+        """
+        type_dict = {}
+        for typ, s in types:
+            if typ in type_dict:
+                if type_dict[typ] is None:
+                    continue
+
+            if s is None:
+                type_dict[typ] = None
+                continue
+
+            type_dict.setdefault(typ, set()).add(s)
+
+        return StateFilter(types=type_dict)
+
+    @staticmethod
+    def from_lazy_load_member_list(members):
+        """Creates a filter that returns all non-member events, plus the member
+        events for the given users
+
+        Args:
+            members (iterable[str]): Set of user IDs
+
+        Returns:
+            StateFilter
+        """
+        return StateFilter(
+            types={EventTypes.Member: set(members)},
+            include_others=True,
+        )
+
+    def return_expanded(self):
+        """Creates a new StateFilter where type wild cards have been removed
+        (except for memberships). The returned filter is a superset of the
+        current one, i.e. anything that passes the current filter will pass
+        the returned filter.
+
+        This helps the caching as the DictionaryCache knows if it has *all* the
+        state, but does not know if it has all of the keys of a particular type,
+        which makes wildcard lookups expensive unless we have a complete cache.
+        Hence, if we are doing a wildcard lookup, populate the cache fully so
+        that we can do an efficient lookup next time.
+
+        Note that since we have two caches, one for membership events and one for
+        other events, we can be a bit more clever than simply returning
+        `StateFilter.all()` if `has_wildcards()` is True.
+
+        We return a StateFilter where:
+            1. the list of membership events to return is the same
+            2. if there is a wildcard that matches non-member events we
+               return all non-member events
+
+        Returns:
+            StateFilter
+        """
+
+        if self.is_full():
+            # If we're going to return everything then there's nothing to do
+            return self
+
+        if not self.has_wildcards():
+            # If there are no wild cards, there's nothing to do
+            return self
+
+        if EventTypes.Member in self.types:
+            get_all_members = self.types[EventTypes.Member] is None
+        else:
+            get_all_members = self.include_others
+
+        has_non_member_wildcard = self.include_others or any(
+            state_keys is None
+            for t, state_keys in iteritems(self.types)
+            if t != EventTypes.Member
+        )
+
+        if not has_non_member_wildcard:
+            # If there are no non-member wild cards we can just return ourselves
+            return self
+
+        if get_all_members:
+            # We want to return everything.
+            return StateFilter.all()
+        else:
+            # We want to return all non-members, but only particular
+            # memberships
+            return StateFilter(
+                types={EventTypes.Member: self.types[EventTypes.Member]},
+                include_others=True,
+            )
+
+    def make_sql_filter_clause(self):
+        """Converts the filter to an SQL clause.
+
+        For example:
+
+            f = StateFilter.from_types([("m.room.create", "")])
+            clause, args = f.make_sql_filter_clause()
+            clause == "(type = ? AND state_key = ?)"
+            args == ['m.room.create', '']
+
+
+        Returns:
+            tuple[str, list]: The SQL string (may be empty) and arguments. An
+            empty SQL string is returned when the filter matches everything
+            (i.e. is "full").
+        """
+
+        where_clause = ""
+        where_args = []
+
+        if self.is_full():
+            return where_clause, where_args
+
+        if not self.include_others and not self.types:
+            # i.e. this is an empty filter, so we need to return a clause that
+            # will match nothing
+            return "1 = 2", []
+
+        # First we build up a lost of clauses for each type/state_key combo
+        clauses = []
+        for etype, state_keys in iteritems(self.types):
+            if state_keys is None:
+                clauses.append("(type = ?)")
+                where_args.append(etype)
+                continue
+
+            for state_key in state_keys:
+                clauses.append("(type = ? AND state_key = ?)")
+                where_args.extend((etype, state_key))
+
+        # This will match anything that appears in `self.types`
+        where_clause = " OR ".join(clauses)
+
+        # If we want to include stuff that's not in the types dict then we add
+        # a `OR type NOT IN (...)` clause to the end.
+        if self.include_others:
+            if where_clause:
+                where_clause += " OR "
+
+            where_clause += "type NOT IN (%s)" % (
+                ",".join(["?"] * len(self.types)),
+            )
+            where_args.extend(self.types)
+
+        return where_clause, where_args
+
+    def max_entries_returned(self):
+        """Returns the maximum number of entries this filter will return if
+        known, otherwise returns None.
+
+        For example a simple state filter asking for `("m.room.create", "")`
+        will return 1, whereas the default state filter will return None.
+
+        This is used to bail out early if the right number of entries have been
+        fetched.
+        """
+        if self.has_wildcards():
+            return None
+
+        return len(self.concrete_types())
+
+    def filter_state(self, state_dict):
+        """Returns the state filtered with by this StateFilter
+
+        Args:
+            state (dict[tuple[str, str], Any]): The state map to filter
+
+        Returns:
+            dict[tuple[str, str], Any]: The filtered state map
+        """
+        if self.is_full():
+            return dict(state_dict)
+
+        filtered_state = {}
+        for k, v in iteritems(state_dict):
+            typ, state_key = k
+            if typ in self.types:
+                state_keys = self.types[typ]
+                if state_keys is None or state_key in state_keys:
+                    filtered_state[k] = v
+            elif self.include_others:
+                filtered_state[k] = v
+
+        return filtered_state
+
+    def is_full(self):
+        """Whether this filter fetches everything or not
+
+        Returns:
+            bool
+        """
+        return self.include_others and not self.types
+
+    def has_wildcards(self):
+        """Whether the filter includes wildcards or is attempting to fetch
+        specific state.
+
+        Returns:
+            bool
+        """
+
+        return (
+            self.include_others
+            or any(
+                state_keys is None
+                for state_keys in itervalues(self.types)
+            )
+        )
+
+    def concrete_types(self):
+        """Returns a list of concrete type/state_keys (i.e. not None) that
+        will be fetched. This will be a complete list if `has_wildcards`
+        returns False, but otherwise will be a subset (or even empty).
+
+        Returns:
+            list[tuple[str,str]]
+        """
+        return [
+            (t, s)
+            for t, state_keys in iteritems(self.types)
+            if state_keys is not None
+            for s in state_keys
+        ]
+
+    def get_member_split(self):
+        """Return the filter split into two: one which assumes it's exclusively
+        matching against member state, and one which assumes it's matching
+        against non member state.
+
+        This is useful due to the returned filters giving correct results for
+        `is_full()`, `has_wildcards()`, etc, when operating against maps that
+        either exclusively contain member events or only contain non-member
+        events. (Which is the case when dealing with the member vs non-member
+        state caches).
+
+        Returns:
+            tuple[StateFilter, StateFilter]: The member and non member filters
+        """
+
+        if EventTypes.Member in self.types:
+            state_keys = self.types[EventTypes.Member]
+            if state_keys is None:
+                member_filter = StateFilter.all()
+            else:
+                member_filter = StateFilter({EventTypes.Member: state_keys})
+        elif self.include_others:
+            member_filter = StateFilter.all()
+        else:
+            member_filter = StateFilter.none()
+
+        non_member_filter = StateFilter(
+            types={k: v for k, v in iteritems(self.types) if k != EventTypes.Member},
+            include_others=self.include_others,
+        )
+
+        return member_filter, non_member_filter
+
+
 # this inherits from EventsWorkerStore because it calls self.get_events
 class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
     """The parts of StateGroupStore that can be called from workers.
@@ -152,61 +466,41 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
 
     # FIXME: how should this be cached?
-    def get_filtered_current_state_ids(self, room_id, types, filtered_types=None):
+    def get_filtered_current_state_ids(self, room_id, state_filter=StateFilter.all()):
         """Get the current state event of a given type for a room based on the
         current_state_events table.  This may not be as up-to-date as the result
         of doing a fresh state resolution as per state_handler.get_current_state
+
         Args:
             room_id (str)
-            types (list[(Str, (Str|None))]): List of (type, state_key) tuples
-                which are used to filter the state fetched. `state_key` may be
-                None, which matches any `state_key`
-            filtered_types (list[Str]|None): List of types to apply the above filter to.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
+
         Returns:
-            deferred: dict of (type, state_key) -> event
+            Deferred[dict[tuple[str, str], str]]: Map from type/state_key to
+            event ID.
         """
 
-        include_other_types = False if filtered_types is None else True
-
         def _get_filtered_current_state_ids_txn(txn):
             results = {}
-            sql = """SELECT type, state_key, event_id FROM current_state_events
-                     WHERE room_id = ? %s"""
-            # Turns out that postgres doesn't like doing a list of OR's and
-            # is about 1000x slower, so we just issue a query for each specific
-            # type seperately.
-            if types:
-                clause_to_args = [
-                    (
-                        "AND type = ? AND state_key = ?",
-                        (etype, state_key)
-                    ) if state_key is not None else (
-                        "AND type = ?",
-                        (etype,)
-                    )
-                    for etype, state_key in types
-                ]
-
-                if include_other_types:
-                    unique_types = set(filtered_types)
-                    clause_to_args.append(
-                        (
-                            "AND type <> ? " * len(unique_types),
-                            list(unique_types)
-                        )
-                    )
-            else:
-                # If types is None we fetch all the state, and so just use an
-                # empty where clause with no extra args.
-                clause_to_args = [("", [])]
-            for where_clause, where_args in clause_to_args:
-                args = [room_id]
-                args.extend(where_args)
-                txn.execute(sql % (where_clause,), args)
-                for row in txn:
-                    typ, state_key, event_id = row
-                    key = (intern_string(typ), intern_string(state_key))
-                    results[key] = event_id
+            sql = """
+                SELECT type, state_key, event_id FROM current_state_events
+                WHERE room_id = ?
+            """
+
+            where_clause, where_args = state_filter.make_sql_filter_clause()
+
+            if where_clause:
+                sql += " AND (%s)" % (where_clause,)
+
+            args = [room_id]
+            args.extend(where_args)
+            txn.execute(sql, args)
+            for row in txn:
+                typ, state_key, event_id = row
+                key = (intern_string(typ), intern_string(state_key))
+                results[key] = event_id
+
             return results
 
         return self.runInteraction(
@@ -322,20 +616,14 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         })
 
     @defer.inlineCallbacks
-    def _get_state_groups_from_groups(self, groups, types, members=None):
+    def _get_state_groups_from_groups(self, groups, state_filter):
         """Returns the state groups for a given set of groups, filtering on
         types of state events.
 
         Args:
             groups(list[int]): list of state group IDs to query
-            types (Iterable[str, str|None]|None): list of 2-tuples of the form
-                (`type`, `state_key`), where a `state_key` of `None` matches all
-                state_keys for the `type`. If None, all types are returned.
-            members (bool|None): If not None, then, in addition to any filtering
-                implied by types, the results are also filtered to only include
-                member events (if True), or to exclude member events (if False)
-
-        Returns:
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
         Returns:
             Deferred[dict[int, dict[tuple[str, str], str]]]:
                 dict of state_group_id -> (dict of (type, state_key) -> event id)
@@ -346,19 +634,23 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         for chunk in chunks:
             res = yield self.runInteraction(
                 "_get_state_groups_from_groups",
-                self._get_state_groups_from_groups_txn, chunk, types, members,
+                self._get_state_groups_from_groups_txn, chunk, state_filter,
             )
             results.update(res)
 
         defer.returnValue(results)
 
     def _get_state_groups_from_groups_txn(
-        self, txn, groups, types=None, members=None,
+        self, txn, groups, state_filter=StateFilter.all(),
     ):
         results = {group: {} for group in groups}
 
-        if types is not None:
-            types = list(set(types))  # deduplicate types list
+        where_clause, where_args = state_filter.make_sql_filter_clause()
+
+        # Unless the filter clause is empty, we're going to append it after an
+        # existing where clause
+        if where_clause:
+            where_clause = " AND (%s)" % (where_clause,)
 
         if isinstance(self.database_engine, PostgresEngine):
             # Temporarily disable sequential scans in this transaction. This is
@@ -374,79 +666,33 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
             # group for the given type, state_key.
             # This may return multiple rows per (type, state_key), but last_value
             # should be the same.
-            sql = ("""
+            sql = """
                 WITH RECURSIVE state(state_group) AS (
                     VALUES(?::bigint)
                     UNION ALL
                     SELECT prev_state_group FROM state_group_edges e, state s
                     WHERE s.state_group = e.state_group
                 )
-                SELECT type, state_key, last_value(event_id) OVER (
+                SELECT DISTINCT type, state_key, last_value(event_id) OVER (
                     PARTITION BY type, state_key ORDER BY state_group ASC
                     ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
                 ) AS event_id FROM state_groups_state
                 WHERE state_group IN (
                     SELECT state_group FROM state
                 )
-                %s
-            """)
-
-            if members is True:
-                sql += " AND type = '%s'" % (EventTypes.Member,)
-            elif members is False:
-                sql += " AND type <> '%s'" % (EventTypes.Member,)
-
-            # Turns out that postgres doesn't like doing a list of OR's and
-            # is about 1000x slower, so we just issue a query for each specific
-            # type seperately.
-            if types is not None:
-                clause_to_args = [
-                    (
-                        "AND type = ? AND state_key = ?",
-                        (etype, state_key)
-                    ) if state_key is not None else (
-                        "AND type = ?",
-                        (etype,)
-                    )
-                    for etype, state_key in types
-                ]
-            else:
-                # If types is None we fetch all the state, and so just use an
-                # empty where clause with no extra args.
-                clause_to_args = [("", [])]
+            """
 
-            for where_clause, where_args in clause_to_args:
-                for group in groups:
-                    args = [group]
-                    args.extend(where_args)
+            for group in groups:
+                args = [group]
+                args.extend(where_args)
 
-                    txn.execute(sql % (where_clause,), args)
-                    for row in txn:
-                        typ, state_key, event_id = row
-                        key = (typ, state_key)
-                        results[group][key] = event_id
+                txn.execute(sql + where_clause, args)
+                for row in txn:
+                    typ, state_key, event_id = row
+                    key = (typ, state_key)
+                    results[group][key] = event_id
         else:
-            where_args = []
-            where_clauses = []
-            wildcard_types = False
-            if types is not None:
-                for typ in types:
-                    if typ[1] is None:
-                        where_clauses.append("(type = ?)")
-                        where_args.append(typ[0])
-                        wildcard_types = True
-                    else:
-                        where_clauses.append("(type = ? AND state_key = ?)")
-                        where_args.extend([typ[0], typ[1]])
-
-                where_clause = "AND (%s)" % (" OR ".join(where_clauses))
-            else:
-                where_clause = ""
-
-            if members is True:
-                where_clause += " AND type = '%s'" % EventTypes.Member
-            elif members is False:
-                where_clause += " AND type <> '%s'" % EventTypes.Member
+            max_entries_returned = state_filter.max_entries_returned()
 
             # We don't use WITH RECURSIVE on sqlite3 as there are distributions
             # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
@@ -460,12 +706,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
                     # without the right indices (which we can't add until
                     # after we finish deduping state, which requires this func)
                     args = [next_group]
-                    if types:
-                        args.extend(where_args)
+                    args.extend(where_args)
 
                     txn.execute(
                         "SELECT type, state_key, event_id FROM state_groups_state"
-                        " WHERE state_group = ? %s" % (where_clause,),
+                        " WHERE state_group = ? " + where_clause,
                         args
                     )
                     results[group].update(
@@ -481,9 +726,8 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
                     # wildcards (i.e. Nones) in which case we have to do an exhaustive
                     # search
                     if (
-                        types is not None and
-                        not wildcard_types and
-                        len(results[group]) == len(types)
+                        max_entries_returned is not None and
+                        len(results[group]) == max_entries_returned
                     ):
                         break
 
@@ -498,20 +742,14 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         return results
 
     @defer.inlineCallbacks
-    def get_state_for_events(self, event_ids, types, filtered_types=None):
+    def get_state_for_events(self, event_ids, state_filter=StateFilter.all()):
         """Given a list of event_ids and type tuples, return a list of state
-        dicts for each event. The state dicts will only have the type/state_keys
-        that are in the `types` list.
+        dicts for each event.
 
         Args:
             event_ids (list[string])
-            types (list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-                May be None, which matches any key.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
             deferred: A dict of (event_id) -> (type, state_key) -> [state_events]
@@ -521,7 +759,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
 
         groups = set(itervalues(event_to_groups))
-        group_to_state = yield self._get_state_for_groups(groups, types, filtered_types)
+        group_to_state = yield self._get_state_for_groups(groups, state_filter)
 
         state_event_map = yield self.get_events(
             [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)],
@@ -540,20 +778,15 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         defer.returnValue({event: event_to_state[event] for event in event_ids})
 
     @defer.inlineCallbacks
-    def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None):
+    def get_state_ids_for_events(self, event_ids, state_filter=StateFilter.all()):
         """
         Get the state dicts corresponding to a list of events, containing the event_ids
         of the state events (as opposed to the events themselves)
 
         Args:
             event_ids(list(str)): events whose state should be returned
-            types(list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-                May be None, which matches any key.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
             A deferred dict from event_id -> (type, state_key) -> event_id
@@ -563,7 +796,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
 
         groups = set(itervalues(event_to_groups))
-        group_to_state = yield self._get_state_for_groups(groups, types, filtered_types)
+        group_to_state = yield self._get_state_for_groups(groups, state_filter)
 
         event_to_state = {
             event_id: group_to_state[group]
@@ -573,45 +806,35 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         defer.returnValue({event: event_to_state[event] for event in event_ids})
 
     @defer.inlineCallbacks
-    def get_state_for_event(self, event_id, types=None, filtered_types=None):
+    def get_state_for_event(self, event_id, state_filter=StateFilter.all()):
         """
         Get the state dict corresponding to a particular event
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-                May be None, which matches any key.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
         """
-        state_map = yield self.get_state_for_events([event_id], types, filtered_types)
+        state_map = yield self.get_state_for_events([event_id], state_filter)
         defer.returnValue(state_map[event_id])
 
     @defer.inlineCallbacks
-    def get_state_ids_for_event(self, event_id, types=None, filtered_types=None):
+    def get_state_ids_for_event(self, event_id, state_filter=StateFilter.all()):
         """
         Get the state dict corresponding to a particular event
 
         Args:
             event_id(str): event whose state should be returned
-            types(list[(str, str|None)]|None): List of (type, state_key) tuples
-                which are used to filter the state fetched. If `state_key` is None,
-                all events are returned of the given type.
-                May be None, which matches any key.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
             A deferred dict from (type, state_key) -> state_event
         """
-        state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types)
+        state_map = yield self.get_state_ids_for_events([event_id], state_filter)
         defer.returnValue(state_map[event_id])
 
     @cached(max_entries=50000)
@@ -642,18 +865,14 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         defer.returnValue({row["event_id"]: row["state_group"] for row in rows})
 
-    def _get_some_state_from_cache(self, cache, group, types, filtered_types=None):
+    def _get_state_for_group_using_cache(self, cache, group, state_filter):
         """Checks if group is in cache. See `_get_state_for_groups`
 
         Args:
             cache(DictionaryCache): the state group cache to use
             group(int): The state group to lookup
-            types(list[str, str|None]): List of 2-tuples of the form
-                (`type`, `state_key`), where a `state_key` of `None` matches all
-                state_keys for the `type`.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns 2-tuple (`state_dict`, `got_all`).
         `got_all` is a bool indicating if we successfully retrieved all
@@ -662,124 +881,102 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         """
         is_all, known_absent, state_dict_ids = cache.get(group)
 
-        type_to_key = {}
+        if is_all or state_filter.is_full():
+            # Either we have everything or want everything, either way
+            # `is_all` tells us whether we've gotten everything.
+            return state_filter.filter_state(state_dict_ids), is_all
 
         # tracks whether any of our requested types are missing from the cache
         missing_types = False
 
-        for typ, state_key in types:
-            key = (typ, state_key)
-
-            if (
-                state_key is None or
-                (filtered_types is not None and typ not in filtered_types)
-            ):
-                type_to_key[typ] = None
-                # we mark the type as missing from the cache because
-                # when the cache was populated it might have been done with a
-                # restricted set of state_keys, so the wildcard will not work
-                # and the cache may be incomplete.
-                missing_types = True
-            else:
-                if type_to_key.get(typ, object()) is not None:
-                    type_to_key.setdefault(typ, set()).add(state_key)
-
+        if state_filter.has_wildcards():
+            # We don't know if we fetched all the state keys for the types in
+            # the filter that are wildcards, so we have to assume that we may
+            # have missed some.
+            missing_types = True
+        else:
+            # There aren't any wild cards, so `concrete_types()` returns the
+            # complete list of event types we're wanting.
+            for key in state_filter.concrete_types():
                 if key not in state_dict_ids and key not in known_absent:
                     missing_types = True
+                    break
 
-        sentinel = object()
-
-        def include(typ, state_key):
-            valid_state_keys = type_to_key.get(typ, sentinel)
-            if valid_state_keys is sentinel:
-                return filtered_types is not None and typ not in filtered_types
-            if valid_state_keys is None:
-                return True
-            if state_key in valid_state_keys:
-                return True
-            return False
-
-        got_all = is_all
-        if not got_all:
-            # the cache is incomplete. We may still have got all the results we need, if
-            # we don't have any wildcards in the match list.
-            if not missing_types and filtered_types is None:
-                got_all = True
-
-        return {
-            k: v for k, v in iteritems(state_dict_ids)
-            if include(k[0], k[1])
-        }, got_all
-
-    def _get_all_state_from_cache(self, cache, group):
-        """Checks if group is in cache. See `_get_state_for_groups`
-
-        Returns 2-tuple (`state_dict`, `got_all`). `got_all` is a bool
-        indicating if we successfully retrieved all requests state from the
-        cache, if False we need to query the DB for the missing state.
-
-        Args:
-            cache(DictionaryCache): the state group cache to use
-            group: The state group to lookup
-        """
-        is_all, _, state_dict_ids = cache.get(group)
-
-        return state_dict_ids, is_all
+        return state_filter.filter_state(state_dict_ids), not missing_types
 
     @defer.inlineCallbacks
-    def _get_state_for_groups(self, groups, types=None, filtered_types=None):
+    def _get_state_for_groups(self, groups, state_filter=StateFilter.all()):
         """Gets the state at each of a list of state groups, optionally
         filtering by type/state_key
 
         Args:
             groups (iterable[int]): list of state groups for which we want
                 to get the state.
-            types (None|iterable[(str, None|str)]):
-                indicates the state type/keys required. If None, the whole
-                state is fetched and returned.
-
-                Otherwise, each entry should be a `(type, state_key)` tuple to
-                include in the response. A `state_key` of None is a wildcard
-                meaning that we require all state with that type.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
-
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
         Returns:
             Deferred[dict[int, dict[tuple[str, str], str]]]:
                 dict of state_group_id -> (dict of (type, state_key) -> event id)
         """
-        if types is not None:
-            non_member_types = [t for t in types if t[0] != EventTypes.Member]
-
-            if filtered_types is not None and EventTypes.Member not in filtered_types:
-                # we want all of the membership events
-                member_types = None
-            else:
-                member_types = [t for t in types if t[0] == EventTypes.Member]
 
-        else:
-            non_member_types = None
-            member_types = None
+        member_filter, non_member_filter = state_filter.get_member_split()
 
-        non_member_state = yield self._get_state_for_groups_using_cache(
-            groups, self._state_group_cache, non_member_types, filtered_types,
+        # Now we look them up in the member and non-member caches
+        non_member_state, incomplete_groups_nm, = (
+            yield self._get_state_for_groups_using_cache(
+                groups, self._state_group_cache,
+                state_filter=non_member_filter,
+            )
         )
-        # XXX: we could skip this entirely if member_types is []
-        member_state = yield self._get_state_for_groups_using_cache(
-            # we set filtered_types=None as member_state only ever contain members.
-            groups, self._state_group_members_cache, member_types, None,
+
+        member_state, incomplete_groups_m, = (
+            yield self._get_state_for_groups_using_cache(
+                groups, self._state_group_members_cache,
+                state_filter=member_filter,
+            )
         )
 
-        state = non_member_state
+        state = dict(non_member_state)
         for group in groups:
             state[group].update(member_state[group])
 
+        # Now fetch any missing groups from the database
+
+        incomplete_groups = incomplete_groups_m | incomplete_groups_nm
+
+        if not incomplete_groups:
+            defer.returnValue(state)
+
+        cache_sequence_nm = self._state_group_cache.sequence
+        cache_sequence_m = self._state_group_members_cache.sequence
+
+        # Help the cache hit ratio by expanding the filter a bit
+        db_state_filter = state_filter.return_expanded()
+
+        group_to_state_dict = yield self._get_state_groups_from_groups(
+            list(incomplete_groups),
+            state_filter=db_state_filter,
+        )
+
+        # Now lets update the caches
+        self._insert_into_cache(
+            group_to_state_dict,
+            db_state_filter,
+            cache_seq_num_members=cache_sequence_m,
+            cache_seq_num_non_members=cache_sequence_nm,
+        )
+
+        # And finally update the result dict, by filtering out any extra
+        # stuff we pulled out of the database.
+        for group, group_state_dict in iteritems(group_to_state_dict):
+            # We just replace any existing entries, as we will have loaded
+            # everything we need from the database anyway.
+            state[group] = state_filter.filter_state(group_state_dict)
+
         defer.returnValue(state)
 
-    @defer.inlineCallbacks
     def _get_state_for_groups_using_cache(
-        self, groups, cache, types=None, filtered_types=None
+        self, groups, cache, state_filter,
     ):
         """Gets the state at each of a list of state groups, optionally
         filtering by type/state_key, querying from a specific cache.
@@ -790,89 +987,85 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
             cache (DictionaryCache): the cache of group ids to state dicts which
                 we will pass through - either the normal state cache or the specific
                 members state cache.
-            types (None|iterable[(str, None|str)]):
-                indicates the state type/keys required. If None, the whole
-                state is fetched and returned.
-
-                Otherwise, each entry should be a `(type, state_key)` tuple to
-                include in the response. A `state_key` of None is a wildcard
-                meaning that we require all state with that type.
-            filtered_types(list[str]|None): Only apply filtering via `types` to this
-                list of event types.  Other types of events are returned unfiltered.
-                If None, `types` filtering is applied to all events.
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
 
         Returns:
-            Deferred[dict[int, dict[tuple[str, str], str]]]:
-                dict of state_group_id -> (dict of (type, state_key) -> event id)
+            tuple[dict[int, dict[tuple[str, str], str]], set[int]]: Tuple of
+            dict of state_group_id -> (dict of (type, state_key) -> event id)
+            of entries in the cache, and the state group ids either missing
+            from the cache or incomplete.
         """
-        if types:
-            types = frozenset(types)
         results = {}
-        missing_groups = []
-        if types is not None:
-            for group in set(groups):
-                state_dict_ids, got_all = self._get_some_state_from_cache(
-                    cache, group, types, filtered_types
-                )
-                results[group] = state_dict_ids
+        incomplete_groups = set()
+        for group in set(groups):
+            state_dict_ids, got_all = self._get_state_for_group_using_cache(
+                cache, group, state_filter
+            )
+            results[group] = state_dict_ids
 
-                if not got_all:
-                    missing_groups.append(group)
-        else:
-            for group in set(groups):
-                state_dict_ids, got_all = self._get_all_state_from_cache(
-                    cache, group
-                )
+            if not got_all:
+                incomplete_groups.add(group)
 
-                results[group] = state_dict_ids
+        return results, incomplete_groups
 
-                if not got_all:
-                    missing_groups.append(group)
+    def _insert_into_cache(self, group_to_state_dict, state_filter,
+                           cache_seq_num_members, cache_seq_num_non_members):
+        """Inserts results from querying the database into the relevant cache.
 
-        if missing_groups:
-            # Okay, so we have some missing_types, let's fetch them.
-            cache_seq_num = cache.sequence
+        Args:
+            group_to_state_dict (dict): The new entries pulled from database.
+                Map from state group to state dict
+            state_filter (StateFilter): The state filter used to fetch state
+                from the database.
+            cache_seq_num_members (int): Sequence number of member cache since
+                last lookup in cache
+            cache_seq_num_non_members (int): Sequence number of member cache since
+                last lookup in cache
+        """
 
-            # the DictionaryCache knows if it has *all* the state, but
-            # does not know if it has all of the keys of a particular type,
-            # which makes wildcard lookups expensive unless we have a complete
-            # cache. Hence, if we are doing a wildcard lookup, populate the
-            # cache fully so that we can do an efficient lookup next time.
+        # We need to work out which types we've fetched from the DB for the
+        # member vs non-member caches. This should be as accurate as possible,
+        # but can be an underestimate (e.g. when we have wild cards)
 
-            if filtered_types or (types and any(k is None for (t, k) in types)):
-                types_to_fetch = None
-            else:
-                types_to_fetch = types
+        member_filter, non_member_filter = state_filter.get_member_split()
+        if member_filter.is_full():
+            # We fetched all member events
+            member_types = None
+        else:
+            # `concrete_types()` will only return a subset when there are wild
+            # cards in the filter, but that's fine.
+            member_types = member_filter.concrete_types()
 
-            group_to_state_dict = yield self._get_state_groups_from_groups(
-                missing_groups, types_to_fetch, cache == self._state_group_members_cache,
-            )
+        if non_member_filter.is_full():
+            # We fetched all non member events
+            non_member_types = None
+        else:
+            non_member_types = non_member_filter.concrete_types()
+
+        for group, group_state_dict in iteritems(group_to_state_dict):
+            state_dict_members = {}
+            state_dict_non_members = {}
 
-            for group, group_state_dict in iteritems(group_to_state_dict):
-                state_dict = results[group]
-
-                # update the result, filtering by `types`.
-                if types:
-                    for k, v in iteritems(group_state_dict):
-                        (typ, _) = k
-                        if (
-                            (k in types or (typ, None) in types) or
-                            (filtered_types and typ not in filtered_types)
-                        ):
-                            state_dict[k] = v
+            for k, v in iteritems(group_state_dict):
+                if k[0] == EventTypes.Member:
+                    state_dict_members[k] = v
                 else:
-                    state_dict.update(group_state_dict)
-
-                # update the cache with all the things we fetched from the
-                # database.
-                cache.update(
-                    cache_seq_num,
-                    key=group,
-                    value=group_state_dict,
-                    fetched_keys=types_to_fetch,
-                )
+                    state_dict_non_members[k] = v
 
-        defer.returnValue(results)
+            self._state_group_members_cache.update(
+                cache_seq_num_members,
+                key=group,
+                value=state_dict_members,
+                fetched_keys=member_types,
+            )
+
+            self._state_group_cache.update(
+                cache_seq_num_non_members,
+                key=group,
+                value=state_dict_non_members,
+                fetched_keys=non_member_types,
+            )
 
     def store_state_group(self, event_id, room_id, prev_group, delta_ids,
                           current_state_ids):
@@ -1064,6 +1257,7 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
     STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
     STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
     CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
+    EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
 
     def __init__(self, db_conn, hs):
         super(StateStore, self).__init__(db_conn, hs)
@@ -1082,6 +1276,12 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
             columns=["state_key"],
             where_clause="type='m.room.member'",
         )
+        self.register_background_index_update(
+            self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME,
+            index_name="event_to_state_groups_sg_index",
+            table="event_to_state_groups",
+            columns=["state_group"],
+        )
 
     def _store_event_state_mappings_txn(self, txn, events_and_contexts):
         state_groups = {}
@@ -1181,12 +1381,12 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
                         continue
 
                     prev_state = self._get_state_groups_from_groups_txn(
-                        txn, [prev_group], types=None
+                        txn, [prev_group],
                     )
                     prev_state = prev_state[prev_group]
 
                     curr_state = self._get_state_groups_from_groups_txn(
-                        txn, [state_group], types=None
+                        txn, [state_group],
                     )
                     curr_state = curr_state[state_group]
 
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index 4c296d72c0..d6cfdba519 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -630,7 +630,21 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
     @defer.inlineCallbacks
     def get_all_new_events_stream(self, from_id, current_id, limit):
-        """Get all new events"""
+        """Get all new events
+
+         Returns all events with from_id < stream_ordering <= current_id.
+
+         Args:
+             from_id (int):  the stream_ordering of the last event we processed
+             current_id (int):  the stream_ordering of the most recently processed event
+             limit (int): the maximum number of events to return
+
+         Returns:
+             Deferred[Tuple[int, list[FrozenEvent]]]: A tuple of (next_id, events), where
+             `next_id` is the next value to pass as `from_id` (it will either be the
+             stream_ordering of the last returned event, or, if fewer than `limit` events
+             were found, `current_id`.
+         """
 
         def get_all_new_events_stream_txn(txn):
             sql = (
diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py
index a3032cdce9..d8bf953ec0 100644
--- a/synapse/storage/transactions.py
+++ b/synapse/storage/transactions.py
@@ -30,7 +30,7 @@ from ._base import SQLBaseStore, db_to_json
 # py2 sqlite has buffer hardcoded as only binary type, so we must use it,
 # despite being deprecated and removed in favor of memoryview
 if six.PY2:
-    db_binary_type = buffer
+    db_binary_type = six.moves.builtins.buffer
 else:
     db_binary_type = memoryview
 
diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index 680ea928c7..0ae7e2ef3b 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+import re
 from itertools import islice
 
 import attr
@@ -68,7 +69,10 @@ class Clock(object):
         """
         call = task.LoopingCall(f)
         call.clock = self._reactor
-        call.start(msec / 1000.0, now=False)
+        d = call.start(msec / 1000.0, now=False)
+        d.addErrback(
+            log_failure, "Looping call died", consumeErrors=False,
+        )
         return call
 
     def call_later(self, delay, callback, *args, **kwargs):
@@ -109,3 +113,53 @@ def batch_iter(iterable, size):
     sourceiter = iter(iterable)
     # call islice until it returns an empty tuple
     return iter(lambda: tuple(islice(sourceiter, size)), ())
+
+
+def log_failure(failure, msg, consumeErrors=True):
+    """Creates a function suitable for passing to `Deferred.addErrback` that
+    logs any failures that occur.
+
+    Args:
+        msg (str): Message to log
+        consumeErrors (bool): If true consumes the failure, otherwise passes
+            on down the callback chain
+
+    Returns:
+        func(Failure)
+    """
+
+    logger.error(
+        msg,
+        exc_info=(
+            failure.type,
+            failure.value,
+            failure.getTracebackObject()
+        )
+    )
+
+    if not consumeErrors:
+        return failure
+
+
+def glob_to_regex(glob):
+    """Converts a glob to a compiled regex object.
+
+    The regex is anchored at the beginning and end of the string.
+
+    Args:
+        glob (str)
+
+    Returns:
+        re.RegexObject
+    """
+    res = ''
+    for c in glob:
+        if c == '*':
+            res = res + '.*'
+        elif c == '?':
+            res = res + '.'
+        else:
+            res = res + re.escape(c)
+
+    # \A anchors at start of string, \Z at end of string
+    return re.compile(r"\A" + res + r"\Z", re.IGNORECASE)
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index f2bde74dc5..625aedc940 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -15,6 +15,8 @@
 
 import logging
 
+from six import integer_types
+
 from sortedcontainers import SortedDict
 
 from synapse.util import caches
@@ -47,7 +49,7 @@ class StreamChangeCache(object):
     def has_entity_changed(self, entity, stream_pos):
         """Returns True if the entity may have been updated since stream_pos
         """
-        assert type(stream_pos) is int or type(stream_pos) is long
+        assert type(stream_pos) in integer_types
 
         if stream_pos < self._earliest_known_stream_pos:
             self.metrics.inc_misses()
diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index 89224b26cc..4c6e92beb8 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -25,7 +25,7 @@ See doc/log_contexts.rst for details on how this works.
 import logging
 import threading
 
-from twisted.internet import defer
+from twisted.internet import defer, threads
 
 logger = logging.getLogger(__name__)
 
@@ -562,58 +562,76 @@ def _set_context_cb(result, context):
     return result
 
 
-# modules to ignore in `logcontext_tracer`
-_to_ignore = [
-    "synapse.util.logcontext",
-    "synapse.http.server",
-    "synapse.storage._base",
-    "synapse.util.async_helpers",
-]
+def defer_to_thread(reactor, f, *args, **kwargs):
+    """
+    Calls the function `f` using a thread from the reactor's default threadpool and
+    returns the result as a Deferred.
+
+    Creates a new logcontext for `f`, which is created as a child of the current
+    logcontext (so its CPU usage metrics will get attributed to the current
+    logcontext). `f` should preserve the logcontext it is given.
+
+    The result deferred follows the Synapse logcontext rules: you should `yield`
+    on it.
+
+    Args:
+        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+            the Deferred will be invoked, and whose threadpool we should use for the
+            function.
+
+            Normally this will be hs.get_reactor().
+
+        f (callable): The function to call.
 
+        args: positional arguments to pass to f.
 
-def logcontext_tracer(frame, event, arg):
-    """A tracer that logs whenever a logcontext "unexpectedly" changes within
-    a function. Probably inaccurate.
+        kwargs: keyword arguments to pass to f.
 
-    Use by calling `sys.settrace(logcontext_tracer)` in the main thread.
+    Returns:
+        Deferred: A Deferred which fires a callback with the result of `f`, or an
+            errback if `f` throws an exception.
     """
-    if event == 'call':
-        name = frame.f_globals["__name__"]
-        if name.startswith("synapse"):
-            if name == "synapse.util.logcontext":
-                if frame.f_code.co_name in ["__enter__", "__exit__"]:
-                    tracer = frame.f_back.f_trace
-                    if tracer:
-                        tracer.just_changed = True
-
-            tracer = frame.f_trace
-            if tracer:
-                return tracer
-
-            if not any(name.startswith(ig) for ig in _to_ignore):
-                return LineTracer()
-
-
-class LineTracer(object):
-    __slots__ = ["context", "just_changed"]
-
-    def __init__(self):
-        self.context = LoggingContext.current_context()
-        self.just_changed = False
-
-    def __call__(self, frame, event, arg):
-        if event in 'line':
-            if self.just_changed:
-                self.context = LoggingContext.current_context()
-                self.just_changed = False
-            else:
-                c = LoggingContext.current_context()
-                if c != self.context:
-                    logger.info(
-                        "Context changed! %s -> %s, %s, %s",
-                        self.context, c,
-                        frame.f_code.co_filename, frame.f_lineno
-                    )
-                    self.context = c
+    return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)
 
-        return self
+
+def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
+    """
+    A wrapper for twisted.internet.threads.deferToThreadpool, which handles
+    logcontexts correctly.
+
+    Calls the function `f` using a thread from the given threadpool and returns
+    the result as a Deferred.
+
+    Creates a new logcontext for `f`, which is created as a child of the current
+    logcontext (so its CPU usage metrics will get attributed to the current
+    logcontext). `f` should preserve the logcontext it is given.
+
+    The result deferred follows the Synapse logcontext rules: you should `yield`
+    on it.
+
+    Args:
+        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+            the Deferred will be invoked. Normally this will be hs.get_reactor().
+
+        threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
+            running `f`. Normally this will be hs.get_reactor().getThreadPool().
+
+        f (callable): The function to call.
+
+        args: positional arguments to pass to f.
+
+        kwargs: keyword arguments to pass to f.
+
+    Returns:
+        Deferred: A Deferred which fires a callback with the result of `f`, or an
+            errback if `f` throws an exception.
+    """
+    logcontext = LoggingContext.current_context()
+
+    def g():
+        with LoggingContext(parent_context=logcontext):
+            return f(*args, **kwargs)
+
+    return make_deferred_yieldable(
+        threads.deferToThreadPool(reactor, threadpool, g)
+    )
diff --git a/synapse/util/manhole.py b/synapse/util/manhole.py
index 8d0f2a8918..9cb7e9c9ab 100644
--- a/synapse/util/manhole.py
+++ b/synapse/util/manhole.py
@@ -70,6 +70,8 @@ def manhole(username, password, globals):
     Returns:
         twisted.internet.protocol.Factory: A factory to pass to ``listenTCP``
     """
+    if not isinstance(password, bytes):
+        password = password.encode('ascii')
 
     checker = checkers.InMemoryUsernamePasswordDatabaseDontUse(
         **{username: password}
@@ -82,7 +84,7 @@ def manhole(username, password, globals):
     )
 
     factory = manhole_ssh.ConchFactory(portal.Portal(rlm, [checker]))
-    factory.publicKeys['ssh-rsa'] = Key.fromString(PUBLIC_KEY)
-    factory.privateKeys['ssh-rsa'] = Key.fromString(PRIVATE_KEY)
+    factory.publicKeys[b'ssh-rsa'] = Key.fromString(PUBLIC_KEY)
+    factory.privateKeys[b'ssh-rsa'] = Key.fromString(PRIVATE_KEY)
 
     return factory
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 6f318c6a29..fdcb375f95 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -16,7 +16,8 @@
 import random
 import string
 
-from six import PY3
+import six
+from six import PY2, PY3
 from six.moves import range
 
 _string_with_symbols = (
@@ -71,3 +72,39 @@ def to_ascii(s):
         return s.encode("ascii")
     except UnicodeEncodeError:
         return s
+
+
+def exception_to_unicode(e):
+    """Helper function to extract the text of an exception as a unicode string
+
+    Args:
+        e (Exception): exception to be stringified
+
+    Returns:
+        unicode
+    """
+    # urgh, this is a mess. The basic problem here is that psycopg2 constructs its
+    # exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will
+    # then produce the raw byte sequence. Under Python 2, this will then cause another
+    # error if it gets mixed with a `unicode` object, as per
+    # https://github.com/matrix-org/synapse/issues/4252
+
+    # First of all, if we're under python3, everything is fine because it will sort this
+    # nonsense out for us.
+    if not PY2:
+        return str(e)
+
+    # otherwise let's have a stab at decoding the exception message. We'll circumvent
+    # Exception.__str__(), which would explode if someone raised Exception(u'non-ascii')
+    # and instead look at what is in the args member.
+
+    if len(e.args) == 0:
+        return u""
+    elif len(e.args) > 1:
+        return six.text_type(repr(e.args))
+
+    msg = e.args[0]
+    if isinstance(msg, bytes):
+        return msg.decode('utf-8', errors='replace')
+    else:
+        return msg
diff --git a/synapse/visibility.py b/synapse/visibility.py
index d4680863d3..0281a7c919 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -23,6 +23,7 @@ from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.events.utils import prune_event
+from synapse.storage.state import StateFilter
 from synapse.types import get_domain_from_id
 
 logger = logging.getLogger(__name__)
@@ -72,7 +73,7 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
     )
     event_id_to_state = yield store.get_state_for_events(
         frozenset(e.event_id for e in events),
-        types=types,
+        state_filter=StateFilter.from_types(types),
     )
 
     ignore_dict_content = yield store.get_global_account_data_by_type_for_user(
@@ -219,7 +220,7 @@ def filter_events_for_server(store, server_name, events):
     # Whatever else we do, we need to check for senders which have requested
     # erasure of their data.
     erased_senders = yield store.are_users_erased(
-        e.sender for e in events,
+        (e.sender for e in events),
     )
 
     def redact_disallowed(event, state):
@@ -273,8 +274,8 @@ def filter_events_for_server(store, server_name, events):
     # need to check membership (as we know the server is in the room).
     event_to_state_ids = yield store.get_state_ids_for_events(
         frozenset(e.event_id for e in events),
-        types=(
-            (EventTypes.RoomHistoryVisibility, ""),
+        state_filter=StateFilter.from_types(
+            types=((EventTypes.RoomHistoryVisibility, ""),),
         )
     )
 
@@ -314,9 +315,11 @@ def filter_events_for_server(store, server_name, events):
     # of the history vis and membership state at those events.
     event_to_state_ids = yield store.get_state_ids_for_events(
         frozenset(e.event_id for e in events),
-        types=(
-            (EventTypes.RoomHistoryVisibility, ""),
-            (EventTypes.Member, None),
+        state_filter=StateFilter.from_types(
+            types=(
+                (EventTypes.RoomHistoryVisibility, ""),
+                (EventTypes.Member, None),
+            ),
         )
     )
 
@@ -324,14 +327,13 @@ def filter_events_for_server(store, server_name, events):
     # server's domain.
     #
     # event_to_state_ids contains lots of duplicates, so it turns out to be
-    # cheaper to build a complete set of unique
-    # ((type, state_key), event_id) tuples, and then filter out the ones we
-    # don't want.
+    # cheaper to build a complete event_id => (type, state_key) dict, and then
+    # filter out the ones we don't want
     #
-    state_key_to_event_id_set = {
-        e
+    event_id_to_state_key = {
+        event_id: key
         for key_to_eid in itervalues(event_to_state_ids)
-        for e in key_to_eid.items()
+        for key, event_id in iteritems(key_to_eid)
     }
 
     def include(typ, state_key):
@@ -346,7 +348,7 @@ def filter_events_for_server(store, server_name, events):
 
     event_map = yield store.get_events([
         e_id
-        for key, e_id in state_key_to_event_id_set
+        for e_id, key in iteritems(event_id_to_state_key)
         if include(key[0], key[1])
     ])