141 files changed, 3191 insertions, 1961 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 92aec334e6..5e65033061 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -21,8 +21,8 @@ import os
 import sys
 
 # Check that we're not running on an unsupported Python version.
-if sys.version_info < (3, 6):
-    print("Synapse requires Python 3.6 or above.")
+if sys.version_info < (3, 7):
+    print("Synapse requires Python 3.7 or above.")
     sys.exit(1)
 
 # Twisted and canonicaljson will fail to import when this file is executed to
@@ -47,7 +47,7 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.50.0rc1"
+__version__ = "1.52.0rc1"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
diff --git a/synapse/_scripts/review_recent_signups.py b/synapse/_scripts/review_recent_signups.py
index 093af4327a..e207f154f3 100644
--- a/synapse/_scripts/review_recent_signups.py
+++ b/synapse/_scripts/review_recent_signups.py
@@ -46,7 +46,9 @@ class UserInfo:
     ips: List[str] = attr.Factory(list)
 
 
-def get_recent_users(txn: LoggingTransaction, since_ms: int) -> List[UserInfo]:
+def get_recent_users(
+    txn: LoggingTransaction, since_ms: int, exclude_app_service: bool
+) -> List[UserInfo]:
     """Fetches recently registered users and some info on them."""
 
     sql = """
@@ -56,6 +58,9 @@ def get_recent_users(txn: LoggingTransaction, since_ms: int) -> List[UserInfo]:
             AND deactivated = 0
     """
 
+    if exclude_app_service:
+        sql += " AND appservice_id IS NULL"
+
     txn.execute(sql, (since_ms / 1000,))
 
     user_infos = [UserInfo(user_id, creation_ts) for user_id, creation_ts in txn]
@@ -113,7 +118,7 @@ def main() -> None:
         "-e",
         "--exclude-emails",
         action="store_true",
-        help="Exclude users that have validated email addresses",
+        help="Exclude users that have validated email addresses.",
     )
     parser.add_argument(
         "-u",
@@ -121,6 +126,12 @@ def main() -> None:
         action="store_true",
         help="Only print user IDs that match.",
     )
+    parser.add_argument(
+        "-a",
+        "--exclude-app-service",
+        help="Exclude appservice users.",
+        action="store_true",
+    )
 
     config = ReviewConfig()
 
@@ -133,6 +144,7 @@ def main() -> None:
 
     since_ms = time.time() * 1000 - Config.parse_duration(config_args.since)
     exclude_users_with_email = config_args.exclude_emails
+    exclude_users_with_appservice = config_args.exclude_app_service
     include_context = not config_args.only_users
 
     for database_config in config.database.databases:
@@ -143,7 +155,7 @@ def main() -> None:
 
     with make_conn(database_config, engine, "review_recent_signups") as db_conn:
         # This generates a type of Cursor, not LoggingTransaction.
-        user_infos = get_recent_users(db_conn.cursor(), since_ms)  # type: ignore[arg-type]
+        user_infos = get_recent_users(db_conn.cursor(), since_ms, exclude_users_with_appservice)  # type: ignore[arg-type]
 
     for user_info in user_infos:
         if exclude_users_with_email and user_info.emails:
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 52c083a20b..36ace7c613 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -81,7 +81,7 @@ class LoginType:
     TERMS: Final = "m.login.terms"
     SSO: Final = "m.login.sso"
     DUMMY: Final = "m.login.dummy"
-    REGISTRATION_TOKEN: Final = "org.matrix.msc3231.login.registration_token"
+    REGISTRATION_TOKEN: Final = "m.login.registration_token"
 
 
 # This is used in the `type` parameter for /register when called by
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 0a895bba48..a747a40814 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -46,41 +46,41 @@ class RoomDisposition:
     UNSTABLE = "unstable"
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class RoomVersion:
     """An object which describes the unique attributes of a room version."""
 
-    identifier = attr.ib(type=str)  # the identifier for this version
-    disposition = attr.ib(type=str)  # one of the RoomDispositions
-    event_format = attr.ib(type=int)  # one of the EventFormatVersions
-    state_res = attr.ib(type=int)  # one of the StateResolutionVersions
-    enforce_key_validity = attr.ib(type=bool)
+    identifier: str  # the identifier for this version
+    disposition: str  # one of the RoomDispositions
+    event_format: int  # one of the EventFormatVersions
+    state_res: int  # one of the StateResolutionVersions
+    enforce_key_validity: bool
 
     # Before MSC2432, m.room.aliases had special auth rules and redaction rules
-    special_case_aliases_auth = attr.ib(type=bool)
+    special_case_aliases_auth: bool
     # Strictly enforce canonicaljson, do not allow:
     # * Integers outside the range of [-2 ^ 53 + 1, 2 ^ 53 - 1]
     # * Floats
     # * NaN, Infinity, -Infinity
-    strict_canonicaljson = attr.ib(type=bool)
+    strict_canonicaljson: bool
     # MSC2209: Check 'notifications' key while verifying
     # m.room.power_levels auth rules.
-    limit_notifications_power_levels = attr.ib(type=bool)
+    limit_notifications_power_levels: bool
     # MSC2174/MSC2176: Apply updated redaction rules algorithm.
-    msc2176_redaction_rules = attr.ib(type=bool)
+    msc2176_redaction_rules: bool
     # MSC3083: Support the 'restricted' join_rule.
-    msc3083_join_rules = attr.ib(type=bool)
+    msc3083_join_rules: bool
     # MSC3375: Support for the proper redaction rules for MSC3083. This mustn't
     #          be enabled if MSC3083 is not.
-    msc3375_redaction_rules = attr.ib(type=bool)
+    msc3375_redaction_rules: bool
     # MSC2403: Allows join_rules to be set to 'knock', changes auth rules to allow sending
     # m.room.membership event with membership 'knock'.
-    msc2403_knocking = attr.ib(type=bool)
+    msc2403_knocking: bool
     # MSC2716: Adds m.room.power_levels -> content.historical field to control
     # whether "insertion", "chunk", "marker" events can be sent
-    msc2716_historical = attr.ib(type=bool)
+    msc2716_historical: bool
     # MSC2716: Adds support for redacting "insertion", "chunk", and "marker" events
-    msc2716_redactions = attr.ib(type=bool)
+    msc2716_redactions: bool
 
 
 class RoomVersions:
diff --git a/synapse/api/urls.py b/synapse/api/urls.py
index f9f9467dc1..bd49fa6a5f 100644
--- a/synapse/api/urls.py
+++ b/synapse/api/urls.py
@@ -28,7 +28,6 @@ FEDERATION_V1_PREFIX = FEDERATION_PREFIX + "/v1"
 FEDERATION_V2_PREFIX = FEDERATION_PREFIX + "/v2"
 FEDERATION_UNSTABLE_PREFIX = FEDERATION_PREFIX + "/unstable"
 STATIC_PREFIX = "/_matrix/static"
-WEB_CLIENT_PREFIX = "/_matrix/client"
 SERVER_KEY_V2_PREFIX = "/_matrix/key/v2"
 MEDIA_R0_PREFIX = "/_matrix/media/r0"
 MEDIA_V3_PREFIX = "/_matrix/media/v3"
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 5fc59c1be1..bbab8a052a 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -16,7 +16,6 @@ import atexit
 import gc
 import logging
 import os
-import platform
 import signal
 import socket
 import sys
@@ -60,7 +59,7 @@ from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.handlers.auth import load_legacy_password_auth_providers
 from synapse.logging.context import PreserveLoggingContext
-from synapse.metrics import register_threadpool
+from synapse.metrics import install_gc_manager, register_threadpool
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.metrics.jemalloc import setup_jemalloc_stats
 from synapse.types import ISynapseReactor
@@ -159,6 +158,7 @@ def start_reactor(
         change_resource_limit(soft_file_limit)
         if gc_thresholds:
             gc.set_threshold(*gc_thresholds)
+        install_gc_manager()
         run_command()
 
     # make sure that we run the reactor with the sentinel log context,
@@ -435,7 +435,8 @@ async def start(hs: "HomeServer") -> None:
     # before we start the listeners.
     module_api = hs.get_module_api()
     for module, config in hs.config.modules.loaded_modules:
-        module(config=config, api=module_api)
+        m = module(config=config, api=module_api)
+        logger.info("Loaded module %s", m)
 
     load_legacy_spam_checkers(hs)
     load_legacy_third_party_event_rules(hs)
@@ -467,15 +468,13 @@ async def start(hs: "HomeServer") -> None:
     # everything currently allocated are things that will be used for the
     # rest of time. Doing so means less work each GC (hopefully).
     #
-    # This only works on Python 3.7
-    if platform.python_implementation() == "CPython" and sys.version_info >= (3, 7):
+    # PyPy does not (yet?) implement gc.freeze()
+    if hasattr(gc, "freeze"):
         gc.collect()
         gc.freeze()
 
-    # Speed up shutdowns by freezing all allocated objects. This moves everything
-    # into the permanent generation and excludes them from the final GC.
-    # Unfortunately only works on Python 3.7
-    if platform.python_implementation() == "CPython" and sys.version_info >= (3, 7):
+        # Speed up shutdowns by freezing all allocated objects. This moves everything
+        # into the permanent generation and excludes them from the final GC.
         atexit.register(gc.freeze)
 
 
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 177ce040e8..24d55b0494 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -21,20 +21,17 @@ from typing import Dict, Iterable, Iterator, List
 from twisted.internet.tcp import Port
 from twisted.web.resource import EncodingResourceWrapper, Resource
 from twisted.web.server import GzipEncoderFactory
-from twisted.web.static import File
 
 import synapse
 import synapse.config.logger
 from synapse import events
 from synapse.api.urls import (
-    CLIENT_API_PREFIX,
     FEDERATION_PREFIX,
     LEGACY_MEDIA_PREFIX,
     MEDIA_R0_PREFIX,
     MEDIA_V3_PREFIX,
     SERVER_KEY_V2_PREFIX,
     STATIC_PREFIX,
-    WEB_CLIENT_PREFIX,
 )
 from synapse.app import _base
 from synapse.app._base import (
@@ -54,7 +51,6 @@ from synapse.http.additional_resource import AdditionalResource
 from synapse.http.server import (
     OptionsResource,
     RootOptionsRedirectResource,
-    RootRedirect,
     StaticResource,
 )
 from synapse.http.site import SynapseSite
@@ -132,9 +128,15 @@ class SynapseHomeServer(HomeServer):
         resources.update(self._module_web_resources)
         self._module_web_resources_consumed = True
 
-        # try to find something useful to redirect '/' to
-        if WEB_CLIENT_PREFIX in resources:
-            root_resource: Resource = RootOptionsRedirectResource(WEB_CLIENT_PREFIX)
+        # Try to find something useful to serve at '/':
+        #
+        # 1. Redirect to the web client if it is an HTTP(S) URL.
+        # 2. Redirect to the static "Synapse is running" page.
+        # 3. Do not redirect and use a blank resource.
+        if self.config.server.web_client_location:
+            root_resource: Resource = RootOptionsRedirectResource(
+                self.config.server.web_client_location
+            )
         elif STATIC_PREFIX in resources:
             root_resource = RootOptionsRedirectResource(STATIC_PREFIX)
         else:
@@ -193,7 +195,13 @@ class SynapseHomeServer(HomeServer):
 
             resources.update(
                 {
-                    CLIENT_API_PREFIX: client_resource,
+                    "/_matrix/client/api/v1": client_resource,
+                    "/_matrix/client/r0": client_resource,
+                    "/_matrix/client/v1": client_resource,
+                    "/_matrix/client/v3": client_resource,
+                    "/_matrix/client/unstable": client_resource,
+                    "/_matrix/client/v2_alpha": client_resource,
+                    "/_matrix/client/versions": client_resource,
                     "/.well-known": well_known_resource(self),
                     "/_synapse/admin": AdminRestResource(self),
                     **build_synapse_client_resource_tree(self),
@@ -256,28 +264,6 @@ class SynapseHomeServer(HomeServer):
         if name in ["keys", "federation"]:
             resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self)
 
-        if name == "webclient":
-            webclient_loc = self.config.server.web_client_location
-
-            if webclient_loc is None:
-                logger.warning(
-                    "Not enabling webclient resource, as web_client_location is unset."
-                )
-            elif webclient_loc.startswith("http://") or webclient_loc.startswith(
-                "https://"
-            ):
-                resources[WEB_CLIENT_PREFIX] = RootRedirect(webclient_loc)
-            else:
-                logger.warning(
-                    "Running webclient on the same domain is not recommended: "
-                    "https://github.com/matrix-org/synapse#security-note - "
-                    "after you move webclient to different host you can set "
-                    "web_client_location to its full URL to enable redirection."
-                )
-                # GZip is disabled here due to
-                # https://twistedmatrix.com/trac/ticket/7678
-                resources[WEB_CLIENT_PREFIX] = File(webclient_loc)
-
         if name == "metrics" and self.config.metrics.enable_metrics:
             resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
 
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index 8c9ff93b2c..7dbebd97b5 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -351,11 +351,13 @@ class AppServiceTransaction:
         id: int,
         events: List[EventBase],
         ephemeral: List[JsonDict],
+        to_device_messages: List[JsonDict],
     ):
         self.service = service
         self.id = id
         self.events = events
         self.ephemeral = ephemeral
+        self.to_device_messages = to_device_messages
 
     async def send(self, as_api: "ApplicationServiceApi") -> bool:
         """Sends this transaction using the provided AS API interface.
@@ -369,6 +371,7 @@ class AppServiceTransaction:
             service=self.service,
             events=self.events,
             ephemeral=self.ephemeral,
+            to_device_messages=self.to_device_messages,
             txn_id=self.id,
         )
 
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index def4424af0..73be7ff3d4 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -218,8 +218,23 @@ class ApplicationServiceApi(SimpleHttpClient):
         service: "ApplicationService",
         events: List[EventBase],
         ephemeral: List[JsonDict],
+        to_device_messages: List[JsonDict],
         txn_id: Optional[int] = None,
     ) -> bool:
+        """
+        Push data to an application service.
+
+        Args:
+            service: The application service to send to.
+            events: The persistent events to send.
+            ephemeral: The ephemeral events to send.
+            to_device_messages: The to-device messages to send.
+            txn_id: An unique ID to assign to this transaction. Application services should
+                deduplicate transactions received with identitical IDs.
+
+        Returns:
+            True if the task succeeded, False if it failed.
+        """
         if service.url is None:
             return True
 
@@ -237,13 +252,15 @@ class ApplicationServiceApi(SimpleHttpClient):
         uri = service.url + ("/transactions/%s" % urllib.parse.quote(str(txn_id)))
 
         # Never send ephemeral events to appservices that do not support it
+        body: Dict[str, List[JsonDict]] = {"events": serialized_events}
         if service.supports_ephemeral:
-            body = {
-                "events": serialized_events,
-                "de.sorunome.msc2409.ephemeral": ephemeral,
-            }
-        else:
-            body = {"events": serialized_events}
+            body.update(
+                {
+                    # TODO: Update to stable prefixes once MSC2409 completes FCP merge.
+                    "de.sorunome.msc2409.ephemeral": ephemeral,
+                    "de.sorunome.msc2409.to_device": to_device_messages,
+                }
+            )
 
         try:
             await self.put_json(
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 185e3a5278..c42fa32fff 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -48,7 +48,16 @@ This is all tied together by the AppServiceScheduler which DIs the required
 components.
 """
 import logging
-from typing import TYPE_CHECKING, Awaitable, Callable, Dict, List, Optional, Set
+from typing import (
+    TYPE_CHECKING,
+    Awaitable,
+    Callable,
+    Collection,
+    Dict,
+    List,
+    Optional,
+    Set,
+)
 
 from synapse.appservice import ApplicationService, ApplicationServiceState
 from synapse.appservice.api import ApplicationServiceApi
@@ -71,6 +80,9 @@ MAX_PERSISTENT_EVENTS_PER_TRANSACTION = 100
 # Maximum number of ephemeral events to provide in an AS transaction.
 MAX_EPHEMERAL_EVENTS_PER_TRANSACTION = 100
 
+# Maximum number of to-device messages to provide in an AS transaction.
+MAX_TO_DEVICE_MESSAGES_PER_TRANSACTION = 100
+
 
 class ApplicationServiceScheduler:
     """Public facing API for this module. Does the required DI to tie the
@@ -97,15 +109,40 @@ class ApplicationServiceScheduler:
         for service in services:
             self.txn_ctrl.start_recoverer(service)
 
-    def submit_event_for_as(
-        self, service: ApplicationService, event: EventBase
+    def enqueue_for_appservice(
+        self,
+        appservice: ApplicationService,
+        events: Optional[Collection[EventBase]] = None,
+        ephemeral: Optional[Collection[JsonDict]] = None,
+        to_device_messages: Optional[Collection[JsonDict]] = None,
     ) -> None:
-        self.queuer.enqueue_event(service, event)
+        """
+        Enqueue some data to be sent off to an application service.
 
-    def submit_ephemeral_events_for_as(
-        self, service: ApplicationService, events: List[JsonDict]
-    ) -> None:
-        self.queuer.enqueue_ephemeral(service, events)
+        Args:
+            appservice: The application service to create and send a transaction to.
+            events: The persistent room events to send.
+            ephemeral: The ephemeral events to send.
+            to_device_messages: The to-device messages to send. These differ from normal
+                to-device messages sent to clients, as they have 'to_device_id' and
+                'to_user_id' fields.
+        """
+        # We purposefully allow this method to run with empty events/ephemeral
+        # collections, so that callers do not need to check iterable size themselves.
+        if not events and not ephemeral and not to_device_messages:
+            return
+
+        if events:
+            self.queuer.queued_events.setdefault(appservice.id, []).extend(events)
+        if ephemeral:
+            self.queuer.queued_ephemeral.setdefault(appservice.id, []).extend(ephemeral)
+        if to_device_messages:
+            self.queuer.queued_to_device_messages.setdefault(appservice.id, []).extend(
+                to_device_messages
+            )
+
+        # Kick off a new application service transaction
+        self.queuer.start_background_request(appservice)
 
 
 class _ServiceQueuer:
@@ -121,13 +158,15 @@ class _ServiceQueuer:
         self.queued_events: Dict[str, List[EventBase]] = {}
         # dict of {service_id: [events]}
         self.queued_ephemeral: Dict[str, List[JsonDict]] = {}
+        # dict of {service_id: [to_device_message_json]}
+        self.queued_to_device_messages: Dict[str, List[JsonDict]] = {}
 
         # the appservices which currently have a transaction in flight
         self.requests_in_flight: Set[str] = set()
         self.txn_ctrl = txn_ctrl
         self.clock = clock
 
-    def _start_background_request(self, service: ApplicationService) -> None:
+    def start_background_request(self, service: ApplicationService) -> None:
         # start a sender for this appservice if we don't already have one
         if service.id in self.requests_in_flight:
             return
@@ -136,16 +175,6 @@ class _ServiceQueuer:
             "as-sender-%s" % (service.id,), self._send_request, service
         )
 
-    def enqueue_event(self, service: ApplicationService, event: EventBase) -> None:
-        self.queued_events.setdefault(service.id, []).append(event)
-        self._start_background_request(service)
-
-    def enqueue_ephemeral(
-        self, service: ApplicationService, events: List[JsonDict]
-    ) -> None:
-        self.queued_ephemeral.setdefault(service.id, []).extend(events)
-        self._start_background_request(service)
-
     async def _send_request(self, service: ApplicationService) -> None:
         # sanity-check: we shouldn't get here if this service already has a sender
         # running.
@@ -162,11 +191,21 @@ class _ServiceQueuer:
                 ephemeral = all_events_ephemeral[:MAX_EPHEMERAL_EVENTS_PER_TRANSACTION]
                 del all_events_ephemeral[:MAX_EPHEMERAL_EVENTS_PER_TRANSACTION]
 
-                if not events and not ephemeral:
+                all_to_device_messages = self.queued_to_device_messages.get(
+                    service.id, []
+                )
+                to_device_messages_to_send = all_to_device_messages[
+                    :MAX_TO_DEVICE_MESSAGES_PER_TRANSACTION
+                ]
+                del all_to_device_messages[:MAX_TO_DEVICE_MESSAGES_PER_TRANSACTION]
+
+                if not events and not ephemeral and not to_device_messages_to_send:
                     return
 
                 try:
-                    await self.txn_ctrl.send(service, events, ephemeral)
+                    await self.txn_ctrl.send(
+                        service, events, ephemeral, to_device_messages_to_send
+                    )
                 except Exception:
                     logger.exception("AS request failed")
         finally:
@@ -198,10 +237,24 @@ class _TransactionController:
         service: ApplicationService,
         events: List[EventBase],
         ephemeral: Optional[List[JsonDict]] = None,
+        to_device_messages: Optional[List[JsonDict]] = None,
     ) -> None:
+        """
+        Create a transaction with the given data and send to the provided
+        application service.
+
+        Args:
+            service: The application service to send the transaction to.
+            events: The persistent events to include in the transaction.
+            ephemeral: The ephemeral events to include in the transaction.
+            to_device_messages: The to-device messages to include in the transaction.
+        """
         try:
             txn = await self.store.create_appservice_txn(
-                service=service, events=events, ephemeral=ephemeral or []
+                service=service,
+                events=events,
+                ephemeral=ephemeral or [],
+                to_device_messages=to_device_messages or [],
             )
             service_is_up = await self._is_service_up(service)
             if service_is_up:
diff --git a/synapse/config/api.py b/synapse/config/api.py
index bdbe9f0fa2..8133b6b624 100644
--- a/synapse/config/api.py
+++ b/synapse/config/api.py
@@ -61,11 +61,20 @@ class ApiConfig(Config):
            #additional_event_types:
            #  - org.example.custom.event.type
 
-        # If enabled, puppeted user IP's can also be tracked. By default when
-        # puppeting another user, the user who has created the access token
-        # for puppeting is tracked. If this is enabled, both requests are tracked.
-        # Implicitly enables MAU tracking for puppeted users.
-        #track_puppeted_user_ips: false
+        # We record the IP address of clients used to access the API for various
+        # reasons, including displaying it to the user in the "Where you're signed in"
+        # dialog.
+        #
+        # By default, when puppeting another user via the admin API, the client IP
+        # address is recorded against the user who created the access token (ie, the
+        # admin user), and *not* the puppeted user.
+        #
+        # Uncomment the following to also record the IP address against the puppeted
+        # user. (This also means that the puppeted user will count as an "active" user
+        # for the purpose of monthly active user tracking - see 'limit_usage_by_mau' etc
+        # above.)
+        #
+        #track_puppeted_user_ips: true
         """ % {
             "formatted_default_state_types": formatted_default_state_types
         }
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index 510b647c63..949d7dd5ac 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -55,19 +55,19 @@ https://matrix-org.github.io/synapse/latest/templates.html
 ---------------------------------------------------------------------------------------"""
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class EmailSubjectConfig:
-    message_from_person_in_room = attr.ib(type=str)
-    message_from_person = attr.ib(type=str)
-    messages_from_person = attr.ib(type=str)
-    messages_in_room = attr.ib(type=str)
-    messages_in_room_and_others = attr.ib(type=str)
-    messages_from_person_and_others = attr.ib(type=str)
-    invite_from_person = attr.ib(type=str)
-    invite_from_person_to_room = attr.ib(type=str)
-    invite_from_person_to_space = attr.ib(type=str)
-    password_reset = attr.ib(type=str)
-    email_validation = attr.ib(type=str)
+    message_from_person_in_room: str
+    message_from_person: str
+    messages_from_person: str
+    messages_in_room: str
+    messages_in_room_and_others: str
+    messages_from_person_and_others: str
+    invite_from_person: str
+    invite_from_person_to_room: str
+    invite_from_person_to_space: str
+    password_reset: str
+    email_validation: str
 
 
 class EmailConfig(Config):
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index dbaeb10918..e4719d19b8 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -24,8 +24,6 @@ class ExperimentalConfig(Config):
     def read_config(self, config: JsonDict, **kwargs):
         experimental = config.get("experimental_features") or {}
 
-        # Whether to enable experimental MSC1849 (aka relations) support
-        self.msc1849_enabled = config.get("experimental_msc1849_support_enabled", True)
         # MSC3440 (thread relation)
         self.msc3440_enabled: bool = experimental.get("msc3440_enabled", False)
 
@@ -54,3 +52,10 @@ class ExperimentalConfig(Config):
         self.msc3202_device_masquerading_enabled: bool = experimental.get(
             "msc3202_device_masquerading", False
         )
+
+        # MSC2409 (this setting only relates to optionally sending to-device messages).
+        # Presence, typing and read receipt EDUs are already sent to application services that
+        # have opted in to receive them. If enabled, this adds to-device messages to that list.
+        self.msc2409_to_device_messages_enabled: bool = experimental.get(
+            "msc2409_to_device_messages_enabled", False
+        )
diff --git a/synapse/config/modules.py b/synapse/config/modules.py
index 85fb05890d..2ef02b8f55 100644
--- a/synapse/config/modules.py
+++ b/synapse/config/modules.py
@@ -41,9 +41,9 @@ class ModulesConfig(Config):
             # documentation on how to configure or create custom modules for Synapse.
             #
             modules:
-                # - module: my_super_module.MySuperClass
-                #   config:
-                #       do_thing: true
-                # - module: my_other_super_module.SomeClass
-                #   config: {}
+              #- module: my_super_module.MySuperClass
+              #  config:
+              #    do_thing: true
+              #- module: my_other_super_module.SomeClass
+              #  config: {}
             """
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index 79c400fe30..e783b11315 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -148,10 +148,13 @@ class OIDCConfig(Config):
         #       Defaults to false. Avoid this in production.
         #
         #   user_profile_method: Whether to fetch the user profile from the userinfo
-        #       endpoint. Valid values are: 'auto' or 'userinfo_endpoint'.
+        #       endpoint, or to rely on the data returned in the id_token from the
+        #       token_endpoint.
         #
-        #       Defaults to 'auto', which fetches the userinfo endpoint if 'openid' is
-        #       included in 'scopes'. Set to 'userinfo_endpoint' to always fetch the
+        #       Valid values are: 'auto' or 'userinfo_endpoint'.
+        #
+        #       Defaults to 'auto', which uses the userinfo endpoint if 'openid' is
+        #       not included in 'scopes'. Set to 'userinfo_endpoint' to always use the
         #       userinfo endpoint.
         #
         #   allow_existing_users: set to 'true' to allow a user logging in via OIDC to
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 36636ab07e..e9ccf1bd62 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -134,6 +134,14 @@ class RatelimitConfig(Config):
             defaults={"per_second": 0.003, "burst_count": 5},
         )
 
+        self.rc_third_party_invite = RateLimitConfig(
+            config.get("rc_third_party_invite", {}),
+            defaults={
+                "per_second": self.rc_message.per_second,
+                "burst_count": self.rc_message.burst_count,
+            },
+        )
+
     def generate_config_section(self, **kwargs):
         return """\
         ## Ratelimiting ##
@@ -168,6 +176,9 @@ class RatelimitConfig(Config):
         #   - one for ratelimiting how often a user or IP can attempt to validate a 3PID.
         #   - two for ratelimiting how often invites can be sent in a room or to a
         #     specific user.
+        #   - one for ratelimiting 3PID invites (i.e. invites sent to a third-party ID
+        #     such as an email address or a phone number) based on the account that's
+        #     sending the invite.
         #
         # The defaults are as shown below.
         #
@@ -217,6 +228,10 @@ class RatelimitConfig(Config):
         #  per_user:
         #    per_second: 0.003
         #    burst_count: 5
+        #
+        #rc_third_party_invite:
+        #  per_second: 0.2
+        #  burst_count: 10
 
         # Ratelimiting settings for incoming federation
         #
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index 7a059c6dec..ea9b50fe97 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -190,6 +190,8 @@ class RegistrationConfig(Config):
         # The success template used during fallback auth.
         self.fallback_success_template = self.read_template("auth_success.html")
 
+        self.inhibit_user_in_use_error = config.get("inhibit_user_in_use_error", False)
+
     def generate_config_section(self, generate_secrets=False, **kwargs):
         if generate_secrets:
             registration_shared_secret = 'registration_shared_secret: "%s"' % (
@@ -446,6 +448,16 @@ class RegistrationConfig(Config):
         # Defaults to true.
         #
         #auto_join_rooms_for_guests: false
+
+        # Whether to inhibit errors raised when registering a new account if the user ID
+        # already exists. If turned on, that requests to /register/available will always
+        # show a user ID as available, and Synapse won't raise an error when starting
+        # a registration with a user ID that already exists. However, Synapse will still
+        # raise an error if the registration completes and the username conflicts.
+        #
+        # Defaults to false.
+        #
+        #inhibit_user_in_use_error: true
         """
             % locals()
         )
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 1de2dea9b0..7bc9624546 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -179,7 +179,6 @@ KNOWN_RESOURCES = {
     "openid",
     "replication",
     "static",
-    "webclient",
 }
 
 
@@ -200,8 +199,8 @@ class HttpListenerConfig:
     """Object describing the http-specific parts of the config of a listener"""
 
     x_forwarded: bool = False
-    resources: List[HttpResourceConfig] = attr.ib(factory=list)
-    additional_resources: Dict[str, dict] = attr.ib(factory=dict)
+    resources: List[HttpResourceConfig] = attr.Factory(list)
+    additional_resources: Dict[str, dict] = attr.Factory(dict)
     tag: Optional[str] = None
 
 
@@ -259,7 +258,6 @@ class ServerConfig(Config):
             raise ConfigError(str(e))
 
         self.pid_file = self.abspath(config.get("pid_file"))
-        self.web_client_location = config.get("web_client_location", None)
         self.soft_file_limit = config.get("soft_file_limit", 0)
         self.daemonize = config.get("daemonize")
         self.print_pidfile = config.get("print_pidfile")
@@ -490,6 +488,19 @@ class ServerConfig(Config):
         # events with profile information that differ from the target's global profile.
         self.allow_per_room_profiles = config.get("allow_per_room_profiles", True)
 
+        # The maximum size an avatar can have, in bytes.
+        self.max_avatar_size = config.get("max_avatar_size")
+        if self.max_avatar_size is not None:
+            self.max_avatar_size = self.parse_size(self.max_avatar_size)
+
+        # The MIME types allowed for an avatar.
+        self.allowed_avatar_mimetypes = config.get("allowed_avatar_mimetypes")
+        if self.allowed_avatar_mimetypes and not isinstance(
+            self.allowed_avatar_mimetypes,
+            list,
+        ):
+            raise ConfigError("allowed_avatar_mimetypes must be a list")
+
         self.listeners = [parse_listener_def(x) for x in config.get("listeners", [])]
 
         # no_tls is not really supported any more, but let's grandfather it in
@@ -506,8 +517,13 @@ class ServerConfig(Config):
                     l2.append(listener)
             self.listeners = l2
 
-        if not self.web_client_location:
-            _warn_if_webclient_configured(self.listeners)
+        self.web_client_location = config.get("web_client_location", None)
+        # Non-HTTP(S) web client location is not supported.
+        if self.web_client_location and not (
+            self.web_client_location.startswith("http://")
+            or self.web_client_location.startswith("https://")
+        ):
+            raise ConfigError("web_client_location must point to a HTTP(S) URL.")
 
         self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None))
         self.gc_seconds = self.read_gc_intervals(config.get("gc_min_interval", None))
@@ -635,19 +651,6 @@ class ServerConfig(Config):
             False,
         )
 
-        # List of users trialing the new experimental default push rules. This setting is
-        # not included in the sample configuration file on purpose as it's a temporary
-        # hack, so that some users can trial the new defaults without impacting every
-        # user on the homeserver.
-        users_new_default_push_rules: list = (
-            config.get("users_new_default_push_rules") or []
-        )
-        if not isinstance(users_new_default_push_rules, list):
-            raise ConfigError("'users_new_default_push_rules' must be a list")
-
-        # Turn the list into a set to improve lookup speed.
-        self.users_new_default_push_rules: set = set(users_new_default_push_rules)
-
         # Whitelist of domain names that given next_link parameters must have
         next_link_domain_whitelist: Optional[List[str]] = config.get(
             "next_link_domain_whitelist"
@@ -793,13 +796,7 @@ class ServerConfig(Config):
         #
         pid_file: %(pid_file)s
 
-        # The absolute URL to the web client which /_matrix/client will redirect
-        # to if 'webclient' is configured under the 'listeners' configuration.
-        #
-        # This option can be also set to the filesystem path to the web client
-        # which will be served at /_matrix/client/ if 'webclient' is configured
-        # under the 'listeners' configuration, however this is a security risk:
-        # https://github.com/matrix-org/synapse#security-note
+        # The absolute URL to the web client which / will redirect to.
         #
         #web_client_location: https://riot.example.com/
 
@@ -883,7 +880,7 @@ class ServerConfig(Config):
         # The default room version for newly created rooms.
         #
         # Known room versions are listed here:
-        # https://matrix.org/docs/spec/#complete-list-of-room-versions
+        # https://spec.matrix.org/latest/rooms/#complete-list-of-room-versions
         #
         # For example, for room version 1, default_room_version should be set
         # to "1".
@@ -1011,8 +1008,6 @@ class ServerConfig(Config):
         #   static: static resources under synapse/static (/_matrix/static). (Mostly
         #       useful for 'fallback authentication'.)
         #
-        #   webclient: A web client. Requires web_client_location to be set.
-        #
         listeners:
           # TLS-enabled listener: for when matrix traffic is sent directly to synapse.
           #
@@ -1168,6 +1163,20 @@ class ServerConfig(Config):
         #
         #allow_per_room_profiles: false
 
+        # The largest allowed file size for a user avatar. Defaults to no restriction.
+        #
+        # Note that user avatar changes will not work if this is set without
+        # using Synapse's media repository.
+        #
+        #max_avatar_size: 10M
+
+        # The MIME types allowed for user avatars. Defaults to no restriction.
+        #
+        # Note that user avatar changes will not work if this is set without
+        # using Synapse's media repository.
+        #
+        #allowed_avatar_mimetypes: ["image/png", "image/jpeg", "image/gif"]
+
         # How long to keep redacted events in unredacted form in the database. After
         # this period redacted events get replaced with their redacted form in the DB.
         #
@@ -1337,11 +1346,16 @@ def parse_listener_def(listener: Any) -> ListenerConfig:
 
     http_config = None
     if listener_type == "http":
+        try:
+            resources = [
+                HttpResourceConfig(**res) for res in listener.get("resources", [])
+            ]
+        except ValueError as e:
+            raise ConfigError("Unknown listener resource") from e
+
         http_config = HttpListenerConfig(
             x_forwarded=listener.get("x_forwarded", False),
-            resources=[
-                HttpResourceConfig(**res) for res in listener.get("resources", [])
-            ],
+            resources=resources,
             additional_resources=listener.get("additional_resources", {}),
             tag=listener.get("tag"),
         )
@@ -1349,24 +1363,6 @@ def parse_listener_def(listener: Any) -> ListenerConfig:
     return ListenerConfig(port, bind_addresses, listener_type, tls, http_config)
 
 
-NO_MORE_WEB_CLIENT_WARNING = """
-Synapse no longer includes a web client. To enable a web client, configure
-web_client_location. To remove this warning, remove 'webclient' from the 'listeners'
-configuration.
-"""
-
-
-def _warn_if_webclient_configured(listeners: Iterable[ListenerConfig]) -> None:
-    for listener in listeners:
-        if not listener.http_options:
-            continue
-        for res in listener.http_options.resources:
-            for name in res.names:
-                if name == "webclient":
-                    logger.warning(NO_MORE_WEB_CLIENT_WARNING)
-                    return
-
-
 _MANHOLE_SETTINGS_SCHEMA = {
     "type": "object",
     "properties": {
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 576f519188..bdaba6db37 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -51,12 +51,12 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]:
     return obj
 
 
-@attr.s
+@attr.s(auto_attribs=True)
 class InstanceLocationConfig:
     """The host and port to talk to an instance via HTTP replication."""
 
-    host = attr.ib(type=str)
-    port = attr.ib(type=int)
+    host: str
+    port: int
 
 
 @attr.s
@@ -77,34 +77,28 @@ class WriterLocations:
             can only be a single instance.
     """
 
-    events = attr.ib(
+    events: List[str] = attr.ib(
         default=["master"],
-        type=List[str],
         converter=_instance_to_list_converter,
     )
-    typing = attr.ib(
+    typing: List[str] = attr.ib(
         default=["master"],
-        type=List[str],
         converter=_instance_to_list_converter,
     )
-    to_device = attr.ib(
+    to_device: List[str] = attr.ib(
         default=["master"],
-        type=List[str],
         converter=_instance_to_list_converter,
     )
-    account_data = attr.ib(
+    account_data: List[str] = attr.ib(
         default=["master"],
-        type=List[str],
         converter=_instance_to_list_converter,
     )
-    receipts = attr.ib(
+    receipts: List[str] = attr.ib(
         default=["master"],
-        type=List[str],
         converter=_instance_to_list_converter,
     )
-    presence = attr.ib(
+    presence: List[str] = attr.ib(
         default=["master"],
-        type=List[str],
         converter=_instance_to_list_converter,
     )
 
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 993b04099e..72d4a69aac 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -58,7 +58,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-@attr.s(slots=True, cmp=False)
+@attr.s(slots=True, frozen=True, cmp=False, auto_attribs=True)
 class VerifyJsonRequest:
     """
     A request to verify a JSON object.
@@ -78,10 +78,10 @@ class VerifyJsonRequest:
         key_ids: The set of key_ids to that could be used to verify the JSON object
     """
 
-    server_name = attr.ib(type=str)
-    get_json_object = attr.ib(type=Callable[[], JsonDict])
-    minimum_valid_until_ts = attr.ib(type=int)
-    key_ids = attr.ib(type=List[str])
+    server_name: str
+    get_json_object: Callable[[], JsonDict]
+    minimum_valid_until_ts: int
+    key_ids: List[str]
 
     @staticmethod
     def from_json_object(
@@ -124,7 +124,7 @@ class KeyLookupError(ValueError):
     pass
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class _FetchKeyRequest:
     """A request for keys for a given server.
 
@@ -138,9 +138,9 @@ class _FetchKeyRequest:
         key_ids: The IDs of the keys to attempt to fetch
     """
 
-    server_name = attr.ib(type=str)
-    minimum_valid_until_ts = attr.ib(type=int)
-    key_ids = attr.ib(type=List[str])
+    server_name: str
+    minimum_valid_until_ts: int
+    key_ids: List[str]
 
 
 class Keyring:
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 38f3cf4d33..9acb3c0cc4 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -315,10 +315,11 @@ class EventBase(metaclass=abc.ABCMeta):
     redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None)
     room_id: DictProperty[str] = DictProperty("room_id")
     sender: DictProperty[str] = DictProperty("sender")
-    # TODO state_key should be Optional[str], this is generally asserted in Synapse
-    # by calling is_state() first (which ensures this), but it is hard (not possible?)
+    # TODO state_key should be Optional[str]. This is generally asserted in Synapse
+    # by calling is_state() first (which ensures it is not None), but it is hard (not possible?)
     # to properly annotate that calling is_state() asserts that state_key exists
-    # and is non-None.
+    # and is non-None. It would be better to replace such direct references with
+    # get_state_key() (and a check for None).
     state_key: DictProperty[str] = DictProperty("state_key")
     type: DictProperty[str] = DictProperty("type")
     user_id: DictProperty[str] = DictProperty("sender")
@@ -332,7 +333,11 @@ class EventBase(metaclass=abc.ABCMeta):
         return self.content["membership"]
 
     def is_state(self) -> bool:
-        return hasattr(self, "state_key") and self.state_key is not None
+        return self.get_state_key() is not None
+
+    def get_state_key(self) -> Optional[str]:
+        """Get the state key of this event, or None if it's not a state event"""
+        return self._dict.get("state_key")
 
     def get_dict(self) -> JsonDict:
         d = dict(self._dict)
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index f251402ed8..5833fee25f 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -28,7 +28,7 @@ if TYPE_CHECKING:
     from synapse.storage.databases.main import DataStore
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class EventContext:
     """
     Holds information relevant to persisting an event
@@ -103,15 +103,15 @@ class EventContext:
             accessed via get_prev_state_ids.
     """
 
-    rejected = attr.ib(default=False, type=Union[bool, str])
-    _state_group = attr.ib(default=None, type=Optional[int])
-    state_group_before_event = attr.ib(default=None, type=Optional[int])
-    prev_group = attr.ib(default=None, type=Optional[int])
-    delta_ids = attr.ib(default=None, type=Optional[StateMap[str]])
-    app_service = attr.ib(default=None, type=Optional[ApplicationService])
+    rejected: Union[bool, str] = False
+    _state_group: Optional[int] = None
+    state_group_before_event: Optional[int] = None
+    prev_group: Optional[int] = None
+    delta_ids: Optional[StateMap[str]] = None
+    app_service: Optional[ApplicationService] = None
 
-    _current_state_ids = attr.ib(default=None, type=Optional[StateMap[str]])
-    _prev_state_ids = attr.ib(default=None, type=Optional[StateMap[str]])
+    _current_state_ids: Optional[StateMap[str]] = None
+    _prev_state_ids: Optional[StateMap[str]] = None
 
     @staticmethod
     def with_state(
@@ -163,7 +163,7 @@ class EventContext:
         return {
             "prev_state_id": prev_state_id,
             "event_type": event.type,
-            "event_state_key": event.state_key if event.is_state() else None,
+            "event_state_key": event.get_state_key(),
             "state_group": self._state_group,
             "state_group_before_event": self.state_group_before_event,
             "rejected": self.rejected,
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index de0e0c1731..243696b357 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -14,7 +14,17 @@
 # limitations under the License.
 import collections.abc
 import re
-from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Union,
+)
 
 from frozendict import frozendict
 
@@ -26,6 +36,10 @@ from synapse.util.frozenutils import unfreeze
 
 from . import EventBase
 
+if TYPE_CHECKING:
+    from synapse.storage.databases.main.relations import BundledAggregations
+
+
 # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
 # (?<!stuff) matches if the current position in the string is not preceded
 # by a match for 'stuff'.
@@ -376,7 +390,7 @@ class EventClientSerializer:
         event: Union[JsonDict, EventBase],
         time_now: int,
         *,
-        bundle_aggregations: Optional[Dict[str, JsonDict]] = None,
+        bundle_aggregations: Optional[Dict[str, "BundledAggregations"]] = None,
         **kwargs: Any,
     ) -> JsonDict:
         """Serializes a single event.
@@ -402,7 +416,7 @@ class EventClientSerializer:
         if bundle_aggregations:
             event_aggregations = bundle_aggregations.get(event.event_id)
             if event_aggregations:
-                self._injected_bundled_aggregations(
+                self._inject_bundled_aggregations(
                     event,
                     time_now,
                     bundle_aggregations[event.event_id],
@@ -411,11 +425,11 @@ class EventClientSerializer:
 
         return serialized_event
 
-    def _injected_bundled_aggregations(
+    def _inject_bundled_aggregations(
         self,
         event: EventBase,
         time_now: int,
-        aggregations: JsonDict,
+        aggregations: "BundledAggregations",
         serialized_event: JsonDict,
     ) -> None:
         """Potentially injects bundled aggregations into the unsigned portion of the serialized event.
@@ -427,13 +441,18 @@ class EventClientSerializer:
             serialized_event: The serialized event which may be modified.
 
         """
-        # Make a copy in-case the object is cached.
-        aggregations = aggregations.copy()
+        serialized_aggregations = {}
+
+        if aggregations.annotations:
+            serialized_aggregations[RelationTypes.ANNOTATION] = aggregations.annotations
+
+        if aggregations.references:
+            serialized_aggregations[RelationTypes.REFERENCE] = aggregations.references
 
-        if RelationTypes.REPLACE in aggregations:
+        if aggregations.replace:
             # If there is an edit replace the content, preserving existing
             # relations.
-            edit = aggregations[RelationTypes.REPLACE]
+            edit = aggregations.replace
 
             # Ensure we take copies of the edit content, otherwise we risk modifying
             # the original event.
@@ -451,24 +470,28 @@ class EventClientSerializer:
             else:
                 serialized_event["content"].pop("m.relates_to", None)
 
-            aggregations[RelationTypes.REPLACE] = {
+            serialized_aggregations[RelationTypes.REPLACE] = {
                 "event_id": edit.event_id,
                 "origin_server_ts": edit.origin_server_ts,
                 "sender": edit.sender,
             }
 
         # If this event is the start of a thread, include a summary of the replies.
-        if RelationTypes.THREAD in aggregations:
-            # Serialize the latest thread event.
-            latest_thread_event = aggregations[RelationTypes.THREAD]["latest_event"]
-
-            # Don't bundle aggregations as this could recurse forever.
-            aggregations[RelationTypes.THREAD]["latest_event"] = self.serialize_event(
-                latest_thread_event, time_now, bundle_aggregations=None
-            )
+        if aggregations.thread:
+            serialized_aggregations[RelationTypes.THREAD] = {
+                # Don't bundle aggregations as this could recurse forever.
+                "latest_event": self.serialize_event(
+                    aggregations.thread.latest_event, time_now, bundle_aggregations=None
+                ),
+                "count": aggregations.thread.count,
+                "current_user_participated": aggregations.thread.current_user_participated,
+            }
 
         # Include the bundled aggregations in the event.
-        serialized_event["unsigned"].setdefault("m.relations", {}).update(aggregations)
+        if serialized_aggregations:
+            serialized_event["unsigned"].setdefault("m.relations", {}).update(
+                serialized_aggregations
+            )
 
     def serialize_events(
         self, events: Iterable[Union[JsonDict, EventBase]], time_now: int, **kwargs: Any
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index cf86934968..360d24274a 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections.abc
-from typing import Iterable, Union
+from typing import Iterable, Type, Union
 
 import jsonschema
 
@@ -246,7 +246,7 @@ POWER_LEVELS_SCHEMA = {
 
 # This could return something newer than Draft 7, but that's the current "latest"
 # validator.
-def _create_power_level_validator() -> jsonschema.Draft7Validator:
+def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
     validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA)
 
     # by default jsonschema does not consider a frozendict to be an object so
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 6ea4edfc71..74f17aa4da 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -56,7 +56,6 @@ from synapse.api.room_versions import (
 from synapse.events import EventBase, builder
 from synapse.federation.federation_base import FederationBase, event_from_pdu_json
 from synapse.federation.transport.client import SendJoinResponse
-from synapse.logging.utils import log_function
 from synapse.types import JsonDict, get_domain_from_id
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -119,7 +118,8 @@ class FederationClient(FederationBase):
         # It is a map of (room ID, suggested-only) -> the response of
         # get_room_hierarchy.
         self._get_room_hierarchy_cache: ExpiringCache[
-            Tuple[str, bool], Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]
+            Tuple[str, bool],
+            Tuple[JsonDict, Sequence[JsonDict], Sequence[JsonDict], Sequence[str]],
         ] = ExpiringCache(
             cache_name="get_room_hierarchy_cache",
             clock=self._clock,
@@ -144,7 +144,6 @@ class FederationClient(FederationBase):
             if destination_dict:
                 self.pdu_destination_tried[event_id] = destination_dict
 
-    @log_function
     async def make_query(
         self,
         destination: str,
@@ -178,7 +177,6 @@ class FederationClient(FederationBase):
             ignore_backoff=ignore_backoff,
         )
 
-    @log_function
     async def query_client_keys(
         self, destination: str, content: JsonDict, timeout: int
     ) -> JsonDict:
@@ -196,7 +194,6 @@ class FederationClient(FederationBase):
             destination, content, timeout
         )
 
-    @log_function
     async def query_user_devices(
         self, destination: str, user_id: str, timeout: int = 30000
     ) -> JsonDict:
@@ -208,7 +205,6 @@ class FederationClient(FederationBase):
             destination, user_id, timeout
         )
 
-    @log_function
     async def claim_client_keys(
         self, destination: str, content: JsonDict, timeout: int
     ) -> JsonDict:
@@ -1338,7 +1334,7 @@ class FederationClient(FederationBase):
         destinations: Iterable[str],
         room_id: str,
         suggested_only: bool,
-    ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]:
+    ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[JsonDict], Sequence[str]]:
         """
         Call other servers to get a hierarchy of the given room.
 
@@ -1353,7 +1349,8 @@ class FederationClient(FederationBase):
 
         Returns:
             A tuple of:
-                The room as a JSON dictionary.
+                The room as a JSON dictionary, without a "children_state" key.
+                A list of `m.space.child` state events.
                 A list of children rooms, as JSON dictionaries.
                 A list of inaccessible children room IDs.
 
@@ -1368,7 +1365,7 @@ class FederationClient(FederationBase):
 
         async def send_request(
             destination: str,
-        ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[str]]:
+        ) -> Tuple[JsonDict, Sequence[JsonDict], Sequence[JsonDict], Sequence[str]]:
             try:
                 res = await self.transport_layer.get_room_hierarchy(
                     destination=destination,
@@ -1397,7 +1394,7 @@ class FederationClient(FederationBase):
                 raise InvalidResponseError("'room' must be a dict")
 
             # Validate children_state of the room.
-            children_state = room.get("children_state", [])
+            children_state = room.pop("children_state", [])
             if not isinstance(children_state, Sequence):
                 raise InvalidResponseError("'room.children_state' must be a list")
             if any(not isinstance(e, dict) for e in children_state):
@@ -1426,7 +1423,7 @@ class FederationClient(FederationBase):
                     "Invalid room ID in 'inaccessible_children' list"
                 )
 
-            return room, children, inaccessible_children
+            return room, children_state, children, inaccessible_children
 
         try:
             result = await self._try_destination_list(
@@ -1474,8 +1471,6 @@ class FederationClient(FederationBase):
                 if event.room_id == room_id:
                     children_events.append(event.data)
                     children_room_ids.add(event.state_key)
-            # And add them under the requested room.
-            requested_room["children_state"] = children_events
 
             # Find the children rooms.
             children = []
@@ -1485,7 +1480,7 @@ class FederationClient(FederationBase):
 
             # It isn't clear from the response whether some of the rooms are
             # not accessible.
-            result = (requested_room, children, ())
+            result = (requested_room, children_events, children, ())
 
         # Cache the result to avoid fetching data over federation every time.
         self._get_room_hierarchy_cache[(room_id, suggested_only)] = result
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index ee71f289c8..af9cb98f67 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -58,7 +58,6 @@ from synapse.logging.context import (
     run_in_background,
 )
 from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace
-from synapse.logging.utils import log_function
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.replication.http.federation import (
     ReplicationFederationSendEduRestServlet,
@@ -859,7 +858,6 @@ class FederationServer(FederationBase):
             res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]}
         return 200, res
 
-    @log_function
     async def on_query_client_keys(
         self, origin: str, content: Dict[str, str]
     ) -> Tuple[int, Dict[str, Any]]:
@@ -940,7 +938,6 @@ class FederationServer(FederationBase):
 
         return {"events": [ev.get_pdu_json(time_now) for ev in missing_events]}
 
-    @log_function
     async def on_openid_userinfo(self, token: str) -> Optional[str]:
         ts_now_ms = self._clock.time_msec()
         return await self.store.get_user_id_for_open_id_token(token, ts_now_ms)
diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py
index 523ab1c51e..60e2e6cf01 100644
--- a/synapse/federation/persistence.py
+++ b/synapse/federation/persistence.py
@@ -23,7 +23,6 @@ import logging
 from typing import Optional, Tuple
 
 from synapse.federation.units import Transaction
-from synapse.logging.utils import log_function
 from synapse.storage.databases.main import DataStore
 from synapse.types import JsonDict
 
@@ -36,7 +35,6 @@ class TransactionActions:
     def __init__(self, datastore: DataStore):
         self.store = datastore
 
-    @log_function
     async def have_responded(
         self, origin: str, transaction: Transaction
     ) -> Optional[Tuple[int, JsonDict]]:
@@ -53,7 +51,6 @@ class TransactionActions:
 
         return await self.store.get_received_txn_response(transaction_id, origin)
 
-    @log_function
     async def set_response(
         self, origin: str, transaction: Transaction, code: int, response: JsonDict
     ) -> None:
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 391b30fbb5..8152e80b88 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -607,18 +607,18 @@ class PerDestinationQueue:
         self._pending_pdus = []
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _TransactionQueueManager:
     """A helper async context manager for pulling stuff off the queues and
     tracking what was last successfully sent, etc.
     """
 
-    queue = attr.ib(type=PerDestinationQueue)
+    queue: PerDestinationQueue
 
-    _device_stream_id = attr.ib(type=Optional[int], default=None)
-    _device_list_id = attr.ib(type=Optional[int], default=None)
-    _last_stream_ordering = attr.ib(type=Optional[int], default=None)
-    _pdus = attr.ib(type=List[EventBase], factory=list)
+    _device_stream_id: Optional[int] = None
+    _device_list_id: Optional[int] = None
+    _last_stream_ordering: Optional[int] = None
+    _pdus: List[EventBase] = attr.Factory(list)
 
     async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]:
         # First we calculate the EDUs we want to send, if any.
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index ab935e5a7e..742ee57255 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
     import synapse.server
 
 logger = logging.getLogger(__name__)
+issue_8631_logger = logging.getLogger("synapse.8631_debug")
 
 last_pdu_ts_metric = Gauge(
     "synapse_federation_last_sent_pdu_time",
@@ -124,6 +125,17 @@ class TransactionManager:
                 len(pdus),
                 len(edus),
             )
+            if issue_8631_logger.isEnabledFor(logging.DEBUG):
+                DEVICE_UPDATE_EDUS = {"m.device_list_update", "m.signing_key_update"}
+                device_list_updates = [
+                    edu.content for edu in edus if edu.edu_type in DEVICE_UPDATE_EDUS
+                ]
+                if device_list_updates:
+                    issue_8631_logger.debug(
+                        "about to send txn [%s] including device list updates: %s",
+                        transaction.transaction_id,
+                        device_list_updates,
+                    )
 
             # Actually send the transaction
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 9fc4c31c93..8782586cd6 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -44,7 +44,6 @@ from synapse.api.urls import (
 from synapse.events import EventBase, make_event_from_dict
 from synapse.federation.units import Transaction
 from synapse.http.matrixfederationclient import ByteParser
-from synapse.logging.utils import log_function
 from synapse.types import JsonDict
 
 logger = logging.getLogger(__name__)
@@ -62,7 +61,6 @@ class TransportLayerClient:
         self.server_name = hs.hostname
         self.client = hs.get_federation_http_client()
 
-    @log_function
     async def get_room_state_ids(
         self, destination: str, room_id: str, event_id: str
     ) -> JsonDict:
@@ -88,7 +86,6 @@ class TransportLayerClient:
             try_trailing_slash_on_400=True,
         )
 
-    @log_function
     async def get_event(
         self, destination: str, event_id: str, timeout: Optional[int] = None
     ) -> JsonDict:
@@ -111,7 +108,6 @@ class TransportLayerClient:
             destination, path=path, timeout=timeout, try_trailing_slash_on_400=True
         )
 
-    @log_function
     async def backfill(
         self, destination: str, room_id: str, event_tuples: Collection[str], limit: int
     ) -> Optional[JsonDict]:
@@ -149,7 +145,6 @@ class TransportLayerClient:
             destination, path=path, args=args, try_trailing_slash_on_400=True
         )
 
-    @log_function
     async def timestamp_to_event(
         self, destination: str, room_id: str, timestamp: int, direction: str
     ) -> Union[JsonDict, List]:
@@ -185,7 +180,6 @@ class TransportLayerClient:
 
         return remote_response
 
-    @log_function
     async def send_transaction(
         self,
         transaction: Transaction,
@@ -234,7 +228,6 @@ class TransportLayerClient:
             try_trailing_slash_on_400=True,
         )
 
-    @log_function
     async def make_query(
         self,
         destination: str,
@@ -254,7 +247,6 @@ class TransportLayerClient:
             ignore_backoff=ignore_backoff,
         )
 
-    @log_function
     async def make_membership_event(
         self,
         destination: str,
@@ -317,7 +309,6 @@ class TransportLayerClient:
             ignore_backoff=ignore_backoff,
         )
 
-    @log_function
     async def send_join_v1(
         self,
         room_version: RoomVersion,
@@ -336,7 +327,6 @@ class TransportLayerClient:
             max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN,
         )
 
-    @log_function
     async def send_join_v2(
         self,
         room_version: RoomVersion,
@@ -355,7 +345,6 @@ class TransportLayerClient:
             max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN,
         )
 
-    @log_function
     async def send_leave_v1(
         self, destination: str, room_id: str, event_id: str, content: JsonDict
     ) -> Tuple[int, JsonDict]:
@@ -372,7 +361,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def send_leave_v2(
         self, destination: str, room_id: str, event_id: str, content: JsonDict
     ) -> JsonDict:
@@ -389,7 +377,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def send_knock_v1(
         self,
         destination: str,
@@ -423,7 +410,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content
         )
 
-    @log_function
     async def send_invite_v1(
         self, destination: str, room_id: str, event_id: str, content: JsonDict
     ) -> Tuple[int, JsonDict]:
@@ -433,7 +419,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     async def send_invite_v2(
         self, destination: str, room_id: str, event_id: str, content: JsonDict
     ) -> JsonDict:
@@ -443,7 +428,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     async def get_public_rooms(
         self,
         remote_server: str,
@@ -516,7 +500,6 @@ class TransportLayerClient:
 
         return response
 
-    @log_function
     async def exchange_third_party_invite(
         self, destination: str, room_id: str, event_dict: JsonDict
     ) -> JsonDict:
@@ -526,7 +509,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=event_dict
         )
 
-    @log_function
     async def get_event_auth(
         self, destination: str, room_id: str, event_id: str
     ) -> JsonDict:
@@ -534,7 +516,6 @@ class TransportLayerClient:
 
         return await self.client.get_json(destination=destination, path=path)
 
-    @log_function
     async def query_client_keys(
         self, destination: str, query_content: JsonDict, timeout: int
     ) -> JsonDict:
@@ -576,7 +557,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=query_content, timeout=timeout
         )
 
-    @log_function
     async def query_user_devices(
         self, destination: str, user_id: str, timeout: int
     ) -> JsonDict:
@@ -616,7 +596,6 @@ class TransportLayerClient:
             destination=destination, path=path, timeout=timeout
         )
 
-    @log_function
     async def claim_client_keys(
         self, destination: str, query_content: JsonDict, timeout: int
     ) -> JsonDict:
@@ -655,7 +634,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=query_content, timeout=timeout
         )
 
-    @log_function
     async def get_missing_events(
         self,
         destination: str,
@@ -680,7 +658,6 @@ class TransportLayerClient:
             timeout=timeout,
         )
 
-    @log_function
     async def get_group_profile(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -694,7 +671,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def update_group_profile(
         self, destination: str, group_id: str, requester_user_id: str, content: JsonDict
     ) -> JsonDict:
@@ -716,7 +692,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_group_summary(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -730,7 +705,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_rooms_in_group(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -798,7 +772,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_users_in_group(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -812,7 +785,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_invited_users_in_group(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -826,7 +798,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def accept_group_invite(
         self, destination: str, group_id: str, user_id: str, content: JsonDict
     ) -> JsonDict:
@@ -837,7 +808,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     def join_group(
         self, destination: str, group_id: str, user_id: str, content: JsonDict
     ) -> Awaitable[JsonDict]:
@@ -848,7 +818,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     async def invite_to_group(
         self,
         destination: str,
@@ -868,7 +837,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def invite_to_group_notification(
         self, destination: str, group_id: str, user_id: str, content: JsonDict
     ) -> JsonDict:
@@ -882,7 +850,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     async def remove_user_from_group(
         self,
         destination: str,
@@ -902,7 +869,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def remove_user_from_group_notification(
         self, destination: str, group_id: str, user_id: str, content: JsonDict
     ) -> JsonDict:
@@ -916,7 +882,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     async def renew_group_attestation(
         self, destination: str, group_id: str, user_id: str, content: JsonDict
     ) -> JsonDict:
@@ -930,7 +895,6 @@ class TransportLayerClient:
             destination=destination, path=path, data=content, ignore_backoff=True
         )
 
-    @log_function
     async def update_group_summary_room(
         self,
         destination: str,
@@ -959,7 +923,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def delete_group_summary_room(
         self,
         destination: str,
@@ -986,7 +949,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_group_categories(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -1000,7 +962,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_group_category(
         self, destination: str, group_id: str, requester_user_id: str, category_id: str
     ) -> JsonDict:
@@ -1014,7 +975,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def update_group_category(
         self,
         destination: str,
@@ -1034,7 +994,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def delete_group_category(
         self, destination: str, group_id: str, requester_user_id: str, category_id: str
     ) -> JsonDict:
@@ -1048,7 +1007,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_group_roles(
         self, destination: str, group_id: str, requester_user_id: str
     ) -> JsonDict:
@@ -1062,7 +1020,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def get_group_role(
         self, destination: str, group_id: str, requester_user_id: str, role_id: str
     ) -> JsonDict:
@@ -1076,7 +1033,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def update_group_role(
         self,
         destination: str,
@@ -1096,7 +1052,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def delete_group_role(
         self, destination: str, group_id: str, requester_user_id: str, role_id: str
     ) -> JsonDict:
@@ -1110,7 +1065,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def update_group_summary_user(
         self,
         destination: str,
@@ -1136,7 +1090,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def set_group_join_policy(
         self, destination: str, group_id: str, requester_user_id: str, content: JsonDict
     ) -> JsonDict:
@@ -1151,7 +1104,6 @@ class TransportLayerClient:
             ignore_backoff=True,
         )
 
-    @log_function
     async def delete_group_summary_user(
         self,
         destination: str,
diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py
index 77b936361a..db4fe2c798 100644
--- a/synapse/federation/transport/server/__init__.py
+++ b/synapse/federation/transport/server/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import Dict, Iterable, List, Optional, Tuple, Type
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type
 
 from typing_extensions import Literal
 
@@ -36,17 +36,19 @@ from synapse.http.servlet import (
     parse_integer_from_args,
     parse_string_from_args,
 )
-from synapse.server import HomeServer
 from synapse.types import JsonDict, ThirdPartyInstanceID
 from synapse.util.ratelimitutils import FederationRateLimiter
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
 
 
 class TransportLayerServer(JsonResource):
     """Handles incoming federation HTTP requests"""
 
-    def __init__(self, hs: HomeServer, servlet_groups: Optional[List[str]] = None):
+    def __init__(self, hs: "HomeServer", servlet_groups: Optional[List[str]] = None):
         """Initialize the TransportLayerServer
 
         Will by default register all servlets. For custom behaviour, pass in
@@ -113,7 +115,7 @@ class PublicRoomList(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -203,7 +205,7 @@ class FederationGroupsRenewAttestaionServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -251,7 +253,7 @@ class OpenIdUserInfo(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -297,7 +299,7 @@ DEFAULT_SERVLET_GROUPS: Dict[str, Iterable[Type[BaseFederationServlet]]] = {
 
 
 def register_servlets(
-    hs: HomeServer,
+    hs: "HomeServer",
     resource: HttpServer,
     authenticator: Authenticator,
     ratelimiter: FederationRateLimiter,
diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index da1fbf8b63..dff2b68359 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -15,7 +15,8 @@
 import functools
 import logging
 import re
-from typing import Any, Awaitable, Callable, Optional, Tuple, cast
+import time
+from typing import TYPE_CHECKING, Any, Awaitable, Callable, Optional, Tuple, cast
 
 from synapse.api.errors import Codes, FederationDeniedError, SynapseError
 from synapse.api.urls import FEDERATION_V1_PREFIX
@@ -24,16 +25,20 @@ from synapse.http.servlet import parse_json_object_from_request
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import run_in_background
 from synapse.logging.opentracing import (
+    active_span,
     set_tag,
     span_context_from_request,
+    start_active_span,
     start_active_span_follows_from,
     whitelisted_homeserver,
 )
-from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.stringutils import parse_and_validate_server_name
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
 
 
@@ -46,7 +51,7 @@ class NoAuthenticationError(AuthenticationError):
 
 
 class Authenticator:
-    def __init__(self, hs: HomeServer):
+    def __init__(self, hs: "HomeServer"):
         self._clock = hs.get_clock()
         self.keyring = hs.get_keyring()
         self.server_name = hs.hostname
@@ -114,11 +119,11 @@ class Authenticator:
         # alive
         retry_timings = await self.store.get_destination_retry_timings(origin)
         if retry_timings and retry_timings.retry_last_ts:
-            run_in_background(self._reset_retry_timings, origin)
+            run_in_background(self.reset_retry_timings, origin)
 
         return origin
 
-    async def _reset_retry_timings(self, origin: str) -> None:
+    async def reset_retry_timings(self, origin: str) -> None:
         try:
             logger.info("Marking origin %r as up", origin)
             await self.store.set_destination_retry_timings(origin, None, 0, 0)
@@ -227,7 +232,7 @@ class BaseFederationServlet:
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -263,9 +268,10 @@ class BaseFederationServlet:
                 content = parse_json_object_from_request(request)
 
             try:
-                origin: Optional[str] = await authenticator.authenticate_request(
-                    request, content
-                )
+                with start_active_span("authenticate_request"):
+                    origin: Optional[str] = await authenticator.authenticate_request(
+                        request, content
+                    )
             except NoAuthenticationError:
                 origin = None
                 if self.REQUIRE_AUTH:
@@ -280,32 +286,57 @@ class BaseFederationServlet:
             # update the active opentracing span with the authenticated entity
             set_tag("authenticated_entity", origin)
 
-            # if the origin is authenticated and whitelisted, link to its span context
+            # if the origin is authenticated and whitelisted, use its span context
+            # as the parent.
             context = None
             if origin and whitelisted_homeserver(origin):
                 context = span_context_from_request(request)
 
-            scope = start_active_span_follows_from(
-                "incoming-federation-request", contexts=(context,) if context else ()
-            )
+            if context:
+                servlet_span = active_span()
+                # a scope which uses the origin's context as a parent
+                processing_start_time = time.time()
+                scope = start_active_span_follows_from(
+                    "incoming-federation-request",
+                    child_of=context,
+                    contexts=(servlet_span,),
+                    start_time=processing_start_time,
+                )
 
-            with scope:
-                if origin and self.RATELIMIT:
-                    with ratelimiter.ratelimit(origin) as d:
-                        await d
-                        if request._disconnected:
-                            logger.warning(
-                                "client disconnected before we started processing "
-                                "request"
+            else:
+                # just use our context as a parent
+                scope = start_active_span(
+                    "incoming-federation-request",
+                )
+
+            try:
+                with scope:
+                    if origin and self.RATELIMIT:
+                        with ratelimiter.ratelimit(origin) as d:
+                            await d
+                            if request._disconnected:
+                                logger.warning(
+                                    "client disconnected before we started processing "
+                                    "request"
+                                )
+                                return None
+                            response = await func(
+                                origin, content, request.args, *args, **kwargs
                             )
-                            return None
+                    else:
                         response = await func(
                             origin, content, request.args, *args, **kwargs
                         )
-                else:
-                    response = await func(
-                        origin, content, request.args, *args, **kwargs
+            finally:
+                # if we used the origin's context as the parent, add a new span using
+                # the servlet span as a parent, so that we have a link
+                if context:
+                    scope2 = start_active_span_follows_from(
+                        "process-federation_request",
+                        contexts=(scope.span,),
+                        start_time=processing_start_time,
                     )
+                    scope2.close()
 
             return response
 
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 77bfd88ad0..d86dfede4e 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -12,7 +12,17 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import logging
-from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
 
 from typing_extensions import Literal
 
@@ -30,12 +40,15 @@ from synapse.http.servlet import (
     parse_string_from_args,
     parse_strings_from_args,
 )
-from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.versionstring import get_version_string
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
+issue_8631_logger = logging.getLogger("synapse.8631_debug")
 
 
 class BaseFederationServerServlet(BaseFederationServlet):
@@ -46,7 +59,7 @@ class BaseFederationServerServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -95,6 +108,20 @@ class FederationSendServlet(BaseFederationServerServlet):
                 len(transaction_data.get("edus", [])),
             )
 
+            if issue_8631_logger.isEnabledFor(logging.DEBUG):
+                DEVICE_UPDATE_EDUS = ["m.device_list_update", "m.signing_key_update"]
+                device_list_updates = [
+                    edu.content
+                    for edu in transaction_data.get("edus", [])
+                    if edu.get("edu_type") in DEVICE_UPDATE_EDUS
+                ]
+                if device_list_updates:
+                    issue_8631_logger.debug(
+                        "received transaction [%s] including device list updates: %s",
+                        transaction_id,
+                        device_list_updates,
+                    )
+
         except Exception as e:
             logger.exception(e)
             return 400, {"error": "Invalid transaction"}
@@ -581,7 +608,7 @@ class FederationSpaceSummaryServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -655,7 +682,7 @@ class FederationRoomHierarchyServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
@@ -691,7 +718,7 @@ class RoomComplexityServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
diff --git a/synapse/federation/transport/server/groups_local.py b/synapse/federation/transport/server/groups_local.py
index a12cd18d58..496472e1dc 100644
--- a/synapse/federation/transport/server/groups_local.py
+++ b/synapse/federation/transport/server/groups_local.py
@@ -11,7 +11,7 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-from typing import Dict, List, Tuple, Type
+from typing import TYPE_CHECKING, Dict, List, Tuple, Type
 
 from synapse.api.errors import SynapseError
 from synapse.federation.transport.server._base import (
@@ -19,10 +19,12 @@ from synapse.federation.transport.server._base import (
     BaseFederationServlet,
 )
 from synapse.handlers.groups_local import GroupsLocalHandler
-from synapse.server import HomeServer
 from synapse.types import JsonDict, get_domain_from_id
 from synapse.util.ratelimitutils import FederationRateLimiter
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 
 class BaseGroupsLocalServlet(BaseFederationServlet):
     """Abstract base class for federation servlet classes which provides a groups local handler.
@@ -32,7 +34,7 @@ class BaseGroupsLocalServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
diff --git a/synapse/federation/transport/server/groups_server.py b/synapse/federation/transport/server/groups_server.py
index b30e92a5eb..851b50152e 100644
--- a/synapse/federation/transport/server/groups_server.py
+++ b/synapse/federation/transport/server/groups_server.py
@@ -11,7 +11,7 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-from typing import Dict, List, Tuple, Type
+from typing import TYPE_CHECKING, Dict, List, Tuple, Type
 
 from typing_extensions import Literal
 
@@ -22,10 +22,12 @@ from synapse.federation.transport.server._base import (
     BaseFederationServlet,
 )
 from synapse.http.servlet import parse_string_from_args
-from synapse.server import HomeServer
 from synapse.types import JsonDict, get_domain_from_id
 from synapse.util.ratelimitutils import FederationRateLimiter
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 
 class BaseGroupsServerServlet(BaseFederationServlet):
     """Abstract base class for federation servlet classes which provides a groups server handler.
@@ -35,7 +37,7 @@ class BaseGroupsServerServlet(BaseFederationServlet):
 
     def __init__(
         self,
-        hs: HomeServer,
+        hs: "HomeServer",
         authenticator: Authenticator,
         ratelimiter: FederationRateLimiter,
         server_name: str,
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 85157a138b..00ab5e79bf 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -55,21 +55,47 @@ class AdminHandler:
 
     async def get_user(self, user: UserID) -> Optional[JsonDict]:
         """Function to get user details"""
-        ret = await self.store.get_user_by_id(user.to_string())
-        if ret:
-            profile = await self.store.get_profileinfo(user.localpart)
-            threepids = await self.store.user_get_threepids(user.to_string())
-            external_ids = [
-                ({"auth_provider": auth_provider, "external_id": external_id})
-                for auth_provider, external_id in await self.store.get_external_ids_by_user(
-                    user.to_string()
-                )
-            ]
-            ret["displayname"] = profile.display_name
-            ret["avatar_url"] = profile.avatar_url
-            ret["threepids"] = threepids
-            ret["external_ids"] = external_ids
-        return ret
+        user_info_dict = await self.store.get_user_by_id(user.to_string())
+        if user_info_dict is None:
+            return None
+
+        # Restrict returned information to a known set of fields. This prevents additional
+        # fields added to get_user_by_id from modifying Synapse's external API surface.
+        user_info_to_return = {
+            "name",
+            "admin",
+            "deactivated",
+            "shadow_banned",
+            "creation_ts",
+            "appservice_id",
+            "consent_server_notice_sent",
+            "consent_version",
+            "user_type",
+            "is_guest",
+        }
+
+        # Restrict returned keys to a known set.
+        user_info_dict = {
+            key: value
+            for key, value in user_info_dict.items()
+            if key in user_info_to_return
+        }
+
+        # Add additional user metadata
+        profile = await self.store.get_profileinfo(user.localpart)
+        threepids = await self.store.user_get_threepids(user.to_string())
+        external_ids = [
+            ({"auth_provider": auth_provider, "external_id": external_id})
+            for auth_provider, external_id in await self.store.get_external_ids_by_user(
+                user.to_string()
+            )
+        ]
+        user_info_dict["displayname"] = profile.display_name
+        user_info_dict["avatar_url"] = profile.avatar_url
+        user_info_dict["threepids"] = threepids
+        user_info_dict["external_ids"] = external_ids
+
+        return user_info_dict
 
     async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Any:
         """Write all data we have on the user to the given writer.
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 7833e77e2b..0fb919acf6 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -55,6 +55,9 @@ class ApplicationServicesHandler:
         self.clock = hs.get_clock()
         self.notify_appservices = hs.config.appservice.notify_appservices
         self.event_sources = hs.get_event_sources()
+        self._msc2409_to_device_messages_enabled = (
+            hs.config.experimental.msc2409_to_device_messages_enabled
+        )
 
         self.current_max = 0
         self.is_processing = False
@@ -132,7 +135,9 @@ class ApplicationServicesHandler:
 
                         # Fork off pushes to these services
                         for service in services:
-                            self.scheduler.submit_event_for_as(service, event)
+                            self.scheduler.enqueue_for_appservice(
+                                service, events=[event]
+                            )
 
                         now = self.clock.time_msec()
                         ts = await self.store.get_received_ts(event.event_id)
@@ -199,8 +204,9 @@ class ApplicationServicesHandler:
         Args:
             stream_key: The stream the event came from.
 
-                `stream_key` can be "typing_key", "receipt_key" or "presence_key". Any other
-                value for `stream_key` will cause this function to return early.
+                `stream_key` can be "typing_key", "receipt_key", "presence_key" or
+                "to_device_key". Any other value for `stream_key` will cause this function
+                to return early.
 
                 Ephemeral events will only be pushed to appservices that have opted into
                 receiving them by setting `push_ephemeral` to true in their registration
@@ -216,8 +222,15 @@ class ApplicationServicesHandler:
         if not self.notify_appservices:
             return
 
-        # Ignore any unsupported streams
-        if stream_key not in ("typing_key", "receipt_key", "presence_key"):
+        # Notify appservices of updates in ephemeral event streams.
+        # Only the following streams are currently supported.
+        # FIXME: We should use constants for these values.
+        if stream_key not in (
+            "typing_key",
+            "receipt_key",
+            "presence_key",
+            "to_device_key",
+        ):
             return
 
         # Assert that new_token is an integer (and not a RoomStreamToken).
@@ -233,6 +246,13 @@ class ApplicationServicesHandler:
         # Additional context: https://github.com/matrix-org/synapse/pull/11137
         assert isinstance(new_token, int)
 
+        # Ignore to-device messages if the feature flag is not enabled
+        if (
+            stream_key == "to_device_key"
+            and not self._msc2409_to_device_messages_enabled
+        ):
+            return
+
         # Check whether there are any appservices which have registered to receive
         # ephemeral events.
         #
@@ -266,7 +286,7 @@ class ApplicationServicesHandler:
         with Measure(self.clock, "notify_interested_services_ephemeral"):
             for service in services:
                 if stream_key == "typing_key":
-                    # Note that we don't persist the token (via set_type_stream_id_for_appservice)
+                    # Note that we don't persist the token (via set_appservice_stream_type_pos)
                     # for typing_key due to performance reasons and due to their highly
                     # ephemeral nature.
                     #
@@ -274,7 +294,7 @@ class ApplicationServicesHandler:
                     # and, if they apply to this application service, send it off.
                     events = await self._handle_typing(service, new_token)
                     if events:
-                        self.scheduler.submit_ephemeral_events_for_as(service, events)
+                        self.scheduler.enqueue_for_appservice(service, ephemeral=events)
                     continue
 
                 # Since we read/update the stream position for this AS/stream
@@ -285,28 +305,37 @@ class ApplicationServicesHandler:
                 ):
                     if stream_key == "receipt_key":
                         events = await self._handle_receipts(service, new_token)
-                        if events:
-                            self.scheduler.submit_ephemeral_events_for_as(
-                                service, events
-                            )
+                        self.scheduler.enqueue_for_appservice(service, ephemeral=events)
 
                         # Persist the latest handled stream token for this appservice
-                        await self.store.set_type_stream_id_for_appservice(
+                        await self.store.set_appservice_stream_type_pos(
                             service, "read_receipt", new_token
                         )
 
                     elif stream_key == "presence_key":
                         events = await self._handle_presence(service, users, new_token)
-                        if events:
-                            self.scheduler.submit_ephemeral_events_for_as(
-                                service, events
-                            )
+                        self.scheduler.enqueue_for_appservice(service, ephemeral=events)
 
                         # Persist the latest handled stream token for this appservice
-                        await self.store.set_type_stream_id_for_appservice(
+                        await self.store.set_appservice_stream_type_pos(
                             service, "presence", new_token
                         )
 
+                    elif stream_key == "to_device_key":
+                        # Retrieve a list of to-device message events, as well as the
+                        # maximum stream token of the messages we were able to retrieve.
+                        to_device_messages = await self._get_to_device_messages(
+                            service, new_token, users
+                        )
+                        self.scheduler.enqueue_for_appservice(
+                            service, to_device_messages=to_device_messages
+                        )
+
+                        # Persist the latest handled stream token for this appservice
+                        await self.store.set_appservice_stream_type_pos(
+                            service, "to_device", new_token
+                        )
+
     async def _handle_typing(
         self, service: ApplicationService, new_token: int
     ) -> List[JsonDict]:
@@ -440,6 +469,79 @@ class ApplicationServicesHandler:
 
         return events
 
+    async def _get_to_device_messages(
+        self,
+        service: ApplicationService,
+        new_token: int,
+        users: Collection[Union[str, UserID]],
+    ) -> List[JsonDict]:
+        """
+        Given an application service, determine which events it should receive
+        from those between the last-recorded to-device message stream token for this
+        appservice and the given stream token.
+
+        Args:
+            service: The application service to check for which events it should receive.
+            new_token: The latest to-device event stream token.
+            users: The users to be notified for the new to-device messages
+                (ie, the recipients of the messages).
+
+        Returns:
+            A list of JSON dictionaries containing data derived from the to-device events
+                that should be sent to the given application service.
+        """
+        # Get the stream token that this application service has processed up until
+        from_key = await self.store.get_type_stream_id_for_appservice(
+            service, "to_device"
+        )
+
+        # Filter out users that this appservice is not interested in
+        users_appservice_is_interested_in: List[str] = []
+        for user in users:
+            # FIXME: We should do this farther up the call stack. We currently repeat
+            #  this operation in _handle_presence.
+            if isinstance(user, UserID):
+                user = user.to_string()
+
+            if service.is_interested_in_user(user):
+                users_appservice_is_interested_in.append(user)
+
+        if not users_appservice_is_interested_in:
+            # Return early if the AS was not interested in any of these users
+            return []
+
+        # Retrieve the to-device messages for each user
+        recipient_device_to_messages = await self.store.get_messages_for_user_devices(
+            users_appservice_is_interested_in,
+            from_key,
+            new_token,
+        )
+
+        # According to MSC2409, we'll need to add 'to_user_id' and 'to_device_id' fields
+        # to the event JSON so that the application service will know which user/device
+        # combination this messages was intended for.
+        #
+        # So we mangle this dict into a flat list of to-device messages with the relevant
+        # user ID and device ID embedded inside each message dict.
+        message_payload: List[JsonDict] = []
+        for (
+            user_id,
+            device_id,
+        ), messages in recipient_device_to_messages.items():
+            for message_json in messages:
+                # Remove 'message_id' from the to-device message, as it's an internal ID
+                message_json.pop("message_id", None)
+
+                message_payload.append(
+                    {
+                        "to_user_id": user_id,
+                        "to_device_id": device_id,
+                        **message_json,
+                    }
+                )
+
+        return message_payload
+
     async def query_user_exists(self, user_id: str) -> bool:
         """Check if any application service knows this user_id exists.
 
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 84724b207c..e32c93e234 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -168,25 +168,25 @@ def login_id_phone_to_thirdparty(identifier: JsonDict) -> Dict[str, str]:
     }
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class SsoLoginExtraAttributes:
     """Data we track about SAML2 sessions"""
 
     # time the session was created, in milliseconds
-    creation_time = attr.ib(type=int)
-    extra_attributes = attr.ib(type=JsonDict)
+    creation_time: int
+    extra_attributes: JsonDict
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class LoginTokenAttributes:
     """Data we store in a short-term login token"""
 
-    user_id = attr.ib(type=str)
+    user_id: str
 
-    auth_provider_id = attr.ib(type=str)
+    auth_provider_id: str
     """The SSO Identity Provider that the user authenticated with, to get this token."""
 
-    auth_provider_session_id = attr.ib(type=Optional[str])
+    auth_provider_session_id: Optional[str]
     """The session ID advertised by the SSO Identity Provider."""
 
 
@@ -2060,6 +2060,10 @@ CHECK_AUTH_CALLBACK = Callable[
         Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]
     ],
 ]
+GET_USERNAME_FOR_REGISTRATION_CALLBACK = Callable[
+    [JsonDict, JsonDict],
+    Awaitable[Optional[str]],
+]
 
 
 class PasswordAuthProvider:
@@ -2072,6 +2076,9 @@ class PasswordAuthProvider:
         # lists of callbacks
         self.check_3pid_auth_callbacks: List[CHECK_3PID_AUTH_CALLBACK] = []
         self.on_logged_out_callbacks: List[ON_LOGGED_OUT_CALLBACK] = []
+        self.get_username_for_registration_callbacks: List[
+            GET_USERNAME_FOR_REGISTRATION_CALLBACK
+        ] = []
 
         # Mapping from login type to login parameters
         self._supported_login_types: Dict[str, Iterable[str]] = {}
@@ -2086,6 +2093,9 @@ class PasswordAuthProvider:
         auth_checkers: Optional[
             Dict[Tuple[str, Tuple[str, ...]], CHECK_AUTH_CALLBACK]
         ] = None,
+        get_username_for_registration: Optional[
+            GET_USERNAME_FOR_REGISTRATION_CALLBACK
+        ] = None,
     ) -> None:
         # Register check_3pid_auth callback
         if check_3pid_auth is not None:
@@ -2130,6 +2140,11 @@ class PasswordAuthProvider:
                 # Add the new method to the list of auth_checker_callbacks for this login type
                 self.auth_checker_callbacks.setdefault(login_type, []).append(callback)
 
+        if get_username_for_registration is not None:
+            self.get_username_for_registration_callbacks.append(
+                get_username_for_registration,
+            )
+
     def get_supported_login_types(self) -> Mapping[str, Iterable[str]]:
         """Get the login types supported by this password provider
 
@@ -2281,7 +2296,50 @@ class PasswordAuthProvider:
         # call all of the on_logged_out callbacks
         for callback in self.on_logged_out_callbacks:
             try:
-                callback(user_id, device_id, access_token)
+                await callback(user_id, device_id, access_token)
             except Exception as e:
                 logger.warning("Failed to run module API callback %s: %s", callback, e)
                 continue
+
+    async def get_username_for_registration(
+        self,
+        uia_results: JsonDict,
+        params: JsonDict,
+    ) -> Optional[str]:
+        """Defines the username to use when registering the user, using the credentials
+        and parameters provided during the UIA flow.
+
+        Stops at the first callback that returns a string.
+
+        Args:
+            uia_results: The credentials provided during the UIA flow.
+            params: The parameters provided by the registration request.
+
+        Returns:
+            The localpart to use when registering this user, or None if no module
+            returned a localpart.
+        """
+        for callback in self.get_username_for_registration_callbacks:
+            try:
+                res = await callback(uia_results, params)
+
+                if isinstance(res, str):
+                    return res
+                elif res is not None:
+                    # mypy complains that this line is unreachable because it assumes the
+                    # data returned by the module fits the expected type. We just want
+                    # to make sure this is the case.
+                    logger.warning(  # type: ignore[unreachable]
+                        "Ignoring non-string value returned by"
+                        " get_username_for_registration callback %s: %s",
+                        callback,
+                        res,
+                    )
+            except Exception as e:
+                logger.error(
+                    "Module raised an exception in get_username_for_registration: %s",
+                    e,
+                )
+                raise SynapseError(code=500, msg="Internal Server Error")
+
+        return None
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index bee62cf360..7a13d76a68 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -157,6 +157,9 @@ class DeactivateAccountHandler:
         # Mark the user as deactivated.
         await self.store.set_user_deactivated_status(user_id, True)
 
+        # Remove account data (including ignored users and push rules).
+        await self.store.purge_account_data_for_user(user_id)
+
         return identity_server_supports_unbinding
 
     async def _reject_pending_invites_for_user(self, user_id: str) -> None:
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 14360b4e40..d4dfddf63f 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -1321,14 +1321,14 @@ def _one_time_keys_match(old_key_json: str, new_key: JsonDict) -> bool:
     return old_key == new_key_copy
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class SignatureListItem:
     """An item in the signature list as used by upload_signatures_for_device_keys."""
 
-    signing_key_id = attr.ib(type=str)
-    target_user_id = attr.ib(type=str)
-    target_device_id = attr.ib(type=str)
-    signature = attr.ib(type=JsonDict)
+    signing_key_id: str
+    target_user_id: str
+    target_device_id: str
+    signature: JsonDict
 
 
 class SigningKeyEduUpdater:
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index a3add8a586..bac5de0526 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -20,7 +20,6 @@ from synapse.api.constants import EduTypes, EventTypes, Membership
 from synapse.api.errors import AuthError, SynapseError
 from synapse.events import EventBase
 from synapse.handlers.presence import format_user_presence_state
-from synapse.logging.utils import log_function
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, UserID
 from synapse.visibility import filter_events_for_client
@@ -43,7 +42,6 @@ class EventStreamHandler:
         self._server_notices_sender = hs.get_server_notices_sender()
         self._event_serializer = hs.get_event_client_serializer()
 
-    @log_function
     async def get_stream(
         self,
         auth_user_id: str,
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 43785de79d..c0f642005f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -51,7 +51,6 @@ from synapse.logging.context import (
     preserve_fn,
     run_in_background,
 )
-from synapse.logging.utils import log_function
 from synapse.replication.http.federation import (
     ReplicationCleanRoomRestServlet,
     ReplicationStoreRoomOnOutlierMembershipRestServlet,
@@ -562,7 +561,6 @@ class FederationHandler:
 
             run_in_background(self._handle_queued_pdus, room_queue)
 
-    @log_function
     async def do_knock(
         self,
         target_hosts: List[str],
@@ -934,7 +932,6 @@ class FederationHandler:
 
         return event
 
-    @log_function
     async def on_make_knock_request(
         self, origin: str, room_id: str, user_id: str
     ) -> EventBase:
@@ -1045,7 +1042,6 @@ class FederationHandler:
         else:
             return []
 
-    @log_function
     async def on_backfill_request(
         self, origin: str, room_id: str, pdu_list: List[str], limit: int
     ) -> List[EventBase]:
@@ -1075,7 +1071,6 @@ class FederationHandler:
 
         return events
 
-    @log_function
     async def get_persisted_pdu(
         self, origin: str, event_id: str
     ) -> Optional[EventBase]:
@@ -1137,7 +1132,6 @@ class FederationHandler:
 
         return missing_events
 
-    @log_function
     async def exchange_third_party_invite(
         self, sender_user_id: str, target_user_id: str, room_id: str, signed: JsonDict
     ) -> None:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index b37bf98690..9edc7369d6 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -56,7 +56,6 @@ from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.federation.federation_client import InvalidResponseError
 from synapse.logging.context import nested_logging_context, run_in_background
-from synapse.logging.utils import log_function
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
 from synapse.replication.http.federation import (
@@ -275,7 +274,6 @@ class FederationEventHandler:
 
         await self._process_received_pdu(origin, pdu, state=None)
 
-    @log_function
     async def on_send_membership_event(
         self, origin: str, event: EventBase
     ) -> Tuple[EventBase, EventContext]:
@@ -472,7 +470,6 @@ class FederationEventHandler:
 
             return await self.persist_events_and_notify(room_id, [(event, context)])
 
-    @log_function
     async def backfill(
         self, dest: str, room_id: str, limit: int, extremities: Collection[str]
     ) -> None:
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 472688f045..973f262964 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -537,7 +537,7 @@ class PaginationHandler:
                 state_dict = await self.store.get_events(list(state_ids.values()))
                 state = state_dict.values()
 
-        aggregations = await self.store.get_bundled_aggregations(events)
+        aggregations = await self.store.get_bundled_aggregations(events, user_id)
 
         time_now = self.clock.time_msec()
 
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index c781fefb1b..067c43ae47 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -55,7 +55,6 @@ from synapse.api.presence import UserPresenceState
 from synapse.appservice import ApplicationService
 from synapse.events.presence_router import PresenceRouter
 from synapse.logging.context import run_in_background
-from synapse.logging.utils import log_function
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.presence import (
@@ -1542,7 +1541,6 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
         self.clock = hs.get_clock()
         self.store = hs.get_datastore()
 
-    @log_function
     async def get_new_events(
         self,
         user: UserID,
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 6b5a6ded8b..36e3ad2ba9 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -31,6 +31,8 @@ from synapse.types import (
     create_requester,
     get_domain_from_id,
 )
+from synapse.util.caches.descriptors import cached
+from synapse.util.stringutils import parse_and_validate_mxc_uri
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -64,6 +66,11 @@ class ProfileHandler:
         self.user_directory_handler = hs.get_user_directory_handler()
         self.request_ratelimiter = hs.get_request_ratelimiter()
 
+        self.max_avatar_size = hs.config.server.max_avatar_size
+        self.allowed_avatar_mimetypes = hs.config.server.allowed_avatar_mimetypes
+
+        self.server_name = hs.config.server.server_name
+
         if hs.config.worker.run_background_tasks:
             self.clock.looping_call(
                 self._update_remote_profile_cache, self.PROFILE_UPDATE_MS
@@ -286,6 +293,9 @@ class ProfileHandler:
                 400, "Avatar URL is too long (max %i)" % (MAX_AVATAR_URL_LEN,)
             )
 
+        if not await self.check_avatar_size_and_mime_type(new_avatar_url):
+            raise SynapseError(403, "This avatar is not allowed", Codes.FORBIDDEN)
+
         avatar_url_to_set: Optional[str] = new_avatar_url
         if new_avatar_url == "":
             avatar_url_to_set = None
@@ -307,6 +317,63 @@ class ProfileHandler:
 
         await self._update_join_states(requester, target_user)
 
+    @cached()
+    async def check_avatar_size_and_mime_type(self, mxc: str) -> bool:
+        """Check that the size and content type of the avatar at the given MXC URI are
+        within the configured limits.
+
+        Args:
+            mxc: The MXC URI at which the avatar can be found.
+
+        Returns:
+             A boolean indicating whether the file can be allowed to be set as an avatar.
+        """
+        if not self.max_avatar_size and not self.allowed_avatar_mimetypes:
+            return True
+
+        server_name, _, media_id = parse_and_validate_mxc_uri(mxc)
+
+        if server_name == self.server_name:
+            media_info = await self.store.get_local_media(media_id)
+        else:
+            media_info = await self.store.get_cached_remote_media(server_name, media_id)
+
+        if media_info is None:
+            # Both configuration options need to access the file's metadata, and
+            # retrieving remote avatars just for this becomes a bit of a faff, especially
+            # if e.g. the file is too big. It's also generally safe to assume most files
+            # used as avatar are uploaded locally, or if the upload didn't happen as part
+            # of a PUT request on /avatar_url that the file was at least previewed by the
+            # user locally (and therefore downloaded to the remote media cache).
+            logger.warning("Forbidding avatar change to %s: avatar not on server", mxc)
+            return False
+
+        if self.max_avatar_size:
+            # Ensure avatar does not exceed max allowed avatar size
+            if media_info["media_length"] > self.max_avatar_size:
+                logger.warning(
+                    "Forbidding avatar change to %s: %d bytes is above the allowed size "
+                    "limit",
+                    mxc,
+                    media_info["media_length"],
+                )
+                return False
+
+        if self.allowed_avatar_mimetypes:
+            # Ensure the avatar's file type is allowed
+            if (
+                self.allowed_avatar_mimetypes
+                and media_info["media_type"] not in self.allowed_avatar_mimetypes
+            ):
+                logger.warning(
+                    "Forbidding avatar change to %s: mimetype %s not allowed",
+                    mxc,
+                    media_info["media_type"],
+                )
+                return False
+
+        return True
+
     async def on_profile_query(self, args: JsonDict) -> JsonDict:
         """Handles federation profile query requests."""
 
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index f08a516a75..a719d5eef3 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -132,6 +132,7 @@ class RegistrationHandler:
         localpart: str,
         guest_access_token: Optional[str] = None,
         assigned_user_id: Optional[str] = None,
+        inhibit_user_in_use_error: bool = False,
     ) -> None:
         if types.contains_invalid_mxid_characters(localpart):
             raise SynapseError(
@@ -171,21 +172,22 @@ class RegistrationHandler:
 
         users = await self.store.get_users_by_id_case_insensitive(user_id)
         if users:
-            if not guest_access_token:
+            if not inhibit_user_in_use_error and not guest_access_token:
                 raise SynapseError(
                     400, "User ID already taken.", errcode=Codes.USER_IN_USE
                 )
-            user_data = await self.auth.get_user_by_access_token(guest_access_token)
-            if (
-                not user_data.is_guest
-                or UserID.from_string(user_data.user_id).localpart != localpart
-            ):
-                raise AuthError(
-                    403,
-                    "Cannot register taken user ID without valid guest "
-                    "credentials for that user.",
-                    errcode=Codes.FORBIDDEN,
-                )
+            if guest_access_token:
+                user_data = await self.auth.get_user_by_access_token(guest_access_token)
+                if (
+                    not user_data.is_guest
+                    or UserID.from_string(user_data.user_id).localpart != localpart
+                ):
+                    raise AuthError(
+                        403,
+                        "Cannot register taken user ID without valid guest "
+                        "credentials for that user.",
+                        errcode=Codes.FORBIDDEN,
+                    )
 
         if guest_access_token is None:
             try:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 3d47163f25..1420d67729 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -30,6 +30,7 @@ from typing import (
     Tuple,
 )
 
+import attr
 from typing_extensions import TypedDict
 
 from synapse.api.constants import (
@@ -60,6 +61,7 @@ from synapse.events.utils import copy_power_levels_contents
 from synapse.federation.federation_client import InvalidResponseError
 from synapse.handlers.federation import get_domains_from_state
 from synapse.rest.admin._base import assert_user_is_admin
+from synapse.storage.databases.main.relations import BundledAggregations
 from synapse.storage.state import StateFilter
 from synapse.streams import EventSource
 from synapse.types import (
@@ -90,6 +92,17 @@ id_server_scheme = "https://"
 FIVE_MINUTES_IN_MS = 5 * 60 * 1000
 
 
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class EventContext:
+    events_before: List[EventBase]
+    event: EventBase
+    events_after: List[EventBase]
+    state: List[EventBase]
+    aggregations: Dict[str, BundledAggregations]
+    start: str
+    end: str
+
+
 class RoomCreationHandler:
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastore()
@@ -1119,7 +1132,7 @@ class RoomContextHandler:
         limit: int,
         event_filter: Optional[Filter],
         use_admin_priviledge: bool = False,
-    ) -> Optional[JsonDict]:
+    ) -> Optional[EventContext]:
         """Retrieves events, pagination tokens and state around a given event
         in a room.
 
@@ -1167,32 +1180,28 @@ class RoomContextHandler:
         results = await self.store.get_events_around(
             room_id, event_id, before_limit, after_limit, event_filter
         )
+        events_before = results.events_before
+        events_after = results.events_after
 
         if event_filter:
-            results["events_before"] = await event_filter.filter(
-                results["events_before"]
-            )
-            results["events_after"] = await event_filter.filter(results["events_after"])
+            events_before = await event_filter.filter(events_before)
+            events_after = await event_filter.filter(events_after)
 
-        results["events_before"] = await filter_evts(results["events_before"])
-        results["events_after"] = await filter_evts(results["events_after"])
+        events_before = await filter_evts(events_before)
+        events_after = await filter_evts(events_after)
         # filter_evts can return a pruned event in case the user is allowed to see that
         # there's something there but not see the content, so use the event that's in
         # `filtered` rather than the event we retrieved from the datastore.
-        results["event"] = filtered[0]
+        event = filtered[0]
 
         # Fetch the aggregations.
-        aggregations = await self.store.get_bundled_aggregations([results["event"]])
-        aggregations.update(
-            await self.store.get_bundled_aggregations(results["events_before"])
-        )
-        aggregations.update(
-            await self.store.get_bundled_aggregations(results["events_after"])
+        aggregations = await self.store.get_bundled_aggregations(
+            itertools.chain(events_before, (event,), events_after),
+            user.to_string(),
         )
-        results["aggregations"] = aggregations
 
-        if results["events_after"]:
-            last_event_id = results["events_after"][-1].event_id
+        if events_after:
+            last_event_id = events_after[-1].event_id
         else:
             last_event_id = event_id
 
@@ -1200,9 +1209,9 @@ class RoomContextHandler:
             state_filter = StateFilter.from_lazy_load_member_list(
                 ev.sender
                 for ev in itertools.chain(
-                    results["events_before"],
-                    (results["event"],),
-                    results["events_after"],
+                    events_before,
+                    (event,),
+                    events_after,
                 )
             )
         else:
@@ -1220,21 +1229,23 @@ class RoomContextHandler:
         if event_filter:
             state_events = await event_filter.filter(state_events)
 
-        results["state"] = await filter_evts(state_events)
-
         # We use a dummy token here as we only care about the room portion of
         # the token, which we replace.
         token = StreamToken.START
 
-        results["start"] = await token.copy_and_replace(
-            "room_key", results["start"]
-        ).to_string(self.store)
-
-        results["end"] = await token.copy_and_replace(
-            "room_key", results["end"]
-        ).to_string(self.store)
-
-        return results
+        return EventContext(
+            events_before=events_before,
+            event=event,
+            events_after=events_after,
+            state=await filter_evts(state_events),
+            aggregations=aggregations,
+            start=await token.copy_and_replace("room_key", results.start).to_string(
+                self.store
+            ),
+            end=await token.copy_and_replace("room_key", results.end).to_string(
+                self.store
+            ),
+        )
 
 
 class TimestampLookupHandler:
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index bc6b72b41b..bf1a47efb0 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -116,6 +116,13 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count,
         )
 
+        self._third_party_invite_limiter = Ratelimiter(
+            store=self.store,
+            clock=self.clock,
+            rate_hz=hs.config.ratelimiting.rc_third_party_invite.per_second,
+            burst_count=hs.config.ratelimiting.rc_third_party_invite.burst_count,
+        )
+
         self.request_ratelimiter = hs.get_request_ratelimiter()
 
     @abc.abstractmethod
@@ -607,6 +614,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 errcode=Codes.BAD_JSON,
             )
 
+        if "avatar_url" in content:
+            if not await self.profile_handler.check_avatar_size_and_mime_type(
+                content["avatar_url"],
+            ):
+                raise SynapseError(403, "This avatar is not allowed", Codes.FORBIDDEN)
+
         # The event content should *not* include the authorising user as
         # it won't be properly signed. Strip it out since it might come
         # back from a client updating a display name / avatar.
@@ -1307,7 +1320,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         # We need to rate limit *before* we send out any 3PID invites, so we
         # can't just rely on the standard ratelimiting of events.
-        await self.request_ratelimiter.ratelimit(requester)
+        await self._third_party_invite_limiter.ratelimit(requester)
 
         can_invite = await self.third_party_event_rules.check_threepid_can_be_invited(
             medium, address, room_id
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index 7c60cb0bdd..4844b69a03 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -780,6 +780,7 @@ class RoomSummaryHandler:
         try:
             (
                 room_response,
+                children_state_events,
                 children,
                 inaccessible_children,
             ) = await self._federation_client.get_room_hierarchy(
@@ -804,7 +805,7 @@ class RoomSummaryHandler:
         }
 
         return (
-            _RoomEntry(room_id, room_response, room_response.pop("children_state", ())),
+            _RoomEntry(room_id, room_response, children_state_events),
             children_by_room_id,
             set(inaccessible_children),
         )
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 0b153a6822..02bb5ae72f 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -361,36 +361,37 @@ class SearchHandler:
 
                 logger.info(
                     "Context for search returned %d and %d events",
-                    len(res["events_before"]),
-                    len(res["events_after"]),
+                    len(res.events_before),
+                    len(res.events_after),
                 )
 
-                res["events_before"] = await filter_events_for_client(
-                    self.storage, user.to_string(), res["events_before"]
+                events_before = await filter_events_for_client(
+                    self.storage, user.to_string(), res.events_before
                 )
 
-                res["events_after"] = await filter_events_for_client(
-                    self.storage, user.to_string(), res["events_after"]
+                events_after = await filter_events_for_client(
+                    self.storage, user.to_string(), res.events_after
                 )
 
-                res["start"] = await now_token.copy_and_replace(
-                    "room_key", res["start"]
-                ).to_string(self.store)
-
-                res["end"] = await now_token.copy_and_replace(
-                    "room_key", res["end"]
-                ).to_string(self.store)
+                context = {
+                    "events_before": events_before,
+                    "events_after": events_after,
+                    "start": await now_token.copy_and_replace(
+                        "room_key", res.start
+                    ).to_string(self.store),
+                    "end": await now_token.copy_and_replace(
+                        "room_key", res.end
+                    ).to_string(self.store),
+                }
 
                 if include_profile:
                     senders = {
                         ev.sender
-                        for ev in itertools.chain(
-                            res["events_before"], [event], res["events_after"]
-                        )
+                        for ev in itertools.chain(events_before, [event], events_after)
                     }
 
-                    if res["events_after"]:
-                        last_event_id = res["events_after"][-1].event_id
+                    if events_after:
+                        last_event_id = events_after[-1].event_id
                     else:
                         last_event_id = event.event_id
 
@@ -402,7 +403,7 @@ class SearchHandler:
                         last_event_id, state_filter
                     )
 
-                    res["profile_info"] = {
+                    context["profile_info"] = {
                         s.state_key: {
                             "displayname": s.content.get("displayname", None),
                             "avatar_url": s.content.get("avatar_url", None),
@@ -411,7 +412,7 @@ class SearchHandler:
                         if s.type == EventTypes.Member and s.state_key in senders
                     }
 
-                contexts[event.event_id] = res
+                contexts[event.event_id] = context
         else:
             contexts = {}
 
@@ -421,10 +422,10 @@ class SearchHandler:
 
         for context in contexts.values():
             context["events_before"] = self._event_serializer.serialize_events(
-                context["events_before"], time_now
+                context["events_before"], time_now  # type: ignore[arg-type]
             )
             context["events_after"] = self._event_serializer.serialize_events(
-                context["events_after"], time_now
+                context["events_after"], time_now  # type: ignore[arg-type]
             )
 
         state_results = {}
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 65c27bc64a..0bb8b0929e 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -126,45 +126,45 @@ class SsoIdentityProvider(Protocol):
         raise NotImplementedError()
 
 
-@attr.s
+@attr.s(auto_attribs=True)
 class UserAttributes:
     # the localpart of the mxid that the mapper has assigned to the user.
     # if `None`, the mapper has not picked a userid, and the user should be prompted to
     # enter one.
-    localpart = attr.ib(type=Optional[str])
-    display_name = attr.ib(type=Optional[str], default=None)
-    emails = attr.ib(type=Collection[str], default=attr.Factory(list))
+    localpart: Optional[str]
+    display_name: Optional[str] = None
+    emails: Collection[str] = attr.Factory(list)
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class UsernameMappingSession:
     """Data we track about SSO sessions"""
 
     # A unique identifier for this SSO provider, e.g.  "oidc" or "saml".
-    auth_provider_id = attr.ib(type=str)
+    auth_provider_id: str
 
     # user ID on the IdP server
-    remote_user_id = attr.ib(type=str)
+    remote_user_id: str
 
     # attributes returned by the ID mapper
-    display_name = attr.ib(type=Optional[str])
-    emails = attr.ib(type=Collection[str])
+    display_name: Optional[str]
+    emails: Collection[str]
 
     # An optional dictionary of extra attributes to be provided to the client in the
     # login response.
-    extra_login_attributes = attr.ib(type=Optional[JsonDict])
+    extra_login_attributes: Optional[JsonDict]
 
     # where to redirect the client back to
-    client_redirect_url = attr.ib(type=str)
+    client_redirect_url: str
 
     # expiry time for the session, in milliseconds
-    expiry_time_ms = attr.ib(type=int)
+    expiry_time_ms: int
 
     # choices made by the user
-    chosen_localpart = attr.ib(type=Optional[str], default=None)
-    use_display_name = attr.ib(type=bool, default=True)
-    emails_to_use = attr.ib(type=Collection[str], default=())
-    terms_accepted_version = attr.ib(type=Optional[str], default=None)
+    chosen_localpart: Optional[str] = None
+    use_display_name: bool = True
+    emails_to_use: Collection[str] = ()
+    terms_accepted_version: Optional[str] = None
 
 
 # the HTTP cookie used to track the mapping session id
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4b3f1ea059..aa9a76f8a9 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -37,6 +37,7 @@ from synapse.logging.context import current_context
 from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.databases.main.event_push_actions import NotifCounts
+from synapse.storage.databases.main.relations import BundledAggregations
 from synapse.storage.roommember import MemberSummary
 from synapse.storage.state import StateFilter
 from synapse.types import (
@@ -98,6 +99,9 @@ class TimelineBatch:
     prev_batch: StreamToken
     events: List[EventBase]
     limited: bool
+    # A mapping of event ID to the bundled aggregations for the above events.
+    # This is only calculated if limited is true.
+    bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None
 
     def __bool__(self) -> bool:
         """Make the result appear empty if there are no updates. This is used
@@ -630,10 +634,19 @@ class SyncHandler:
 
             prev_batch_token = now_token.copy_and_replace("room_key", room_key)
 
+        # Don't bother to bundle aggregations if the timeline is unlimited,
+        # as clients will have all the necessary information.
+        bundled_aggregations = None
+        if limited or newly_joined_room:
+            bundled_aggregations = await self.store.get_bundled_aggregations(
+                recents, sync_config.user.to_string()
+            )
+
         return TimelineBatch(
             events=recents,
             prev_batch=prev_batch_token,
             limited=limited or newly_joined_room,
+            bundled_aggregations=bundled_aggregations,
         )
 
     async def get_state_after_event(
@@ -1335,8 +1348,8 @@ class SyncHandler:
         if sync_result_builder.since_token is not None:
             since_stream_id = int(sync_result_builder.since_token.to_device_key)
 
-        if since_stream_id != int(now_token.to_device_key):
-            messages, stream_id = await self.store.get_new_messages_for_device(
+        if device_id is not None and since_stream_id != int(now_token.to_device_key):
+            messages, stream_id = await self.store.get_messages_for_device(
                 user_id, device_id, since_stream_id, now_token.to_device_key
             )
 
@@ -1607,7 +1620,7 @@ class SyncHandler:
         # TODO: Can we `SELECT ignored_user_id FROM ignored_users WHERE ignorer_user_id=?;` instead?
         ignored_account_data = (
             await self.store.get_global_account_data_by_type_for_user(
-                AccountDataTypes.IGNORED_USER_LIST, user_id=user_id
+                user_id=user_id, data_type=AccountDataTypes.IGNORED_USER_LIST
             )
         )
 
diff --git a/synapse/handlers/ui_auth/__init__.py b/synapse/handlers/ui_auth/__init__.py
index 13b0c61d2e..56eee4057f 100644
--- a/synapse/handlers/ui_auth/__init__.py
+++ b/synapse/handlers/ui_auth/__init__.py
@@ -38,4 +38,4 @@ class UIAuthSessionDataConstants:
     # used during registration to store the registration token used (if required) so that:
     # - we can prevent a token being used twice by one session
     # - we can 'use up' the token after registration has successfully completed
-    REGISTRATION_TOKEN = "org.matrix.msc3231.login.registration_token"
+    REGISTRATION_TOKEN = "m.login.registration_token"
diff --git a/synapse/http/client.py b/synapse/http/client.py
index ca33b45cb2..743a7ffcb1 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -731,15 +731,24 @@ class SimpleHttpClient:
         # straight back in again
 
         try:
-            length = await make_deferred_yieldable(
-                read_body_with_max_size(response, output_stream, max_size)
-            )
+            d = read_body_with_max_size(response, output_stream, max_size)
+
+            # Ensure that the body is not read forever.
+            d = timeout_deferred(d, 30, self.hs.get_reactor())
+
+            length = await make_deferred_yieldable(d)
         except BodyExceededMaxSize:
             raise SynapseError(
                 HTTPStatus.BAD_GATEWAY,
                 "Requested file is too large > %r bytes" % (max_size,),
                 Codes.TOO_LARGE,
             )
+        except defer.TimeoutError:
+            raise SynapseError(
+                HTTPStatus.BAD_GATEWAY,
+                "Requested file took too long to download",
+                Codes.TOO_LARGE,
+            )
         except Exception as e:
             raise SynapseError(
                 HTTPStatus.BAD_GATEWAY, ("Failed to download remote body: %s" % e)
diff --git a/synapse/http/connectproxyclient.py b/synapse/http/connectproxyclient.py
index fbafffd69b..203e995bb7 100644
--- a/synapse/http/connectproxyclient.py
+++ b/synapse/http/connectproxyclient.py
@@ -32,9 +32,9 @@ class ProxyConnectError(ConnectError):
     pass
 
 
-@attr.s
+@attr.s(auto_attribs=True)
 class ProxyCredentials:
-    username_password = attr.ib(type=bytes)
+    username_password: bytes
 
     def as_proxy_authorization_value(self) -> bytes:
         """
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index deedde0b5b..2e668363b2 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -123,37 +123,37 @@ class ByteParser(ByteWriteable, Generic[T], abc.ABC):
         pass
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class MatrixFederationRequest:
-    method = attr.ib(type=str)
+    method: str
     """HTTP method
     """
 
-    path = attr.ib(type=str)
+    path: str
     """HTTP path
     """
 
-    destination = attr.ib(type=str)
+    destination: str
     """The remote server to send the HTTP request to.
     """
 
-    json = attr.ib(default=None, type=Optional[JsonDict])
+    json: Optional[JsonDict] = None
     """JSON to send in the body.
     """
 
-    json_callback = attr.ib(default=None, type=Optional[Callable[[], JsonDict]])
+    json_callback: Optional[Callable[[], JsonDict]] = None
     """A callback to generate the JSON.
     """
 
-    query = attr.ib(default=None, type=Optional[dict])
+    query: Optional[dict] = None
     """Query arguments.
     """
 
-    txn_id = attr.ib(default=None, type=Optional[str])
+    txn_id: Optional[str] = None
     """Unique ID for this request (for logging)
     """
 
-    uri = attr.ib(init=False, type=bytes)
+    uri: bytes = attr.ib(init=False)
     """The URI of this request
     """
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 80f7a2ff58..40f6c04894 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -407,7 +407,10 @@ class SynapseRequest(Request):
 
         user_agent = get_request_user_agent(self, "-")
 
-        code = str(self.code)
+        # int(self.code) looks redundant, because self.code is already an int.
+        # But self.code might be an HTTPStatus (which inherits from int)---which has
+        # a different string representation. So ensure we really have an integer.
+        code = str(int(self.code))
         if not self.finished:
             # we didn't send the full response before we gave up (presumably because
             # the connection dropped)
@@ -534,9 +537,9 @@ class XForwardedForRequest(SynapseRequest):
 
 
 @implementer(IAddress)
-@attr.s(frozen=True, slots=True)
+@attr.s(frozen=True, slots=True, auto_attribs=True)
 class _XForwardedForAddress:
-    host = attr.ib(type=str)
+    host: str
 
 
 class SynapseSite(Site):
diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py
index 8202d0494d..475756f1db 100644
--- a/synapse/logging/_remote.py
+++ b/synapse/logging/_remote.py
@@ -39,7 +39,7 @@ from twisted.python.failure import Failure
 logger = logging.getLogger(__name__)
 
 
-@attr.s
+@attr.s(slots=True, auto_attribs=True)
 @implementer(IPushProducer)
 class LogProducer:
     """
@@ -54,10 +54,10 @@ class LogProducer:
 
     # This is essentially ITCPTransport, but that is missing certain fields
     # (connected and registerProducer) which are part of the implementation.
-    transport = attr.ib(type=Connection)
-    _format = attr.ib(type=Callable[[logging.LogRecord], str])
-    _buffer = attr.ib(type=deque)
-    _paused = attr.ib(default=False, type=bool, init=False)
+    transport: Connection
+    _format: Callable[[logging.LogRecord], str]
+    _buffer: Deque[logging.LogRecord]
+    _paused: bool = attr.ib(default=False, init=False)
 
     def pauseProducing(self):
         self._paused = True
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index d4ee893376..c31c2960ad 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -193,7 +193,7 @@ class ContextResourceUsage:
         return res
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class ContextRequest:
     """
     A bundle of attributes from the SynapseRequest object.
@@ -205,15 +205,15 @@ class ContextRequest:
       their children.
     """
 
-    request_id = attr.ib(type=str)
-    ip_address = attr.ib(type=str)
-    site_tag = attr.ib(type=str)
-    requester = attr.ib(type=Optional[str])
-    authenticated_entity = attr.ib(type=Optional[str])
-    method = attr.ib(type=str)
-    url = attr.ib(type=str)
-    protocol = attr.ib(type=str)
-    user_agent = attr.ib(type=str)
+    request_id: str
+    ip_address: str
+    site_tag: str
+    requester: Optional[str]
+    authenticated_entity: Optional[str]
+    method: str
+    url: str
+    protocol: str
+    user_agent: str
 
 
 LoggingContextOrSentinel = Union["LoggingContext", "_Sentinel"]
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 622445e9f4..3ebed5c161 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -247,11 +247,11 @@ try:
         class BaseReporter:  # type: ignore[no-redef]
             pass
 
-    @attr.s(slots=True, frozen=True)
+    @attr.s(slots=True, frozen=True, auto_attribs=True)
     class _WrappedRustReporter(BaseReporter):
         """Wrap the reporter to ensure `report_span` never throws."""
 
-        _reporter = attr.ib(type=Reporter, default=attr.Factory(Reporter))
+        _reporter: Reporter = attr.Factory(Reporter)
 
         def set_process(self, *args, **kwargs):
             return self._reporter.set_process(*args, **kwargs)
@@ -443,10 +443,14 @@ def start_active_span(
     start_time=None,
     ignore_active_span=False,
     finish_on_close=True,
+    *,
+    tracer=None,
 ):
-    """Starts an active opentracing span. Note, the scope doesn't become active
-    until it has been entered, however, the span starts from the time this
-    message is called.
+    """Starts an active opentracing span.
+
+    Records the start time for the span, and sets it as the "active span" in the
+    scope manager.
+
     Args:
         See opentracing.tracer
     Returns:
@@ -456,7 +460,11 @@ def start_active_span(
     if opentracing is None:
         return noop_context_manager()  # type: ignore[unreachable]
 
-    return opentracing.tracer.start_active_span(
+    if tracer is None:
+        # use the global tracer by default
+        tracer = opentracing.tracer
+
+    return tracer.start_active_span(
         operation_name,
         child_of=child_of,
         references=references,
@@ -468,21 +476,42 @@ def start_active_span(
 
 
 def start_active_span_follows_from(
-    operation_name: str, contexts: Collection, inherit_force_tracing=False
+    operation_name: str,
+    contexts: Collection,
+    child_of=None,
+    start_time: Optional[float] = None,
+    *,
+    inherit_force_tracing=False,
+    tracer=None,
 ):
     """Starts an active opentracing span, with additional references to previous spans
 
     Args:
         operation_name: name of the operation represented by the new span
         contexts: the previous spans to inherit from
+
+        child_of: optionally override the parent span. If unset, the currently active
+           span will be the parent. (If there is no currently active span, the first
+           span in `contexts` will be the parent.)
+
+        start_time: optional override for the start time of the created span. Seconds
+            since the epoch.
+
         inherit_force_tracing: if set, and any of the previous contexts have had tracing
            forced, the new span will also have tracing forced.
+        tracer: override the opentracing tracer. By default the global tracer is used.
     """
     if opentracing is None:
         return noop_context_manager()  # type: ignore[unreachable]
 
     references = [opentracing.follows_from(context) for context in contexts]
-    scope = start_active_span(operation_name, references=references)
+    scope = start_active_span(
+        operation_name,
+        child_of=child_of,
+        references=references,
+        start_time=start_time,
+        tracer=tracer,
+    )
 
     if inherit_force_tracing and any(
         is_context_forced_tracing(ctx) for ctx in contexts
diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py
index db8ca2c049..d57e7c5324 100644
--- a/synapse/logging/scopecontextmanager.py
+++ b/synapse/logging/scopecontextmanager.py
@@ -28,8 +28,9 @@ class LogContextScopeManager(ScopeManager):
     The LogContextScopeManager tracks the active scope in opentracing
     by using the log contexts which are native to synapse. This is so
     that the basic opentracing api can be used across twisted defereds.
-    (I would love to break logcontexts and this into an OS package. but
-    let's wait for twisted's contexts to be released.)
+
+    It would be nice just to use opentracing's ContextVarsScopeManager,
+    but currently that doesn't work due to https://twistedmatrix.com/trac/ticket/10301.
     """
 
     def __init__(self, config):
@@ -65,29 +66,45 @@ class LogContextScopeManager(ScopeManager):
             Scope.close() on the returned instance.
         """
 
-        enter_logcontext = False
         ctx = current_context()
 
         if not ctx:
-            # We don't want this scope to affect.
             logger.error("Tried to activate scope outside of loggingcontext")
             return Scope(None, span)  # type: ignore[arg-type]
-        elif ctx.scope is not None:
-            # We want the logging scope to look exactly the same so we give it
-            # a blank suffix
+
+        if ctx.scope is not None:
+            # start a new logging context as a child of the existing one.
+            # Doing so -- rather than updating the existing logcontext -- means that
+            # creating several concurrent spans under the same logcontext works
+            # correctly.
             ctx = nested_logging_context("")
             enter_logcontext = True
+        else:
+            # if there is no span currently associated with the current logcontext, we
+            # just store the scope in it.
+            #
+            # This feels a bit dubious, but it does hack around a problem where a
+            # span outlasts its parent logcontext (which would otherwise lead to
+            # "Re-starting finished log context" errors).
+            enter_logcontext = False
 
         scope = _LogContextScope(self, span, ctx, enter_logcontext, finish_on_close)
         ctx.scope = scope
+        if enter_logcontext:
+            ctx.__enter__()
+
         return scope
 
 
 class _LogContextScope(Scope):
     """
-    A custom opentracing scope. The only significant difference is that it will
-    close the log context it's related to if the logcontext was created specifically
-    for this scope.
+    A custom opentracing scope, associated with a LogContext
+
+      * filters out _DefGen_Return exceptions which arise from calling
+        `defer.returnValue` in Twisted code
+
+      * When the scope is closed, the logcontext's active scope is reset to None.
+        and - if enter_logcontext was set - the logcontext is finished too.
     """
 
     def __init__(self, manager, span, logcontext, enter_logcontext, finish_on_close):
@@ -101,8 +118,7 @@ class _LogContextScope(Scope):
             logcontext (LogContext):
                 the logcontext to which this scope is attached.
             enter_logcontext (Boolean):
-                if True the logcontext will be entered and exited when the scope
-                is entered and exited respectively
+                if True the logcontext will be exited when the scope is finished
             finish_on_close (Boolean):
                 if True finish the span when the scope is closed
         """
@@ -111,26 +127,28 @@ class _LogContextScope(Scope):
         self._finish_on_close = finish_on_close
         self._enter_logcontext = enter_logcontext
 
-    def __enter__(self):
-        if self._enter_logcontext:
-            self.logcontext.__enter__()
+    def __exit__(self, exc_type, value, traceback):
+        if exc_type == twisted.internet.defer._DefGen_Return:
+            # filter out defer.returnValue() calls
+            exc_type = value = traceback = None
+        super().__exit__(exc_type, value, traceback)
 
-        return self
-
-    def __exit__(self, type, value, traceback):
-        if type == twisted.internet.defer._DefGen_Return:
-            super().__exit__(None, None, None)
-        else:
-            super().__exit__(type, value, traceback)
-        if self._enter_logcontext:
-            self.logcontext.__exit__(type, value, traceback)
-        else:  # the logcontext existed before the creation of the scope
-            self.logcontext.scope = None
+    def __str__(self):
+        return f"Scope<{self.span}>"
 
     def close(self):
-        if self.manager.active is not self:
-            logger.error("Tried to close a non-active scope!")
-            return
+        active_scope = self.manager.active
+        if active_scope is not self:
+            logger.error(
+                "Closing scope %s which is not the currently-active one %s",
+                self,
+                active_scope,
+            )
 
         if self._finish_on_close:
             self.span.finish()
+
+        self.logcontext.scope = None
+
+        if self._enter_logcontext:
+            self.logcontext.__exit__(None, None, None)
diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py
deleted file mode 100644
index 4a01b902c2..0000000000
--- a/synapse/logging/utils.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import logging
-from functools import wraps
-from inspect import getcallargs
-from typing import Callable, TypeVar, cast
-
-_TIME_FUNC_ID = 0
-
-
-def _log_debug_as_f(f, msg, msg_args):
-    name = f.__module__
-    logger = logging.getLogger(name)
-
-    if logger.isEnabledFor(logging.DEBUG):
-        lineno = f.__code__.co_firstlineno
-        pathname = f.__code__.co_filename
-
-        record = logger.makeRecord(
-            name=name,
-            level=logging.DEBUG,
-            fn=pathname,
-            lno=lineno,
-            msg=msg,
-            args=msg_args,
-            exc_info=None,
-        )
-
-        logger.handle(record)
-
-
-F = TypeVar("F", bound=Callable)
-
-
-def log_function(f: F) -> F:
-    """Function decorator that logs every call to that function."""
-    func_name = f.__name__
-
-    @wraps(f)
-    def wrapped(*args, **kwargs):
-        name = f.__module__
-        logger = logging.getLogger(name)
-        level = logging.DEBUG
-
-        if logger.isEnabledFor(level):
-            bound_args = getcallargs(f, *args, **kwargs)
-
-            def format(value):
-                r = str(value)
-                if len(r) > 50:
-                    r = r[:50] + "..."
-                return r
-
-            func_args = ["%s=%s" % (k, format(v)) for k, v in bound_args.items()]
-
-            msg_args = {"func_name": func_name, "args": ", ".join(func_args)}
-
-            _log_debug_as_f(f, "Invoked '%(func_name)s' with args: %(args)s", msg_args)
-
-        return f(*args, **kwargs)
-
-    wrapped.__name__ = func_name
-    return cast(F, wrapped)
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index ceef57ad88..cca084c18c 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -12,16 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import functools
-import gc
 import itertools
 import logging
 import os
 import platform
 import threading
-import time
 from typing import (
-    Any,
     Callable,
     Dict,
     Generic,
@@ -41,34 +37,31 @@ import attr
 from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram, Metric
 from prometheus_client.core import (
     REGISTRY,
-    CounterMetricFamily,
     GaugeHistogramMetricFamily,
     GaugeMetricFamily,
 )
 
-from twisted.internet import reactor
-from twisted.internet.base import ReactorBase
 from twisted.python.threadpool import ThreadPool
 
-import synapse
+import synapse.metrics._reactor_metrics
 from synapse.metrics._exposition import (
     MetricsResource,
     generate_latest,
     start_http_server,
 )
+from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
 from synapse.util.versionstring import get_version_string
 
 logger = logging.getLogger(__name__)
 
 METRICS_PREFIX = "/_synapse/metrics"
 
-running_on_pypy = platform.python_implementation() == "PyPy"
 all_gauges: "Dict[str, Union[LaterGauge, InFlightGauge]]" = {}
 
 HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
 
 
-class RegistryProxy:
+class _RegistryProxy:
     @staticmethod
     def collect() -> Iterable[Metric]:
         for metric in REGISTRY.collect():
@@ -76,19 +69,24 @@ class RegistryProxy:
                 yield metric
 
 
-@attr.s(slots=True, hash=True)
+# A little bit nasty, but collect() above is static so a Protocol doesn't work.
+# _RegistryProxy matches the signature of a CollectorRegistry instance enough
+# for it to be usable in the contexts in which we use it.
+# TODO Do something nicer about this.
+RegistryProxy = cast(CollectorRegistry, _RegistryProxy)
+
+
+@attr.s(slots=True, hash=True, auto_attribs=True)
 class LaterGauge:
 
-    name = attr.ib(type=str)
-    desc = attr.ib(type=str)
-    labels = attr.ib(hash=False, type=Optional[Iterable[str]])
+    name: str
+    desc: str
+    labels: Optional[Iterable[str]] = attr.ib(hash=False)
     # callback: should either return a value (if there are no labels for this metric),
     # or dict mapping from a label tuple to a value
-    caller = attr.ib(
-        type=Callable[
-            [], Union[Mapping[Tuple[str, ...], Union[int, float]], Union[int, float]]
-        ]
-    )
+    caller: Callable[
+        [], Union[Mapping[Tuple[str, ...], Union[int, float]], Union[int, float]]
+    ]
 
     def collect(self) -> Iterable[Metric]:
 
@@ -157,7 +155,9 @@ class InFlightGauge(Generic[MetricsEntry]):
         # Create a class which have the sub_metrics values as attributes, which
         # default to 0 on initialization. Used to pass to registered callbacks.
         self._metrics_class: Type[MetricsEntry] = attr.make_class(
-            "_MetricsEntry", attrs={x: attr.ib(0) for x in sub_metrics}, slots=True
+            "_MetricsEntry",
+            attrs={x: attr.ib(default=0) for x in sub_metrics},
+            slots=True,
         )
 
         # Counts number of in flight blocks for a given set of label values
@@ -369,136 +369,6 @@ class CPUMetrics:
 
 REGISTRY.register(CPUMetrics())
 
-#
-# Python GC metrics
-#
-
-gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
-gc_time = Histogram(
-    "python_gc_time",
-    "Time taken to GC (sec)",
-    ["gen"],
-    buckets=[
-        0.0025,
-        0.005,
-        0.01,
-        0.025,
-        0.05,
-        0.10,
-        0.25,
-        0.50,
-        1.00,
-        2.50,
-        5.00,
-        7.50,
-        15.00,
-        30.00,
-        45.00,
-        60.00,
-    ],
-)
-
-
-class GCCounts:
-    def collect(self) -> Iterable[Metric]:
-        cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
-        for n, m in enumerate(gc.get_count()):
-            cm.add_metric([str(n)], m)
-
-        yield cm
-
-
-if not running_on_pypy:
-    REGISTRY.register(GCCounts())
-
-
-#
-# PyPy GC / memory metrics
-#
-
-
-class PyPyGCStats:
-    def collect(self) -> Iterable[Metric]:
-
-        # @stats is a pretty-printer object with __str__() returning a nice table,
-        # plus some fields that contain data from that table.
-        # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
-        stats = gc.get_stats(memory_pressure=False)  # type: ignore
-        # @s contains same fields as @stats, but as actual integers.
-        s = stats._s  # type: ignore
-
-        # also note that field naming is completely braindead
-        # and only vaguely correlates with the pretty-printed table.
-        # >>>> gc.get_stats(False)
-        # Total memory consumed:
-        #     GC used:            8.7MB (peak: 39.0MB)        # s.total_gc_memory, s.peak_memory
-        #        in arenas:            3.0MB                  # s.total_arena_memory
-        #        rawmalloced:          1.7MB                  # s.total_rawmalloced_memory
-        #        nursery:              4.0MB                  # s.nursery_size
-        #     raw assembler used: 31.0kB                      # s.jit_backend_used
-        #     -----------------------------
-        #     Total:              8.8MB                       # stats.memory_used_sum
-        #
-        #     Total memory allocated:
-        #     GC allocated:            38.7MB (peak: 41.1MB)  # s.total_allocated_memory, s.peak_allocated_memory
-        #        in arenas:            30.9MB                 # s.peak_arena_memory
-        #        rawmalloced:          4.1MB                  # s.peak_rawmalloced_memory
-        #        nursery:              4.0MB                  # s.nursery_size
-        #     raw assembler allocated: 1.0MB                  # s.jit_backend_allocated
-        #     -----------------------------
-        #     Total:                   39.7MB                 # stats.memory_allocated_sum
-        #
-        #     Total time spent in GC:  0.073                  # s.total_gc_time
-
-        pypy_gc_time = CounterMetricFamily(
-            "pypy_gc_time_seconds_total",
-            "Total time spent in PyPy GC",
-            labels=[],
-        )
-        pypy_gc_time.add_metric([], s.total_gc_time / 1000)
-        yield pypy_gc_time
-
-        pypy_mem = GaugeMetricFamily(
-            "pypy_memory_bytes",
-            "Memory tracked by PyPy allocator",
-            labels=["state", "class", "kind"],
-        )
-        # memory used by JIT assembler
-        pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used)
-        pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated)
-        # memory used by GCed objects
-        pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory)
-        pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory)
-        pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory)
-        pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory)
-        pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size)
-        pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size)
-        # totals
-        pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory)
-        pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory)
-        pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory)
-        pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory)
-        yield pypy_mem
-
-
-if running_on_pypy:
-    REGISTRY.register(PyPyGCStats())
-
-
-#
-# Twisted reactor metrics
-#
-
-tick_time = Histogram(
-    "python_twisted_reactor_tick_time",
-    "Tick time of the Twisted reactor (sec)",
-    buckets=[0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2, 5],
-)
-pending_calls_metric = Histogram(
-    "python_twisted_reactor_pending_calls",
-    "Pending calls",
-    buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000],
-)
 
 #
 # Federation Metrics
@@ -551,8 +421,6 @@ build_info.labels(
     " ".join([platform.system(), platform.release()]),
 ).set(1)
 
-last_ticked = time.time()
-
 # 3PID send info
 threepid_send_requests = Histogram(
     "synapse_threepid_send_requests_with_tries",
@@ -600,116 +468,6 @@ def register_threadpool(name: str, threadpool: ThreadPool) -> None:
     )
 
 
-class ReactorLastSeenMetric:
-    def collect(self) -> Iterable[Metric]:
-        cm = GaugeMetricFamily(
-            "python_twisted_reactor_last_seen",
-            "Seconds since the Twisted reactor was last seen",
-        )
-        cm.add_metric([], time.time() - last_ticked)
-        yield cm
-
-
-REGISTRY.register(ReactorLastSeenMetric())
-
-# The minimum time in seconds between GCs for each generation, regardless of the current GC
-# thresholds and counts.
-MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
-
-# The time (in seconds since the epoch) of the last time we did a GC for each generation.
-_last_gc = [0.0, 0.0, 0.0]
-
-
-F = TypeVar("F", bound=Callable[..., Any])
-
-
-def runUntilCurrentTimer(reactor: ReactorBase, func: F) -> F:
-    @functools.wraps(func)
-    def f(*args: Any, **kwargs: Any) -> Any:
-        now = reactor.seconds()
-        num_pending = 0
-
-        # _newTimedCalls is one long list of *all* pending calls. Below loop
-        # is based off of impl of reactor.runUntilCurrent
-        for delayed_call in reactor._newTimedCalls:
-            if delayed_call.time > now:
-                break
-
-            if delayed_call.delayed_time > 0:
-                continue
-
-            num_pending += 1
-
-        num_pending += len(reactor.threadCallQueue)
-        start = time.time()
-        ret = func(*args, **kwargs)
-        end = time.time()
-
-        # record the amount of wallclock time spent running pending calls.
-        # This is a proxy for the actual amount of time between reactor polls,
-        # since about 25% of time is actually spent running things triggered by
-        # I/O events, but that is harder to capture without rewriting half the
-        # reactor.
-        tick_time.observe(end - start)
-        pending_calls_metric.observe(num_pending)
-
-        # Update the time we last ticked, for the metric to test whether
-        # Synapse's reactor has frozen
-        global last_ticked
-        last_ticked = end
-
-        if running_on_pypy:
-            return ret
-
-        # Check if we need to do a manual GC (since its been disabled), and do
-        # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
-        # promote an object into gen 2, and we don't want to handle the same
-        # object multiple times.
-        threshold = gc.get_threshold()
-        counts = gc.get_count()
-        for i in (2, 1, 0):
-            # We check if we need to do one based on a straightforward
-            # comparison between the threshold and count. We also do an extra
-            # check to make sure that we don't a GC too often.
-            if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
-                if i == 0:
-                    logger.debug("Collecting gc %d", i)
-                else:
-                    logger.info("Collecting gc %d", i)
-
-                start = time.time()
-                unreachable = gc.collect(i)
-                end = time.time()
-
-                _last_gc[i] = end
-
-                gc_time.labels(i).observe(end - start)
-                gc_unreachable.labels(i).set(unreachable)
-
-        return ret
-
-    return cast(F, f)
-
-
-try:
-    # Ensure the reactor has all the attributes we expect
-    reactor.seconds  # type: ignore
-    reactor.runUntilCurrent  # type: ignore
-    reactor._newTimedCalls  # type: ignore
-    reactor.threadCallQueue  # type: ignore
-
-    # runUntilCurrent is called when we have pending calls. It is called once
-    # per iteratation after fd polling.
-    reactor.runUntilCurrent = runUntilCurrentTimer(reactor, reactor.runUntilCurrent)  # type: ignore
-
-    # We manually run the GC each reactor tick so that we can get some metrics
-    # about time spent doing GC,
-    if not running_on_pypy:
-        gc.disable()
-except AttributeError:
-    pass
-
-
 __all__ = [
     "MetricsResource",
     "generate_latest",
@@ -717,4 +475,6 @@ __all__ = [
     "LaterGauge",
     "InFlightGauge",
     "GaugeBucketCollector",
+    "MIN_TIME_BETWEEN_GCS",
+    "install_gc_manager",
 ]
diff --git a/synapse/metrics/_gc.py b/synapse/metrics/_gc.py
new file mode 100644
index 0000000000..2bc909efa0
--- /dev/null
+++ b/synapse/metrics/_gc.py
@@ -0,0 +1,203 @@
+# Copyright 2015-2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import gc
+import logging
+import platform
+import time
+from typing import Iterable
+
+from prometheus_client.core import (
+    REGISTRY,
+    CounterMetricFamily,
+    Gauge,
+    GaugeMetricFamily,
+    Histogram,
+    Metric,
+)
+
+from twisted.internet import task
+
+"""Prometheus metrics for garbage collection"""
+
+
+logger = logging.getLogger(__name__)
+
+# The minimum time in seconds between GCs for each generation, regardless of the current GC
+# thresholds and counts.
+MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
+
+running_on_pypy = platform.python_implementation() == "PyPy"
+
+#
+# Python GC metrics
+#
+
+gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
+gc_time = Histogram(
+    "python_gc_time",
+    "Time taken to GC (sec)",
+    ["gen"],
+    buckets=[
+        0.0025,
+        0.005,
+        0.01,
+        0.025,
+        0.05,
+        0.10,
+        0.25,
+        0.50,
+        1.00,
+        2.50,
+        5.00,
+        7.50,
+        15.00,
+        30.00,
+        45.00,
+        60.00,
+    ],
+)
+
+
+class GCCounts:
+    def collect(self) -> Iterable[Metric]:
+        cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
+        for n, m in enumerate(gc.get_count()):
+            cm.add_metric([str(n)], m)
+
+        yield cm
+
+
+def install_gc_manager() -> None:
+    """Disable automatic GC, and replace it with a task that runs every 100ms
+
+    This means that (a) we can limit how often GC runs; (b) we can get some metrics
+    about GC activity.
+
+    It does nothing on PyPy.
+    """
+
+    if running_on_pypy:
+        return
+
+    REGISTRY.register(GCCounts())
+
+    gc.disable()
+
+    # The time (in seconds since the epoch) of the last time we did a GC for each generation.
+    _last_gc = [0.0, 0.0, 0.0]
+
+    def _maybe_gc() -> None:
+        # Check if we need to do a manual GC (since its been disabled), and do
+        # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
+        # promote an object into gen 2, and we don't want to handle the same
+        # object multiple times.
+        threshold = gc.get_threshold()
+        counts = gc.get_count()
+        end = time.time()
+        for i in (2, 1, 0):
+            # We check if we need to do one based on a straightforward
+            # comparison between the threshold and count. We also do an extra
+            # check to make sure that we don't a GC too often.
+            if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
+                if i == 0:
+                    logger.debug("Collecting gc %d", i)
+                else:
+                    logger.info("Collecting gc %d", i)
+
+                start = time.time()
+                unreachable = gc.collect(i)
+                end = time.time()
+
+                _last_gc[i] = end
+
+                gc_time.labels(i).observe(end - start)
+                gc_unreachable.labels(i).set(unreachable)
+
+    gc_task = task.LoopingCall(_maybe_gc)
+    gc_task.start(0.1)
+
+
+#
+# PyPy GC / memory metrics
+#
+
+
+class PyPyGCStats:
+    def collect(self) -> Iterable[Metric]:
+
+        # @stats is a pretty-printer object with __str__() returning a nice table,
+        # plus some fields that contain data from that table.
+        # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
+        stats = gc.get_stats(memory_pressure=False)  # type: ignore
+        # @s contains same fields as @stats, but as actual integers.
+        s = stats._s  # type: ignore
+
+        # also note that field naming is completely braindead
+        # and only vaguely correlates with the pretty-printed table.
+        # >>>> gc.get_stats(False)
+        # Total memory consumed:
+        #     GC used:            8.7MB (peak: 39.0MB)        # s.total_gc_memory, s.peak_memory
+        #        in arenas:            3.0MB                  # s.total_arena_memory
+        #        rawmalloced:          1.7MB                  # s.total_rawmalloced_memory
+        #        nursery:              4.0MB                  # s.nursery_size
+        #     raw assembler used: 31.0kB                      # s.jit_backend_used
+        #     -----------------------------
+        #     Total:              8.8MB                       # stats.memory_used_sum
+        #
+        #     Total memory allocated:
+        #     GC allocated:            38.7MB (peak: 41.1MB)  # s.total_allocated_memory, s.peak_allocated_memory
+        #        in arenas:            30.9MB                 # s.peak_arena_memory
+        #        rawmalloced:          4.1MB                  # s.peak_rawmalloced_memory
+        #        nursery:              4.0MB                  # s.nursery_size
+        #     raw assembler allocated: 1.0MB                  # s.jit_backend_allocated
+        #     -----------------------------
+        #     Total:                   39.7MB                 # stats.memory_allocated_sum
+        #
+        #     Total time spent in GC:  0.073                  # s.total_gc_time
+
+        pypy_gc_time = CounterMetricFamily(
+            "pypy_gc_time_seconds_total",
+            "Total time spent in PyPy GC",
+            labels=[],
+        )
+        pypy_gc_time.add_metric([], s.total_gc_time / 1000)
+        yield pypy_gc_time
+
+        pypy_mem = GaugeMetricFamily(
+            "pypy_memory_bytes",
+            "Memory tracked by PyPy allocator",
+            labels=["state", "class", "kind"],
+        )
+        # memory used by JIT assembler
+        pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used)
+        pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated)
+        # memory used by GCed objects
+        pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory)
+        pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory)
+        pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory)
+        pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory)
+        pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size)
+        pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size)
+        # totals
+        pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory)
+        pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory)
+        pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory)
+        pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory)
+        yield pypy_mem
+
+
+if running_on_pypy:
+    REGISTRY.register(PyPyGCStats())
diff --git a/synapse/metrics/_reactor_metrics.py b/synapse/metrics/_reactor_metrics.py
new file mode 100644
index 0000000000..f38f798313
--- /dev/null
+++ b/synapse/metrics/_reactor_metrics.py
@@ -0,0 +1,83 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import select
+import time
+from typing import Any, Iterable, List, Tuple
+
+from prometheus_client import Histogram, Metric
+from prometheus_client.core import REGISTRY, GaugeMetricFamily
+
+from twisted.internet import reactor
+
+#
+# Twisted reactor metrics
+#
+
+tick_time = Histogram(
+    "python_twisted_reactor_tick_time",
+    "Tick time of the Twisted reactor (sec)",
+    buckets=[0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2, 5],
+)
+
+
+class EpollWrapper:
+    """a wrapper for an epoll object which records the time between polls"""
+
+    def __init__(self, poller: "select.epoll"):  # type: ignore[name-defined]
+        self.last_polled = time.time()
+        self._poller = poller
+
+    def poll(self, *args, **kwargs) -> List[Tuple[int, int]]:  # type: ignore[no-untyped-def]
+        # record the time since poll() was last called. This gives a good proxy for
+        # how long it takes to run everything in the reactor - ie, how long anything
+        # waiting for the next tick will have to wait.
+        tick_time.observe(time.time() - self.last_polled)
+
+        ret = self._poller.poll(*args, **kwargs)
+
+        self.last_polled = time.time()
+        return ret
+
+    def __getattr__(self, item: str) -> Any:
+        return getattr(self._poller, item)
+
+
+class ReactorLastSeenMetric:
+    def __init__(self, epoll_wrapper: EpollWrapper):
+        self._epoll_wrapper = epoll_wrapper
+
+    def collect(self) -> Iterable[Metric]:
+        cm = GaugeMetricFamily(
+            "python_twisted_reactor_last_seen",
+            "Seconds since the Twisted reactor was last seen",
+        )
+        cm.add_metric([], time.time() - self._epoll_wrapper.last_polled)
+        yield cm
+
+
+try:
+    # if the reactor has a `_poller` attribute, which is an `epoll` object
+    # (ie, it's an EPollReactor), we wrap the `epoll` with a thing that will
+    # measure the time between ticks
+    from select import epoll  # type: ignore[attr-defined]
+
+    poller = reactor._poller  # type: ignore[attr-defined]
+except (AttributeError, ImportError):
+    pass
+else:
+    if isinstance(poller, epoll):
+        poller = EpollWrapper(poller)
+        reactor._poller = poller  # type: ignore[attr-defined]
+        REGISTRY.register(ReactorLastSeenMetric(poller))
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 662e60bc33..29fbc73c97 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -71,6 +71,7 @@ from synapse.handlers.account_validity import (
 from synapse.handlers.auth import (
     CHECK_3PID_AUTH_CALLBACK,
     CHECK_AUTH_CALLBACK,
+    GET_USERNAME_FOR_REGISTRATION_CALLBACK,
     ON_LOGGED_OUT_CALLBACK,
     AuthHandler,
 )
@@ -177,6 +178,7 @@ class ModuleApi:
         self._presence_stream = hs.get_event_sources().sources.presence
         self._state = hs.get_state_handler()
         self._clock: Clock = hs.get_clock()
+        self._registration_handler = hs.get_registration_handler()
         self._send_email_handler = hs.get_send_email_handler()
         self.custom_template_dir = hs.config.server.custom_template_directory
 
@@ -310,6 +312,9 @@ class ModuleApi:
         auth_checkers: Optional[
             Dict[Tuple[str, Tuple[str, ...]], CHECK_AUTH_CALLBACK]
         ] = None,
+        get_username_for_registration: Optional[
+            GET_USERNAME_FOR_REGISTRATION_CALLBACK
+        ] = None,
     ) -> None:
         """Registers callbacks for password auth provider capabilities.
 
@@ -319,6 +324,7 @@ class ModuleApi:
             check_3pid_auth=check_3pid_auth,
             on_logged_out=on_logged_out,
             auth_checkers=auth_checkers,
+            get_username_for_registration=get_username_for_registration,
         )
 
     def register_background_update_controller_callbacks(
@@ -395,6 +401,32 @@ class ModuleApi:
         """
         return self._hs.config.email.email_app_name
 
+    @property
+    def server_name(self) -> str:
+        """The server name for the local homeserver.
+
+        Added in Synapse v1.53.0.
+        """
+        return self._server_name
+
+    @property
+    def worker_name(self) -> Optional[str]:
+        """The name of the worker this specific instance is running as per the
+        "worker_name" configuration setting, or None if it's the main process.
+
+        Added in Synapse v1.53.0.
+        """
+        return self._hs.config.worker.worker_name
+
+    @property
+    def worker_app(self) -> Optional[str]:
+        """The name of the worker app this specific instance is running as per the
+        "worker_app" configuration setting, or None if it's the main process.
+
+        Added in Synapse v1.53.0.
+        """
+        return self._hs.config.worker.worker_app
+
     async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]:
         """Get user info by user_id
 
@@ -1202,6 +1234,22 @@ class ModuleApi:
         """
         return await defer_to_thread(self._hs.get_reactor(), f, *args, **kwargs)
 
+    async def check_username(self, username: str) -> None:
+        """Checks if the provided username uses the grammar defined in the Matrix
+        specification, and is already being used by an existing user.
+
+        Added in Synapse v1.52.0.
+
+        Args:
+            username: The username to check. This is the local part of the user's full
+                Matrix user ID, i.e. it's "alice" if the full user ID is "@alice:foo.com".
+
+        Raises:
+            SynapseError with the errcode "M_USER_IN_USE" if the username is already in
+            use.
+        """
+        await self._registration_handler.check_username(username)
+
 
 class PublicRoomListManager:
     """Contains methods for adding to, removing from and querying whether a room
diff --git a/synapse/notifier.py b/synapse/notifier.py
index bbabdb0587..5988c67d90 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -40,7 +40,6 @@ from synapse.handlers.presence import format_user_presence_state
 from synapse.logging import issue9533_logger
 from synapse.logging.context import PreserveLoggingContext
 from synapse.logging.opentracing import log_kv, start_active_span
-from synapse.logging.utils import log_function
 from synapse.metrics import LaterGauge
 from synapse.streams.config import PaginationConfig
 from synapse.types import (
@@ -193,15 +192,15 @@ class EventStreamResult:
         return bool(self.events)
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class _PendingRoomEventEntry:
-    event_pos = attr.ib(type=PersistedEventPosition)
-    extra_users = attr.ib(type=Collection[UserID])
+    event_pos: PersistedEventPosition
+    extra_users: Collection[UserID]
 
-    room_id = attr.ib(type=str)
-    type = attr.ib(type=str)
-    state_key = attr.ib(type=Optional[str])
-    membership = attr.ib(type=Optional[str])
+    room_id: str
+    type: str
+    state_key: Optional[str]
+    membership: Optional[str]
 
 
 class Notifier:
@@ -462,7 +461,9 @@ class Notifier:
                     users,
                 )
             except Exception:
-                logger.exception("Error notifying application services of event")
+                logger.exception(
+                    "Error notifying application services of ephemeral events"
+                )
 
     def on_new_replication_data(self) -> None:
         """Used to inform replication listeners that something has happened
@@ -686,7 +687,6 @@ class Notifier:
         else:
             return False
 
-    @log_function
     def remove_expired_streams(self) -> None:
         time_now_ms = self.clock.time_msec()
         expired_streams = []
@@ -700,7 +700,6 @@ class Notifier:
         for expired_stream in expired_streams:
             expired_stream.remove(self)
 
-    @log_function
     def _register_with_keys(self, user_stream: _NotifierUserStream):
         self.user_to_user_stream[user_stream.user_id] = user_stream
 
diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py
index 820f6f3f7e..5176a1c186 100644
--- a/synapse/push/__init__.py
+++ b/synapse/push/__init__.py
@@ -23,25 +23,25 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class PusherConfig:
     """Parameters necessary to configure a pusher."""
 
-    id = attr.ib(type=Optional[str])
-    user_name = attr.ib(type=str)
-    access_token = attr.ib(type=Optional[int])
-    profile_tag = attr.ib(type=str)
-    kind = attr.ib(type=str)
-    app_id = attr.ib(type=str)
-    app_display_name = attr.ib(type=str)
-    device_display_name = attr.ib(type=str)
-    pushkey = attr.ib(type=str)
-    ts = attr.ib(type=int)
-    lang = attr.ib(type=Optional[str])
-    data = attr.ib(type=Optional[JsonDict])
-    last_stream_ordering = attr.ib(type=int)
-    last_success = attr.ib(type=Optional[int])
-    failing_since = attr.ib(type=Optional[int])
+    id: Optional[str]
+    user_name: str
+    access_token: Optional[int]
+    profile_tag: str
+    kind: str
+    app_id: str
+    app_display_name: str
+    device_display_name: str
+    pushkey: str
+    ts: int
+    lang: Optional[str]
+    data: Optional[JsonDict]
+    last_stream_ordering: int
+    last_success: Optional[int]
+    failing_since: Optional[int]
 
     def as_dict(self) -> Dict[str, Any]:
         """Information that can be retrieved about a pusher after creation."""
@@ -57,12 +57,12 @@ class PusherConfig:
         }
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class ThrottleParams:
     """Parameters for controlling the rate of sending pushes via email."""
 
-    last_sent_ts = attr.ib(type=int)
-    throttle_ms = attr.ib(type=int)
+    last_sent_ts: int
+    throttle_ms: int
 
 
 class Pusher(metaclass=abc.ABCMeta):
diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py
index 6211506990..910b05c0da 100644
--- a/synapse/push/baserules.py
+++ b/synapse/push/baserules.py
@@ -20,15 +20,11 @@ from typing import Any, Dict, List
 from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP
 
 
-def list_with_base_rules(
-    rawrules: List[Dict[str, Any]], use_new_defaults: bool = False
-) -> List[Dict[str, Any]]:
+def list_with_base_rules(rawrules: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """Combine the list of rules set by the user with the default push rules
 
     Args:
         rawrules: The rules the user has modified or set.
-        use_new_defaults: Whether to use the new experimental default rules when
-            appending or prepending default rules.
 
     Returns:
         A new list with the rules set by the user combined with the defaults.
@@ -48,9 +44,7 @@ def list_with_base_rules(
 
     ruleslist.extend(
         make_base_prepend_rules(
-            PRIORITY_CLASS_INVERSE_MAP[current_prio_class],
-            modified_base_rules,
-            use_new_defaults,
+            PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules
         )
     )
 
@@ -61,7 +55,6 @@ def list_with_base_rules(
                     make_base_append_rules(
                         PRIORITY_CLASS_INVERSE_MAP[current_prio_class],
                         modified_base_rules,
-                        use_new_defaults,
                     )
                 )
                 current_prio_class -= 1
@@ -70,7 +63,6 @@ def list_with_base_rules(
                         make_base_prepend_rules(
                             PRIORITY_CLASS_INVERSE_MAP[current_prio_class],
                             modified_base_rules,
-                            use_new_defaults,
                         )
                     )
 
@@ -79,18 +71,14 @@ def list_with_base_rules(
     while current_prio_class > 0:
         ruleslist.extend(
             make_base_append_rules(
-                PRIORITY_CLASS_INVERSE_MAP[current_prio_class],
-                modified_base_rules,
-                use_new_defaults,
+                PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules
             )
         )
         current_prio_class -= 1
         if current_prio_class > 0:
             ruleslist.extend(
                 make_base_prepend_rules(
-                    PRIORITY_CLASS_INVERSE_MAP[current_prio_class],
-                    modified_base_rules,
-                    use_new_defaults,
+                    PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules
                 )
             )
 
@@ -98,24 +86,14 @@ def list_with_base_rules(
 
 
 def make_base_append_rules(
-    kind: str,
-    modified_base_rules: Dict[str, Dict[str, Any]],
-    use_new_defaults: bool = False,
+    kind: str, modified_base_rules: Dict[str, Dict[str, Any]]
 ) -> List[Dict[str, Any]]:
     rules = []
 
     if kind == "override":
-        rules = (
-            NEW_APPEND_OVERRIDE_RULES
-            if use_new_defaults
-            else BASE_APPEND_OVERRIDE_RULES
-        )
+        rules = BASE_APPEND_OVERRIDE_RULES
     elif kind == "underride":
-        rules = (
-            NEW_APPEND_UNDERRIDE_RULES
-            if use_new_defaults
-            else BASE_APPEND_UNDERRIDE_RULES
-        )
+        rules = BASE_APPEND_UNDERRIDE_RULES
     elif kind == "content":
         rules = BASE_APPEND_CONTENT_RULES
 
@@ -134,7 +112,6 @@ def make_base_append_rules(
 def make_base_prepend_rules(
     kind: str,
     modified_base_rules: Dict[str, Dict[str, Any]],
-    use_new_defaults: bool = False,
 ) -> List[Dict[str, Any]]:
     rules = []
 
@@ -301,135 +278,6 @@ BASE_APPEND_OVERRIDE_RULES = [
 ]
 
 
-NEW_APPEND_OVERRIDE_RULES = [
-    {
-        "rule_id": "global/override/.m.rule.encrypted",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.encrypted",
-                "_id": "_encrypted",
-            }
-        ],
-        "actions": ["notify"],
-    },
-    {
-        "rule_id": "global/override/.m.rule.suppress_notices",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.message",
-                "_id": "_suppress_notices_type",
-            },
-            {
-                "kind": "event_match",
-                "key": "content.msgtype",
-                "pattern": "m.notice",
-                "_id": "_suppress_notices",
-            },
-        ],
-        "actions": [],
-    },
-    {
-        "rule_id": "global/underride/.m.rule.suppress_edits",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "m.relates_to.m.rel_type",
-                "pattern": "m.replace",
-                "_id": "_suppress_edits",
-            }
-        ],
-        "actions": [],
-    },
-    {
-        "rule_id": "global/override/.m.rule.invite_for_me",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.member",
-                "_id": "_member",
-            },
-            {
-                "kind": "event_match",
-                "key": "content.membership",
-                "pattern": "invite",
-                "_id": "_invite_member",
-            },
-            {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"},
-        ],
-        "actions": ["notify", {"set_tweak": "sound", "value": "default"}],
-    },
-    {
-        "rule_id": "global/override/.m.rule.contains_display_name",
-        "conditions": [{"kind": "contains_display_name"}],
-        "actions": [
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight"},
-        ],
-    },
-    {
-        "rule_id": "global/override/.m.rule.tombstone",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.tombstone",
-                "_id": "_tombstone",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "",
-                "_id": "_tombstone_statekey",
-            },
-        ],
-        "actions": [
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight"},
-        ],
-    },
-    {
-        "rule_id": "global/override/.m.rule.roomnotif",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                "pattern": "@room",
-                "_id": "_roomnotif_content",
-            },
-            {
-                "kind": "sender_notification_permission",
-                "key": "room",
-                "_id": "_roomnotif_pl",
-            },
-        ],
-        "actions": [
-            "notify",
-            {"set_tweak": "highlight"},
-            {"set_tweak": "sound", "value": "default"},
-        ],
-    },
-    {
-        "rule_id": "global/override/.m.rule.call",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.call.invite",
-                "_id": "_call",
-            }
-        ],
-        "actions": ["notify", {"set_tweak": "sound", "value": "ring"}],
-    },
-]
-
-
 BASE_APPEND_UNDERRIDE_RULES = [
     {
         "rule_id": "global/underride/.m.rule.call",
@@ -538,36 +386,6 @@ BASE_APPEND_UNDERRIDE_RULES = [
 ]
 
 
-NEW_APPEND_UNDERRIDE_RULES = [
-    {
-        "rule_id": "global/underride/.m.rule.room_one_to_one",
-        "conditions": [
-            {"kind": "room_member_count", "is": "2", "_id": "member_count"},
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                "pattern": "*",
-                "_id": "body",
-            },
-        ],
-        "actions": ["notify", {"set_tweak": "sound", "value": "default"}],
-    },
-    {
-        "rule_id": "global/underride/.m.rule.message",
-        "conditions": [
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                "pattern": "*",
-                "_id": "body",
-            },
-        ],
-        "actions": ["notify"],
-        "enabled": False,
-    },
-]
-
-
 BASE_RULE_IDS = set()
 
 for r in BASE_APPEND_CONTENT_RULES:
@@ -589,26 +407,3 @@ for r in BASE_APPEND_UNDERRIDE_RULES:
     r["priority_class"] = PRIORITY_CLASS_MAP["underride"]
     r["default"] = True
     BASE_RULE_IDS.add(r["rule_id"])
-
-
-NEW_RULE_IDS = set()
-
-for r in BASE_APPEND_CONTENT_RULES:
-    r["priority_class"] = PRIORITY_CLASS_MAP["content"]
-    r["default"] = True
-    NEW_RULE_IDS.add(r["rule_id"])
-
-for r in BASE_PREPEND_OVERRIDE_RULES:
-    r["priority_class"] = PRIORITY_CLASS_MAP["override"]
-    r["default"] = True
-    NEW_RULE_IDS.add(r["rule_id"])
-
-for r in NEW_APPEND_OVERRIDE_RULES:
-    r["priority_class"] = PRIORITY_CLASS_MAP["override"]
-    r["default"] = True
-    NEW_RULE_IDS.add(r["rule_id"])
-
-for r in NEW_APPEND_UNDERRIDE_RULES:
-    r["priority_class"] = PRIORITY_CLASS_MAP["underride"]
-    r["default"] = True
-    NEW_RULE_IDS.add(r["rule_id"])
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 009d8e77b0..bee660893b 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -298,7 +298,7 @@ RulesByUser = Dict[str, List[Rule]]
 StateGroup = Union[object, int]
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class RulesForRoomData:
     """The data stored in the cache by `RulesForRoom`.
 
@@ -307,29 +307,29 @@ class RulesForRoomData:
     """
 
     # event_id -> (user_id, state)
-    member_map = attr.ib(type=MemberMap, factory=dict)
+    member_map: MemberMap = attr.Factory(dict)
     # user_id -> rules
-    rules_by_user = attr.ib(type=RulesByUser, factory=dict)
+    rules_by_user: RulesByUser = attr.Factory(dict)
 
     # The last state group we updated the caches for. If the state_group of
     # a new event comes along, we know that we can just return the cached
     # result.
     # On invalidation of the rules themselves (if the user changes them),
     # we invalidate everything and set state_group to `object()`
-    state_group = attr.ib(type=StateGroup, factory=object)
+    state_group: StateGroup = attr.Factory(object)
 
     # A sequence number to keep track of when we're allowed to update the
     # cache. We bump the sequence number when we invalidate the cache. If
     # the sequence number changes while we're calculating stuff we should
     # not update the cache with it.
-    sequence = attr.ib(type=int, default=0)
+    sequence: int = 0
 
     # A cache of user_ids that we *know* aren't interesting, e.g. user_ids
     # owned by AS's, or remote users, etc. (I.e. users we will never need to
     # calculate push for)
     # These never need to be invalidated as we will never set up push for
     # them.
-    uninteresting_user_set = attr.ib(type=Set[str], factory=set)
+    uninteresting_user_set: Set[str] = attr.Factory(set)
 
 
 class RulesForRoom:
@@ -553,7 +553,7 @@ class RulesForRoom:
             self.data.state_group = state_group
 
 
-@attr.attrs(slots=True, frozen=True)
+@attr.attrs(slots=True, frozen=True, auto_attribs=True)
 class _Invalidation:
     # _Invalidation is passed as an `on_invalidate` callback to bulk_get_push_rules,
     # which means that it it is stored on the bulk_get_push_rules cache entry. In order
@@ -564,8 +564,8 @@ class _Invalidation:
     # attrs provides suitable __hash__ and __eq__ methods, provided we remember to
     # set `frozen=True`.
 
-    cache = attr.ib(type=LruCache)
-    room_id = attr.ib(type=str)
+    cache: LruCache
+    room_id: str
 
     def __call__(self) -> None:
         rules_data = self.cache.get(self.room_id, None, update_metrics=False)
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index ff904c2b4a..3df8452eec 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -178,7 +178,7 @@ class Mailer:
         await self.send_email(
             email_address,
             self.email_subjects.email_validation
-            % {"server_name": self.hs.config.server.server_name},
+            % {"server_name": self.hs.config.server.server_name, "app": self.app_name},
             template_vars,
         )
 
@@ -209,7 +209,7 @@ class Mailer:
         await self.send_email(
             email_address,
             self.email_subjects.email_validation
-            % {"server_name": self.hs.config.server.server_name},
+            % {"server_name": self.hs.config.server.server_name, "app": self.app_name},
             template_vars,
         )
 
@@ -455,7 +455,7 @@ class Mailer:
         }
 
         the_events = await filter_events_for_client(
-            self.storage, user_id, results["events_before"]
+            self.storage, user_id, results.events_before
         )
         the_events.append(notif_event)
 
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index d844fbb3b3..22b4606ae0 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -70,7 +70,7 @@ REQUIREMENTS = [
     "pyasn1>=0.1.9",
     "pyasn1-modules>=0.0.7",
     "bcrypt>=3.1.0",
-    "pillow>=4.3.0",
+    "pillow>=5.4.0",
     "sortedcontainers>=1.4.4",
     "pymacaroons>=0.13.0",
     "msgpack>=0.5.2",
@@ -107,7 +107,7 @@ CONDITIONAL_REQUIREMENTS = {
     # `systemd.journal.JournalHandler`, as is documented in
     # `contrib/systemd/log_config.yaml`.
     "systemd": ["systemd-python>=231"],
-    "url_preview": ["lxml>=3.5.0"],
+    "url_preview": ["lxml>=4.2.0"],
     "sentry": ["sentry-sdk>=0.7.2"],
     "opentracing": ["jaeger-client>=4.0.0", "opentracing>=2.2.0"],
     "jwt": ["pyjwt>=1.6.4"],
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
index 0f08372694..a72dad7464 100644
--- a/synapse/replication/slave/storage/events.py
+++ b/synapse/replication/slave/storage/events.py
@@ -52,8 +52,8 @@ class SlavedEventStore(
     EventPushActionsWorkerStore,
     StreamWorkerStore,
     StateGroupWorkerStore,
-    EventsWorkerStore,
     SignatureWorkerStore,
+    EventsWorkerStore,
     UserErasureWorkerStore,
     RelationsWorkerStore,
     BaseSlavedStore,
diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py
index a390cfcb74..4f4f1ad453 100644
--- a/synapse/replication/tcp/streams/events.py
+++ b/synapse/replication/tcp/streams/events.py
@@ -50,12 +50,12 @@ data part are:
 """
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class EventsStreamRow:
     """A parsed row from the events replication stream"""
 
-    type = attr.ib()  # str: the TypeId of one of the *EventsStreamRows
-    data = attr.ib()  # BaseEventsStreamRow
+    type: str  # the TypeId of one of the *EventsStreamRows
+    data: "BaseEventsStreamRow"
 
 
 class BaseEventsStreamRow:
@@ -79,28 +79,28 @@ class BaseEventsStreamRow:
         return cls(*data)
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class EventsStreamEventRow(BaseEventsStreamRow):
     TypeId = "ev"
 
-    event_id = attr.ib(type=str)
-    room_id = attr.ib(type=str)
-    type = attr.ib(type=str)
-    state_key = attr.ib(type=Optional[str])
-    redacts = attr.ib(type=Optional[str])
-    relates_to = attr.ib(type=Optional[str])
-    membership = attr.ib(type=Optional[str])
-    rejected = attr.ib(type=bool)
+    event_id: str
+    room_id: str
+    type: str
+    state_key: Optional[str]
+    redacts: Optional[str]
+    relates_to: Optional[str]
+    membership: Optional[str]
+    rejected: bool
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class EventsStreamCurrentStateRow(BaseEventsStreamRow):
     TypeId = "state"
 
-    room_id = attr.ib()  # str
-    type = attr.ib()  # str
-    state_key = attr.ib()  # str
-    event_id = attr.ib()  # str, optional
+    room_id: str
+    type: str
+    state_key: str
+    event_id: Optional[str]
 
 
 _EventRows: Tuple[Type[BaseEventsStreamRow], ...] = (
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 465e06772b..9be9e33c8e 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -41,7 +41,9 @@ from synapse.rest.admin.event_reports import (
     EventReportsRestServlet,
 )
 from synapse.rest.admin.federation import (
-    DestinationsRestServlet,
+    DestinationMembershipRestServlet,
+    DestinationResetConnectionRestServlet,
+    DestinationRestServlet,
     ListDestinationsRestServlet,
 )
 from synapse.rest.admin.groups import DeleteGroupAdminRestServlet
@@ -267,7 +269,9 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ListRegistrationTokensRestServlet(hs).register(http_server)
     NewRegistrationTokenRestServlet(hs).register(http_server)
     RegistrationTokenRestServlet(hs).register(http_server)
-    DestinationsRestServlet(hs).register(http_server)
+    DestinationMembershipRestServlet(hs).register(http_server)
+    DestinationResetConnectionRestServlet(hs).register(http_server)
+    DestinationRestServlet(hs).register(http_server)
     ListDestinationsRestServlet(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
diff --git a/synapse/rest/admin/background_updates.py b/synapse/rest/admin/background_updates.py
index 6ec00ce0b9..e9bce22a34 100644
--- a/synapse/rest/admin/background_updates.py
+++ b/synapse/rest/admin/background_updates.py
@@ -123,34 +123,25 @@ class BackgroundUpdateStartJobRestServlet(RestServlet):
         job_name = body["job_name"]
 
         if job_name == "populate_stats_process_rooms":
-            jobs = [
-                {
-                    "update_name": "populate_stats_process_rooms",
-                    "progress_json": "{}",
-                },
-            ]
+            jobs = [("populate_stats_process_rooms", "{}", "")]
         elif job_name == "regenerate_directory":
             jobs = [
-                {
-                    "update_name": "populate_user_directory_createtables",
-                    "progress_json": "{}",
-                    "depends_on": "",
-                },
-                {
-                    "update_name": "populate_user_directory_process_rooms",
-                    "progress_json": "{}",
-                    "depends_on": "populate_user_directory_createtables",
-                },
-                {
-                    "update_name": "populate_user_directory_process_users",
-                    "progress_json": "{}",
-                    "depends_on": "populate_user_directory_process_rooms",
-                },
-                {
-                    "update_name": "populate_user_directory_cleanup",
-                    "progress_json": "{}",
-                    "depends_on": "populate_user_directory_process_users",
-                },
+                ("populate_user_directory_createtables", "{}", ""),
+                (
+                    "populate_user_directory_process_rooms",
+                    "{}",
+                    "populate_user_directory_createtables",
+                ),
+                (
+                    "populate_user_directory_process_users",
+                    "{}",
+                    "populate_user_directory_process_rooms",
+                ),
+                (
+                    "populate_user_directory_cleanup",
+                    "{}",
+                    "populate_user_directory_process_users",
+                ),
             ]
         else:
             raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid job_name")
@@ -158,6 +149,7 @@ class BackgroundUpdateStartJobRestServlet(RestServlet):
         try:
             await self._store.db_pool.simple_insert_many(
                 table="background_updates",
+                keys=("update_name", "progress_json", "depends_on"),
                 values=jobs,
                 desc=f"admin_api_run_{job_name}",
             )
diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py
index 8cd3fa189e..d162e0081e 100644
--- a/synapse/rest/admin/federation.py
+++ b/synapse/rest/admin/federation.py
@@ -16,6 +16,7 @@ from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.errors import Codes, NotFoundError, SynapseError
+from synapse.federation.transport.server import Authenticator
 from synapse.http.servlet import RestServlet, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
@@ -90,7 +91,7 @@ class ListDestinationsRestServlet(RestServlet):
         return HTTPStatus.OK, response
 
 
-class DestinationsRestServlet(RestServlet):
+class DestinationRestServlet(RestServlet):
     """Get details of a destination.
     This needs user to have administrator access in Synapse.
 
@@ -145,3 +146,100 @@ class DestinationsRestServlet(RestServlet):
             }
 
         return HTTPStatus.OK, response
+
+
+class DestinationMembershipRestServlet(RestServlet):
+    """Get list of rooms of a destination.
+    This needs user to have administrator access in Synapse.
+
+    GET /_synapse/admin/v1/federation/destinations/<destination>/rooms?from=0&limit=10
+
+    returns:
+        200 OK with a list of rooms if success otherwise an error.
+
+    The parameters `from` and `limit` are required only for pagination.
+    By default, a `limit` of 100 is used.
+    """
+
+    PATTERNS = admin_patterns("/federation/destinations/(?P<destination>[^/]*)/rooms$")
+
+    def __init__(self, hs: "HomeServer"):
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastore()
+
+    async def on_GET(
+        self, request: SynapseRequest, destination: str
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self._auth, request)
+
+        if not await self._store.is_destination_known(destination):
+            raise NotFoundError("Unknown destination")
+
+        start = parse_integer(request, "from", default=0)
+        limit = parse_integer(request, "limit", default=100)
+
+        if start < 0:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Query parameter from must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        if limit < 0:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Query parameter limit must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        direction = parse_string(request, "dir", default="f", allowed_values=("f", "b"))
+
+        rooms, total = await self._store.get_destination_rooms_paginate(
+            destination, start, limit, direction
+        )
+        response = {"rooms": rooms, "total": total}
+        if (start + limit) < total:
+            response["next_token"] = str(start + len(rooms))
+
+        return HTTPStatus.OK, response
+
+
+class DestinationResetConnectionRestServlet(RestServlet):
+    """Reset destinations' connection timeouts and wake it up.
+    This needs user to have administrator access in Synapse.
+
+    POST /_synapse/admin/v1/federation/destinations/<destination>/reset_connection
+    {}
+
+    returns:
+        200 OK otherwise an error.
+    """
+
+    PATTERNS = admin_patterns(
+        "/federation/destinations/(?P<destination>[^/]+)/reset_connection$"
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastore()
+        self._authenticator = Authenticator(hs)
+
+    async def on_POST(
+        self, request: SynapseRequest, destination: str
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self._auth, request)
+
+        if not await self._store.is_destination_known(destination):
+            raise NotFoundError("Unknown destination")
+
+        retry_timings = await self._store.get_destination_retry_timings(destination)
+        if not (retry_timings and retry_timings.retry_last_ts):
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "The retry timing does not need to be reset for this destination.",
+            )
+
+        # reset timings and wake up
+        await self._authenticator.reset_retry_timings(destination)
+
+        return HTTPStatus.OK, {}
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 2e714ac87b..5b706efbcf 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -729,7 +729,7 @@ class RoomEventContextServlet(RestServlet):
         else:
             event_filter = None
 
-        results = await self.room_context_handler.get_event_context(
+        event_context = await self.room_context_handler.get_event_context(
             requester,
             room_id,
             event_id,
@@ -738,30 +738,34 @@ class RoomEventContextServlet(RestServlet):
             use_admin_priviledge=True,
         )
 
-        if not results:
+        if not event_context:
             raise SynapseError(
                 HTTPStatus.NOT_FOUND, "Event not found.", errcode=Codes.NOT_FOUND
             )
 
         time_now = self.clock.time_msec()
-        results["events_before"] = self._event_serializer.serialize_events(
-            results["events_before"],
-            time_now,
-            bundle_aggregations=results["aggregations"],
-        )
-        results["event"] = self._event_serializer.serialize_event(
-            results["event"],
-            time_now,
-            bundle_aggregations=results["aggregations"],
-        )
-        results["events_after"] = self._event_serializer.serialize_events(
-            results["events_after"],
-            time_now,
-            bundle_aggregations=results["aggregations"],
-        )
-        results["state"] = self._event_serializer.serialize_events(
-            results["state"], time_now
-        )
+        results = {
+            "events_before": self._event_serializer.serialize_events(
+                event_context.events_before,
+                time_now,
+                bundle_aggregations=event_context.aggregations,
+            ),
+            "event": self._event_serializer.serialize_event(
+                event_context.event,
+                time_now,
+                bundle_aggregations=event_context.aggregations,
+            ),
+            "events_after": self._event_serializer.serialize_events(
+                event_context.events_after,
+                time_now,
+                bundle_aggregations=event_context.aggregations,
+            ),
+            "state": self._event_serializer.serialize_events(
+                event_context.state, time_now
+            ),
+            "start": event_context.start,
+            "end": event_context.end,
+        }
 
         return HTTPStatus.OK, results
 
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 78e795c347..c2617ee30c 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -173,12 +173,11 @@ class UserRestServletV2(RestServlet):
         if not self.hs.is_mine(target_user):
             raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users")
 
-        ret = await self.admin_handler.get_user(target_user)
-
-        if not ret:
+        user_info_dict = await self.admin_handler.get_user(target_user)
+        if not user_info_dict:
             raise NotFoundError("User not found")
 
-        return HTTPStatus.OK, ret
+        return HTTPStatus.OK, user_info_dict
 
     async def on_PUT(
         self, request: SynapseRequest, user_id: str
@@ -399,10 +398,10 @@ class UserRestServletV2(RestServlet):
                     target_user, requester, body["avatar_url"], True
                 )
 
-            user = await self.admin_handler.get_user(target_user)
-            assert user is not None
+            user_info_dict = await self.admin_handler.get_user(target_user)
+            assert user_info_dict is not None
 
-            return 201, user
+            return HTTPStatus.CREATED, user_info_dict
 
 
 class UserRegisterServlet(RestServlet):
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index d1badbdf3b..58b8adbd32 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -66,7 +66,7 @@ class AccountDataServlet(RestServlet):
             raise AuthError(403, "Cannot get account data for other users.")
 
         event = await self.store.get_global_account_data_by_type_for_user(
-            account_data_type, user_id
+            user_id, account_data_type
         )
 
         if event is None:
diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py
index 6f796d5e50..8fe75bd750 100644
--- a/synapse/rest/client/push_rule.py
+++ b/synapse/rest/client/push_rule.py
@@ -29,7 +29,7 @@ from synapse.http.servlet import (
     parse_string,
 )
 from synapse.http.site import SynapseRequest
-from synapse.push.baserules import BASE_RULE_IDS, NEW_RULE_IDS
+from synapse.push.baserules import BASE_RULE_IDS
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.push.rulekinds import PRIORITY_CLASS_MAP
 from synapse.rest.client._base import client_patterns
@@ -61,10 +61,6 @@ class PushRuleRestServlet(RestServlet):
         self.notifier = hs.get_notifier()
         self._is_worker = hs.config.worker.worker_app is not None
 
-        self._users_new_default_push_rules = (
-            hs.config.server.users_new_default_push_rules
-        )
-
     async def on_PUT(self, request: SynapseRequest, path: str) -> Tuple[int, JsonDict]:
         if self._is_worker:
             raise Exception("Cannot handle PUT /push_rules on worker")
@@ -217,12 +213,7 @@ class PushRuleRestServlet(RestServlet):
             rule_id = spec.rule_id
             is_default_rule = rule_id.startswith(".")
             if is_default_rule:
-                if user_id in self._users_new_default_push_rules:
-                    rule_ids = NEW_RULE_IDS
-                else:
-                    rule_ids = BASE_RULE_IDS
-
-                if namespaced_rule_id not in rule_ids:
+                if namespaced_rule_id not in BASE_RULE_IDS:
                     raise SynapseError(404, "Unknown rule %r" % (namespaced_rule_id,))
             await self.store.set_push_rule_actions(
                 user_id, namespaced_rule_id, actions, is_default_rule
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 8b56c76aed..c283313e8d 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -339,12 +339,19 @@ class UsernameAvailabilityRestServlet(RestServlet):
             ),
         )
 
+        self.inhibit_user_in_use_error = (
+            hs.config.registration.inhibit_user_in_use_error
+        )
+
     async def on_GET(self, request: Request) -> Tuple[int, JsonDict]:
         if not self.hs.config.registration.enable_registration:
             raise SynapseError(
                 403, "Registration has been disabled", errcode=Codes.FORBIDDEN
             )
 
+        if self.inhibit_user_in_use_error:
+            return 200, {"available": True}
+
         ip = request.getClientIP()
         with self.ratelimiter.ratelimit(ip) as wait_deferred:
             await wait_deferred
@@ -361,7 +368,7 @@ class RegistrationTokenValidityRestServlet(RestServlet):
 
     Example:
 
-        GET /_matrix/client/unstable/org.matrix.msc3231/register/org.matrix.msc3231.login.registration_token/validity?token=abcd
+        GET /_matrix/client/v1/register/m.login.registration_token/validity?token=abcd
 
         200 OK
 
@@ -371,9 +378,8 @@ class RegistrationTokenValidityRestServlet(RestServlet):
     """
 
     PATTERNS = client_patterns(
-        f"/org.matrix.msc3231/register/{LoginType.REGISTRATION_TOKEN}/validity",
-        releases=(),
-        unstable=True,
+        f"/register/{LoginType.REGISTRATION_TOKEN}/validity",
+        releases=("v1",),
     )
 
     def __init__(self, hs: "HomeServer"):
@@ -418,10 +424,14 @@ class RegisterRestServlet(RestServlet):
         self.ratelimiter = hs.get_registration_ratelimiter()
         self.password_policy_handler = hs.get_password_policy_handler()
         self.clock = hs.get_clock()
+        self.password_auth_provider = hs.get_password_auth_provider()
         self._registration_enabled = self.hs.config.registration.enable_registration
         self._refresh_tokens_enabled = (
             hs.config.registration.refreshable_access_token_lifetime is not None
         )
+        self._inhibit_user_in_use_error = (
+            hs.config.registration.inhibit_user_in_use_error
+        )
 
         self._registration_flows = _calculate_registration_flows(
             hs.config, self.auth_handler
@@ -564,6 +574,7 @@ class RegisterRestServlet(RestServlet):
                 desired_username,
                 guest_access_token=guest_access_token,
                 assigned_user_id=registered_user_id,
+                inhibit_user_in_use_error=self._inhibit_user_in_use_error,
             )
 
         # Check if the user-interactive authentication flows are complete, if
@@ -627,7 +638,16 @@ class RegisterRestServlet(RestServlet):
             if not password_hash:
                 raise SynapseError(400, "Missing params: password", Codes.MISSING_PARAM)
 
-            desired_username = params.get("username", None)
+            desired_username = await (
+                self.password_auth_provider.get_username_for_registration(
+                    auth_result,
+                    params,
+                )
+            )
+
+            if desired_username is None:
+                desired_username = params.get("username", None)
+
             guest_access_token = params.get("guest_access_token", None)
 
             if desired_username is not None:
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index 37d949a71e..8cf5ebaa07 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -118,7 +118,9 @@ class RelationPaginationServlet(RestServlet):
         )
         # The relations returned for the requested event do include their
         # bundled aggregations.
-        aggregations = await self.store.get_bundled_aggregations(events)
+        aggregations = await self.store.get_bundled_aggregations(
+            events, requester.user.to_string()
+        )
         serialized_events = self._event_serializer.serialize_events(
             events, now, bundle_aggregations=aggregations
         )
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index da6014900a..90355e44b2 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -663,7 +663,9 @@ class RoomEventServlet(RestServlet):
 
         if event:
             # Ensure there are bundled aggregations available.
-            aggregations = await self._store.get_bundled_aggregations([event])
+            aggregations = await self._store.get_bundled_aggregations(
+                [event], requester.user.to_string()
+            )
 
             time_now = self.clock.time_msec()
             event_dict = self._event_serializer.serialize_event(
@@ -704,30 +706,36 @@ class RoomEventContextServlet(RestServlet):
         else:
             event_filter = None
 
-        results = await self.room_context_handler.get_event_context(
+        event_context = await self.room_context_handler.get_event_context(
             requester, room_id, event_id, limit, event_filter
         )
 
-        if not results:
+        if not event_context:
             raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND)
 
         time_now = self.clock.time_msec()
-        results["events_before"] = self._event_serializer.serialize_events(
-            results["events_before"],
-            time_now,
-            bundle_aggregations=results["aggregations"],
-        )
-        results["event"] = self._event_serializer.serialize_event(
-            results["event"], time_now, bundle_aggregations=results["aggregations"]
-        )
-        results["events_after"] = self._event_serializer.serialize_events(
-            results["events_after"],
-            time_now,
-            bundle_aggregations=results["aggregations"],
-        )
-        results["state"] = self._event_serializer.serialize_events(
-            results["state"], time_now
-        )
+        results = {
+            "events_before": self._event_serializer.serialize_events(
+                event_context.events_before,
+                time_now,
+                bundle_aggregations=event_context.aggregations,
+            ),
+            "event": self._event_serializer.serialize_event(
+                event_context.event,
+                time_now,
+                bundle_aggregations=event_context.aggregations,
+            ),
+            "events_after": self._event_serializer.serialize_events(
+                event_context.events_after,
+                time_now,
+                bundle_aggregations=event_context.aggregations,
+            ),
+            "state": self._event_serializer.serialize_events(
+                event_context.state, time_now
+            ),
+            "start": event_context.start,
+            "end": event_context.end,
+        }
 
         return 200, results
 
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index a3e57e4b20..f9615da525 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -48,6 +48,7 @@ from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import trace
+from synapse.storage.databases.main.relations import BundledAggregations
 from synapse.types import JsonDict, StreamToken
 from synapse.util import json_decoder
 
@@ -526,7 +527,7 @@ class SyncRestServlet(RestServlet):
 
         def serialize(
             events: Iterable[EventBase],
-            aggregations: Optional[Dict[str, Dict[str, Any]]] = None,
+            aggregations: Optional[Dict[str, BundledAggregations]] = None,
         ) -> List[JsonDict]:
             return self._event_serializer.serialize_events(
                 events,
@@ -554,20 +555,9 @@ class SyncRestServlet(RestServlet):
                 )
 
         serialized_state = serialize(state_events)
-        # Don't bother to bundle aggregations if the timeline is unlimited,
-        # as clients will have all the necessary information.
-        # bundle_aggregations=room.timeline.limited,
-        #
-        # richvdh 2021-12-15: disable this temporarily as it has too high an
-        # overhead for initialsyncs. We need to figure out a way that the
-        # bundling can be done *before* the events are stored in the
-        # SyncResponseCache so that this part can be synchronous.
-        #
-        # Ensure to re-enable the test at tests/rest/client/test_relations.py::RelationsTestCase.test_bundled_aggregations.
-        # if room.timeline.limited:
-        #    aggregations = await self.store.get_bundled_aggregations(timeline_events)
-        aggregations = None
-        serialized_timeline = serialize(timeline_events, aggregations)
+        serialized_timeline = serialize(
+            timeline_events, room.timeline.bundled_aggregations
+        )
 
         account_data = room.account_data
 
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index fca239d8c7..9f6c251caf 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -343,7 +343,7 @@ class SpamMediaException(NotFoundError):
     """
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class ReadableFileWrapper:
     """Wrapper that allows reading a file in chunks, yielding to the reactor,
     and writing to a callback.
@@ -354,8 +354,8 @@ class ReadableFileWrapper:
 
     CHUNK_SIZE = 2 ** 14
 
-    clock = attr.ib(type=Clock)
-    path = attr.ib(type=str)
+    clock: Clock
+    path: str
 
     async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None:
         """Reads the file in chunks and calls the callback with each chunk."""
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index cce1527ed9..2177b46c9e 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -33,6 +33,8 @@ logger = logging.getLogger(__name__)
 class OEmbedResult:
     # The Open Graph result (converted from the oEmbed result).
     open_graph_result: JsonDict
+    # The author_name of the oEmbed result
+    author_name: Optional[str]
     # Number of milliseconds to cache the content, according to the oEmbed response.
     #
     # This will be None if no cache-age is provided in the oEmbed response (or
@@ -154,11 +156,12 @@ class OEmbedProvider:
                 "og:url": url,
             }
 
-            # Use either title or author's name as the title.
-            title = oembed.get("title") or oembed.get("author_name")
+            title = oembed.get("title")
             if title:
                 open_graph_response["og:title"] = title
 
+            author_name = oembed.get("author_name")
+
             # Use the provider name and as the site.
             provider_name = oembed.get("provider_name")
             if provider_name:
@@ -193,9 +196,10 @@ class OEmbedProvider:
             # Trap any exception and let the code follow as usual.
             logger.warning("Error parsing oEmbed metadata from %s: %r", url, e)
             open_graph_response = {}
+            author_name = None
             cache_age = None
 
-        return OEmbedResult(open_graph_response, cache_age)
+        return OEmbedResult(open_graph_response, author_name, cache_age)
 
 
 def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index 30b067dd42..872a9e72e8 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -321,14 +321,33 @@ def _iterate_over_text(
 
 
 def rebase_url(url: str, base: str) -> str:
-    base_parts = list(urlparse.urlparse(base))
+    """
+    Resolves a potentially relative `url` against an absolute `base` URL.
+
+    For example:
+
+        >>> rebase_url("subpage", "https://example.com/foo/")
+        'https://example.com/foo/subpage'
+        >>> rebase_url("sibling", "https://example.com/foo")
+        'https://example.com/sibling'
+        >>> rebase_url("/bar", "https://example.com/foo/")
+        'https://example.com/bar'
+        >>> rebase_url("https://alice.com/a/", "https://example.com/foo/")
+        'https://alice.com/a'
+    """
+    base_parts = urlparse.urlparse(base)
+    # Convert the parsed URL to a list for (potential) modification.
     url_parts = list(urlparse.urlparse(url))
-    if not url_parts[0]:  # fix up schema
-        url_parts[0] = base_parts[0] or "http"
-    if not url_parts[1]:  # fix up hostname
-        url_parts[1] = base_parts[1]
+    # Add a scheme, if one does not exist.
+    if not url_parts[0]:
+        url_parts[0] = base_parts.scheme or "http"
+    # Fix up the hostname, if this is not a data URL.
+    if url_parts[0] != "data" and not url_parts[1]:
+        url_parts[1] = base_parts.netloc
+        # If the path does not start with a /, nest it under the base path's last
+        # directory.
         if not url_parts[2].startswith("/"):
-            url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts[2]) + url_parts[2]
+            url_parts[2] = re.sub(r"/[^/]+$", "/", base_parts.path) + url_parts[2]
     return urlparse.urlunparse(url_parts)
 
 
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index a3829d943b..efd84ced8f 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -21,8 +21,9 @@ import re
 import shutil
 import sys
 import traceback
-from typing import TYPE_CHECKING, Iterable, Optional, Tuple
+from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
 from urllib import parse as urlparse
+from urllib.request import urlopen
 
 import attr
 
@@ -71,6 +72,17 @@ IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
+class DownloadResult:
+    length: int
+    uri: str
+    response_code: int
+    media_type: str
+    download_name: Optional[str]
+    expires: int
+    etag: Optional[str]
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class MediaInfo:
     """
     Information parsed from downloading media being previewed.
@@ -256,12 +268,13 @@ class PreviewUrlResource(DirectServeJsonResource):
         if oembed_url:
             url_to_download = oembed_url
 
-        media_info = await self._download_url(url_to_download, user)
+        media_info = await self._handle_url(url_to_download, user)
 
         logger.debug("got media_info of '%s'", media_info)
 
         # The number of milliseconds that the response should be considered valid.
         expiration_ms = media_info.expires
+        author_name: Optional[str] = None
 
         if _is_media(media_info.media_type):
             file_id = media_info.filesystem_id
@@ -294,17 +307,27 @@ class PreviewUrlResource(DirectServeJsonResource):
                 # Check if this HTML document points to oEmbed information and
                 # defer to that.
                 oembed_url = self._oembed.autodiscover_from_html(tree)
-                og = {}
+                og_from_oembed: JsonDict = {}
                 if oembed_url:
-                    oembed_info = await self._download_url(oembed_url, user)
-                    og, expiration_ms = await self._handle_oembed_response(
+                    oembed_info = await self._handle_url(
+                        oembed_url, user, allow_data_urls=True
+                    )
+                    (
+                        og_from_oembed,
+                        author_name,
+                        expiration_ms,
+                    ) = await self._handle_oembed_response(
                         url, oembed_info, expiration_ms
                     )
 
-                # If there was no oEmbed URL (or oEmbed parsing failed), attempt
-                # to generate the Open Graph information from the HTML.
-                if not oembed_url or not og:
-                    og = parse_html_to_open_graph(tree, media_info.uri)
+                # Parse Open Graph information from the HTML in case the oEmbed
+                # response failed or is incomplete.
+                og_from_html = parse_html_to_open_graph(tree, media_info.uri)
+
+                # Compile the Open Graph response by using the scraped
+                # information from the HTML and overlaying any information
+                # from the oEmbed response.
+                og = {**og_from_html, **og_from_oembed}
 
                 await self._precache_image_url(user, media_info, og)
             else:
@@ -312,7 +335,7 @@ class PreviewUrlResource(DirectServeJsonResource):
 
         elif oembed_url:
             # Handle the oEmbed information.
-            og, expiration_ms = await self._handle_oembed_response(
+            og, author_name, expiration_ms = await self._handle_oembed_response(
                 url, media_info, expiration_ms
             )
             await self._precache_image_url(user, media_info, og)
@@ -321,6 +344,11 @@ class PreviewUrlResource(DirectServeJsonResource):
             logger.warning("Failed to find any OG data in %s", url)
             og = {}
 
+        # If we don't have a title but we have author_name, copy it as
+        # title
+        if not og.get("og:title") and author_name:
+            og["og:title"] = author_name
+
         # filter out any stupidly long values
         keys_to_remove = []
         for k, v in og.items():
@@ -353,7 +381,135 @@ class PreviewUrlResource(DirectServeJsonResource):
 
         return jsonog.encode("utf8")
 
-    async def _download_url(self, url: str, user: UserID) -> MediaInfo:
+    async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
+        """
+        Fetches a remote URL and parses the headers.
+
+        Args:
+             url: The URL to fetch.
+             output_stream: The stream to write the content to.
+
+        Returns:
+            A tuple of:
+                Media length, URL downloaded, the HTTP response code,
+                the media type, the downloaded file name, the number of
+                milliseconds the result is valid for, the etag header.
+        """
+
+        try:
+            logger.debug("Trying to get preview for url '%s'", url)
+            length, headers, uri, code = await self.client.get_file(
+                url,
+                output_stream=output_stream,
+                max_size=self.max_spider_size,
+                headers={"Accept-Language": self.url_preview_accept_language},
+            )
+        except SynapseError:
+            # Pass SynapseErrors through directly, so that the servlet
+            # handler will return a SynapseError to the client instead of
+            # blank data or a 500.
+            raise
+        except DNSLookupError:
+            # DNS lookup returned no results
+            # Note: This will also be the case if one of the resolved IP
+            # addresses is blacklisted
+            raise SynapseError(
+                502,
+                "DNS resolution failure during URL preview generation",
+                Codes.UNKNOWN,
+            )
+        except Exception as e:
+            # FIXME: pass through 404s and other error messages nicely
+            logger.warning("Error downloading %s: %r", url, e)
+
+            raise SynapseError(
+                500,
+                "Failed to download content: %s"
+                % (traceback.format_exception_only(sys.exc_info()[0], e),),
+                Codes.UNKNOWN,
+            )
+
+        if b"Content-Type" in headers:
+            media_type = headers[b"Content-Type"][0].decode("ascii")
+        else:
+            media_type = "application/octet-stream"
+
+        download_name = get_filename_from_headers(headers)
+
+        # FIXME: we should calculate a proper expiration based on the
+        # Cache-Control and Expire headers.  But for now, assume 1 hour.
+        expires = ONE_HOUR
+        etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
+
+        return DownloadResult(
+            length, uri, code, media_type, download_name, expires, etag
+        )
+
+    async def _parse_data_url(
+        self, url: str, output_stream: BinaryIO
+    ) -> DownloadResult:
+        """
+        Parses a data: URL.
+
+        Args:
+             url: The URL to parse.
+             output_stream: The stream to write the content to.
+
+        Returns:
+            A tuple of:
+                Media length, URL downloaded, the HTTP response code,
+                the media type, the downloaded file name, the number of
+                milliseconds the result is valid for, the etag header.
+        """
+
+        try:
+            logger.debug("Trying to parse data url '%s'", url)
+            with urlopen(url) as url_info:
+                # TODO Can this be more efficient.
+                output_stream.write(url_info.read())
+        except Exception as e:
+            logger.warning("Error parsing data: URL %s: %r", url, e)
+
+            raise SynapseError(
+                500,
+                "Failed to parse data URL: %s"
+                % (traceback.format_exception_only(sys.exc_info()[0], e),),
+                Codes.UNKNOWN,
+            )
+
+        return DownloadResult(
+            # Read back the length that has been written.
+            length=output_stream.tell(),
+            uri=url,
+            # If it was parsed, consider this a 200 OK.
+            response_code=200,
+            # urlopen shoves the media-type from the data URL into the content type
+            # header object.
+            media_type=url_info.headers.get_content_type(),
+            # Some features are not supported by data: URLs.
+            download_name=None,
+            expires=ONE_HOUR,
+            etag=None,
+        )
+
+    async def _handle_url(
+        self, url: str, user: UserID, allow_data_urls: bool = False
+    ) -> MediaInfo:
+        """
+        Fetches content from a URL and parses the result to generate a MediaInfo.
+
+        It uses the media storage provider to persist the fetched content and
+        stores the mapping into the database.
+
+        Args:
+             url: The URL to fetch.
+             user: The user who ahs requested this URL.
+             allow_data_urls: True if data URLs should be allowed.
+
+        Returns:
+            A MediaInfo object describing the fetched content.
+        """
+
         # TODO: we should probably honour robots.txt... except in practice
         # we're most likely being explicitly triggered by a human rather than a
         # bot, so are we really a robot?
@@ -363,61 +519,27 @@ class PreviewUrlResource(DirectServeJsonResource):
         file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
 
         with self.media_storage.store_into_file(file_info) as (f, fname, finish):
-            try:
-                logger.debug("Trying to get preview for url '%s'", url)
-                length, headers, uri, code = await self.client.get_file(
-                    url,
-                    output_stream=f,
-                    max_size=self.max_spider_size,
-                    headers={"Accept-Language": self.url_preview_accept_language},
-                )
-            except SynapseError:
-                # Pass SynapseErrors through directly, so that the servlet
-                # handler will return a SynapseError to the client instead of
-                # blank data or a 500.
-                raise
-            except DNSLookupError:
-                # DNS lookup returned no results
-                # Note: This will also be the case if one of the resolved IP
-                # addresses is blacklisted
-                raise SynapseError(
-                    502,
-                    "DNS resolution failure during URL preview generation",
-                    Codes.UNKNOWN,
-                )
-            except Exception as e:
-                # FIXME: pass through 404s and other error messages nicely
-                logger.warning("Error downloading %s: %r", url, e)
-
-                raise SynapseError(
-                    500,
-                    "Failed to download content: %s"
-                    % (traceback.format_exception_only(sys.exc_info()[0], e),),
-                    Codes.UNKNOWN,
-                )
-            await finish()
+            if url.startswith("data:"):
+                if not allow_data_urls:
+                    raise SynapseError(
+                        500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
+                    )
 
-            if b"Content-Type" in headers:
-                media_type = headers[b"Content-Type"][0].decode("ascii")
+                download_result = await self._parse_data_url(url, f)
             else:
-                media_type = "application/octet-stream"
+                download_result = await self._download_url(url, f)
 
-            download_name = get_filename_from_headers(headers)
-
-            # FIXME: we should calculate a proper expiration based on the
-            # Cache-Control and Expire headers.  But for now, assume 1 hour.
-            expires = ONE_HOUR
-            etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
+            await finish()
 
         try:
             time_now_ms = self.clock.time_msec()
 
             await self.store.store_local_media(
                 media_id=file_id,
-                media_type=media_type,
+                media_type=download_result.media_type,
                 time_now_ms=time_now_ms,
-                upload_name=download_name,
-                media_length=length,
+                upload_name=download_result.download_name,
+                media_length=download_result.length,
                 user_id=user,
                 url_cache=url,
             )
@@ -430,16 +552,16 @@ class PreviewUrlResource(DirectServeJsonResource):
             raise
 
         return MediaInfo(
-            media_type=media_type,
-            media_length=length,
-            download_name=download_name,
+            media_type=download_result.media_type,
+            media_length=download_result.length,
+            download_name=download_result.download_name,
             created_ts_ms=time_now_ms,
             filesystem_id=file_id,
             filename=fname,
-            uri=uri,
-            response_code=code,
-            expires=expires,
-            etag=etag,
+            uri=download_result.uri,
+            response_code=download_result.response_code,
+            expires=download_result.expires,
+            etag=download_result.etag,
         )
 
     async def _precache_image_url(
@@ -460,8 +582,8 @@ class PreviewUrlResource(DirectServeJsonResource):
         # FIXME: it might be cleaner to use the same flow as the main /preview_url
         # request itself and benefit from the same caching etc.  But for now we
         # just rely on the caching on the master request to speed things up.
-        image_info = await self._download_url(
-            rebase_url(og["og:image"], media_info.uri), user
+        image_info = await self._handle_url(
+            rebase_url(og["og:image"], media_info.uri), user, allow_data_urls=True
         )
 
         if _is_media(image_info.media_type):
@@ -484,7 +606,7 @@ class PreviewUrlResource(DirectServeJsonResource):
 
     async def _handle_oembed_response(
         self, url: str, media_info: MediaInfo, expiration_ms: int
-    ) -> Tuple[JsonDict, int]:
+    ) -> Tuple[JsonDict, Optional[str], int]:
         """
         Parse the downloaded oEmbed info.
 
@@ -497,11 +619,12 @@ class PreviewUrlResource(DirectServeJsonResource):
         Returns:
             A tuple of:
                 The Open Graph dictionary, if the oEmbed info can be parsed.
+                The author name if it could be retrieved from oEmbed.
                 The (possibly updated) length of time, in milliseconds, the media is valid for.
         """
         # If JSON was not returned, there's nothing to do.
         if not _is_json(media_info.media_type):
-            return {}, expiration_ms
+            return {}, None, expiration_ms
 
         with open(media_info.filename, "rb") as file:
             body = file.read()
@@ -513,7 +636,7 @@ class PreviewUrlResource(DirectServeJsonResource):
         if open_graph_result and oembed_response.cache_age is not None:
             expiration_ms = oembed_response.cache_age
 
-        return open_graph_result, expiration_ms
+        return open_graph_result, oembed_response.author_name, expiration_ms
 
     def _start_expire_url_cache_data(self) -> Deferred:
         return run_as_background_process(
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 69ac8c3423..67e8bc6ec2 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -45,7 +45,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersio
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.logging.context import ContextResourceUsage
-from synapse.logging.utils import log_function
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.roommember import ProfileInfo
@@ -450,19 +449,19 @@ class StateHandler:
         return {key: state_map[ev_id] for key, ev_id in new_state.items()}
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _StateResMetrics:
     """Keeps track of some usage metrics about state res."""
 
     # System and User CPU time, in seconds
-    cpu_time = attr.ib(type=float, default=0.0)
+    cpu_time: float = 0.0
 
     # time spent on database transactions (excluding scheduling time). This roughly
     # corresponds to the amount of work done on the db server, excluding event fetches.
-    db_time = attr.ib(type=float, default=0.0)
+    db_time: float = 0.0
 
     # number of events fetched from the db.
-    db_events = attr.ib(type=int, default=0)
+    db_events: int = 0
 
 
 _biggest_room_by_cpu_counter = Counter(
@@ -512,7 +511,6 @@ class StateResolutionHandler:
 
         self.clock.looping_call(self._report_metrics, 120 * 1000)
 
-    @log_function
     async def resolve_state_groups(
         self,
         room_id: str,
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 7967011afd..8df80664a2 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -57,7 +57,7 @@ class SQLBaseStore(metaclass=ABCMeta):
         pass
 
     def _invalidate_state_caches(
-        self, room_id: str, members_changed: Iterable[str]
+        self, room_id: str, members_changed: Collection[str]
     ) -> None:
         """Invalidates caches that are based on the current state, but does
         not stream invalidations down replication.
@@ -66,11 +66,16 @@ class SQLBaseStore(metaclass=ABCMeta):
             room_id: Room where state changed
             members_changed: The user_ids of members that have changed
         """
+        # If there were any membership changes, purge the appropriate caches.
         for host in {get_domain_from_id(u) for u in members_changed}:
             self._attempt_to_invalidate_cache("is_host_joined", (room_id, host))
+        if members_changed:
+            self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
+            self._attempt_to_invalidate_cache(
+                "get_users_in_room_with_profiles", (room_id,)
+            )
 
-        self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
-        self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
+        # Purge other caches based on room state.
         self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
         self._attempt_to_invalidate_cache("get_current_state_ids", (room_id,))
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 2cacc7dd6c..99802228c9 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -143,7 +143,7 @@ def make_conn(
     return db_conn
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class LoggingDatabaseConnection:
     """A wrapper around a database connection that returns `LoggingTransaction`
     as its cursor class.
@@ -151,9 +151,9 @@ class LoggingDatabaseConnection:
     This is mainly used on startup to ensure that queries get logged correctly
     """
 
-    conn = attr.ib(type=Connection)
-    engine = attr.ib(type=BaseDatabaseEngine)
-    default_txn_name = attr.ib(type=str)
+    conn: Connection
+    engine: BaseDatabaseEngine
+    default_txn_name: str
 
     def cursor(
         self, *, txn_name=None, after_callbacks=None, exception_callbacks=None
@@ -702,6 +702,7 @@ class DatabasePool:
         func: Callable[..., R],
         *args: Any,
         db_autocommit: bool = False,
+        isolation_level: Optional[int] = None,
         **kwargs: Any,
     ) -> R:
         """Starts a transaction on the database and runs a given function
@@ -724,6 +725,7 @@ class DatabasePool:
                 called multiple times if the transaction is retried, so must
                 correctly handle that case.
 
+            isolation_level: Set the server isolation level for this transaction.
             args: positional args to pass to `func`
             kwargs: named args to pass to `func`
 
@@ -746,6 +748,7 @@ class DatabasePool:
                     func,
                     *args,
                     db_autocommit=db_autocommit,
+                    isolation_level=isolation_level,
                     **kwargs,
                 )
 
@@ -763,6 +766,7 @@ class DatabasePool:
         func: Callable[..., R],
         *args: Any,
         db_autocommit: bool = False,
+        isolation_level: Optional[int] = None,
         **kwargs: Any,
     ) -> R:
         """Wraps the .runWithConnection() method on the underlying db_pool.
@@ -775,6 +779,7 @@ class DatabasePool:
             db_autocommit: Whether to run the function in "autocommit" mode,
                 i.e. outside of a transaction. This is useful for transaction
                 that are only a single query. Currently only affects postgres.
+            isolation_level: Set the server isolation level for this transaction.
             kwargs: named args to pass to `func`
 
         Returns:
@@ -834,6 +839,10 @@ class DatabasePool:
                     try:
                         if db_autocommit:
                             self.engine.attempt_to_set_autocommit(conn, True)
+                        if isolation_level is not None:
+                            self.engine.attempt_to_set_isolation_level(
+                                conn, isolation_level
+                            )
 
                         db_conn = LoggingDatabaseConnection(
                             conn, self.engine, "runWithConnection"
@@ -842,6 +851,8 @@ class DatabasePool:
                     finally:
                         if db_autocommit:
                             self.engine.attempt_to_set_autocommit(conn, False)
+                        if isolation_level:
+                            self.engine.attempt_to_set_isolation_level(conn, None)
 
         return await make_deferred_yieldable(
             self._db_pool.runWithConnection(inner_func, *args, **kwargs)
@@ -934,56 +945,6 @@ class DatabasePool:
         txn.execute(sql, vals)
 
     async def simple_insert_many(
-        self, table: str, values: List[Dict[str, Any]], desc: str
-    ) -> None:
-        """Executes an INSERT query on the named table.
-
-        The input is given as a list of dicts, with one dict per row.
-        Generally simple_insert_many_values should be preferred for new code.
-
-        Args:
-            table: string giving the table name
-            values: dict of new column names and values for them
-            desc: description of the transaction, for logging and metrics
-        """
-        await self.runInteraction(desc, self.simple_insert_many_txn, table, values)
-
-    @staticmethod
-    def simple_insert_many_txn(
-        txn: LoggingTransaction, table: str, values: List[Dict[str, Any]]
-    ) -> None:
-        """Executes an INSERT query on the named table.
-
-        The input is given as a list of dicts, with one dict per row.
-        Generally simple_insert_many_values_txn should be preferred for new code.
-
-        Args:
-            txn: The transaction to use.
-            table: string giving the table name
-            values: dict of new column names and values for them
-        """
-        if not values:
-            return
-
-        # This is a *slight* abomination to get a list of tuples of key names
-        # and a list of tuples of value names.
-        #
-        # i.e. [{"a": 1, "b": 2}, {"c": 3, "d": 4}]
-        #         => [("a", "b",), ("c", "d",)] and [(1, 2,), (3, 4,)]
-        #
-        # The sort is to ensure that we don't rely on dictionary iteration
-        # order.
-        keys, vals = zip(
-            *(zip(*(sorted(i.items(), key=lambda kv: kv[0]))) for i in values if i)
-        )
-
-        for k in keys:
-            if k != keys[0]:
-                raise RuntimeError("All items must have the same keys")
-
-        return DatabasePool.simple_insert_many_values_txn(txn, table, keys[0], vals)
-
-    async def simple_insert_many_values(
         self,
         table: str,
         keys: Collection[str],
@@ -1002,11 +963,11 @@ class DatabasePool:
             desc: description of the transaction, for logging and metrics
         """
         await self.runInteraction(
-            desc, self.simple_insert_many_values_txn, table, keys, values
+            desc, self.simple_insert_many_txn, table, keys, values
         )
 
     @staticmethod
-    def simple_insert_many_values_txn(
+    def simple_insert_many_txn(
         txn: LoggingTransaction,
         table: str,
         keys: Collection[str],
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 93db71d1b4..52146aacc8 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -26,6 +26,7 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
+from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
@@ -44,7 +45,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class AccountDataWorkerStore(CacheInvalidationWorkerStore):
+class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore):
     def __init__(
         self,
         database: DatabasePool,
@@ -105,6 +106,11 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
             "AccountDataAndTagsChangeCache", account_max
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            "delete_account_data_for_deactivated_users",
+            self._delete_account_data_for_deactivated_users,
+        )
+
     def get_max_account_data_stream_id(self) -> int:
         """Get the current max stream ID for account data stream
 
@@ -158,9 +164,9 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
             "get_account_data_for_user", get_account_data_for_user_txn
         )
 
-    @cached(num_args=2, max_entries=5000)
+    @cached(num_args=2, max_entries=5000, tree=True)
     async def get_global_account_data_by_type_for_user(
-        self, data_type: str, user_id: str
+        self, user_id: str, data_type: str
     ) -> Optional[JsonDict]:
         """
         Returns:
@@ -179,7 +185,7 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
         else:
             return None
 
-    @cached(num_args=2)
+    @cached(num_args=2, tree=True)
     async def get_account_data_for_room(
         self, user_id: str, room_id: str
     ) -> Dict[str, JsonDict]:
@@ -210,7 +216,7 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
             "get_account_data_for_room", get_account_data_for_room_txn
         )
 
-    @cached(num_args=3, max_entries=5000)
+    @cached(num_args=3, max_entries=5000, tree=True)
     async def get_account_data_for_room_and_type(
         self, user_id: str, room_id: str, account_data_type: str
     ) -> Optional[JsonDict]:
@@ -392,7 +398,7 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
             for row in rows:
                 if not row.room_id:
                     self.get_global_account_data_by_type_for_user.invalidate(
-                        (row.data_type, row.user_id)
+                        (row.user_id, row.data_type)
                     )
                 self.get_account_data_for_user.invalidate((row.user_id,))
                 self.get_account_data_for_room.invalidate((row.user_id, row.room_id))
@@ -476,7 +482,7 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
             self._account_data_stream_cache.entity_has_changed(user_id, next_id)
             self.get_account_data_for_user.invalidate((user_id,))
             self.get_global_account_data_by_type_for_user.invalidate(
-                (account_data_type, user_id)
+                (user_id, account_data_type)
             )
 
         return self._account_data_id_gen.get_current_token()
@@ -536,9 +542,9 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="ignored_users",
+            keys=("ignorer_user_id", "ignored_user_id"),
             values=[
-                {"ignorer_user_id": user_id, "ignored_user_id": u}
-                for u in currently_ignored_users - previously_ignored_users
+                (user_id, u) for u in currently_ignored_users - previously_ignored_users
             ],
         )
 
@@ -546,6 +552,123 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore):
         for ignored_user_id in previously_ignored_users ^ currently_ignored_users:
             self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,))
 
+    async def purge_account_data_for_user(self, user_id: str) -> None:
+        """
+        Removes ALL the account data for a user.
+        Intended to be used upon user deactivation.
+
+        Also purges the user from the ignored_users cache table
+        and the push_rules cache tables.
+        """
+
+        await self.db_pool.runInteraction(
+            "purge_account_data_for_user_txn",
+            self._purge_account_data_for_user_txn,
+            user_id,
+        )
+
+    def _purge_account_data_for_user_txn(
+        self, txn: LoggingTransaction, user_id: str
+    ) -> None:
+        """
+        See `purge_account_data_for_user`.
+        """
+        # Purge from the primary account_data tables.
+        self.db_pool.simple_delete_txn(
+            txn, table="account_data", keyvalues={"user_id": user_id}
+        )
+
+        self.db_pool.simple_delete_txn(
+            txn, table="room_account_data", keyvalues={"user_id": user_id}
+        )
+
+        # Purge from ignored_users where this user is the ignorer.
+        # N.B. We don't purge where this user is the ignoree, because that
+        #      interferes with other users' account data.
+        #      It's also not this user's data to delete!
+        self.db_pool.simple_delete_txn(
+            txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
+        )
+
+        # Remove the push rules
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules", keyvalues={"user_name": user_id}
+        )
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules_enable", keyvalues={"user_name": user_id}
+        )
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules_stream", keyvalues={"user_id": user_id}
+        )
+
+        # Invalidate caches as appropriate
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_room_and_type, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_global_account_data_by_type_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_room, (user_id,)
+        )
+        self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_push_rules_enabled_for_user, (user_id,)
+        )
+        # This user might be contained in the ignored_by cache for other users,
+        # so we have to invalidate it all.
+        self._invalidate_all_cache_and_stream(txn, self.ignored_by)
+
+    async def _delete_account_data_for_deactivated_users(
+        self, progress: dict, batch_size: int
+    ) -> int:
+        """
+        Retroactively purges account data for users that have already been deactivated.
+        Gets run as a background update caused by a schema delta.
+        """
+
+        last_user: str = progress.get("last_user", "")
+
+        def _delete_account_data_for_deactivated_users_txn(
+            txn: LoggingTransaction,
+        ) -> int:
+            sql = """
+                SELECT name FROM users
+                WHERE deactivated = ? and name > ?
+                ORDER BY name ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (1, last_user, batch_size))
+            users = [row[0] for row in txn]
+
+            for user in users:
+                self._purge_account_data_for_user_txn(txn, user_id=user)
+
+            if users:
+                self.db_pool.updates._background_update_progress_txn(
+                    txn,
+                    "delete_account_data_for_deactivated_users",
+                    {"last_user": users[-1]},
+                )
+
+            return len(users)
+
+        number_deleted = await self.db_pool.runInteraction(
+            "_delete_account_data_for_deactivated_users",
+            _delete_account_data_for_deactivated_users_txn,
+        )
+
+        if number_deleted < batch_size:
+            await self.db_pool.updates._end_background_update(
+                "delete_account_data_for_deactivated_users"
+            )
+
+        return number_deleted
+
 
 class AccountDataStore(AccountDataWorkerStore):
     pass
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 92c95a41d7..304814af5d 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -198,6 +198,7 @@ class ApplicationServiceTransactionWorkerStore(
         service: ApplicationService,
         events: List[EventBase],
         ephemeral: List[JsonDict],
+        to_device_messages: List[JsonDict],
     ) -> AppServiceTransaction:
         """Atomically creates a new transaction for this application service
         with the given list of events. Ephemeral events are NOT persisted to the
@@ -207,6 +208,7 @@ class ApplicationServiceTransactionWorkerStore(
             service: The service who the transaction is for.
             events: A list of persistent events to put in the transaction.
             ephemeral: A list of ephemeral events to put in the transaction.
+            to_device_messages: A list of to-device messages to put in the transaction.
 
         Returns:
             A new transaction.
@@ -237,7 +239,11 @@ class ApplicationServiceTransactionWorkerStore(
                 (service.id, new_txn_id, event_ids),
             )
             return AppServiceTransaction(
-                service=service, id=new_txn_id, events=events, ephemeral=ephemeral
+                service=service,
+                id=new_txn_id,
+                events=events,
+                ephemeral=ephemeral,
+                to_device_messages=to_device_messages,
             )
 
         return await self.db_pool.runInteraction(
@@ -330,7 +336,11 @@ class ApplicationServiceTransactionWorkerStore(
         events = await self.get_events_as_list(event_ids)
 
         return AppServiceTransaction(
-            service=service, id=entry["txn_id"], events=events, ephemeral=[]
+            service=service,
+            id=entry["txn_id"],
+            events=events,
+            ephemeral=[],
+            to_device_messages=[],
         )
 
     def _get_last_txn(self, txn, service_id: Optional[str]) -> int:
@@ -384,14 +394,14 @@ class ApplicationServiceTransactionWorkerStore(
             "get_new_events_for_appservice", get_new_events_for_appservice_txn
         )
 
-        events = await self.get_events_as_list(event_ids)
+        events = await self.get_events_as_list(event_ids, get_prev_content=True)
 
         return upper_bound, events
 
     async def get_type_stream_id_for_appservice(
         self, service: ApplicationService, type: str
     ) -> int:
-        if type not in ("read_receipt", "presence"):
+        if type not in ("read_receipt", "presence", "to_device"):
             raise ValueError(
                 "Expected type to be a valid application stream id type, got %s"
                 % (type,)
@@ -415,16 +425,16 @@ class ApplicationServiceTransactionWorkerStore(
             "get_type_stream_id_for_appservice", get_type_stream_id_for_appservice_txn
         )
 
-    async def set_type_stream_id_for_appservice(
+    async def set_appservice_stream_type_pos(
         self, service: ApplicationService, stream_type: str, pos: Optional[int]
     ) -> None:
-        if stream_type not in ("read_receipt", "presence"):
+        if stream_type not in ("read_receipt", "presence", "to_device"):
             raise ValueError(
                 "Expected type to be a valid application stream id type, got %s"
                 % (stream_type,)
             )
 
-        def set_type_stream_id_for_appservice_txn(txn):
+        def set_appservice_stream_type_pos_txn(txn):
             stream_id_type = "%s_stream_id" % stream_type
             txn.execute(
                 "UPDATE application_services_state SET %s = ? WHERE as_id=?"
@@ -433,7 +443,7 @@ class ApplicationServiceTransactionWorkerStore(
             )
 
         await self.db_pool.runInteraction(
-            "set_type_stream_id_for_appservice", set_type_stream_id_for_appservice_txn
+            "set_appservice_stream_type_pos", set_appservice_stream_type_pos_txn
         )
 
 
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 0024348067..c428dd5596 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -15,7 +15,7 @@
 
 import itertools
 import logging
-from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Tuple
 
 from synapse.api.constants import EventTypes
 from synapse.replication.tcp.streams import BackfillStream, CachesStream
@@ -25,7 +25,11 @@ from synapse.replication.tcp.streams.events import (
     EventsStreamEventRow,
 )
 from synapse.storage._base import SQLBaseStore
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+)
 from synapse.storage.engines import PostgresEngine
 from synapse.util.iterutils import batch_iter
 
@@ -236,7 +240,9 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         txn.call_after(cache_func.invalidate_all)
         self._send_invalidation_to_replication(txn, cache_func.__name__, None)
 
-    def _invalidate_state_caches_and_stream(self, txn, room_id, members_changed):
+    def _invalidate_state_caches_and_stream(
+        self, txn: LoggingTransaction, room_id: str, members_changed: Collection[str]
+    ) -> None:
         """Special case invalidation of caches based on current state.
 
         We special case this so that we can batch the cache invalidations into a
@@ -244,8 +250,8 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
 
         Args:
             txn
-            room_id (str): Room where state changed
-            members_changed (iterable[str]): The user_ids of members that have changed
+            room_id: Room where state changed
+            members_changed: The user_ids of members that have changed
         """
         txn.call_after(self._invalidate_state_caches, room_id, members_changed)
 
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 3682cb6a81..8801b7b2dd 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Set, Tuple, cast
 
 from synapse.logging import issue9533_logger
 from synapse.logging.opentracing import log_kv, set_tag, trace
@@ -24,6 +24,7 @@ from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
     LoggingTransaction,
+    make_in_list_sql_clause,
 )
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import (
@@ -136,63 +137,260 @@ class DeviceInboxWorkerStore(SQLBaseStore):
     def get_to_device_stream_token(self):
         return self._device_inbox_id_gen.get_current_token()
 
-    async def get_new_messages_for_device(
+    async def get_messages_for_user_devices(
+        self,
+        user_ids: Collection[str],
+        from_stream_id: int,
+        to_stream_id: int,
+    ) -> Dict[Tuple[str, str], List[JsonDict]]:
+        """
+        Retrieve to-device messages for a given set of users.
+
+        Only to-device messages with stream ids between the given boundaries
+        (from < X <= to) are returned.
+
+        Args:
+            user_ids: The users to retrieve to-device messages for.
+            from_stream_id: The lower boundary of stream id to filter with (exclusive).
+            to_stream_id: The upper boundary of stream id to filter with (inclusive).
+
+        Returns:
+            A dictionary of (user id, device id) -> list of to-device messages.
+        """
+        # We expect the stream ID returned by _get_device_messages to always
+        # be to_stream_id. So, no need to return it from this function.
+        (
+            user_id_device_id_to_messages,
+            last_processed_stream_id,
+        ) = await self._get_device_messages(
+            user_ids=user_ids,
+            from_stream_id=from_stream_id,
+            to_stream_id=to_stream_id,
+        )
+
+        assert (
+            last_processed_stream_id == to_stream_id
+        ), "Expected _get_device_messages to process all to-device messages up to `to_stream_id`"
+
+        return user_id_device_id_to_messages
+
+    async def get_messages_for_device(
         self,
         user_id: str,
-        device_id: Optional[str],
-        last_stream_id: int,
-        current_stream_id: int,
+        device_id: str,
+        from_stream_id: int,
+        to_stream_id: int,
         limit: int = 100,
-    ) -> Tuple[List[dict], int]:
+    ) -> Tuple[List[JsonDict], int]:
         """
+        Retrieve to-device messages for a single user device.
+
+        Only to-device messages with stream ids between the given boundaries
+        (from < X <= to) are returned.
+
         Args:
-            user_id: The recipient user_id.
-            device_id: The recipient device_id.
-            last_stream_id: The last stream ID checked.
-            current_stream_id: The current position of the to device
-                message stream.
-            limit: The maximum number of messages to retrieve.
+            user_id: The ID of the user to retrieve messages for.
+            device_id: The ID of the device to retrieve to-device messages for.
+            from_stream_id: The lower boundary of stream id to filter with (exclusive).
+            to_stream_id: The upper boundary of stream id to filter with (inclusive).
+            limit: A limit on the number of to-device messages returned.
 
         Returns:
             A tuple containing:
-                * A list of messages for the device.
-                * The max stream token of these messages. There may be more to retrieve
-                  if the given limit was reached.
+                * A list of to-device messages within the given stream id range intended for
+                  the given user / device combo.
+                * The last-processed stream ID. Subsequent calls of this function with the
+                  same device should pass this value as 'from_stream_id'.
         """
-        has_changed = self._device_inbox_stream_cache.has_entity_changed(
-            user_id, last_stream_id
+        (
+            user_id_device_id_to_messages,
+            last_processed_stream_id,
+        ) = await self._get_device_messages(
+            user_ids=[user_id],
+            device_id=device_id,
+            from_stream_id=from_stream_id,
+            to_stream_id=to_stream_id,
+            limit=limit,
         )
-        if not has_changed:
-            return [], current_stream_id
 
-        def get_new_messages_for_device_txn(txn):
-            sql = (
-                "SELECT stream_id, message_json FROM device_inbox"
-                " WHERE user_id = ? AND device_id = ?"
-                " AND ? < stream_id AND stream_id <= ?"
-                " ORDER BY stream_id ASC"
-                " LIMIT ?"
+        if not user_id_device_id_to_messages:
+            # There were no messages!
+            return [], to_stream_id
+
+        # Extract the messages, no need to return the user and device ID again
+        to_device_messages = user_id_device_id_to_messages.get((user_id, device_id), [])
+
+        return to_device_messages, last_processed_stream_id
+
+    async def _get_device_messages(
+        self,
+        user_ids: Collection[str],
+        from_stream_id: int,
+        to_stream_id: int,
+        device_id: Optional[str] = None,
+        limit: Optional[int] = None,
+    ) -> Tuple[Dict[Tuple[str, str], List[JsonDict]], int]:
+        """
+        Retrieve pending to-device messages for a collection of user devices.
+
+        Only to-device messages with stream ids between the given boundaries
+        (from < X <= to) are returned.
+
+        Note that a stream ID can be shared by multiple copies of the same message with
+        different recipient devices. Stream IDs are only unique in the context of a single
+        user ID / device ID pair. Thus, applying a limit (of messages to return) when working
+        with a sliding window of stream IDs is only possible when querying messages of a
+        single user device.
+
+        Finally, note that device IDs are not unique across users.
+
+        Args:
+            user_ids: The user IDs to filter device messages by.
+            from_stream_id: The lower boundary of stream id to filter with (exclusive).
+            to_stream_id: The upper boundary of stream id to filter with (inclusive).
+            device_id: A device ID to query to-device messages for. If not provided, to-device
+                messages from all device IDs for the given user IDs will be queried. May not be
+                provided if `user_ids` contains more than one entry.
+            limit: The maximum number of to-device messages to return. Can only be used when
+                passing a single user ID / device ID tuple.
+
+        Returns:
+            A tuple containing:
+                * A dict of (user_id, device_id) -> list of to-device messages
+                * The last-processed stream ID. If this is less than `to_stream_id`, then
+                    there may be more messages to retrieve. If `limit` is not set, then this
+                    is always equal to 'to_stream_id'.
+        """
+        if not user_ids:
+            logger.warning("No users provided upon querying for device IDs")
+            return {}, to_stream_id
+
+        # Prevent a query for one user's device also retrieving another user's device with
+        # the same device ID (device IDs are not unique across users).
+        if len(user_ids) > 1 and device_id is not None:
+            raise AssertionError(
+                "Programming error: 'device_id' cannot be supplied to "
+                "_get_device_messages when >1 user_id has been provided"
             )
-            txn.execute(
-                sql, (user_id, device_id, last_stream_id, current_stream_id, limit)
+
+        # A limit can only be applied when querying for a single user ID / device ID tuple.
+        # See the docstring of this function for more details.
+        if limit is not None and device_id is None:
+            raise AssertionError(
+                "Programming error: _get_device_messages was passed 'limit' "
+                "without a specific user_id/device_id"
             )
 
-            messages = []
-            stream_pos = current_stream_id
+        user_ids_to_query: Set[str] = set()
+        device_ids_to_query: Set[str] = set()
+
+        # Note that a device ID could be an empty str
+        if device_id is not None:
+            # If a device ID was passed, use it to filter results.
+            # Otherwise, device IDs will be derived from the given collection of user IDs.
+            device_ids_to_query.add(device_id)
+
+        # Determine which users have devices with pending messages
+        for user_id in user_ids:
+            if self._device_inbox_stream_cache.has_entity_changed(
+                user_id, from_stream_id
+            ):
+                # This user has new messages sent to them. Query messages for them
+                user_ids_to_query.add(user_id)
+
+        def get_device_messages_txn(txn: LoggingTransaction):
+            # Build a query to select messages from any of the given devices that
+            # are between the given stream id bounds.
+
+            # If a list of device IDs was not provided, retrieve all devices IDs
+            # for the given users. We explicitly do not query hidden devices, as
+            # hidden devices should not receive to-device messages.
+            # Note that this is more efficient than just dropping `device_id` from the query,
+            # since device_inbox has an index on `(user_id, device_id, stream_id)`
+            if not device_ids_to_query:
+                user_device_dicts = self.db_pool.simple_select_many_txn(
+                    txn,
+                    table="devices",
+                    column="user_id",
+                    iterable=user_ids_to_query,
+                    keyvalues={"user_id": user_id, "hidden": False},
+                    retcols=("device_id",),
+                )
 
-            for row in txn:
-                stream_pos = row[0]
-                messages.append(db_to_json(row[1]))
+                device_ids_to_query.update(
+                    {row["device_id"] for row in user_device_dicts}
+                )
 
-            # If the limit was not reached we know that there's no more data for this
-            # user/device pair up to current_stream_id.
-            if len(messages) < limit:
-                stream_pos = current_stream_id
+            if not device_ids_to_query:
+                # We've ended up with no devices to query.
+                return {}, to_stream_id
 
-            return messages, stream_pos
+            # We include both user IDs and device IDs in this query, as we have an index
+            # (device_inbox_user_stream_id) for them.
+            user_id_many_clause_sql, user_id_many_clause_args = make_in_list_sql_clause(
+                self.database_engine, "user_id", user_ids_to_query
+            )
+            (
+                device_id_many_clause_sql,
+                device_id_many_clause_args,
+            ) = make_in_list_sql_clause(
+                self.database_engine, "device_id", device_ids_to_query
+            )
+
+            sql = f"""
+                SELECT stream_id, user_id, device_id, message_json FROM device_inbox
+                WHERE {user_id_many_clause_sql}
+                AND {device_id_many_clause_sql}
+                AND ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC
+            """
+            sql_args = (
+                *user_id_many_clause_args,
+                *device_id_many_clause_args,
+                from_stream_id,
+                to_stream_id,
+            )
+
+            # If a limit was provided, limit the data retrieved from the database
+            if limit is not None:
+                sql += "LIMIT ?"
+                sql_args += (limit,)
+
+            txn.execute(sql, sql_args)
+
+            # Create and fill a dictionary of (user ID, device ID) -> list of messages
+            # intended for each device.
+            last_processed_stream_pos = to_stream_id
+            recipient_device_to_messages: Dict[Tuple[str, str], List[JsonDict]] = {}
+            for row in txn:
+                last_processed_stream_pos = row[0]
+                recipient_user_id = row[1]
+                recipient_device_id = row[2]
+                message_dict = db_to_json(row[3])
+
+                # Store the device details
+                recipient_device_to_messages.setdefault(
+                    (recipient_user_id, recipient_device_id), []
+                ).append(message_dict)
+
+            if limit is not None and txn.rowcount == limit:
+                # We ended up bumping up against the message limit. There may be more messages
+                # to retrieve. Return what we have, as well as the last stream position that
+                # was processed.
+                #
+                # The caller is expected to set this as the lower (exclusive) bound
+                # for the next query of this device.
+                return recipient_device_to_messages, last_processed_stream_pos
+
+            # The limit was not reached, thus we know that recipient_device_to_messages
+            # contains all to-device messages for the given device and stream id range.
+            #
+            # We return to_stream_id, which the caller should then provide as the lower
+            # (exclusive) bound on the next query of this device.
+            return recipient_device_to_messages, to_stream_id
 
         return await self.db_pool.runInteraction(
-            "get_new_messages_for_device", get_new_messages_for_device_txn
+            "get_device_messages", get_device_messages_txn
         )
 
     @trace
@@ -432,14 +630,21 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="device_federation_outbox",
+                keys=(
+                    "destination",
+                    "stream_id",
+                    "queued_ts",
+                    "messages_json",
+                    "instance_name",
+                ),
                 values=[
-                    {
-                        "destination": destination,
-                        "stream_id": stream_id,
-                        "queued_ts": now_ms,
-                        "messages_json": json_encoder.encode(edu),
-                        "instance_name": self._instance_name,
-                    }
+                    (
+                        destination,
+                        stream_id,
+                        now_ms,
+                        json_encoder.encode(edu),
+                        self._instance_name,
+                    )
                     for destination, edu in remote_messages_by_destination.items()
                 ],
             )
@@ -571,14 +776,9 @@ class DeviceInboxWorkerStore(SQLBaseStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="device_inbox",
+            keys=("user_id", "device_id", "stream_id", "message_json", "instance_name"),
             values=[
-                {
-                    "user_id": user_id,
-                    "device_id": device_id,
-                    "stream_id": stream_id,
-                    "message_json": message_json,
-                    "instance_name": self._instance_name,
-                }
+                (user_id, device_id, stream_id, message_json, self._instance_name)
                 for user_id, messages_by_device in local_by_user_then_device.items()
                 for device_id, message_json in messages_by_device.items()
             ],
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 52fbf50db6..b2a5cd9a65 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -53,6 +53,7 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
+issue_8631_logger = logging.getLogger("synapse.8631_debug")
 
 DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = (
     "drop_device_list_streams_non_unique_indexes"
@@ -191,7 +192,7 @@ class DeviceWorkerStore(SQLBaseStore):
     @trace
     async def get_device_updates_by_remote(
         self, destination: str, from_stream_id: int, limit: int
-    ) -> Tuple[int, List[Tuple[str, dict]]]:
+    ) -> Tuple[int, List[Tuple[str, JsonDict]]]:
         """Get a stream of device updates to send to the given remote server.
 
         Args:
@@ -200,9 +201,10 @@ class DeviceWorkerStore(SQLBaseStore):
             limit: Maximum number of device updates to return
 
         Returns:
-            A mapping from the  current stream id (ie, the stream id of the last
-            update included in the response), and the list of updates, where
-            each update is a pair of EDU type and EDU contents.
+            - The current stream id (i.e. the stream id of the last update included
+              in the response); and
+            - The list of updates, where each update is a pair of EDU type and
+              EDU contents.
         """
         now_stream_id = self.get_device_stream_token()
 
@@ -221,10 +223,19 @@ class DeviceWorkerStore(SQLBaseStore):
             limit,
         )
 
+        # We need to ensure `updates` doesn't grow too big.
+        # Currently: `len(updates) <= limit`.
+
         # Return an empty list if there are no updates
         if not updates:
             return now_stream_id, []
 
+        if issue_8631_logger.isEnabledFor(logging.DEBUG):
+            data = {(user, device): stream_id for user, device, stream_id, _ in updates}
+            issue_8631_logger.debug(
+                "device updates need to be sent to %s: %s", destination, data
+            )
+
         # get the cross-signing keys of the users in the list, so that we can
         # determine which of the device changes were cross-signing keys
         users = {r[0] for r in updates}
@@ -270,19 +281,50 @@ class DeviceWorkerStore(SQLBaseStore):
         # The most recent request's opentracing_context is used as the
         # context which created the Edu.
 
+        # This is the stream ID that we will return for the consumer to resume
+        # following this stream later.
+        last_processed_stream_id = from_stream_id
+
         query_map = {}
         cross_signing_keys_by_user = {}
         for user_id, device_id, update_stream_id, update_context in updates:
-            if (
+            # Calculate the remaining length budget.
+            # Note that, for now, each entry in `cross_signing_keys_by_user`
+            # gives rise to two device updates in the result, so those cost twice
+            # as much (and are the whole reason we need to separately calculate
+            # the budget; we know len(updates) <= limit otherwise!)
+            # N.B. len() on dicts is cheap since they store their size.
+            remaining_length_budget = limit - (
+                len(query_map) + 2 * len(cross_signing_keys_by_user)
+            )
+            assert remaining_length_budget >= 0
+
+            is_master_key_update = (
                 user_id in master_key_by_user
                 and device_id == master_key_by_user[user_id]["device_id"]
-            ):
-                result = cross_signing_keys_by_user.setdefault(user_id, {})
-                result["master_key"] = master_key_by_user[user_id]["key_info"]
-            elif (
+            )
+            is_self_signing_key_update = (
                 user_id in self_signing_key_by_user
                 and device_id == self_signing_key_by_user[user_id]["device_id"]
+            )
+
+            is_cross_signing_key_update = (
+                is_master_key_update or is_self_signing_key_update
+            )
+
+            if (
+                is_cross_signing_key_update
+                and user_id not in cross_signing_keys_by_user
             ):
+                # This will give rise to 2 device updates.
+                # If we don't have the budget, stop here!
+                if remaining_length_budget < 2:
+                    break
+
+            if is_master_key_update:
+                result = cross_signing_keys_by_user.setdefault(user_id, {})
+                result["master_key"] = master_key_by_user[user_id]["key_info"]
+            elif is_self_signing_key_update:
                 result = cross_signing_keys_by_user.setdefault(user_id, {})
                 result["self_signing_key"] = self_signing_key_by_user[user_id][
                     "key_info"
@@ -290,24 +332,58 @@ class DeviceWorkerStore(SQLBaseStore):
             else:
                 key = (user_id, device_id)
 
+                if key not in query_map and remaining_length_budget < 1:
+                    # We don't have space for a new entry
+                    break
+
                 previous_update_stream_id, _ = query_map.get(key, (0, None))
 
                 if update_stream_id > previous_update_stream_id:
+                    # FIXME If this overwrites an older update, this discards the
+                    #  previous OpenTracing context.
+                    #  It might make it harder to track down issues using OpenTracing.
+                    #  If there's a good reason why it doesn't matter, a comment here
+                    #  about that would not hurt.
                     query_map[key] = (update_stream_id, update_context)
 
+            # As this update has been added to the response, advance the stream
+            # position.
+            last_processed_stream_id = update_stream_id
+
+        # In the worst case scenario, each update is for a distinct user and is
+        # added either to the query_map or to cross_signing_keys_by_user,
+        # but not both:
+        # len(query_map) + len(cross_signing_keys_by_user) <= len(updates) here,
+        # so len(query_map) + len(cross_signing_keys_by_user) <= limit.
+
         results = await self._get_device_update_edus_by_remote(
             destination, from_stream_id, query_map
         )
 
-        # add the updated cross-signing keys to the results list
+        # len(results) <= len(query_map) here,
+        # so len(results) + len(cross_signing_keys_by_user) <= limit.
+
+        # Add the updated cross-signing keys to the results list
         for user_id, result in cross_signing_keys_by_user.items():
             result["user_id"] = user_id
             results.append(("m.signing_key_update", result))
             # also send the unstable version
             # FIXME: remove this when enough servers have upgraded
+            #        and remove the length budgeting above.
             results.append(("org.matrix.signing_key_update", result))
 
-        return now_stream_id, results
+        if issue_8631_logger.isEnabledFor(logging.DEBUG):
+            for (user_id, edu) in results:
+                issue_8631_logger.debug(
+                    "device update to %s for %s from %s to %s: %s",
+                    destination,
+                    user_id,
+                    from_stream_id,
+                    last_processed_stream_id,
+                    edu,
+                )
+
+        return last_processed_stream_id, results
 
     def _get_device_updates_by_remote_txn(
         self,
@@ -316,7 +392,7 @@ class DeviceWorkerStore(SQLBaseStore):
         from_stream_id: int,
         now_stream_id: int,
         limit: int,
-    ):
+    ) -> List[Tuple[str, str, int, Optional[str]]]:
         """Return device update information for a given remote destination
 
         Args:
@@ -327,7 +403,11 @@ class DeviceWorkerStore(SQLBaseStore):
             limit: Maximum number of device updates to return
 
         Returns:
-            List: List of device updates
+            List: List of device update tuples:
+                - user_id
+                - device_id
+                - stream_id
+                - opentracing_context
         """
         # get the list of device updates that need to be sent
         sql = """
@@ -351,15 +431,21 @@ class DeviceWorkerStore(SQLBaseStore):
         Args:
             destination: The host the device updates are intended for
             from_stream_id: The minimum stream_id to filter updates by, exclusive
-            query_map (Dict[(str, str): (int, str|None)]): Dictionary mapping
-                user_id/device_id to update stream_id and the relevant json-encoded
-                opentracing context
+            query_map: Dictionary mapping (user_id, device_id) to
+                (update stream_id, the relevant json-encoded opentracing context)
 
         Returns:
-            List of objects representing an device update EDU
+            List of objects representing a device update EDU.
+
+        Postconditions:
+            The returned list has a length not exceeding that of the query_map:
+                len(result) <= len(query_map)
         """
         devices = (
             await self.get_e2e_device_keys_and_signatures(
+                # Because these are (user_id, device_id) tuples with all
+                # device_ids not being None, the returned list's length will not
+                # exceed that of query_map.
                 query_map.keys(),
                 include_all_devices=True,
                 include_deleted_devices=True,
@@ -1386,12 +1472,9 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="device_lists_remote_cache",
+            keys=("user_id", "device_id", "content"),
             values=[
-                {
-                    "user_id": user_id,
-                    "device_id": content["device_id"],
-                    "content": json_encoder.encode(content),
-                }
+                (user_id, content["device_id"], json_encoder.encode(content))
                 for content in devices
             ],
         )
@@ -1479,8 +1562,9 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="device_lists_stream",
+            keys=("stream_id", "user_id", "device_id"),
             values=[
-                {"stream_id": stream_id, "user_id": user_id, "device_id": device_id}
+                (stream_id, user_id, device_id)
                 for stream_id, device_id in zip(stream_ids, device_ids)
             ],
         )
@@ -1507,18 +1591,27 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="device_lists_outbound_pokes",
+            keys=(
+                "destination",
+                "stream_id",
+                "user_id",
+                "device_id",
+                "sent",
+                "ts",
+                "opentracing_context",
+            ),
             values=[
-                {
-                    "destination": destination,
-                    "stream_id": next(next_stream_id),
-                    "user_id": user_id,
-                    "device_id": device_id,
-                    "sent": False,
-                    "ts": now,
-                    "opentracing_context": json_encoder.encode(context)
+                (
+                    destination,
+                    next(next_stream_id),
+                    user_id,
+                    device_id,
+                    False,
+                    now,
+                    json_encoder.encode(context)
                     if whitelisted_homeserver(destination)
                     else "{}",
-                }
+                )
                 for destination in hosts
                 for device_id in device_ids
             ],
diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py
index f76c6121e8..5903fdaf00 100644
--- a/synapse/storage/databases/main/directory.py
+++ b/synapse/storage/databases/main/directory.py
@@ -112,10 +112,8 @@ class DirectoryWorkerStore(CacheInvalidationWorkerStore):
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="room_alias_servers",
-                values=[
-                    {"room_alias": room_alias.to_string(), "server": server}
-                    for server in servers
-                ],
+                keys=("room_alias", "server"),
+                values=[(room_alias.to_string(), server) for server in servers],
             )
 
             self._invalidate_cache_and_stream(
diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py
index 0cb48b9dd7..b789a588a5 100644
--- a/synapse/storage/databases/main/e2e_room_keys.py
+++ b/synapse/storage/databases/main/e2e_room_keys.py
@@ -110,16 +110,16 @@ class EndToEndRoomKeyStore(SQLBaseStore):
         values = []
         for (room_id, session_id, room_key) in room_keys:
             values.append(
-                {
-                    "user_id": user_id,
-                    "version": version_int,
-                    "room_id": room_id,
-                    "session_id": session_id,
-                    "first_message_index": room_key["first_message_index"],
-                    "forwarded_count": room_key["forwarded_count"],
-                    "is_verified": room_key["is_verified"],
-                    "session_data": json_encoder.encode(room_key["session_data"]),
-                }
+                (
+                    user_id,
+                    version_int,
+                    room_id,
+                    session_id,
+                    room_key["first_message_index"],
+                    room_key["forwarded_count"],
+                    room_key["is_verified"],
+                    json_encoder.encode(room_key["session_data"]),
+                )
             )
             log_kv(
                 {
@@ -131,7 +131,19 @@ class EndToEndRoomKeyStore(SQLBaseStore):
             )
 
         await self.db_pool.simple_insert_many(
-            table="e2e_room_keys", values=values, desc="add_e2e_room_keys"
+            table="e2e_room_keys",
+            keys=(
+                "user_id",
+                "version",
+                "room_id",
+                "session_id",
+                "first_message_index",
+                "forwarded_count",
+                "is_verified",
+                "session_data",
+            ),
+            values=values,
+            desc="add_e2e_room_keys",
         )
 
     @trace
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 57b5ffbad3..1f8447b507 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -50,16 +50,16 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class DeviceKeyLookupResult:
     """The type returned by get_e2e_device_keys_and_signatures"""
 
-    display_name = attr.ib(type=Optional[str])
+    display_name: Optional[str]
 
     # the key data from e2e_device_keys_json. Typically includes fields like
     # "algorithm", "keys" (including the curve25519 identity key and the ed25519 signing
     # key) and "signatures" (a map from (user id) to (key id/device_id) to signature.)
-    keys = attr.ib(type=Optional[JsonDict])
+    keys: Optional[JsonDict]
 
 
 class EndToEndKeyBackgroundStore(SQLBaseStore):
@@ -387,15 +387,16 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="e2e_one_time_keys_json",
+                keys=(
+                    "user_id",
+                    "device_id",
+                    "algorithm",
+                    "key_id",
+                    "ts_added_ms",
+                    "key_json",
+                ),
                 values=[
-                    {
-                        "user_id": user_id,
-                        "device_id": device_id,
-                        "algorithm": algorithm,
-                        "key_id": key_id,
-                        "ts_added_ms": time_now,
-                        "key_json": json_bytes,
-                    }
+                    (user_id, device_id, algorithm, key_id, time_now, json_bytes)
                     for algorithm, key_id, json_bytes in new_keys
                 ],
             )
@@ -1186,15 +1187,22 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
         """
         await self.db_pool.simple_insert_many(
             "e2e_cross_signing_signatures",
-            [
-                {
-                    "user_id": user_id,
-                    "key_id": item.signing_key_id,
-                    "target_user_id": item.target_user_id,
-                    "target_device_id": item.target_device_id,
-                    "signature": item.signature,
-                }
+            keys=(
+                "user_id",
+                "key_id",
+                "target_user_id",
+                "target_device_id",
+                "signature",
+            ),
+            values=[
+                (
+                    user_id,
+                    item.signing_key_id,
+                    item.target_user_id,
+                    item.target_device_id,
+                    item.signature,
+                )
                 for item in signatures
             ],
-            "add_e2e_signing_key",
+            desc="add_e2e_signing_key",
         )
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index cace55cabc..22f6474127 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -14,18 +14,9 @@
 import itertools
 import logging
 from queue import Empty, PriorityQueue
-from typing import (
-    TYPE_CHECKING,
-    Collection,
-    Dict,
-    Iterable,
-    List,
-    NamedTuple,
-    Optional,
-    Set,
-    Tuple,
-)
+from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple
 
+import attr
 from prometheus_client import Counter, Gauge
 
 from synapse.api.constants import MAX_DEPTH, EventTypes
@@ -71,7 +62,8 @@ logger = logging.getLogger(__name__)
 
 
 # All the info we need while iterating the DAG while backfilling
-class BackfillQueueNavigationItem(NamedTuple):
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class BackfillQueueNavigationItem:
     depth: int
     stream_ordering: int
     event_id: str
@@ -83,7 +75,7 @@ class _NoChainCoverIndex(Exception):
         super().__init__("Unexpectedly no chain cover for events in %s" % (room_id,))
 
 
-class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBaseStore):
+class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBaseStore):
     def __init__(
         self,
         database: DatabasePool,
@@ -1051,7 +1043,6 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             batch_connection_query,
             (insertion_event_id, limit),
         )
-        batch_start_event_id_results = txn.fetchall()
         return [
             BackfillQueueNavigationItem(
                 depth=row[0],
@@ -1059,7 +1050,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
                 event_id=row[2],
                 type=row[3],
             )
-            for row in batch_start_event_id_results
+            for row in txn
         ]
 
     def _get_connected_prev_event_backfill_results_txn(
@@ -1095,7 +1086,6 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             connected_prev_event_query,
             (event_id, False, limit),
         )
-        prev_event_id_results = txn.fetchall()
         return [
             BackfillQueueNavigationItem(
                 depth=row[0],
@@ -1103,7 +1093,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
                 event_id=row[2],
                 type=row[3],
             )
-            for row in prev_event_id_results
+            for row in txn
         ]
 
     async def get_backfill_events(
@@ -1553,7 +1543,10 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
 
             if room_version.event_format == EventFormatVersions.V1:
                 for prev_event_tuple in prev_events:
-                    if not isinstance(prev_event_tuple, list) or len(prev_events) != 2:
+                    if (
+                        not isinstance(prev_event_tuple, list)
+                        or len(prev_event_tuple) != 2
+                    ):
                         logger.info("Invalid prev_events for %s", event_id)
                         break
 
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index a98e6b2593..b7c4c62222 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -875,14 +875,21 @@ class EventPushActionsWorkerStore(SQLBaseStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="event_push_summary",
+            keys=(
+                "user_id",
+                "room_id",
+                "notif_count",
+                "unread_count",
+                "stream_ordering",
+            ),
             values=[
-                {
-                    "user_id": user_id,
-                    "room_id": room_id,
-                    "notif_count": summary.notif_count,
-                    "unread_count": summary.unread_count,
-                    "stream_ordering": summary.stream_ordering,
-                }
+                (
+                    user_id,
+                    room_id,
+                    summary.notif_count,
+                    summary.unread_count,
+                    summary.stream_ordering,
+                )
                 for ((user_id, room_id), summary) in summaries.items()
                 if summary.old_user_id is None
             ],
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 6fd2ba00f9..b804185c40 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -39,7 +39,6 @@ from synapse.api.room_versions import RoomVersions
 from synapse.crypto.event_signing import compute_event_reference_hash
 from synapse.events import EventBase  # noqa: F401
 from synapse.events.snapshot import EventContext  # noqa: F401
-from synapse.logging.utils import log_function
 from synapse.storage._base import db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -69,7 +68,7 @@ event_counter = Counter(
 )
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class DeltaState:
     """Deltas to use to update the `current_state_events` table.
 
@@ -80,9 +79,9 @@ class DeltaState:
             should e.g. be removed from `current_state_events` table.
     """
 
-    to_delete = attr.ib(type=List[Tuple[str, str]])
-    to_insert = attr.ib(type=StateMap[str])
-    no_longer_in_room = attr.ib(type=bool, default=False)
+    to_delete: List[Tuple[str, str]]
+    to_insert: StateMap[str]
+    no_longer_in_room: bool = False
 
 
 class PersistEventsStore:
@@ -328,7 +327,6 @@ class PersistEventsStore:
 
         return existing_prevs
 
-    @log_function
     def _persist_events_txn(
         self,
         txn: LoggingTransaction,
@@ -442,12 +440,9 @@ class PersistEventsStore:
         self.db_pool.simple_insert_many_txn(
             txn,
             table="event_auth",
+            keys=("event_id", "room_id", "auth_id"),
             values=[
-                {
-                    "event_id": event.event_id,
-                    "room_id": event.room_id,
-                    "auth_id": auth_id,
-                }
+                (event.event_id, event.room_id, auth_id)
                 for event in events
                 for auth_id in event.auth_event_ids()
                 if event.is_state()
@@ -675,8 +670,9 @@ class PersistEventsStore:
         db_pool.simple_insert_many_txn(
             txn,
             table="event_auth_chains",
+            keys=("event_id", "chain_id", "sequence_number"),
             values=[
-                {"event_id": event_id, "chain_id": c_id, "sequence_number": seq}
+                (event_id, c_id, seq)
                 for event_id, (c_id, seq) in new_chain_tuples.items()
             ],
         )
@@ -782,13 +778,14 @@ class PersistEventsStore:
         db_pool.simple_insert_many_txn(
             txn,
             table="event_auth_chain_links",
+            keys=(
+                "origin_chain_id",
+                "origin_sequence_number",
+                "target_chain_id",
+                "target_sequence_number",
+            ),
             values=[
-                {
-                    "origin_chain_id": source_id,
-                    "origin_sequence_number": source_seq,
-                    "target_chain_id": target_id,
-                    "target_sequence_number": target_seq,
-                }
+                (source_id, source_seq, target_id, target_seq)
                 for (
                     source_id,
                     source_seq,
@@ -943,20 +940,28 @@ class PersistEventsStore:
             txn_id = getattr(event.internal_metadata, "txn_id", None)
             if token_id and txn_id:
                 to_insert.append(
-                    {
-                        "event_id": event.event_id,
-                        "room_id": event.room_id,
-                        "user_id": event.sender,
-                        "token_id": token_id,
-                        "txn_id": txn_id,
-                        "inserted_ts": self._clock.time_msec(),
-                    }
+                    (
+                        event.event_id,
+                        event.room_id,
+                        event.sender,
+                        token_id,
+                        txn_id,
+                        self._clock.time_msec(),
+                    )
                 )
 
         if to_insert:
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="event_txn_id",
+                keys=(
+                    "event_id",
+                    "room_id",
+                    "user_id",
+                    "token_id",
+                    "txn_id",
+                    "inserted_ts",
+                ),
                 values=to_insert,
             )
 
@@ -1161,8 +1166,9 @@ class PersistEventsStore:
         self.db_pool.simple_insert_many_txn(
             txn,
             table="event_forward_extremities",
+            keys=("event_id", "room_id"),
             values=[
-                {"event_id": ev_id, "room_id": room_id}
+                (ev_id, room_id)
                 for room_id, new_extrem in new_forward_extremities.items()
                 for ev_id in new_extrem
             ],
@@ -1174,12 +1180,9 @@ class PersistEventsStore:
         self.db_pool.simple_insert_many_txn(
             txn,
             table="stream_ordering_to_exterm",
+            keys=("room_id", "event_id", "stream_ordering"),
             values=[
-                {
-                    "room_id": room_id,
-                    "event_id": event_id,
-                    "stream_ordering": max_stream_order,
-                }
+                (room_id, event_id, max_stream_order)
                 for room_id, new_extrem in new_forward_extremities.items()
                 for event_id in new_extrem
             ],
@@ -1251,20 +1254,22 @@ class PersistEventsStore:
         for room_id, depth in depth_updates.items():
             self._update_min_depth_for_room_txn(txn, room_id, depth)
 
-    def _update_outliers_txn(self, txn, events_and_contexts):
+    def _update_outliers_txn(
+        self,
+        txn: LoggingTransaction,
+        events_and_contexts: List[Tuple[EventBase, EventContext]],
+    ) -> List[Tuple[EventBase, EventContext]]:
         """Update any outliers with new event info.
 
-        This turns outliers into ex-outliers (unless the new event was
-        rejected).
+        This turns outliers into ex-outliers (unless the new event was rejected), and
+        also removes any other events we have already seen from the list.
 
         Args:
-            txn (twisted.enterprise.adbapi.Connection): db connection
-            events_and_contexts (list[(EventBase, EventContext)]): events
-                we are persisting
+            txn: db connection
+            events_and_contexts: events we are persisting
 
         Returns:
-            list[(EventBase, EventContext)] new list, without events which
-            are already in the events table.
+            new list, without events which are already in the events table.
         """
         txn.execute(
             "SELECT event_id, outlier FROM events WHERE event_id in (%s)"
@@ -1272,7 +1277,9 @@ class PersistEventsStore:
             [event.event_id for event, _ in events_and_contexts],
         )
 
-        have_persisted = {event_id: outlier for event_id, outlier in txn}
+        have_persisted: Dict[str, bool] = {
+            event_id: outlier for event_id, outlier in txn
+        }
 
         to_remove = set()
         for event, context in events_and_contexts:
@@ -1282,15 +1289,22 @@ class PersistEventsStore:
             to_remove.add(event)
 
             if context.rejected:
-                # If the event is rejected then we don't care if the event
-                # was an outlier or not.
+                # If the incoming event is rejected then we don't care if the event
+                # was an outlier or not - what we have is at least as good.
                 continue
 
             outlier_persisted = have_persisted[event.event_id]
             if not event.internal_metadata.is_outlier() and outlier_persisted:
                 # We received a copy of an event that we had already stored as
-                # an outlier in the database. We now have some state at that
+                # an outlier in the database. We now have some state at that event
                 # so we need to update the state_groups table with that state.
+                #
+                # Note that we do not update the stream_ordering of the event in this
+                # scenario. XXX: does this cause bugs? It will mean we won't send such
+                # events down /sync. In general they will be historical events, so that
+                # doesn't matter too much, but that is not always the case.
+
+                logger.info("Updating state for ex-outlier event %s", event.event_id)
 
                 # insert into event_to_state_groups.
                 try:
@@ -1342,7 +1356,7 @@ class PersistEventsStore:
             d.pop("redacted_because", None)
             return d
 
-        self.db_pool.simple_insert_many_values_txn(
+        self.db_pool.simple_insert_many_txn(
             txn,
             table="event_json",
             keys=("event_id", "room_id", "internal_metadata", "json", "format_version"),
@@ -1358,7 +1372,7 @@ class PersistEventsStore:
             ),
         )
 
-        self.db_pool.simple_insert_many_values_txn(
+        self.db_pool.simple_insert_many_txn(
             txn,
             table="events",
             keys=(
@@ -1375,6 +1389,8 @@ class PersistEventsStore:
                 "received_ts",
                 "sender",
                 "contains_url",
+                "state_key",
+                "rejection_reason",
             ),
             values=(
                 (
@@ -1391,8 +1407,10 @@ class PersistEventsStore:
                     self._clock.time_msec(),
                     event.sender,
                     "url" in event.content and isinstance(event.content["url"], str),
+                    event.get_state_key(),
+                    context.rejected or None,
                 )
-                for event, _ in events_and_contexts
+                for event, context in events_and_contexts
             ),
         )
 
@@ -1412,7 +1430,7 @@ class PersistEventsStore:
         )
         txn.execute(sql + clause, [False] + args)
 
-        self.db_pool.simple_insert_many_values_txn(
+        self.db_pool.simple_insert_many_txn(
             txn,
             table="state_events",
             keys=("event_id", "room_id", "type", "state_key"),
@@ -1442,6 +1460,7 @@ class PersistEventsStore:
         for event, context in events_and_contexts:
             if context.rejected:
                 # Insert the event_id into the rejections table
+                # (events.rejection_reason has already been done)
                 self._store_rejections_txn(txn, event.event_id, context.rejected)
                 to_remove.add(event)
 
@@ -1622,14 +1641,9 @@ class PersistEventsStore:
         return self.db_pool.simple_insert_many_txn(
             txn=txn,
             table="event_labels",
+            keys=("event_id", "label", "room_id", "topological_ordering"),
             values=[
-                {
-                    "event_id": event_id,
-                    "label": label,
-                    "room_id": room_id,
-                    "topological_ordering": topological_ordering,
-                }
-                for label in labels
+                (event_id, label, room_id, topological_ordering) for label in labels
             ],
         )
 
@@ -1657,16 +1671,13 @@ class PersistEventsStore:
         vals = []
         for event in events:
             ref_alg, ref_hash_bytes = compute_event_reference_hash(event)
-            vals.append(
-                {
-                    "event_id": event.event_id,
-                    "algorithm": ref_alg,
-                    "hash": memoryview(ref_hash_bytes),
-                }
-            )
+            vals.append((event.event_id, ref_alg, memoryview(ref_hash_bytes)))
 
         self.db_pool.simple_insert_many_txn(
-            txn, table="event_reference_hashes", values=vals
+            txn,
+            table="event_reference_hashes",
+            keys=("event_id", "algorithm", "hash"),
+            values=vals,
         )
 
     def _store_room_members_txn(
@@ -1689,18 +1700,25 @@ class PersistEventsStore:
         self.db_pool.simple_insert_many_txn(
             txn,
             table="room_memberships",
+            keys=(
+                "event_id",
+                "user_id",
+                "sender",
+                "room_id",
+                "membership",
+                "display_name",
+                "avatar_url",
+            ),
             values=[
-                {
-                    "event_id": event.event_id,
-                    "user_id": event.state_key,
-                    "sender": event.user_id,
-                    "room_id": event.room_id,
-                    "membership": event.membership,
-                    "display_name": non_null_str_or_none(
-                        event.content.get("displayname")
-                    ),
-                    "avatar_url": non_null_str_or_none(event.content.get("avatar_url")),
-                }
+                (
+                    event.event_id,
+                    event.state_key,
+                    event.user_id,
+                    event.room_id,
+                    event.membership,
+                    non_null_str_or_none(event.content.get("displayname")),
+                    non_null_str_or_none(event.content.get("avatar_url")),
+                )
                 for event in events
             ],
         )
@@ -1791,6 +1809,13 @@ class PersistEventsStore:
             txn.call_after(
                 self.store.get_thread_summary.invalidate, (parent_id, event.room_id)
             )
+            # It should be safe to only invalidate the cache if the user has not
+            # previously participated in the thread, but that's difficult (and
+            # potentially error-prone) so it is always invalidated.
+            txn.call_after(
+                self.store.get_thread_participated.invalidate,
+                (parent_id, event.room_id, event.sender),
+            )
 
     def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase):
         """Handles keeping track of insertion events and edges/connections.
@@ -2163,13 +2188,9 @@ class PersistEventsStore:
         self.db_pool.simple_insert_many_txn(
             txn,
             table="event_edges",
+            keys=("event_id", "prev_event_id", "room_id", "is_state"),
             values=[
-                {
-                    "event_id": ev.event_id,
-                    "prev_event_id": e_id,
-                    "room_id": ev.room_id,
-                    "is_state": False,
-                }
+                (ev.event_id, e_id, ev.room_id, False)
                 for ev in events
                 for e_id in ev.prev_event_ids()
             ],
@@ -2235,17 +2256,17 @@ class PersistEventsStore:
         )
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _LinkMap:
     """A helper type for tracking links between chains."""
 
     # Stores the set of links as nested maps: source chain ID -> target chain ID
     # -> source sequence number -> target sequence number.
-    maps = attr.ib(type=Dict[int, Dict[int, Dict[int, int]]], factory=dict)
+    maps: Dict[int, Dict[int, Dict[int, int]]] = attr.Factory(dict)
 
     # Stores the links that have been added (with new set to true), as tuples of
     # `(source chain ID, source sequence no, target chain ID, target sequence no.)`
-    additions = attr.ib(type=Set[Tuple[int, int, int, int]], factory=set)
+    additions: Set[Tuple[int, int, int, int]] = attr.Factory(set)
 
     def add_link(
         self,
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index a68f14ba48..d5f0059665 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -65,22 +65,22 @@ class _BackgroundUpdates:
     REPLACE_STREAM_ORDERING_COLUMN = "replace_stream_ordering_column"
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class _CalculateChainCover:
     """Return value for _calculate_chain_cover_txn."""
 
     # The last room_id/depth/stream processed.
-    room_id = attr.ib(type=str)
-    depth = attr.ib(type=int)
-    stream = attr.ib(type=int)
+    room_id: str
+    depth: int
+    stream: int
 
     # Number of rows processed
-    processed_count = attr.ib(type=int)
+    processed_count: int
 
     # Map from room_id to last depth/stream processed for each room that we have
     # processed all events for (i.e. the rooms we can flip the
     # `has_auth_chain_index` for)
-    finished_room_map = attr.ib(type=Dict[str, Tuple[int, int]])
+    finished_room_map: Dict[str, Tuple[int, int]]
 
 
 class EventsBackgroundUpdatesStore(SQLBaseStore):
@@ -684,13 +684,14 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
                     self.db_pool.simple_insert_many_txn(
                         txn=txn,
                         table="event_labels",
+                        keys=("event_id", "label", "room_id", "topological_ordering"),
                         values=[
-                            {
-                                "event_id": event_id,
-                                "label": label,
-                                "room_id": event_json["room_id"],
-                                "topological_ordering": event_json["depth"],
-                            }
+                            (
+                                event_id,
+                                label,
+                                event_json["room_id"],
+                                event_json["depth"],
+                            )
                             for label in event_json["content"].get(
                                 EventContentFields.LABELS, []
                             )
@@ -803,29 +804,19 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
 
             if not has_state:
                 state_events.append(
-                    {
-                        "event_id": event.event_id,
-                        "room_id": event.room_id,
-                        "type": event.type,
-                        "state_key": event.state_key,
-                    }
+                    (event.event_id, event.room_id, event.type, event.state_key)
                 )
 
             if not has_event_auth:
                 # Old, dodgy, events may have duplicate auth events, which we
                 # need to deduplicate as we have a unique constraint.
                 for auth_id in set(event.auth_event_ids()):
-                    auth_events.append(
-                        {
-                            "room_id": event.room_id,
-                            "event_id": event.event_id,
-                            "auth_id": auth_id,
-                        }
-                    )
+                    auth_events.append((event.event_id, event.room_id, auth_id))
 
         if state_events:
             await self.db_pool.simple_insert_many(
                 table="state_events",
+                keys=("event_id", "room_id", "type", "state_key"),
                 values=state_events,
                 desc="_rejected_events_metadata_state_events",
             )
@@ -833,6 +824,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
         if auth_events:
             await self.db_pool.simple_insert_many(
                 table="event_auth",
+                keys=("event_id", "room_id", "auth_id"),
                 values=auth_events,
                 desc="_rejected_events_metadata_event_auth",
             )
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index cbf9ec38f7..4f05811a77 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -129,18 +129,29 @@ class PresenceStore(PresenceBackgroundUpdateStore):
         self.db_pool.simple_insert_many_txn(
             txn,
             table="presence_stream",
+            keys=(
+                "stream_id",
+                "user_id",
+                "state",
+                "last_active_ts",
+                "last_federation_update_ts",
+                "last_user_sync_ts",
+                "status_msg",
+                "currently_active",
+                "instance_name",
+            ),
             values=[
-                {
-                    "stream_id": stream_id,
-                    "user_id": state.user_id,
-                    "state": state.state,
-                    "last_active_ts": state.last_active_ts,
-                    "last_federation_update_ts": state.last_federation_update_ts,
-                    "last_user_sync_ts": state.last_user_sync_ts,
-                    "status_msg": state.status_msg,
-                    "currently_active": state.currently_active,
-                    "instance_name": self._instance_name,
-                }
+                (
+                    stream_id,
+                    state.user_id,
+                    state.state,
+                    state.last_active_ts,
+                    state.last_federation_update_ts,
+                    state.last_user_sync_ts,
+                    state.status_msg,
+                    state.currently_active,
+                    self._instance_name,
+                )
                 for stream_id, state in zip(stream_orderings, presence_states)
             ],
         )
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 91b0576b85..e87a8fb85d 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -390,7 +390,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "event_search",
             "events",
             "group_rooms",
-            "public_room_list_stream",
             "receipts_graph",
             "receipts_linearized",
             "room_aliases",
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index e01c94930a..92539f5d41 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -42,7 +42,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-def _load_rules(rawrules, enabled_map, use_new_defaults=False):
+def _load_rules(rawrules, enabled_map):
     ruleslist = []
     for rawrule in rawrules:
         rule = dict(rawrule)
@@ -52,7 +52,7 @@ def _load_rules(rawrules, enabled_map, use_new_defaults=False):
         ruleslist.append(rule)
 
     # We're going to be mutating this a lot, so do a deep copy
-    rules = list(list_with_base_rules(ruleslist, use_new_defaults))
+    rules = list(list_with_base_rules(ruleslist))
 
     for i, rule in enumerate(rules):
         rule_id = rule["rule_id"]
@@ -112,10 +112,6 @@ class PushRulesWorkerStore(
             prefilled_cache=push_rules_prefill,
         )
 
-        self._users_new_default_push_rules = (
-            hs.config.server.users_new_default_push_rules
-        )
-
     @abc.abstractmethod
     def get_max_push_rules_stream_id(self):
         """Get the position of the push rules stream.
@@ -145,9 +141,7 @@ class PushRulesWorkerStore(
 
         enabled_map = await self.get_push_rules_enabled_for_user(user_id)
 
-        use_new_defaults = user_id in self._users_new_default_push_rules
-
-        return _load_rules(rows, enabled_map, use_new_defaults)
+        return _load_rules(rows, enabled_map)
 
     @cached(max_entries=5000)
     async def get_push_rules_enabled_for_user(self, user_id) -> Dict[str, bool]:
@@ -206,13 +200,7 @@ class PushRulesWorkerStore(
         enabled_map_by_user = await self.bulk_get_push_rules_enabled(user_ids)
 
         for user_id, rules in results.items():
-            use_new_defaults = user_id in self._users_new_default_push_rules
-
-            results[user_id] = _load_rules(
-                rules,
-                enabled_map_by_user.get(user_id, {}),
-                use_new_defaults,
-            )
+            results[user_id] = _load_rules(rules, enabled_map_by_user.get(user_id, {}))
 
         return results
 
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 747b4f31df..cf64cd63a4 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -561,13 +561,9 @@ class PusherStore(PusherWorkerStore):
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="deleted_pushers",
+                keys=("stream_id", "app_id", "pushkey", "user_id"),
                 values=[
-                    {
-                        "stream_id": stream_id,
-                        "app_id": pusher.app_id,
-                        "pushkey": pusher.pushkey,
-                        "user_id": user_id,
-                    }
+                    (stream_id, pusher.app_id, pusher.pushkey, user_id)
                     for stream_id, pusher in zip(stream_ids, pushers)
                 ],
             )
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 4175c82a25..aac94fa464 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -51,7 +51,7 @@ class ExternalIDReuseException(Exception):
     pass
 
 
-@attr.s(frozen=True, slots=True)
+@attr.s(frozen=True, slots=True, auto_attribs=True)
 class TokenLookupResult:
     """Result of looking up an access token.
 
@@ -69,14 +69,14 @@ class TokenLookupResult:
             cached.
     """
 
-    user_id = attr.ib(type=str)
-    is_guest = attr.ib(type=bool, default=False)
-    shadow_banned = attr.ib(type=bool, default=False)
-    token_id = attr.ib(type=Optional[int], default=None)
-    device_id = attr.ib(type=Optional[str], default=None)
-    valid_until_ms = attr.ib(type=Optional[int], default=None)
-    token_owner = attr.ib(type=str)
-    token_used = attr.ib(type=bool, default=False)
+    user_id: str
+    is_guest: bool = False
+    shadow_banned: bool = False
+    token_id: Optional[int] = None
+    device_id: Optional[str] = None
+    valid_until_ms: Optional[int] = None
+    token_owner: str = attr.ib()
+    token_used: bool = False
 
     # Make the token owner default to the user ID, which is the common case.
     @token_owner.default
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index c6c4bd18da..37468a5183 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -13,17 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 import attr
 from frozendict import frozendict
@@ -43,6 +33,7 @@ from synapse.storage.relations import (
     PaginationChunk,
     RelationPaginationToken,
 )
+from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
 
 if TYPE_CHECKING:
@@ -51,6 +42,30 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class _ThreadAggregation:
+    latest_event: EventBase
+    count: int
+    current_user_participated: bool
+
+
+@attr.s(slots=True, auto_attribs=True)
+class BundledAggregations:
+    """
+    The bundled aggregations for an event.
+
+    Some values require additional processing during serialization.
+    """
+
+    annotations: Optional[JsonDict] = None
+    references: Optional[JsonDict] = None
+    replace: Optional[EventBase] = None
+    thread: Optional[_ThreadAggregation] = None
+
+    def __bool__(self) -> bool:
+        return bool(self.annotations or self.references or self.replace or self.thread)
+
+
 class RelationsWorkerStore(SQLBaseStore):
     def __init__(
         self,
@@ -60,7 +75,6 @@ class RelationsWorkerStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        self._msc1849_enabled = hs.config.experimental.msc1849_enabled
         self._msc3440_enabled = hs.config.experimental.msc3440_enabled
 
     @cached(tree=True)
@@ -384,8 +398,7 @@ class RelationsWorkerStore(SQLBaseStore):
     async def get_thread_summary(
         self, event_id: str, room_id: str
     ) -> Tuple[int, Optional[EventBase]]:
-        """Get the number of threaded replies, the senders of those replies, and
-        the latest reply (if any) for the given event.
+        """Get the number of threaded replies and the latest reply (if any) for the given event.
 
         Args:
             event_id: Summarize the thread related to this event ID.
@@ -398,7 +411,7 @@ class RelationsWorkerStore(SQLBaseStore):
         def _get_thread_summary_txn(
             txn: LoggingTransaction,
         ) -> Tuple[int, Optional[str]]:
-            # Fetch the count of threaded events and the latest event ID.
+            # Fetch the latest event ID in the thread.
             # TODO Should this only allow m.room.message events.
             sql = """
                 SELECT event_id
@@ -419,6 +432,7 @@ class RelationsWorkerStore(SQLBaseStore):
 
             latest_event_id = row[0]
 
+            # Fetch the number of threaded replies.
             sql = """
                 SELECT COUNT(event_id)
                 FROM event_relations
@@ -443,6 +457,44 @@ class RelationsWorkerStore(SQLBaseStore):
 
         return count, latest_event
 
+    @cached()
+    async def get_thread_participated(
+        self, event_id: str, room_id: str, user_id: str
+    ) -> bool:
+        """Get whether the requesting user participated in a thread.
+
+        This is separate from get_thread_summary since that can be cached across
+        all users while this value is specific to the requeser.
+
+        Args:
+            event_id: The thread related to this event ID.
+            room_id: The room the event belongs to.
+            user_id: The user requesting the summary.
+
+        Returns:
+            True if the requesting user participated in the thread, otherwise false.
+        """
+
+        def _get_thread_summary_txn(txn: LoggingTransaction) -> bool:
+            # Fetch whether the requester has participated or not.
+            sql = """
+                SELECT 1
+                FROM event_relations
+                INNER JOIN events USING (event_id)
+                WHERE
+                    relates_to_id = ?
+                    AND room_id = ?
+                    AND relation_type = ?
+                    AND sender = ?
+            """
+
+            txn.execute(sql, (event_id, room_id, RelationTypes.THREAD, user_id))
+            return bool(txn.fetchone())
+
+        return await self.db_pool.runInteraction(
+            "get_thread_summary", _get_thread_summary_txn
+        )
+
     async def events_have_relations(
         self,
         parent_ids: List[str],
@@ -546,14 +598,15 @@ class RelationsWorkerStore(SQLBaseStore):
         )
 
     async def _get_bundled_aggregation_for_event(
-        self, event: EventBase
-    ) -> Optional[Dict[str, Any]]:
+        self, event: EventBase, user_id: str
+    ) -> Optional[BundledAggregations]:
         """Generate bundled aggregations for an event.
 
         Note that this does not use a cache, but depends on cached methods.
 
         Args:
             event: The event to calculate bundled aggregations for.
+            user_id: The user requesting the bundled aggregations.
 
         Returns:
             The bundled aggregations for an event, if bundled aggregations are
@@ -577,62 +630,64 @@ class RelationsWorkerStore(SQLBaseStore):
         # The bundled aggregations to include, a mapping of relation type to a
         # type-specific value. Some types include the direct return type here
         # while others need more processing during serialization.
-        aggregations: Dict[str, Any] = {}
+        aggregations = BundledAggregations()
 
         annotations = await self.get_aggregation_groups_for_event(event_id, room_id)
         if annotations.chunk:
-            aggregations[RelationTypes.ANNOTATION] = annotations.to_dict()
+            aggregations.annotations = annotations.to_dict()
 
         references = await self.get_relations_for_event(
             event_id, room_id, RelationTypes.REFERENCE, direction="f"
         )
         if references.chunk:
-            aggregations[RelationTypes.REFERENCE] = references.to_dict()
+            aggregations.references = references.to_dict()
 
         edit = None
         if event.type == EventTypes.Message:
             edit = await self.get_applicable_edit(event_id, room_id)
 
         if edit:
-            aggregations[RelationTypes.REPLACE] = edit
+            aggregations.replace = edit
 
         # If this event is the start of a thread, include a summary of the replies.
         if self._msc3440_enabled:
-            (
-                thread_count,
-                latest_thread_event,
-            ) = await self.get_thread_summary(event_id, room_id)
+            thread_count, latest_thread_event = await self.get_thread_summary(
+                event_id, room_id
+            )
+            participated = await self.get_thread_participated(
+                event_id, room_id, user_id
+            )
             if latest_thread_event:
-                aggregations[RelationTypes.THREAD] = {
-                    # Don't bundle aggregations as this could recurse forever.
-                    "latest_event": latest_thread_event,
-                    "count": thread_count,
-                }
+                aggregations.thread = _ThreadAggregation(
+                    latest_event=latest_thread_event,
+                    count=thread_count,
+                    current_user_participated=participated,
+                )
 
         # Store the bundled aggregations in the event metadata for later use.
         return aggregations
 
     async def get_bundled_aggregations(
-        self, events: Iterable[EventBase]
-    ) -> Dict[str, Dict[str, Any]]:
+        self,
+        events: Iterable[EventBase],
+        user_id: str,
+    ) -> Dict[str, BundledAggregations]:
         """Generate bundled aggregations for events.
 
         Args:
             events: The iterable of events to calculate bundled aggregations for.
+            user_id: The user requesting the bundled aggregations.
 
         Returns:
             A map of event ID to the bundled aggregation for the event. Not all
             events may have bundled aggregations in the results.
         """
-        # If bundled aggregations are disabled, nothing to do.
-        if not self._msc1849_enabled:
-            return {}
 
         # TODO Parallelize.
         results = {}
         for event in events:
-            event_result = await self._get_bundled_aggregation_for_event(event)
-            if event_result is not None:
+            event_result = await self._get_bundled_aggregation_for_event(event, user_id)
+            if event_result:
                 results[event.event_id] = event_result
 
         return results
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index c0e837854a..95167116c9 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -551,24 +551,24 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             FROM room_stats_state state
             INNER JOIN room_stats_current curr USING (room_id)
             INNER JOIN rooms USING (room_id)
-            %s
-            ORDER BY %s %s
+            {where}
+            ORDER BY {order_by} {direction}, state.room_id {direction}
             LIMIT ?
             OFFSET ?
-        """ % (
-            where_statement,
-            order_by_column,
-            "ASC" if order_by_asc else "DESC",
+        """.format(
+            where=where_statement,
+            order_by=order_by_column,
+            direction="ASC" if order_by_asc else "DESC",
         )
 
         # Use a nested SELECT statement as SQL can't count(*) with an OFFSET
         count_sql = """
             SELECT count(*) FROM (
               SELECT room_id FROM room_stats_state state
-              %s
+              {where}
             ) AS get_room_ids
-        """ % (
-            where_statement,
+        """.format(
+            where=where_statement,
         )
 
         def _get_rooms_paginate_txn(
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index cda80d6511..4489732fda 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -1177,18 +1177,18 @@ class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore):
         await self.db_pool.runInteraction("forget_membership", f)
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _JoinedHostsCache:
     """The cached data used by the `_get_joined_hosts_cache`."""
 
     # Dict of host to the set of their users in the room at the state group.
-    hosts_to_joined_users = attr.ib(type=Dict[str, Set[str]], factory=dict)
+    hosts_to_joined_users: Dict[str, Set[str]] = attr.Factory(dict)
 
     # The state group `hosts_to_joined_users` is derived from. Will be an object
     # if the instance is newly created or if the state is not based on a state
     # group. (An object is used as a sentinel value to ensure that it never is
     # equal to anything else).
-    state_group = attr.ib(type=Union[object, int], factory=object)
+    state_group: Union[object, int] = attr.Factory(object)
 
     def __len__(self):
         return sum(len(v) for v in self.hosts_to_joined_users.values())
diff --git a/synapse/storage/databases/main/signatures.py b/synapse/storage/databases/main/signatures.py
index 3201623fe4..0518b8b910 100644
--- a/synapse/storage/databases/main/signatures.py
+++ b/synapse/storage/databases/main/signatures.py
@@ -12,16 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Iterable, List, Tuple
+from typing import Collection, Dict, List, Tuple
 
 from unpaddedbase64 import encode_base64
 
-from synapse.storage._base import SQLBaseStore
-from synapse.storage.types import Cursor
+from synapse.crypto.event_signing import compute_event_reference_hash
+from synapse.storage.databases.main.events_worker import (
+    EventRedactBehaviour,
+    EventsWorkerStore,
+)
 from synapse.util.caches.descriptors import cached, cachedList
 
 
-class SignatureWorkerStore(SQLBaseStore):
+class SignatureWorkerStore(EventsWorkerStore):
     @cached()
     def get_event_reference_hash(self, event_id):
         # This is a dummy function to allow get_event_reference_hashes
@@ -32,7 +35,7 @@ class SignatureWorkerStore(SQLBaseStore):
         cached_method_name="get_event_reference_hash", list_name="event_ids", num_args=1
     )
     async def get_event_reference_hashes(
-        self, event_ids: Iterable[str]
+        self, event_ids: Collection[str]
     ) -> Dict[str, Dict[str, bytes]]:
         """Get all hashes for given events.
 
@@ -41,18 +44,27 @@ class SignatureWorkerStore(SQLBaseStore):
 
         Returns:
              A mapping of event ID to a mapping of algorithm to hash.
+             Returns an empty dict for a given event id if that event is unknown.
         """
+        events = await self.get_events(
+            event_ids,
+            redact_behaviour=EventRedactBehaviour.AS_IS,
+            allow_rejected=True,
+        )
 
-        def f(txn):
-            return {
-                event_id: self._get_event_reference_hashes_txn(txn, event_id)
-                for event_id in event_ids
-            }
+        hashes: Dict[str, Dict[str, bytes]] = {}
+        for event_id in event_ids:
+            event = events.get(event_id)
+            if event is None:
+                hashes[event_id] = {}
+            else:
+                ref_alg, ref_hash_bytes = compute_event_reference_hash(event)
+                hashes[event_id] = {ref_alg: ref_hash_bytes}
 
-        return await self.db_pool.runInteraction("get_event_reference_hashes", f)
+        return hashes
 
     async def add_event_hashes(
-        self, event_ids: Iterable[str]
+        self, event_ids: Collection[str]
     ) -> List[Tuple[str, Dict[str, str]]]:
         """
 
@@ -70,24 +82,6 @@ class SignatureWorkerStore(SQLBaseStore):
 
         return list(encoded_hashes.items())
 
-    def _get_event_reference_hashes_txn(
-        self, txn: Cursor, event_id: str
-    ) -> Dict[str, bytes]:
-        """Get all the hashes for a given PDU.
-        Args:
-            txn:
-            event_id: Id for the Event.
-        Returns:
-            A mapping of algorithm -> hash.
-        """
-        query = (
-            "SELECT algorithm, hash"
-            " FROM event_reference_hashes"
-            " WHERE event_id = ?"
-        )
-        txn.execute(query, (event_id,))
-        return {k: v for k, v in txn}
-
 
 class SignatureStore(SignatureWorkerStore):
     """Persistence for event signatures and hashes"""
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 319464b1fa..a898f847e7 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -81,6 +81,14 @@ class _EventDictReturn:
     stream_ordering: int
 
 
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class _EventsAround:
+    events_before: List[EventBase]
+    events_after: List[EventBase]
+    start: RoomStreamToken
+    end: RoomStreamToken
+
+
 def generate_pagination_where_clause(
     direction: str,
     column_names: Tuple[str, str],
@@ -846,7 +854,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         before_limit: int,
         after_limit: int,
         event_filter: Optional[Filter] = None,
-    ) -> dict:
+    ) -> _EventsAround:
         """Retrieve events and pagination tokens around a given event in a
         room.
         """
@@ -869,12 +877,12 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             list(results["after"]["event_ids"]), get_prev_content=True
         )
 
-        return {
-            "events_before": events_before,
-            "events_after": events_after,
-            "start": results["before"]["token"],
-            "end": results["after"]["token"],
-        }
+        return _EventsAround(
+            events_before=events_before,
+            events_after=events_after,
+            start=results["before"]["token"],
+            end=results["after"]["token"],
+        )
 
     def _get_events_around_txn(
         self,
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index 4b78b4d098..ba79e19f7f 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -561,6 +561,54 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
             "get_destinations_paginate_txn", get_destinations_paginate_txn
         )
 
+    async def get_destination_rooms_paginate(
+        self, destination: str, start: int, limit: int, direction: str = "f"
+    ) -> Tuple[List[JsonDict], int]:
+        """Function to retrieve a paginated list of destination's rooms.
+        This will return a json list of rooms and the
+        total number of rooms.
+
+        Args:
+            destination: the destination to query
+            start: start number to begin the query from
+            limit: number of rows to retrieve
+            direction: sort ascending or descending by room_id
+        Returns:
+            A tuple of a dict of rooms and a count of total rooms.
+        """
+
+        def get_destination_rooms_paginate_txn(
+            txn: LoggingTransaction,
+        ) -> Tuple[List[JsonDict], int]:
+
+            if direction == "b":
+                order = "DESC"
+            else:
+                order = "ASC"
+
+            sql = """
+                SELECT COUNT(*) as total_rooms
+                FROM destination_rooms
+                WHERE destination = ?
+                """
+            txn.execute(sql, [destination])
+            count = cast(Tuple[int], txn.fetchone())[0]
+
+            rooms = self.db_pool.simple_select_list_paginate_txn(
+                txn=txn,
+                table="destination_rooms",
+                orderby="room_id",
+                start=start,
+                limit=limit,
+                retcols=("room_id", "stream_ordering"),
+                order_direction=order,
+            )
+            return rooms, count
+
+        return await self.db_pool.runInteraction(
+            "get_destination_rooms_paginate_txn", get_destination_rooms_paginate_txn
+        )
+
     async def is_destination_known(self, destination: str) -> bool:
         """Check if a destination is known to the server."""
         result = await self.db_pool.simple_select_one_onecol(
diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py
index a1a1a6a14a..2d339b6008 100644
--- a/synapse/storage/databases/main/ui_auth.py
+++ b/synapse/storage/databases/main/ui_auth.py
@@ -23,19 +23,19 @@ from synapse.types import JsonDict
 from synapse.util import json_encoder, stringutils
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class UIAuthSessionData:
-    session_id = attr.ib(type=str)
+    session_id: str
     # The dictionary from the client root level, not the 'auth' key.
-    clientdict = attr.ib(type=JsonDict)
+    clientdict: JsonDict
     # The URI and method the session was intiatied with. These are checked at
     # each stage of the authentication to ensure that the asked for operation
     # has not changed.
-    uri = attr.ib(type=str)
-    method = attr.ib(type=str)
+    uri: str
+    method: str
     # A string description of the operation that the current authentication is
     # authorising.
-    description = attr.ib(type=str)
+    description: str
 
 
 class UIAuthWorkerStore(SQLBaseStore):
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 0f9b8575d3..f7c778bdf2 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -105,8 +105,10 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 GROUP BY room_id
             """
             txn.execute(sql)
-            rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()]
-            self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms)
+            rooms = list(txn.fetchall())
+            self.db_pool.simple_insert_many_txn(
+                txn, TEMP_TABLE + "_rooms", keys=("room_id", "events"), values=rooms
+            )
             del rooms
 
             sql = (
@@ -117,9 +119,11 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             txn.execute(sql)
 
             txn.execute("SELECT name FROM users")
-            users = [{"user_id": x[0]} for x in txn.fetchall()]
+            users = list(txn.fetchall())
 
-            self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users)
+            self.db_pool.simple_insert_many_txn(
+                txn, TEMP_TABLE + "_users", keys=("user_id",), values=users
+            )
 
         new_pos = await self.get_max_stream_id_in_current_state_deltas()
         await self.db_pool.runInteraction(
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index eb1118d2cb..5de70f31d2 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -327,14 +327,15 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
                         self.db_pool.simple_insert_many_txn(
                             txn,
                             table="state_groups_state",
+                            keys=(
+                                "state_group",
+                                "room_id",
+                                "type",
+                                "state_key",
+                                "event_id",
+                            ),
                             values=[
-                                {
-                                    "state_group": state_group,
-                                    "room_id": room_id,
-                                    "type": key[0],
-                                    "state_key": key[1],
-                                    "event_id": state_id,
-                                }
+                                (state_group, room_id, key[0], key[1], state_id)
                                 for key, state_id in delta_state.items()
                             ],
                         )
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index c4c8c0021b..7614d76ac6 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -460,14 +460,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 self.db_pool.simple_insert_many_txn(
                     txn,
                     table="state_groups_state",
+                    keys=("state_group", "room_id", "type", "state_key", "event_id"),
                     values=[
-                        {
-                            "state_group": state_group,
-                            "room_id": room_id,
-                            "type": key[0],
-                            "state_key": key[1],
-                            "event_id": state_id,
-                        }
+                        (state_group, room_id, key[0], key[1], state_id)
                         for key, state_id in delta_ids.items()
                     ],
                 )
@@ -475,14 +470,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 self.db_pool.simple_insert_many_txn(
                     txn,
                     table="state_groups_state",
+                    keys=("state_group", "room_id", "type", "state_key", "event_id"),
                     values=[
-                        {
-                            "state_group": state_group,
-                            "room_id": room_id,
-                            "type": key[0],
-                            "state_key": key[1],
-                            "event_id": state_id,
-                        }
+                        (state_group, room_id, key[0], key[1], state_id)
                         for key, state_id in current_state_ids.items()
                     ],
                 )
@@ -589,14 +579,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="state_groups_state",
+                keys=("state_group", "room_id", "type", "state_key", "event_id"),
                 values=[
-                    {
-                        "state_group": sg,
-                        "room_id": room_id,
-                        "type": key[0],
-                        "state_key": key[1],
-                        "event_id": state_id,
-                    }
+                    (sg, room_id, key[0], key[1], state_id)
                     for key, state_id in curr_state.items()
                 ],
             )
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 20cd63c330..143cd98ca2 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -12,11 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
-from typing import Generic, TypeVar
+from enum import IntEnum
+from typing import Generic, Optional, TypeVar
 
 from synapse.storage.types import Connection
 
 
+class IsolationLevel(IntEnum):
+    READ_COMMITTED: int = 1
+    REPEATABLE_READ: int = 2
+    SERIALIZABLE: int = 3
+
+
 class IncorrectDatabaseSetup(RuntimeError):
     pass
 
@@ -109,3 +116,13 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
         commit/rollback the connections.
         """
         ...
+
+    @abc.abstractmethod
+    def attempt_to_set_isolation_level(
+        self, conn: Connection, isolation_level: Optional[int]
+    ):
+        """Attempt to set the connections isolation level.
+
+        Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default.
+        """
+        ...
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 30f948a0f7..808342fafb 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -13,8 +13,13 @@
 # limitations under the License.
 
 import logging
+from typing import Mapping, Optional
 
-from synapse.storage.engines._base import BaseDatabaseEngine, IncorrectDatabaseSetup
+from synapse.storage.engines._base import (
+    BaseDatabaseEngine,
+    IncorrectDatabaseSetup,
+    IsolationLevel,
+)
 from synapse.storage.types import Connection
 
 logger = logging.getLogger(__name__)
@@ -34,6 +39,15 @@ class PostgresEngine(BaseDatabaseEngine):
         self.synchronous_commit = database_config.get("synchronous_commit", True)
         self._version = None  # unknown as yet
 
+        self.isolation_level_map: Mapping[int, int] = {
+            IsolationLevel.READ_COMMITTED: self.module.extensions.ISOLATION_LEVEL_READ_COMMITTED,
+            IsolationLevel.REPEATABLE_READ: self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ,
+            IsolationLevel.SERIALIZABLE: self.module.extensions.ISOLATION_LEVEL_SERIALIZABLE,
+        }
+        self.default_isolation_level = (
+            self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ
+        )
+
     @property
     def single_threaded(self) -> bool:
         return False
@@ -46,8 +60,8 @@ class PostgresEngine(BaseDatabaseEngine):
         self._version = db_conn.server_version
 
         # Are we on a supported PostgreSQL version?
-        if not allow_outdated_version and self._version < 90600:
-            raise RuntimeError("Synapse requires PostgreSQL 9.6 or above.")
+        if not allow_outdated_version and self._version < 100000:
+            raise RuntimeError("Synapse requires PostgreSQL 10 or above.")
 
         with db_conn.cursor() as txn:
             txn.execute("SHOW SERVER_ENCODING")
@@ -104,9 +118,7 @@ class PostgresEngine(BaseDatabaseEngine):
         return sql.replace("?", "%s")
 
     def on_new_connection(self, db_conn):
-        db_conn.set_isolation_level(
-            self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ
-        )
+        db_conn.set_isolation_level(self.default_isolation_level)
 
         # Set the bytea output to escape, vs the default of hex
         cursor = db_conn.cursor()
@@ -175,3 +187,12 @@ class PostgresEngine(BaseDatabaseEngine):
 
     def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool):
         return conn.set_session(autocommit=autocommit)  # type: ignore
+
+    def attempt_to_set_isolation_level(
+        self, conn: Connection, isolation_level: Optional[int]
+    ):
+        if isolation_level is None:
+            isolation_level = self.default_isolation_level
+        else:
+            isolation_level = self.isolation_level_map[isolation_level]
+        return conn.set_isolation_level(isolation_level)  # type: ignore
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 70d17d4f2c..6c19e55999 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -15,6 +15,7 @@ import platform
 import struct
 import threading
 import typing
+from typing import Optional
 
 from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.types import Connection
@@ -122,6 +123,12 @@ class Sqlite3Engine(BaseDatabaseEngine["sqlite3.Connection"]):
         # set the connection to autocommit mode.
         pass
 
+    def attempt_to_set_isolation_level(
+        self, conn: Connection, isolation_level: Optional[int]
+    ):
+        # All transactions are SERIALIZABLE by default in sqllite
+        pass
+
 
 # Following functions taken from: https://github.com/coleifer/peewee
 
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index 540adb8781..71584f3f74 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -21,7 +21,7 @@ from signedjson.types import VerifyKey
 logger = logging.getLogger(__name__)
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class FetchKeyResult:
-    verify_key = attr.ib(type=VerifyKey)  # the key itself
-    valid_until_ts = attr.ib(type=int)  # how long we can use this key for
+    verify_key: VerifyKey  # the key itself
+    valid_until_ts: int  # how long we can use this key for
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index e45adfcb55..e3153d1a4a 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -499,9 +499,12 @@ def _upgrade_existing_database(
                 module = importlib.util.module_from_spec(spec)
                 spec.loader.exec_module(module)  # type: ignore
 
-                logger.info("Running script %s", relative_path)
-                module.run_create(cur, database_engine)  # type: ignore
-                if not is_empty:
+                if hasattr(module, "run_create"):
+                    logger.info("Running %s:run_create", relative_path)
+                    module.run_create(cur, database_engine)  # type: ignore
+
+                if not is_empty and hasattr(module, "run_upgrade"):
+                    logger.info("Running %s:run_upgrade", relative_path)
                     module.run_upgrade(cur, database_engine, config=config)  # type: ignore
             elif ext == ".pyc" or file_name == "__pycache__":
                 # Sometimes .pyc files turn up anyway even though we've
@@ -696,7 +699,7 @@ def _get_or_create_schema_state(
     )
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _DirectoryListing:
     """Helper class to store schema file name and the
     absolute path to it.
@@ -705,5 +708,5 @@ class _DirectoryListing:
     `file_name` attr is kept first.
     """
 
-    file_name = attr.ib(type=str)
-    absolute_path = attr.ib(type=str)
+    file_name: str
+    absolute_path: str
diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py
index 10a46b5e82..b1536c1ca4 100644
--- a/synapse/storage/relations.py
+++ b/synapse/storage/relations.py
@@ -23,7 +23,7 @@ from synapse.types import JsonDict
 logger = logging.getLogger(__name__)
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class PaginationChunk:
     """Returned by relation pagination APIs.
 
@@ -35,9 +35,9 @@ class PaginationChunk:
             None then there are no previous results.
     """
 
-    chunk = attr.ib(type=List[JsonDict])
-    next_batch = attr.ib(type=Optional[Any], default=None)
-    prev_batch = attr.ib(type=Optional[Any], default=None)
+    chunk: List[JsonDict]
+    next_batch: Optional[Any] = None
+    prev_batch: Optional[Any] = None
 
     def to_dict(self) -> Dict[str, Any]:
         d = {"chunk": self.chunk}
@@ -51,7 +51,7 @@ class PaginationChunk:
         return d
 
 
-@attr.s(frozen=True, slots=True)
+@attr.s(frozen=True, slots=True, auto_attribs=True)
 class RelationPaginationToken:
     """Pagination token for relation pagination API.
 
@@ -64,8 +64,8 @@ class RelationPaginationToken:
         stream: The stream ordering of the boundary event.
     """
 
-    topological = attr.ib(type=int)
-    stream = attr.ib(type=int)
+    topological: int
+    stream: int
 
     @staticmethod
     def from_string(string: str) -> "RelationPaginationToken":
@@ -82,7 +82,7 @@ class RelationPaginationToken:
         return attr.astuple(self)
 
 
-@attr.s(frozen=True, slots=True)
+@attr.s(frozen=True, slots=True, auto_attribs=True)
 class AggregationPaginationToken:
     """Pagination token for relation aggregation pagination API.
 
@@ -94,8 +94,8 @@ class AggregationPaginationToken:
         stream: The MAX stream ordering in the boundary group.
     """
 
-    count = attr.ib(type=int)
-    stream = attr.ib(type=int)
+    count: int
+    stream: int
 
     @staticmethod
     def from_string(string: str) -> "AggregationPaginationToken":
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 2a3d47185a..7b21c1b96d 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 67  # remember to update the list below when updating
+SCHEMA_VERSION = 68  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -53,11 +53,18 @@ Changes in SCHEMA_VERSION = 66:
 
 Changes in SCHEMA_VERSION = 67:
     - state_events.prev_state is no longer written to.
+
+Changes in SCHEMA_VERSION = 68:
+    - event_reference_hashes is no longer read.
+    - `events` has `state_key` and `rejection_reason` columns, which are populated for
+      new events.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    61  # 61: Remove unused tables `user_stats_historical` and `room_stats_historical`
+    # we now have `state_key` columns in both `events` and `state_events`, so
+    # now incompatible with synapses wth SCHEMA_VERSION < 66.
+    66
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/67/01drop_public_room_list_stream.sql b/synapse/storage/schema/main/delta/67/01drop_public_room_list_stream.sql
new file mode 100644
index 0000000000..1eb8de9907
--- /dev/null
+++ b/synapse/storage/schema/main/delta/67/01drop_public_room_list_stream.sql
@@ -0,0 +1,18 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- this table is unused as of Synapse 1.41
+DROP TABLE public_room_list_stream;
+
diff --git a/synapse/storage/schema/main/delta/68/01event_columns.sql b/synapse/storage/schema/main/delta/68/01event_columns.sql
new file mode 100644
index 0000000000..7c072f972e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/01event_columns.sql
@@ -0,0 +1,26 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add new colums to the `events` table which will (one day) make the `state_events`
+-- and `rejections` tables redundant.
+
+ALTER TABLE events
+  -- if this event is a state event, its state key
+  ADD COLUMN state_key TEXT DEFAULT NULL;
+
+
+ALTER TABLE events
+  -- if this event was rejected, the reason it was rejected.
+  ADD COLUMN rejection_reason TEXT DEFAULT NULL;
diff --git a/synapse/storage/schema/main/delta/68/02_msc2409_add_device_id_appservice_stream_type.sql b/synapse/storage/schema/main/delta/68/02_msc2409_add_device_id_appservice_stream_type.sql
new file mode 100644
index 0000000000..bbf0af5311
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/02_msc2409_add_device_id_appservice_stream_type.sql
@@ -0,0 +1,21 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a column to track what to_device stream id that this application
+-- service has been caught up to.
+
+-- NULL indicates that this appservice has never received any to_device messages. This
+-- can be used, for example, to avoid sending a huge dump of messages at startup.
+ALTER TABLE application_services_state ADD COLUMN to_device_stream_id BIGINT;
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql b/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql
new file mode 100644
index 0000000000..e124933843
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql
@@ -0,0 +1,20 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We want to retroactively delete account data for users that were already
+-- deactivated.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (6803, 'delete_account_data_for_deactivated_users', '{}');
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index b5ba1560d1..913448f0f9 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -45,7 +45,7 @@ logger = logging.getLogger(__name__)
 T = TypeVar("T")
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class StateFilter:
     """A filter used when querying for state.
 
@@ -58,8 +58,8 @@ class StateFilter:
             appear in `types`.
     """
 
-    types = attr.ib(type="frozendict[str, Optional[FrozenSet[str]]]")
-    include_others = attr.ib(default=False, type=bool)
+    types: "frozendict[str, Optional[FrozenSet[str]]]"
+    include_others: bool = False
 
     def __attrs_post_init__(self):
         # If `include_others` is set we canonicalise the filter by removing
@@ -74,21 +74,21 @@ class StateFilter:
 
     @staticmethod
     def all() -> "StateFilter":
-        """Creates a filter that fetches everything.
+        """Returns a filter that fetches everything.
 
         Returns:
-            The new state filter.
+            The state filter.
         """
-        return StateFilter(types=frozendict(), include_others=True)
+        return _ALL_STATE_FILTER
 
     @staticmethod
     def none() -> "StateFilter":
-        """Creates a filter that fetches nothing.
+        """Returns a filter that fetches nothing.
 
         Returns:
             The new state filter.
         """
-        return StateFilter(types=frozendict(), include_others=False)
+        return _NONE_STATE_FILTER
 
     @staticmethod
     def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter":
@@ -527,6 +527,10 @@ class StateFilter:
         )
 
 
+_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True)
+_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False)
+
+
 class StateGroupStorage:
     """High level interface to fetching state for event."""
 
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index b8112e1c05..3c13859faa 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -762,13 +762,13 @@ class _AsyncCtxManagerWrapper(Generic[T]):
         return self.inner.__exit__(exc_type, exc, tb)
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _MultiWriterCtxManager:
     """Async context manager returned by MultiWriterIdGenerator"""
 
-    id_gen = attr.ib(type=MultiWriterIdGenerator)
-    multiple_ids = attr.ib(type=Optional[int], default=None)
-    stream_ids = attr.ib(type=List[int], factory=list)
+    id_gen: MultiWriterIdGenerator
+    multiple_ids: Optional[int] = None
+    stream_ids: List[int] = attr.Factory(list)
 
     async def __aenter__(self) -> Union[int, List[int]]:
         # It's safe to run this in autocommit mode as fetching values from a
diff --git a/synapse/streams/config.py b/synapse/streams/config.py
index c08d591f29..b52723e2b8 100644
--- a/synapse/streams/config.py
+++ b/synapse/streams/config.py
@@ -28,14 +28,14 @@ logger = logging.getLogger(__name__)
 MAX_LIMIT = 1000
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class PaginationConfig:
     """A configuration object which stores pagination parameters."""
 
-    from_token = attr.ib(type=Optional[StreamToken])
-    to_token = attr.ib(type=Optional[StreamToken])
-    direction = attr.ib(type=str)
-    limit = attr.ib(type=Optional[int])
+    from_token: Optional[StreamToken]
+    to_token: Optional[StreamToken]
+    direction: str
+    limit: Optional[int]
 
     @classmethod
     async def from_request(
diff --git a/synapse/types.py b/synapse/types.py
index 74a2c51857..f89fb216a6 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -20,6 +20,7 @@ from typing import (
     Any,
     ClassVar,
     Dict,
+    List,
     Mapping,
     Match,
     MutableMapping,
@@ -80,7 +81,7 @@ class ISynapseReactor(
     """The interfaces necessary for Synapse to function."""
 
 
-@attr.s(frozen=True, slots=True)
+@attr.s(frozen=True, slots=True, auto_attribs=True)
 class Requester:
     """
     Represents the user making a request
@@ -98,13 +99,13 @@ class Requester:
             "puppeting" the user.
     """
 
-    user = attr.ib(type="UserID")
-    access_token_id = attr.ib(type=Optional[int])
-    is_guest = attr.ib(type=bool)
-    shadow_banned = attr.ib(type=bool)
-    device_id = attr.ib(type=Optional[str])
-    app_service = attr.ib(type=Optional["ApplicationService"])
-    authenticated_entity = attr.ib(type=str)
+    user: "UserID"
+    access_token_id: Optional[int]
+    is_guest: bool
+    shadow_banned: bool
+    device_id: Optional[str]
+    app_service: Optional["ApplicationService"]
+    authenticated_entity: str
 
     def serialize(self):
         """Converts self to a type that can be serialized as JSON, and then
@@ -211,7 +212,7 @@ def get_localpart_from_id(string: str) -> str:
 DS = TypeVar("DS", bound="DomainSpecificString")
 
 
-@attr.s(slots=True, frozen=True, repr=False)
+@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True)
 class DomainSpecificString(metaclass=abc.ABCMeta):
     """Common base class among ID/name strings that have a local part and a
     domain name, prefixed with a sigil.
@@ -224,8 +225,8 @@ class DomainSpecificString(metaclass=abc.ABCMeta):
 
     SIGIL: ClassVar[str] = abc.abstractproperty()  # type: ignore
 
-    localpart = attr.ib(type=str)
-    domain = attr.ib(type=str)
+    localpart: str
+    domain: str
 
     # Because this is a frozen class, it is deeply immutable.
     def __copy__(self):
@@ -461,14 +462,12 @@ class RoomStreamToken:
     attributes, must be hashable.
     """
 
-    topological = attr.ib(
-        type=Optional[int],
+    topological: Optional[int] = attr.ib(
         validator=attr.validators.optional(attr.validators.instance_of(int)),
     )
-    stream = attr.ib(type=int, validator=attr.validators.instance_of(int))
+    stream: int = attr.ib(validator=attr.validators.instance_of(int))
 
-    instance_map = attr.ib(
-        type="frozendict[str, int]",
+    instance_map: "frozendict[str, int]" = attr.ib(
         factory=frozendict,
         validator=attr.validators.deep_mapping(
             key_validator=attr.validators.instance_of(str),
@@ -477,7 +476,7 @@ class RoomStreamToken:
         ),
     )
 
-    def __attrs_post_init__(self):
+    def __attrs_post_init__(self) -> None:
         """Validates that both `topological` and `instance_map` aren't set."""
 
         if self.instance_map and self.topological:
@@ -593,7 +592,7 @@ class RoomStreamToken:
             return "s%d" % (self.stream,)
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class StreamToken:
     """A collection of positions within multiple streams.
 
@@ -601,20 +600,20 @@ class StreamToken:
     must be hashable.
     """
 
-    room_key = attr.ib(
-        type=RoomStreamToken, validator=attr.validators.instance_of(RoomStreamToken)
+    room_key: RoomStreamToken = attr.ib(
+        validator=attr.validators.instance_of(RoomStreamToken)
     )
-    presence_key = attr.ib(type=int)
-    typing_key = attr.ib(type=int)
-    receipt_key = attr.ib(type=int)
-    account_data_key = attr.ib(type=int)
-    push_rules_key = attr.ib(type=int)
-    to_device_key = attr.ib(type=int)
-    device_list_key = attr.ib(type=int)
-    groups_key = attr.ib(type=int)
+    presence_key: int
+    typing_key: int
+    receipt_key: int
+    account_data_key: int
+    push_rules_key: int
+    to_device_key: int
+    device_list_key: int
+    groups_key: int
 
     _SEPARATOR = "_"
-    START: "StreamToken"
+    START: ClassVar["StreamToken"]
 
     @classmethod
     async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
@@ -674,7 +673,7 @@ class StreamToken:
 StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0)
 
 
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class PersistedEventPosition:
     """Position of a newly persisted event with instance that persisted it.
 
@@ -682,8 +681,8 @@ class PersistedEventPosition:
     RoomStreamToken.
     """
 
-    instance_name = attr.ib(type=str)
-    stream = attr.ib(type=int)
+    instance_name: str
+    stream: int
 
     def persisted_after(self, token: RoomStreamToken) -> bool:
         return token.get_stream_pos_for_instance(self.instance_name) < self.stream
@@ -733,15 +732,15 @@ class ThirdPartyInstanceID:
     __str__ = to_string
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class ReadReceipt:
     """Information about a read-receipt"""
 
-    room_id = attr.ib()
-    receipt_type = attr.ib()
-    user_id = attr.ib()
-    event_ids = attr.ib()
-    data = attr.ib()
+    room_id: str
+    receipt_type: str
+    user_id: str
+    event_ids: List[str]
+    data: JsonDict
 
 
 def get_verify_key_from_cross_signing_key(key_info):
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 150a04b53e..3f7299aff7 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -309,12 +309,12 @@ def gather_results(  # type: ignore[misc]
     return deferred.addCallback(tuple)
 
 
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class _LinearizerEntry:
     # The number of things executing.
-    count = attr.ib(type=int)
+    count: int
     # Deferreds for the things blocked from executing.
-    deferreds = attr.ib(type=collections.OrderedDict)
+    deferreds: collections.OrderedDict
 
 
 class Linearizer:
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index 377c9a282a..1d6ec22191 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -81,13 +81,14 @@ class DeferredCache(Generic[KT, VT]):
         Args:
             name: The name of the cache
             max_entries: Maximum amount of entries that the cache will hold
-            keylen: The length of the tuple used as the cache key. Ignored unless
-               `tree` is True.
             tree: Use a TreeCache instead of a dict as the underlying cache type
             iterable: If True, count each item in the cached object as an entry,
                 rather than each cached object
             apply_cache_factor_from_config: Whether cache factors specified in the
                 config file affect `max_entries`
+            prune_unread_entries: If True, cache entries that haven't been read recently
+                will be evicted from the cache in the background. Set to False to
+                opt-out of this behaviour.
         """
         cache_type = TreeCache if tree else dict
 
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 375cd443f1..df4fb156c2 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -254,9 +254,17 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
             return r1 + r2
 
     Args:
+        orig:
+        max_entries:
         num_args: number of positional arguments (excluding ``self`` and
             ``cache_context``) to use as cache keys. Defaults to all named
             args of the function.
+        tree:
+        cache_context:
+        iterable:
+        prune_unread_entries: If True, cache entries that haven't been read recently
+            will be evicted from the cache in the background. Set to False to opt-out
+            of this behaviour.
     """
 
     def __init__(
diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py
index 485ddb1893..d267703df0 100644
--- a/synapse/util/caches/dictionary_cache.py
+++ b/synapse/util/caches/dictionary_cache.py
@@ -33,7 +33,7 @@ DV = TypeVar("DV")
 
 # This class can't be generic because it uses slots with attrs.
 # See: https://github.com/python-attrs/attrs/issues/313
-@attr.s(slots=True)
+@attr.s(slots=True, auto_attribs=True)
 class DictionaryEntry:  # should be: Generic[DKT, DV].
     """Returned when getting an entry from the cache
 
@@ -41,14 +41,13 @@ class DictionaryEntry:  # should be: Generic[DKT, DV].
         full: Whether the cache has the full or dict or just some keys.
             If not full then not all requested keys will necessarily be present
             in `value`
-        known_absent: Keys that were looked up in the dict and were not
-            there.
+        known_absent: Keys that were looked up in the dict and were not there.
         value: The full or partial dict value
     """
 
-    full = attr.ib(type=bool)
-    known_absent = attr.ib(type=Set[Any])  # should be: Set[DKT]
-    value = attr.ib(type=Dict[Any, Any])  # should be: Dict[DKT, DV]
+    full: bool
+    known_absent: Set[Any]  # should be: Set[DKT]
+    value: Dict[Any, Any]  # should be: Dict[DKT, DV]
 
     def __len__(self) -> int:
         return len(self.value)
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 3f11a2f9dd..7548b38548 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -340,6 +340,12 @@ class LruCache(Generic[KT, VT]):
 
             apply_cache_factor_from_config (bool): If true, `max_size` will be
                 multiplied by a cache factor derived from the homeserver config
+
+            clock:
+
+            prune_unread_entries: If True, cache entries that haven't been read recently
+                will be evicted from the cache in the background. Set to False to
+                opt-out of this behaviour.
         """
         # Default `clock` to something sensible. Note that we rename it to
         # `real_clock` so that mypy doesn't think its still `Optional`.
diff --git a/synapse/visibility.py b/synapse/visibility.py
index 17532059e9..1b970ce479 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -87,7 +87,7 @@ async def filter_events_for_client(
     )
 
     ignore_dict_content = await storage.main.get_global_account_data_by_type_for_user(
-        AccountDataTypes.IGNORED_USER_LIST, user_id
+        user_id, AccountDataTypes.IGNORED_USER_LIST
     )
 
     ignore_list: FrozenSet[str] = frozenset()