Merge remote-tracking branch 'origin/develop' into hs/hacked-together-event-cache

author: Will Hunt <willh@matrix.org> 2021-07-20 09:35:03 +0100
committer: Will Hunt <willh@matrix.org> 2021-07-20 09:35:03 +0100
commit: f19795355bf31af3842e607b3f1cc34e5d7727dc (patch)
tree: 85834d47d9f4610fcf70f176114aa17a7b61b8e4
parent: Merge remote-tracking branch 'origin/develop' into hs/hacked-together-event-c... (diff)
parent: Factorise `get_datastore` calls in phone_stats_home. (#10427) (diff)
download: synapse-f19795355bf31af3842e607b3f1cc34e5d7727dc.tar.xz
365 files changed, 5931 insertions, 3506 deletions
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index c239130c57..808f825331 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -7,6 +7,8 @@ on:
       - develop
       # For documentation specific to a release
       - 'release-v*'
+      # stable docs
+      - master
 
   workflow_dispatch:
 
@@ -30,40 +32,35 @@ jobs:
           mdbook build
           cp book/welcome_and_overview.html book/index.html
 
-      # Deploy to the latest documentation directories
-      - name: Deploy latest documentation
-        uses: peaceiris/actions-gh-pages@068dc23d9710f1ba62e86896f84735d869951305 # v3.8.0
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          keep_files: true
-          publish_dir: ./book
-          destination_dir: ./develop
-
-      - name: Get the current Synapse version
+      # Figure out the target directory.
+      #
+      # The target directory depends on the name of the branch
+      #
+      - name: Get the target directory name
         id: vars
-        # The $GITHUB_REF value for a branch looks like `refs/heads/release-v1.2`. We do some
-        # shell magic to remove the "refs/heads/release-v" bit from this, to end up with "1.2",
-        # our major/minor version number, and set this to a var called `branch-version`.
-        #
-        # We then use some python to get Synapse's full version string, which may look
-        # like "1.2.3rc4". We set this to a var called `synapse-version`. We use this
-        # to determine if this release is still an RC, and if so block deployment.
         run: |
-          echo ::set-output name=branch-version::${GITHUB_REF#refs/heads/release-v}
-          echo ::set-output name=synapse-version::`python3 -c 'import synapse; print(synapse.__version__)'`
+          # first strip the 'refs/heads/' prefix with some shell foo
+          branch="${GITHUB_REF#refs/heads/}"
 
-      # Deploy to the version-specific directory
-      - name: Deploy release-specific documentation
-        # We only carry out this step if we're running on a release branch,
-        # and the current Synapse version does not have "rc" in the name.
-        #
-        # The result is that only full releases are deployed, but can be
-        # updated if the release branch gets retroactive fixes.
-        if: ${{ startsWith( github.ref, 'refs/heads/release-v' ) && !contains( steps.vars.outputs.synapse-version, 'rc') }}
-        uses: peaceiris/actions-gh-pages@v3
+          case $branch in
+              release-*)
+                  # strip 'release-' from the name for release branches.
+                  branch="${branch#release-}"
+                  ;;
+              master)
+                  # deploy to "latest" for the master branch.
+                  branch="latest"
+                  ;;
+          esac
+
+          # finally, set the 'branch-version' var.
+          echo "::set-output name=branch-version::$branch"
+          
+      # Deploy to the target directory.
+      - name: Deploy to gh pages
+        uses: peaceiris/actions-gh-pages@068dc23d9710f1ba62e86896f84735d869951305 # v3.8.0
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           keep_files: true
           publish_dir: ./book
-          # The resulting documentation will end up in a directory named `vX.Y`.
-          destination_dir: ./v${{ steps.vars.outputs.branch-version }}
+          destination_dir: ./${{ steps.vars.outputs.branch-version }}
diff --git a/.github/workflows/release-artifacts.yml b/.github/workflows/release-artifacts.yml
new file mode 100644
index 0000000000..325c1f7d39
--- /dev/null
+++ b/.github/workflows/release-artifacts.yml
@@ -0,0 +1,95 @@
+# GitHub actions workflow which builds the release artifacts.
+
+name: Build release artifacts
+
+on:
+  # we build on PRs and develop to (hopefully) get early warning
+  # of things breaking (but only build one set of debs)
+  pull_request:
+  push:
+    branches: ["develop"]
+
+    # we do the full build on tags.
+    tags: ["v*"]
+
+permissions:
+  contents: write
+
+jobs:
+  get-distros:
+    name: "Calculate list of debian distros"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+      - id: set-distros
+        run: |
+          # if we're running from a tag, get the full list of distros; otherwise just use debian:sid
+          dists='["debian:sid"]'
+          if [[ $GITHUB_REF == refs/tags/* ]]; then
+              dists=$(scripts-dev/build_debian_packages --show-dists-json)
+          fi
+          echo "::set-output name=distros::$dists"
+    # map the step outputs to job outputs
+    outputs:
+      distros: ${{ steps.set-distros.outputs.distros }}
+
+  # now build the packages with a matrix build.
+  build-debs:
+    needs: get-distros
+    name: "Build .deb packages"
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        distro: ${{ fromJson(needs.get-distros.outputs.distros) }}
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          path: src
+      - uses: actions/setup-python@v2
+      - run: ./src/scripts-dev/build_debian_packages "${{ matrix.distro }}"
+      - uses: actions/upload-artifact@v2
+        with:
+          name: debs
+          path: debs/*
+
+  build-sdist:
+    name: "Build pypi distribution files"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+      - run: pip install wheel
+      - run: |
+          python setup.py sdist bdist_wheel
+      - uses: actions/upload-artifact@v2
+        with:
+          name: python-dist
+          path: dist/*
+
+  # if it's a tag, create a release and attach the artifacts to it
+  attach-assets:
+    name: "Attach assets to release"
+    if: ${{ !failure() && !cancelled() && startsWith(github.ref, 'refs/tags/') }}
+    needs:
+      - build-debs
+      - build-sdist
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download all workflow run artifacts
+        uses: actions/download-artifact@v2
+      - name: Build a tarball for the debs
+        run: tar -cvJf debs.tar.xz debs
+      - name: Attach to release
+        uses: softprops/action-gh-release@a929a66f232c1b11af63782948aa2210f981808a  # PR#109
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          files: |
+            python-dist/*
+            debs.tar.xz
+          # if it's not already published, keep the release as a draft.
+          draft: true
+          # mark it as a prerelease if the tag contains 'rc'.
+          prerelease: ${{ contains(github.ref, 'rc') }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index bf36ee1cdf..505bac1308 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -65,14 +65,14 @@ jobs:
 
   # Dummy step to gate other tests on without repeating the whole list
   linting-done:
-    if: ${{ always() }} # Run this even if prior jobs were skipped
+    if: ${{ !cancelled() }} # Run this even if prior jobs were skipped
     needs: [lint, lint-crlf, lint-newsfile, lint-sdist]
     runs-on: ubuntu-latest
     steps:
       - run: "true"
 
   trial:
-    if: ${{ !failure() }} # Allow previous steps to be skipped, but not fail
+    if: ${{ !cancelled() && !failure() }} # Allow previous steps to be skipped, but not fail
     needs: linting-done
     runs-on: ubuntu-latest
     strategy:
@@ -131,7 +131,7 @@ jobs:
           || true
 
   trial-olddeps:
-    if: ${{ !failure() }} # Allow previous steps to be skipped, but not fail
+    if: ${{ !cancelled() && !failure() }} # Allow previous steps to be skipped, but not fail
     needs: linting-done
     runs-on: ubuntu-latest
     steps:
@@ -156,7 +156,7 @@ jobs:
 
   trial-pypy:
     # Very slow; only run if the branch name includes 'pypy'
-    if: ${{ contains(github.ref, 'pypy') && !failure() }}
+    if: ${{ contains(github.ref, 'pypy') && !failure() && !cancelled() }}
     needs: linting-done
     runs-on: ubuntu-latest
     strategy:
@@ -185,7 +185,7 @@ jobs:
           || true
 
   sytest:
-    if: ${{ !failure() }}
+    if: ${{ !failure() && !cancelled() }}
     needs: linting-done
     runs-on: ubuntu-latest
     container:
@@ -245,7 +245,7 @@ jobs:
             /logs/**/*.log*
 
   portdb:
-    if: ${{ !failure() }} # Allow previous steps to be skipped, but not fail
+    if: ${{ !failure() && !cancelled() }} # Allow previous steps to be skipped, but not fail
     needs: linting-done
     runs-on: ubuntu-latest
     strategy:
@@ -286,7 +286,7 @@ jobs:
       - run: .buildkite/scripts/test_synapse_port_db.sh
 
   complement:
-    if: ${{ !failure() }}
+    if: ${{ !failure() && !cancelled() }}
     needs: linting-done
     runs-on: ubuntu-latest
     container:
diff --git a/CHANGES.md b/CHANGES.md
index f5ac026df5..82baaa2d1f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,93 @@
+Synapse 1.38.0 (2021-07-13)
+===========================
+
+This release includes a database schema update which could result in elevated disk usage. See the [upgrade notes](https://matrix-org.github.io/synapse/develop/upgrade#upgrading-to-v1380) for more information.
+
+No significant changes since 1.38.0rc3.
+
+
+Synapse 1.38.0rc3 (2021-07-13)
+==============================
+
+Internal Changes
+----------------
+
+- Build the Debian packages in CI. ([\#10247](https://github.com/matrix-org/synapse/issues/10247), [\#10379](https://github.com/matrix-org/synapse/issues/10379))
+
+
+Synapse 1.38.0rc2 (2021-07-09)
+==============================
+
+Bugfixes
+--------
+
+- Fix bug where inbound federation in a room could be delayed due to not correctly dropping a lock. Introduced in v1.37.1. ([\#10336](https://github.com/matrix-org/synapse/issues/10336))
+
+
+Improved Documentation
+----------------------
+
+- Update links to documentation in the sample config. Contributed by @dklimpel. ([\#10287](https://github.com/matrix-org/synapse/issues/10287))
+- Fix broken links in [INSTALL.md](INSTALL.md). Contributed by @dklimpel. ([\#10331](https://github.com/matrix-org/synapse/issues/10331))
+
+
+Synapse 1.38.0rc1 (2021-07-06)
+==============================
+
+Features
+--------
+
+- Implement refresh tokens as specified by [MSC2918](https://github.com/matrix-org/matrix-doc/pull/2918). ([\#9450](https://github.com/matrix-org/synapse/issues/9450))
+- Add support for evicting cache entries based on last access time. ([\#10205](https://github.com/matrix-org/synapse/issues/10205))
+- Omit empty fields from the `/sync` response. Contributed by @deepbluev7. ([\#10214](https://github.com/matrix-org/synapse/issues/10214))
+- Improve validation on federation `send_{join,leave,knock}` endpoints. ([\#10225](https://github.com/matrix-org/synapse/issues/10225), [\#10243](https://github.com/matrix-org/synapse/issues/10243))
+- Add SSO `external_ids` to the Query User Account admin API. ([\#10261](https://github.com/matrix-org/synapse/issues/10261))
+- Mark events received over federation which fail a spam check as "soft-failed". ([\#10263](https://github.com/matrix-org/synapse/issues/10263))
+- Add metrics for new inbound federation staging area. ([\#10284](https://github.com/matrix-org/synapse/issues/10284))
+- Add script to print information about recently registered users. ([\#10290](https://github.com/matrix-org/synapse/issues/10290))
+
+
+Bugfixes
+--------
+
+- Fix a long-standing bug which meant that invite rejections and knocks were not sent out over federation in a timely manner. ([\#10223](https://github.com/matrix-org/synapse/issues/10223))
+- Fix a bug introduced in v1.26.0 where only users who have set profile information could be deactivated with erasure enabled. ([\#10252](https://github.com/matrix-org/synapse/issues/10252))
+- Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server. ([\#10264](https://github.com/matrix-org/synapse/issues/10264), [\#10267](https://github.com/matrix-org/synapse/issues/10267), [\#10282](https://github.com/matrix-org/synapse/issues/10282), [\#10286](https://github.com/matrix-org/synapse/issues/10286), [\#10291](https://github.com/matrix-org/synapse/issues/10291), [\#10314](https://github.com/matrix-org/synapse/issues/10314), [\#10326](https://github.com/matrix-org/synapse/issues/10326))
+- Fix the prometheus `synapse_federation_server_pdu_process_time` metric. Broke in v1.37.1. ([\#10279](https://github.com/matrix-org/synapse/issues/10279))
+- Ensure that inbound events from federation that were being processed when Synapse was restarted get promptly processed on start up. ([\#10303](https://github.com/matrix-org/synapse/issues/10303))
+
+
+Improved Documentation
+----------------------
+
+- Move the upgrade notes to [docs/upgrade.md](https://github.com/matrix-org/synapse/blob/develop/docs/upgrade.md) and convert them to markdown. ([\#10166](https://github.com/matrix-org/synapse/issues/10166))
+- Choose Welcome & Overview as the default page for synapse documentation website. ([\#10242](https://github.com/matrix-org/synapse/issues/10242))
+- Adjust the URL in the README.rst file to point to irc.libera.chat. ([\#10258](https://github.com/matrix-org/synapse/issues/10258))
+- Fix homeserver config option name in presence router documentation. ([\#10288](https://github.com/matrix-org/synapse/issues/10288))
+- Fix link pointing at the wrong section in the modules documentation page. ([\#10302](https://github.com/matrix-org/synapse/issues/10302))
+
+
+Internal Changes
+----------------
+
+- Drop `Origin` and `Accept` from the value of the `Access-Control-Allow-Headers` response header. ([\#10114](https://github.com/matrix-org/synapse/issues/10114))
+- Add type hints to the federation servlets. ([\#10213](https://github.com/matrix-org/synapse/issues/10213))
+- Improve the reliability of auto-joining remote rooms. ([\#10237](https://github.com/matrix-org/synapse/issues/10237))
+- Update the release script to use the semver terminology and determine the release branch based on the next version. ([\#10239](https://github.com/matrix-org/synapse/issues/10239))
+- Fix type hints for computing auth events. ([\#10253](https://github.com/matrix-org/synapse/issues/10253))
+- Improve the performance of the spaces summary endpoint by only recursing into spaces (and not rooms in general). ([\#10256](https://github.com/matrix-org/synapse/issues/10256))
+- Move event authentication methods from `Auth` to `EventAuthHandler`. ([\#10268](https://github.com/matrix-org/synapse/issues/10268))
+- Re-enable a SyTest after it has been fixed. ([\#10292](https://github.com/matrix-org/synapse/issues/10292))
+
+
+Synapse 1.37.1 (2021-06-30)
+===========================
+
+This release resolves issues (such as [#9490](https://github.com/matrix-org/synapse/issues/9490)) where one busy room could cause head-of-line blocking, starving Synapse from processing events in other rooms, and causing all federated traffic to fall behind. Synapse 1.37.1 processes inbound federation traffic asynchronously, ensuring that one busy room won't impact others. Please upgrade to Synapse 1.37.1 as soon as possible, in order to increase resilience to other traffic spikes.
+
+No significant changes since v1.37.1rc1.
+
+
 Synapse 1.37.1rc1 (2021-06-29)
 ==============================
 
@@ -1167,7 +1257,10 @@ Crucially, this means __we will not produce .deb packages for Debian 9 (Stretch)
 
 The website https://endoflife.date/ has convenient summaries of the support schedules for projects like [Python](https://endoflife.date/python) and [PostgreSQL](https://endoflife.date/postgresql).
 
-If you are unable to upgrade your environment to a supported version of Python or Postgres, we encourage you to consider using the [Synapse Docker images](./INSTALL.md#docker-images-and-ansible-playbooks) instead.
+If you are unable to upgrade your environment to a supported version of Python or
+Postgres, we encourage you to consider using the
+[Synapse Docker images](https://matrix-org.github.io/synapse/latest/setup/installation.html#docker-images-and-ansible-playbooks)
+instead.
 
 ### Transition Period
 
@@ -1310,11 +1403,11 @@ To upgrade Synapse along with the cryptography package:
 * Administrators using the [`matrix.org` Docker
   image](https://hub.docker.com/r/matrixdotorg/synapse/) or the [Debian/Ubuntu
   packages from
-  `matrix.org`](https://github.com/matrix-org/synapse/blob/master/INSTALL.md#matrixorg-packages)
+  `matrix.org`](https://matrix-org.github.io/synapse/latest/setup/installation.html#matrixorg-packages)
   should ensure that they have version 1.24.0 or 1.23.1 installed: these images include
   the updated packages.
 * Administrators who have [installed Synapse from
-  source](https://github.com/matrix-org/synapse/blob/master/INSTALL.md#installing-from-source)
+  source](https://matrix-org.github.io/synapse/latest/setup/installation.html#installing-from-source)
   should upgrade the cryptography package within their virtualenv by running:
   ```sh
   <path_to_virtualenv>/bin/pip install 'cryptography>=3.3'
@@ -1356,11 +1449,11 @@ To upgrade Synapse along with the cryptography package:
 * Administrators using the [`matrix.org` Docker
   image](https://hub.docker.com/r/matrixdotorg/synapse/) or the [Debian/Ubuntu
   packages from
-  `matrix.org`](https://github.com/matrix-org/synapse/blob/master/INSTALL.md#matrixorg-packages)
+  `matrix.org`](https://matrix-org.github.io/synapse/latest/setup/installation.html#matrixorg-packages)
   should ensure that they have version 1.24.0 or 1.23.1 installed: these images include
   the updated packages.
 * Administrators who have [installed Synapse from
-  source](https://github.com/matrix-org/synapse/blob/master/INSTALL.md#installing-from-source)
+  source](https://matrix-org.github.io/synapse/latest/setup/installation.html#installing-from-source)
   should upgrade the cryptography package within their virtualenv by running:
   ```sh
   <path_to_virtualenv>/bin/pip install 'cryptography>=3.3'
@@ -2939,11 +3032,11 @@ installation remains secure.
 * Administrators using the [`matrix.org` Docker
   image](https://hub.docker.com/r/matrixdotorg/synapse/) or the [Debian/Ubuntu
   packages from
-  `matrix.org`](https://github.com/matrix-org/synapse/blob/master/INSTALL.md#matrixorg-packages)
+  `matrix.org`](https://matrix-org.github.io/synapse/latest/setup/installation.html#matrixorg-packages)
   should ensure that they have version 1.12.0 installed: these images include
   Twisted 20.3.0.
 * Administrators who have [installed Synapse from
-  source](https://github.com/matrix-org/synapse/blob/master/INSTALL.md#installing-from-source)
+  source](https://matrix-org.github.io/synapse/latest/setup/installation.html#installing-from-source)
   should upgrade Twisted within their virtualenv by running:
   ```sh
   <path_to_virtualenv>/bin/pip install 'Twisted>=20.3.0'
diff --git a/INSTALL.md b/INSTALL.md
index b0697052c1..f199b233b9 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,593 +1,7 @@
 # Installation Instructions
 
-There are 3 steps to follow under **Installation Instructions**.
+This document has moved to the
+[Synapse documentation website](https://matrix-org.github.io/synapse/latest/setup/installation.html).
+Please update your links.
 
-- [Installation Instructions](#installation-instructions)
-  - [Choosing your server name](#choosing-your-server-name)
-  - [Installing Synapse](#installing-synapse)
-    - [Installing from source](#installing-from-source)
-      - [Platform-specific prerequisites](#platform-specific-prerequisites)
-        - [Debian/Ubuntu/Raspbian](#debianubunturaspbian)
-        - [ArchLinux](#archlinux)
-        - [CentOS/Fedora](#centosfedora)
-        - [macOS](#macos)
-        - [OpenSUSE](#opensuse)
-        - [OpenBSD](#openbsd)
-        - [Windows](#windows)
-    - [Prebuilt packages](#prebuilt-packages)
-      - [Docker images and Ansible playbooks](#docker-images-and-ansible-playbooks)
-      - [Debian/Ubuntu](#debianubuntu)
-        - [Matrix.org packages](#matrixorg-packages)
-        - [Downstream Debian packages](#downstream-debian-packages)
-        - [Downstream Ubuntu packages](#downstream-ubuntu-packages)
-      - [Fedora](#fedora)
-      - [OpenSUSE](#opensuse-1)
-      - [SUSE Linux Enterprise Server](#suse-linux-enterprise-server)
-      - [ArchLinux](#archlinux-1)
-      - [Void Linux](#void-linux)
-      - [FreeBSD](#freebsd)
-      - [OpenBSD](#openbsd-1)
-      - [NixOS](#nixos)
-  - [Setting up Synapse](#setting-up-synapse)
-    - [Using PostgreSQL](#using-postgresql)
-    - [TLS certificates](#tls-certificates)
-    - [Client Well-Known URI](#client-well-known-uri)
-    - [Email](#email)
-    - [Registering a user](#registering-a-user)
-    - [Setting up a TURN server](#setting-up-a-turn-server)
-    - [URL previews](#url-previews)
-    - [Troubleshooting Installation](#troubleshooting-installation)
-
-
-## Choosing your server name
-
-It is important to choose the name for your server before you install Synapse,
-because it cannot be changed later.
-
-The server name determines the "domain" part of user-ids for users on your
-server: these will all be of the format `@user:my.domain.name`. It also
-determines how other matrix servers will reach yours for federation.
-
-For a test configuration, set this to the hostname of your server. For a more
-production-ready setup, you will probably want to specify your domain
-(`example.com`) rather than a matrix-specific hostname here (in the same way
-that your email address is probably `user@example.com` rather than
-`user@email.example.com`) - but doing so may require more advanced setup: see
-[Setting up Federation](docs/federate.md).
-
-## Installing Synapse
-
-### Installing from source
-
-(Prebuilt packages are available for some platforms - see [Prebuilt packages](#prebuilt-packages).)
-
-When installing from source please make sure that the [Platform-specific prerequisites](#platform-specific-prerequisites) are already installed.
-
-System requirements:
-
-- POSIX-compliant system (tested on Linux & OS X)
-- Python 3.5.2 or later, up to Python 3.9.
-- At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org
-
-
-To install the Synapse homeserver run:
-
-```sh
-mkdir -p ~/synapse
-virtualenv -p python3 ~/synapse/env
-source ~/synapse/env/bin/activate
-pip install --upgrade pip
-pip install --upgrade setuptools
-pip install matrix-synapse
-```
-
-This will download Synapse from [PyPI](https://pypi.org/project/matrix-synapse)
-and install it, along with the python libraries it uses, into a virtual environment
-under `~/synapse/env`.  Feel free to pick a different directory if you
-prefer.
-
-This Synapse installation can then be later upgraded by using pip again with the
-update flag:
-
-```sh
-source ~/synapse/env/bin/activate
-pip install -U matrix-synapse
-```
-
-Before you can start Synapse, you will need to generate a configuration
-file. To do this, run (in your virtualenv, as before):
-
-```sh
-cd ~/synapse
-python -m synapse.app.homeserver \
-    --server-name my.domain.name \
-    --config-path homeserver.yaml \
-    --generate-config \
-    --report-stats=[yes|no]
-```
-
-... substituting an appropriate value for `--server-name`.
-
-This command will generate you a config file that you can then customise, but it will
-also generate a set of keys for you. These keys will allow your homeserver to
-identify itself to other homeserver, so don't lose or delete them. It would be
-wise to back them up somewhere safe. (If, for whatever reason, you do need to
-change your homeserver's keys, you may find that other homeserver have the
-old key cached. If you update the signing key, you should change the name of the
-key in the `<server name>.signing.key` file (the second word) to something
-different. See the [spec](https://matrix.org/docs/spec/server_server/latest.html#retrieving-server-keys) for more information on key management).
-
-To actually run your new homeserver, pick a working directory for Synapse to
-run (e.g. `~/synapse`), and:
-
-```sh
-cd ~/synapse
-source env/bin/activate
-synctl start
-```
-
-#### Platform-specific prerequisites
-
-Synapse is written in Python but some of the libraries it uses are written in
-C. So before we can install Synapse itself we need a working C compiler and the
-header files for Python C extensions.
-
-##### Debian/Ubuntu/Raspbian
-
-Installing prerequisites on Ubuntu or Debian:
-
-```sh
-sudo apt install build-essential python3-dev libffi-dev \
-                     python3-pip python3-setuptools sqlite3 \
-                     libssl-dev virtualenv libjpeg-dev libxslt1-dev
-```
-
-##### ArchLinux
-
-Installing prerequisites on ArchLinux:
-
-```sh
-sudo pacman -S base-devel python python-pip \
-               python-setuptools python-virtualenv sqlite3
-```
-
-##### CentOS/Fedora
-
-Installing prerequisites on CentOS or Fedora Linux:
-
-```sh
-sudo dnf install libtiff-devel libjpeg-devel libzip-devel freetype-devel \
-                 libwebp-devel libxml2-devel libxslt-devel libpq-devel \
-                 python3-virtualenv libffi-devel openssl-devel python3-devel
-sudo dnf groupinstall "Development Tools"
-```
-
-##### macOS
-
-Installing prerequisites on macOS:
-
-```sh
-xcode-select --install
-sudo easy_install pip
-sudo pip install virtualenv
-brew install pkg-config libffi
-```
-
-On macOS Catalina (10.15) you may need to explicitly install OpenSSL
-via brew and inform `pip` about it so that `psycopg2` builds:
-
-```sh
-brew install openssl@1.1
-export LDFLAGS="-L/usr/local/opt/openssl/lib"
-export CPPFLAGS="-I/usr/local/opt/openssl/include"
-```
-
-##### OpenSUSE
-
-Installing prerequisites on openSUSE:
-
-```sh
-sudo zypper in -t pattern devel_basis
-sudo zypper in python-pip python-setuptools sqlite3 python-virtualenv \
-               python-devel libffi-devel libopenssl-devel libjpeg62-devel
-```
-
-##### OpenBSD
-
-A port of Synapse is available under `net/synapse`. The filesystem
-underlying the homeserver directory (defaults to `/var/synapse`) has to be
-mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem
-and mounting it to `/var/synapse` should be taken into consideration.
-
-To be able to build Synapse's dependency on python the `WRKOBJDIR`
-(cf. `bsd.port.mk(5)`) for building python, too, needs to be on a filesystem
-mounted with `wxallowed` (cf. `mount(8)`).
-
-Creating a `WRKOBJDIR` for building python under `/usr/local` (which on a
-default OpenBSD installation is mounted with `wxallowed`):
-
-```sh
-doas mkdir /usr/local/pobj_wxallowed
-```
-
-Assuming `PORTS_PRIVSEP=Yes` (cf. `bsd.port.mk(5)`) and `SUDO=doas` are
-configured in `/etc/mk.conf`:
-
-```sh
-doas chown _pbuild:_pbuild /usr/local/pobj_wxallowed
-```
-
-Setting the `WRKOBJDIR` for building python:
-
-```sh
-echo WRKOBJDIR_lang/python/3.7=/usr/local/pobj_wxallowed  \\nWRKOBJDIR_lang/python/2.7=/usr/local/pobj_wxallowed >> /etc/mk.conf
-```
-
-Building Synapse:
-
-```sh
-cd /usr/ports/net/synapse
-make install
-```
-
-##### Windows
-
-If you wish to run or develop Synapse on Windows, the Windows Subsystem For
-Linux provides a Linux environment on Windows 10 which is capable of using the
-Debian, Fedora, or source installation methods. More information about WSL can
-be found at <https://docs.microsoft.com/en-us/windows/wsl/install-win10> for
-Windows 10 and <https://docs.microsoft.com/en-us/windows/wsl/install-on-server>
-for Windows Server.
-
-### Prebuilt packages
-
-As an alternative to installing from source, prebuilt packages are available
-for a number of platforms.
-
-#### Docker images and Ansible playbooks
-
-There is an official synapse image available at
-<https://hub.docker.com/r/matrixdotorg/synapse> which can be used with
-the docker-compose file available at [contrib/docker](contrib/docker). Further
-information on this including configuration options is available in the README
-on hub.docker.com.
-
-Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a
-Dockerfile to automate a synapse server in a single Docker image, at
-<https://hub.docker.com/r/avhost/docker-matrix/tags/>
-
-Slavi Pantaleev has created an Ansible playbook,
-which installs the offical Docker image of Matrix Synapse
-along with many other Matrix-related services (Postgres database, Element, coturn,
-ma1sd, SSL support, etc.).
-For more details, see
-<https://github.com/spantaleev/matrix-docker-ansible-deploy>
-
-#### Debian/Ubuntu
-
-##### Matrix.org packages
-
-Matrix.org provides Debian/Ubuntu packages of the latest stable version of
-Synapse via <https://packages.matrix.org/debian/>. They are available for Debian
-9 (Stretch), Ubuntu 16.04 (Xenial), and later. To use them:
-
-```sh
-sudo apt install -y lsb-release wget apt-transport-https
-sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg
-echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main" |
-    sudo tee /etc/apt/sources.list.d/matrix-org.list
-sudo apt update
-sudo apt install matrix-synapse-py3
-```
-
-**Note**: if you followed a previous version of these instructions which
-recommended using `apt-key add` to add an old key from
-`https://matrix.org/packages/debian/`, you should note that this key has been
-revoked. You should remove the old key with `sudo apt-key remove
-C35EB17E1EAE708E6603A9B3AD0592FE47F0DF61`, and follow the above instructions to
-update your configuration.
-
-The fingerprint of the repository signing key (as shown by `gpg
-/usr/share/keyrings/matrix-org-archive-keyring.gpg`) is
-`AAF9AE843A7584B5A3E4CD2BCF45A512DE2DA058`.
-
-##### Downstream Debian packages
-
-We do not recommend using the packages from the default Debian `buster`
-repository at this time, as they are old and suffer from known security
-vulnerabilities. You can install the latest version of Synapse from
-[our repository](#matrixorg-packages) or from `buster-backports`. Please
-see the [Debian documentation](https://backports.debian.org/Instructions/)
-for information on how to use backports.
-
-If you are using Debian `sid` or testing, Synapse is available in the default
-repositories and it should be possible to install it simply with:
-
-```sh
-sudo apt install matrix-synapse
-```
-
-##### Downstream Ubuntu packages
-
-We do not recommend using the packages in the default Ubuntu repository
-at this time, as they are old and suffer from known security vulnerabilities.
-The latest version of Synapse can be installed from [our repository](#matrixorg-packages).
-
-#### Fedora
-
-Synapse is in the Fedora repositories as `matrix-synapse`:
-
-```sh
-sudo dnf install matrix-synapse
-```
-
-Oleg Girko provides Fedora RPMs at
-<https://obs.infoserver.lv/project/monitor/matrix-synapse>
-
-#### OpenSUSE
-
-Synapse is in the OpenSUSE repositories as `matrix-synapse`:
-
-```sh
-sudo zypper install matrix-synapse
-```
-
-#### SUSE Linux Enterprise Server
-
-Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at
-<https://download.opensuse.org/repositories/openSUSE:/Backports:/SLE-15/standard/>
-
-#### ArchLinux
-
-The quickest way to get up and running with ArchLinux is probably with the community package
-<https://www.archlinux.org/packages/community/any/matrix-synapse/>, which should pull in most of
-the necessary dependencies.
-
-pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ):
-
-```sh
-sudo pip install --upgrade pip
-```
-
-If you encounter an error with lib bcrypt causing an Wrong ELF Class:
-ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly
-compile it under the right architecture. (This should not be needed if
-installing under virtualenv):
-
-```sh
-sudo pip uninstall py-bcrypt
-sudo pip install py-bcrypt
-```
-
-#### Void Linux
-
-Synapse can be found in the void repositories as 'synapse':
-
-```sh
-xbps-install -Su
-xbps-install -S synapse
-```
-
-#### FreeBSD
-
-Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from:
-
-- Ports: `cd /usr/ports/net-im/py-matrix-synapse && make install clean`
-- Packages: `pkg install py37-matrix-synapse`
-
-#### OpenBSD
-
-As of OpenBSD 6.7 Synapse is available as a pre-compiled binary. The filesystem
-underlying the homeserver directory (defaults to `/var/synapse`) has to be
-mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem
-and mounting it to `/var/synapse` should be taken into consideration.
-
-Installing Synapse:
-
-```sh
-doas pkg_add synapse
-```
-
-#### NixOS
-
-Robin Lambertz has packaged Synapse for NixOS at:
-<https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/matrix-synapse.nix>
-
-## Setting up Synapse
-
-Once you have installed synapse as above, you will need to configure it.
-
-### Using PostgreSQL
-
-By default Synapse uses an [SQLite](https://sqlite.org/) database and in doing so trades
-performance for convenience. Almost all installations should opt to use [PostgreSQL](https://www.postgresql.org)
-instead. Advantages include:
-
-- significant performance improvements due to the superior threading and
-  caching model, smarter query optimiser
-- allowing the DB to be run on separate hardware
-
-For information on how to install and use PostgreSQL in Synapse, please see
-[docs/postgres.md](docs/postgres.md)
-
-SQLite is only acceptable for testing purposes. SQLite should not be used in
-a production server. Synapse will perform poorly when using
-SQLite, especially when participating in large rooms.
-
-### TLS certificates
-
-The default configuration exposes a single HTTP port on the local
-interface: `http://localhost:8008`. It is suitable for local testing,
-but for any practical use, you will need Synapse's APIs to be served
-over HTTPS.
-
-The recommended way to do so is to set up a reverse proxy on port
-`8448`. You can find documentation on doing so in
-[docs/reverse_proxy.md](docs/reverse_proxy.md).
-
-Alternatively, you can configure Synapse to expose an HTTPS port. To do
-so, you will need to edit `homeserver.yaml`, as follows:
-
-- First, under the `listeners` section, uncomment the configuration for the
-  TLS-enabled listener. (Remove the hash sign (`#`) at the start of
-  each line). The relevant lines are like this:
-
-```yaml
-  - port: 8448
-    type: http
-    tls: true
-    resources:
-      - names: [client, federation]
-  ```
-
-- You will also need to uncomment the `tls_certificate_path` and
-  `tls_private_key_path` lines under the `TLS` section. You will need to manage
-  provisioning of these certificates yourself.
-
-  If you are using your own certificate, be sure to use a `.pem` file that
-  includes the full certificate chain including any intermediate certificates
-  (for instance, if using certbot, use `fullchain.pem` as your certificate, not
-  `cert.pem`).
-
-For a more detailed guide to configuring your server for federation, see
-[federate.md](docs/federate.md).
-
-### Client Well-Known URI
-
-Setting up the client Well-Known URI is optional but if you set it up, it will
-allow users to enter their full username (e.g. `@user:<server_name>`) into clients
-which support well-known lookup to automatically configure the homeserver and
-identity server URLs. This is useful so that users don't have to memorize or think
-about the actual homeserver URL you are using.
-
-The URL `https://<server_name>/.well-known/matrix/client` should return JSON in
-the following format.
-
-```json
-{
-  "m.homeserver": {
-    "base_url": "https://<matrix.example.com>"
-  }
-}
-```
-
-It can optionally contain identity server information as well.
-
-```json
-{
-  "m.homeserver": {
-    "base_url": "https://<matrix.example.com>"
-  },
-  "m.identity_server": {
-    "base_url": "https://<identity.example.com>"
-  }
-}
-```
-
-To work in browser based clients, the file must be served with the appropriate
-Cross-Origin Resource Sharing (CORS) headers. A recommended value would be
-`Access-Control-Allow-Origin: *` which would allow all browser based clients to
-view it.
-
-In nginx this would be something like:
-
-```nginx
-location /.well-known/matrix/client {
-    return 200 '{"m.homeserver": {"base_url": "https://<matrix.example.com>"}}';
-    default_type application/json;
-    add_header Access-Control-Allow-Origin *;
-}
-```
-
-You should also ensure the `public_baseurl` option in `homeserver.yaml` is set
-correctly. `public_baseurl` should be set to the URL that clients will use to
-connect to your server. This is the same URL you put for the `m.homeserver`
-`base_url` above.
-
-```yaml
-public_baseurl: "https://<matrix.example.com>"
-```
-
-### Email
-
-It is desirable for Synapse to have the capability to send email. This allows
-Synapse to send password reset emails, send verifications when an email address
-is added to a user's account, and send email notifications to users when they
-receive new messages.
-
-To configure an SMTP server for Synapse, modify the configuration section
-headed `email`, and be sure to have at least the `smtp_host`, `smtp_port`
-and `notif_from` fields filled out.  You may also need to set `smtp_user`,
-`smtp_pass`, and `require_transport_security`.
-
-If email is not configured, password reset, registration and notifications via
-email will be disabled.
-
-### Registering a user
-
-The easiest way to create a new user is to do so from a client like [Element](https://element.io/).
-
-Alternatively, you can do so from the command line. This can be done as follows:
-
- 1. If synapse was installed via pip, activate the virtualenv as follows (if Synapse was
-    installed via a prebuilt package, `register_new_matrix_user` should already be
-    on the search path):
-    ```sh
-    cd ~/synapse
-    source env/bin/activate
-    synctl start # if not already running
-    ```
- 2. Run the following command:
-    ```sh
-    register_new_matrix_user -c homeserver.yaml http://localhost:8008
-    ```
-
-This will prompt you to add details for the new user, and will then connect to
-the running Synapse to create the new user. For example:
-```
-New user localpart: erikj
-Password:
-Confirm password:
-Make admin [no]:
-Success!
-```
-
-This process uses a setting `registration_shared_secret` in
-`homeserver.yaml`, which is shared between Synapse itself and the
-`register_new_matrix_user` script. It doesn't matter what it is (a random
-value is generated by `--generate-config`), but it should be kept secret, as
-anyone with knowledge of it can register users, including admin accounts,
-on your server even if `enable_registration` is `false`.
-
-### Setting up a TURN server
-
-For reliable VoIP calls to be routed via this homeserver, you MUST configure
-a TURN server. See [docs/turn-howto.md](docs/turn-howto.md) for details.
-
-### URL previews
-
-Synapse includes support for previewing URLs, which is disabled by default.  To
-turn it on you must enable the `url_preview_enabled: True` config parameter
-and explicitly specify the IP ranges that Synapse is not allowed to spider for
-previewing in the `url_preview_ip_range_blacklist` configuration parameter.
-This is critical from a security perspective to stop arbitrary Matrix users
-spidering 'internal' URLs on your network. At the very least we recommend that
-your loopback and RFC1918 IP addresses are blacklisted.
-
-This also requires the optional `lxml` python dependency to be  installed. This
-in turn requires the `libxml2` library to be available - on  Debian/Ubuntu this
-means `apt-get install libxml2-dev`, or equivalent for your OS.
-
-### Troubleshooting Installation
-
-`pip` seems to leak *lots* of memory during installation. For instance, a Linux
-host with 512MB of RAM may run out of memory whilst installing Twisted. If this
-happens, you will have to individually install the dependencies which are
-failing, e.g.:
-
-```sh
-pip install twisted
-```
-
-If you have any other problems, feel free to ask in
-[#synapse:matrix.org](https://matrix.to/#/#synapse:matrix.org).
+The markdown source is available in [docs/setup/installation.md](docs/setup/installation.md).
diff --git a/README.rst b/README.rst
index 6d3cf6c1a5..0ae05616e7 100644
--- a/README.rst
+++ b/README.rst
@@ -94,7 +94,8 @@ Synapse Installation
 
 .. _federation:
 
-* For details on how to install synapse, see `<INSTALL.md>`_.
+* For details on how to install synapse, see
+  `Installation Instructions <https://matrix-org.github.io/synapse/latest/setup/installation.html>`_.
 * For specific details on how to configure Synapse for federation see `docs/federate.md <docs/federate.md>`_
 
 
@@ -106,7 +107,8 @@ from a web client.
 
 Unless you are running a test instance of Synapse on your local machine, in
 general, you will need to enable TLS support before you can successfully
-connect from a client: see `<INSTALL.md#tls-certificates>`_.
+connect from a client: see
+`TLS certificates <https://matrix-org.github.io/synapse/latest/setup/installation.html#tls-certificates>`_.
 
 An easy way to get started is to login or register via Element at
 https://app.element.io/#/login or https://app.element.io/#/register respectively.
@@ -265,7 +267,7 @@ Join our developer community on Matrix: `#synapse-dev:matrix.org <https://matrix
 
 Before setting up a development environment for synapse, make sure you have the
 system dependencies (such as the python header files) installed - see
-`Installing from source <INSTALL.md#installing-from-source>`_.
+`Installing from source <https://matrix-org.github.io/synapse/latest/setup/installation.html#installing-from-source>`_.
 
 To check out a synapse for development, clone the git repo into a working
 directory of your choice::
@@ -333,8 +335,8 @@ access the API as a Matrix client would. It is able to run Synapse directly from
 the source tree, so installation of the server is not required.
 
 Testing with SyTest is recommended for verifying that changes related to the
-Client-Server API are functioning correctly. See the `installation instructions
-<https://github.com/matrix-org/sytest#installing>`_ for details.
+Client-Server API are functioning correctly. See the `SyTest installation
+instructions <https://github.com/matrix-org/sytest#installing>`_ for details.
 
 
 Platform dependencies
diff --git a/UPGRADE.rst b/UPGRADE.rst
index 82548ac850..17ecd935fd 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -1,7 +1,7 @@
 Upgrading Synapse
 =================
 
-This document has moved to the `Synapse documentation website <https://matrix-org.github.io/synapse/develop/upgrading>`_.
+This document has moved to the `Synapse documentation website <https://matrix-org.github.io/synapse/latest/upgrading>`_.
 Please update your links.
 
 The markdown source is available in `docs/upgrade.md <docs/upgrade.md>`_.
diff --git a/changelog.d/10114.misc b/changelog.d/10114.misc
deleted file mode 100644
index 808548f7c7..0000000000
--- a/changelog.d/10114.misc
+++ /dev/null
@@ -1 +0,0 @@
-Drop Origin and Accept from the value of the Access-Control-Allow-Headers response header.
diff --git a/changelog.d/10166.doc b/changelog.d/10166.doc
deleted file mode 100644
index 8d1710c132..0000000000
--- a/changelog.d/10166.doc
+++ /dev/null
@@ -1 +0,0 @@
-Move the upgrade notes to [docs/upgrade.md](https://github.com/matrix-org/synapse/blob/develop/docs/upgrade.md) and convert them to markdown.
diff --git a/changelog.d/10213.misc b/changelog.d/10213.misc
deleted file mode 100644
index 9adb0fbd02..0000000000
--- a/changelog.d/10213.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add type hints to the federation servlets.
diff --git a/changelog.d/10214.feature b/changelog.d/10214.feature
deleted file mode 100644
index a3818c9d25..0000000000
--- a/changelog.d/10214.feature
+++ /dev/null
@@ -1 +0,0 @@
-Omit empty fields from the `/sync` response. Contributed by @deepbluev7.
\ No newline at end of file
diff --git a/changelog.d/10223.bugfix b/changelog.d/10223.bugfix
deleted file mode 100644
index 4e42f6b608..0000000000
--- a/changelog.d/10223.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug which meant that invite rejections and knocks were not sent out over federation in a timely manner.
diff --git a/changelog.d/10225.feature b/changelog.d/10225.feature
deleted file mode 100644
index d16f66ffe9..0000000000
--- a/changelog.d/10225.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve validation on federation `send_{join,leave,knock}` endpoints.
diff --git a/changelog.d/10237.misc b/changelog.d/10237.misc
deleted file mode 100644
index d76c119a41..0000000000
--- a/changelog.d/10237.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve the reliability of auto-joining remote rooms.
diff --git a/changelog.d/10239.misc b/changelog.d/10239.misc
deleted file mode 100644
index d05f1c4411..0000000000
--- a/changelog.d/10239.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update the release script to use the semver terminology and determine the release branch based on the next version.
diff --git a/changelog.d/10242.doc b/changelog.d/10242.doc
deleted file mode 100644
index 2241b28547..0000000000
--- a/changelog.d/10242.doc
+++ /dev/null
@@ -1 +0,0 @@
-Choose Welcome & Overview as the default page for synapse documentation website.
diff --git a/changelog.d/10243.feature b/changelog.d/10243.feature
deleted file mode 100644
index d16f66ffe9..0000000000
--- a/changelog.d/10243.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve validation on federation `send_{join,leave,knock}` endpoints.
diff --git a/changelog.d/10250.bugfix b/changelog.d/10250.bugfix
new file mode 100644
index 0000000000..a8107dafb2
--- /dev/null
+++ b/changelog.d/10250.bugfix
@@ -0,0 +1 @@
+Add base starting insertion event when no chunk ID is specified in the historical batch send API.
diff --git a/changelog.d/10256.misc b/changelog.d/10256.misc
deleted file mode 100644
index adef12fcb9..0000000000
--- a/changelog.d/10256.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve the performance of the spaces summary endpoint by only recursing into spaces (and not rooms in general).
diff --git a/changelog.d/10258.doc b/changelog.d/10258.doc
deleted file mode 100644
index 1549786c0c..0000000000
--- a/changelog.d/10258.doc
+++ /dev/null
@@ -1 +0,0 @@
-Adjust the URL in the README.rst file to point to irc.libera.chat.
diff --git a/changelog.d/10263.feature b/changelog.d/10263.feature
deleted file mode 100644
index 7b1d2fe60f..0000000000
--- a/changelog.d/10263.feature
+++ /dev/null
@@ -1 +0,0 @@
-Mark events received over federation which fail a spam check as "soft-failed".
diff --git a/changelog.d/10264.bugfix b/changelog.d/10264.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10264.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10267.bugfix b/changelog.d/10267.bugfix
deleted file mode 100644
index 7ebda7cdc2..0000000000
--- a/changelog.d/10267.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would return errors after 2<sup>31</sup> events were handled by the server.
diff --git a/changelog.d/10276.bugfix b/changelog.d/10276.bugfix
new file mode 100644
index 0000000000..42adc57ad1
--- /dev/null
+++ b/changelog.d/10276.bugfix
@@ -0,0 +1 @@
+Fix historical batch send endpoint (MSC2716) rejecting batches with messages from multiple senders.
diff --git a/changelog.d/10289.misc b/changelog.d/10289.misc
new file mode 100644
index 0000000000..2df30e7a7a
--- /dev/null
+++ b/changelog.d/10289.misc
@@ -0,0 +1 @@
+Convert `room_depth.min_depth` column to a `BIGINT`.
diff --git a/changelog.d/10298.feature b/changelog.d/10298.feature
new file mode 100644
index 0000000000..7059db5075
--- /dev/null
+++ b/changelog.d/10298.feature
@@ -0,0 +1 @@
+The spaces summary API now returns any joinable rooms, not only rooms which are world-readable.
diff --git a/changelog.d/10305.feature b/changelog.d/10305.feature
new file mode 100644
index 0000000000..7059db5075
--- /dev/null
+++ b/changelog.d/10305.feature
@@ -0,0 +1 @@
+The spaces summary API now returns any joinable rooms, not only rooms which are world-readable.
diff --git a/changelog.d/10313.doc b/changelog.d/10313.doc
new file mode 100644
index 0000000000..44086e3d9d
--- /dev/null
+++ b/changelog.d/10313.doc
@@ -0,0 +1 @@
+Simplify structure of room admin API.
\ No newline at end of file
diff --git a/changelog.d/10315.misc b/changelog.d/10315.misc
new file mode 100644
index 0000000000..2c78644e20
--- /dev/null
+++ b/changelog.d/10315.misc
@@ -0,0 +1 @@
+Add tests to characterise the current behaviour of R30 phone-home metrics.
diff --git a/changelog.d/10316.misc b/changelog.d/10316.misc
new file mode 100644
index 0000000000..1fd0810fde
--- /dev/null
+++ b/changelog.d/10316.misc
@@ -0,0 +1 @@
+Rebuild event context and auth when processing specific results from `ThirdPartyEventRules` modules.
diff --git a/changelog.d/10317.bugfix b/changelog.d/10317.bugfix
new file mode 100644
index 0000000000..826c269eff
--- /dev/null
+++ b/changelog.d/10317.bugfix
@@ -0,0 +1 @@
+Fix purging rooms that other homeservers are still sending events for. Contributed by @ilmari.
diff --git a/changelog.d/10322.doc b/changelog.d/10322.doc
new file mode 100644
index 0000000000..db604cf2aa
--- /dev/null
+++ b/changelog.d/10322.doc
@@ -0,0 +1 @@
+Fix a broken link in the admin api docs.
diff --git a/changelog.d/10324.misc b/changelog.d/10324.misc
new file mode 100644
index 0000000000..3c3ee6d6fc
--- /dev/null
+++ b/changelog.d/10324.misc
@@ -0,0 +1 @@
+Minor change to the code that populates `user_daily_visits`.
diff --git a/changelog.d/10332.feature b/changelog.d/10332.feature
new file mode 100644
index 0000000000..091947ff22
--- /dev/null
+++ b/changelog.d/10332.feature
@@ -0,0 +1 @@
+Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric.
diff --git a/changelog.d/10337.doc b/changelog.d/10337.doc
new file mode 100644
index 0000000000..f305bdb3ba
--- /dev/null
+++ b/changelog.d/10337.doc
@@ -0,0 +1 @@
+Fix formatting in the logcontext documentation.
diff --git a/changelog.d/10343.bugfix b/changelog.d/10343.bugfix
new file mode 100644
index 0000000000..53ccf79a81
--- /dev/null
+++ b/changelog.d/10343.bugfix
@@ -0,0 +1 @@
+Fix errors during backfill caused by previously purged redaction events. Contributed by Andreas Rammhold (@andir).
diff --git a/changelog.d/10344.bugfix b/changelog.d/10344.bugfix
new file mode 100644
index 0000000000..ab6eb4999f
--- /dev/null
+++ b/changelog.d/10344.bugfix
@@ -0,0 +1 @@
+Fix the user directory becoming broken (and noisy errors being logged) when knocking and room statistics are in use.
diff --git a/changelog.d/10345.misc b/changelog.d/10345.misc
new file mode 100644
index 0000000000..7424486e87
--- /dev/null
+++ b/changelog.d/10345.misc
@@ -0,0 +1 @@
+Re-enable Sytests that were disabled for the 1.37.1 release.
diff --git a/changelog.d/10347.misc b/changelog.d/10347.misc
new file mode 100644
index 0000000000..b2275a1350
--- /dev/null
+++ b/changelog.d/10347.misc
@@ -0,0 +1 @@
+Run `pyupgrade` on the codebase.
\ No newline at end of file
diff --git a/changelog.d/10348.misc b/changelog.d/10348.misc
new file mode 100644
index 0000000000..b2275a1350
--- /dev/null
+++ b/changelog.d/10348.misc
@@ -0,0 +1 @@
+Run `pyupgrade` on the codebase.
\ No newline at end of file
diff --git a/changelog.d/10349.misc b/changelog.d/10349.misc
new file mode 100644
index 0000000000..5b014e7416
--- /dev/null
+++ b/changelog.d/10349.misc
@@ -0,0 +1 @@
+Switch `application_services_txns.txn_id` database column to `BIGINT`.
diff --git a/changelog.d/10350.misc b/changelog.d/10350.misc
new file mode 100644
index 0000000000..eed2d8552a
--- /dev/null
+++ b/changelog.d/10350.misc
@@ -0,0 +1 @@
+Convert internal type variable syntax to reflect wider ecosystem use.
\ No newline at end of file
diff --git a/changelog.d/10353.doc b/changelog.d/10353.doc
new file mode 100644
index 0000000000..274ac83549
--- /dev/null
+++ b/changelog.d/10353.doc
@@ -0,0 +1 @@
+Refresh the logcontext dev documentation.
diff --git a/changelog.d/10355.bugfix b/changelog.d/10355.bugfix
new file mode 100644
index 0000000000..92df612011
--- /dev/null
+++ b/changelog.d/10355.bugfix
@@ -0,0 +1 @@
+Fix newly added `synapse_federation_server_oldest_inbound_pdu_in_staging` prometheus metric to measure age rather than timestamp.
diff --git a/changelog.d/10357.misc b/changelog.d/10357.misc
new file mode 100644
index 0000000000..7424486e87
--- /dev/null
+++ b/changelog.d/10357.misc
@@ -0,0 +1 @@
+Re-enable Sytests that were disabled for the 1.37.1 release.
diff --git a/changelog.d/10359.bugfix b/changelog.d/10359.bugfix
new file mode 100644
index 0000000000..d318f8fa08
--- /dev/null
+++ b/changelog.d/10359.bugfix
@@ -0,0 +1 @@
+Fix PostgreSQL sometimes using table scans for queries against `state_groups_state` table, taking a long time and a large amount of IO.
diff --git a/changelog.d/10360.feature b/changelog.d/10360.feature
new file mode 100644
index 0000000000..904221cb6d
--- /dev/null
+++ b/changelog.d/10360.feature
@@ -0,0 +1 @@
+Allow providing credentials to `http_proxy`.
\ No newline at end of file
diff --git a/changelog.d/10367.bugfix b/changelog.d/10367.bugfix
new file mode 100644
index 0000000000..b445556084
--- /dev/null
+++ b/changelog.d/10367.bugfix
@@ -0,0 +1 @@
+Bugfix `make_room_admin` fails for users that have left a private room.
\ No newline at end of file
diff --git a/changelog.d/10368.doc b/changelog.d/10368.doc
new file mode 100644
index 0000000000..10297aa424
--- /dev/null
+++ b/changelog.d/10368.doc
@@ -0,0 +1 @@
+Add delegation example for caddy in the reverse proxy documentation. Contributed by @moritzdietz.
diff --git a/changelog.d/10370.doc b/changelog.d/10370.doc
new file mode 100644
index 0000000000..8c59d98ee8
--- /dev/null
+++ b/changelog.d/10370.doc
@@ -0,0 +1 @@
+Fix some links in `docs` and `contrib`.
\ No newline at end of file
diff --git a/changelog.d/10380.misc b/changelog.d/10380.misc
new file mode 100644
index 0000000000..eed2d8552a
--- /dev/null
+++ b/changelog.d/10380.misc
@@ -0,0 +1 @@
+Convert internal type variable syntax to reflect wider ecosystem use.
\ No newline at end of file
diff --git a/changelog.d/10381.misc b/changelog.d/10381.misc
new file mode 100644
index 0000000000..eed2d8552a
--- /dev/null
+++ b/changelog.d/10381.misc
@@ -0,0 +1 @@
+Convert internal type variable syntax to reflect wider ecosystem use.
\ No newline at end of file
diff --git a/changelog.d/10382.misc b/changelog.d/10382.misc
new file mode 100644
index 0000000000..eed2d8552a
--- /dev/null
+++ b/changelog.d/10382.misc
@@ -0,0 +1 @@
+Convert internal type variable syntax to reflect wider ecosystem use.
\ No newline at end of file
diff --git a/changelog.d/10383.misc b/changelog.d/10383.misc
new file mode 100644
index 0000000000..952c1e77a8
--- /dev/null
+++ b/changelog.d/10383.misc
@@ -0,0 +1 @@
+Make the Github Actions workflow configuration more efficient.
diff --git a/changelog.d/10385.misc b/changelog.d/10385.misc
new file mode 100644
index 0000000000..e515ac09fd
--- /dev/null
+++ b/changelog.d/10385.misc
@@ -0,0 +1 @@
+Add type hints to `get_{domain,localpart}_from_id`.
diff --git a/changelog.d/10391.misc b/changelog.d/10391.misc
new file mode 100644
index 0000000000..3f191b520a
--- /dev/null
+++ b/changelog.d/10391.misc
@@ -0,0 +1 @@
+When building Debian packages for prerelease versions, set the Section accordingly.
diff --git a/changelog.d/10393.misc b/changelog.d/10393.misc
new file mode 100644
index 0000000000..e80f16d607
--- /dev/null
+++ b/changelog.d/10393.misc
@@ -0,0 +1 @@
+Add type hints and comments to event auth code.
diff --git a/changelog.d/10395.doc b/changelog.d/10395.doc
new file mode 100644
index 0000000000..4bdaea76c5
--- /dev/null
+++ b/changelog.d/10395.doc
@@ -0,0 +1 @@
+Make deprecation notice of the spam checker doc more obvious.
diff --git a/changelog.d/10396.doc b/changelog.d/10396.doc
new file mode 100644
index 0000000000..b521ad9cbf
--- /dev/null
+++ b/changelog.d/10396.doc
@@ -0,0 +1 @@
+Add instructructions on installing Debian packages for release candidates.
diff --git a/changelog.d/10398.misc b/changelog.d/10398.misc
new file mode 100644
index 0000000000..326e54655a
--- /dev/null
+++ b/changelog.d/10398.misc
@@ -0,0 +1 @@
+Stagger sending of presence update to remote servers, reducing CPU spikes caused by starting many connections to remote servers at once.
diff --git a/changelog.d/10399.doc b/changelog.d/10399.doc
new file mode 100644
index 0000000000..b596ac5627
--- /dev/null
+++ b/changelog.d/10399.doc
@@ -0,0 +1 @@
+Rewrite the text of links to be clearer in the documentation.
diff --git a/changelog.d/10400.bugfix b/changelog.d/10400.bugfix
new file mode 100644
index 0000000000..bfebed8d29
--- /dev/null
+++ b/changelog.d/10400.bugfix
@@ -0,0 +1 @@
+Fix a number of logged errors caused by remote servers being down.
diff --git a/changelog.d/10404.bugfix b/changelog.d/10404.bugfix
new file mode 100644
index 0000000000..2e095b6402
--- /dev/null
+++ b/changelog.d/10404.bugfix
@@ -0,0 +1 @@
+Responses from `/make_{join,leave,knock}` no longer include signatures, which will turn out to be invalid after events are returned to `/send_{join,leave,knock}`.
diff --git a/changelog.d/10418.misc b/changelog.d/10418.misc
new file mode 100644
index 0000000000..eed2d8552a
--- /dev/null
+++ b/changelog.d/10418.misc
@@ -0,0 +1 @@
+Convert internal type variable syntax to reflect wider ecosystem use.
\ No newline at end of file
diff --git a/changelog.d/10421.misc b/changelog.d/10421.misc
new file mode 100644
index 0000000000..385cbe07af
--- /dev/null
+++ b/changelog.d/10421.misc
@@ -0,0 +1 @@
+Remove unused `events_by_room` code (tech debt).
diff --git a/changelog.d/10427.feature b/changelog.d/10427.feature
new file mode 100644
index 0000000000..091947ff22
--- /dev/null
+++ b/changelog.d/10427.feature
@@ -0,0 +1 @@
+Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric.
diff --git a/changelog.d/9450.feature b/changelog.d/9450.feature
deleted file mode 100644
index 455936a41d..0000000000
--- a/changelog.d/9450.feature
+++ /dev/null
@@ -1 +0,0 @@
-Implement refresh tokens as specified by [MSC2918](https://github.com/matrix-org/matrix-doc/pull/2918).
diff --git a/changelog.d/9721.removal b/changelog.d/9721.removal
new file mode 100644
index 0000000000..da2ba48c84
--- /dev/null
+++ b/changelog.d/9721.removal
@@ -0,0 +1 @@
+Remove functionality associated with the unused `room_stats_historical` and `user_stats_historical` tables. Contributed by @xmunoz.
diff --git a/changelog.d/9884.feature b/changelog.d/9884.feature
new file mode 100644
index 0000000000..525fd2f93c
--- /dev/null
+++ b/changelog.d/9884.feature
@@ -0,0 +1 @@
+Add a module type for the account validity feature.
diff --git a/changelog.d/9971.doc b/changelog.d/9971.doc
new file mode 100644
index 0000000000..ada68f70ca
--- /dev/null
+++ b/changelog.d/9971.doc
@@ -0,0 +1 @@
+Updated installation dependencies for newer macOS versions and ARM Macs. Contributed by Luke Walsh.
diff --git a/contrib/docker/docker-compose.yml b/contrib/docker/docker-compose.yml
index d1ecd453db..26d640c448 100644
--- a/contrib/docker/docker-compose.yml
+++ b/contrib/docker/docker-compose.yml
@@ -56,7 +56,7 @@ services:
       - POSTGRES_USER=synapse
       - POSTGRES_PASSWORD=changeme
       # ensure the database gets created correctly
-      # https://github.com/matrix-org/synapse/blob/master/docs/postgres.md#set-up-database
+      # https://matrix-org.github.io/synapse/latest/postgres.html#set-up-database
       - POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=C --lc-ctype=C
     volumes:
       # You may store the database tables in a local folder..
diff --git a/contrib/grafana/README.md b/contrib/grafana/README.md
index 4608793394..0d4e1b59b2 100644
--- a/contrib/grafana/README.md
+++ b/contrib/grafana/README.md
@@ -1,6 +1,6 @@
 # Using the Synapse Grafana dashboard
 
 0. Set up Prometheus and Grafana. Out of scope for this readme. Useful documentation about using Grafana with Prometheus: http://docs.grafana.org/features/datasources/prometheus/
-1. Have your Prometheus scrape your Synapse. https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md
+1. Have your Prometheus scrape your Synapse. https://matrix-org.github.io/synapse/latest/metrics-howto.html
 2. Import dashboard into Grafana. Download `synapse.json`. Import it to Grafana and select the correct Prometheus datasource. http://docs.grafana.org/reference/export_import/
-3. Set up required recording rules. https://github.com/matrix-org/synapse/tree/master/contrib/prometheus
+3. Set up required recording rules. [contrib/prometheus](../prometheus)
diff --git a/contrib/prometheus/README.md b/contrib/prometheus/README.md
index b3f23bcc80..4dbf648df8 100644
--- a/contrib/prometheus/README.md
+++ b/contrib/prometheus/README.md
@@ -34,7 +34,7 @@ Add a new job to the main prometheus.yml file:
 ```
 
 An example of a Prometheus configuration with workers can be found in
-[metrics-howto.md](https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md).
+[metrics-howto.md](https://matrix-org.github.io/synapse/latest/metrics-howto.html).
 
 To use `synapse.rules` add
 
diff --git a/contrib/purge_api/README.md b/contrib/purge_api/README.md
index 06b4cdb9f7..2f2e5c58cd 100644
--- a/contrib/purge_api/README.md
+++ b/contrib/purge_api/README.md
@@ -3,8 +3,9 @@ Purge history API examples
 
 # `purge_history.sh`
 
-A bash file, that uses the [purge history API](/docs/admin_api/purge_history_api.rst) to 
-purge all messages in a list of rooms up to a certain event. You can select a 
+A bash file, that uses the
+[purge history API](https://matrix-org.github.io/synapse/latest/admin_api/purge_history_api.html)
+to purge all messages in a list of rooms up to a certain event. You can select a 
 timeframe or a number of messages that you want to keep in the room.
 
 Just configure the variables DOMAIN, ADMIN, ROOMS_ARRAY and TIME at the top of
@@ -12,5 +13,6 @@ the script.
 
 # `purge_remote_media.sh`
 
-A bash file, that uses the [purge history API](/docs/admin_api/purge_history_api.rst) to 
-purge all old cached remote media.
+A bash file, that uses the
+[purge history API](https://matrix-org.github.io/synapse/latest/admin_api/purge_history_api.html)
+to purge all old cached remote media.
diff --git a/contrib/purge_api/purge_history.sh b/contrib/purge_api/purge_history.sh
index c45136ff53..9d5324ea1c 100644
--- a/contrib/purge_api/purge_history.sh
+++ b/contrib/purge_api/purge_history.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 # this script will use the api:
-#    https://github.com/matrix-org/synapse/blob/master/docs/admin_api/purge_history_api.rst
+#    https://matrix-org.github.io/synapse/latest/admin_api/purge_history_api.html
 # 
 # It will purge all messages in a list of rooms up to a cetrain event
 
diff --git a/contrib/systemd-with-workers/README.md b/contrib/systemd-with-workers/README.md
index 8d21d532bd..9b19b042e9 100644
--- a/contrib/systemd-with-workers/README.md
+++ b/contrib/systemd-with-workers/README.md
@@ -1,2 +1,3 @@
 The documentation for using systemd to manage synapse workers is now part of
-the main synapse distribution. See [docs/systemd-with-workers](../../docs/systemd-with-workers).
+the main synapse distribution. See
+[docs/systemd-with-workers](https://matrix-org.github.io/synapse/latest/systemd-with-workers/index.html).
diff --git a/contrib/systemd/README.md b/contrib/systemd/README.md
index 5d42b3464f..2844cbc8e0 100644
--- a/contrib/systemd/README.md
+++ b/contrib/systemd/README.md
@@ -2,7 +2,8 @@
 This is a setup for managing synapse with a user contributed systemd unit 
 file. It provides a `matrix-synapse` systemd unit file that should be tailored 
 to accommodate your installation in accordance with the installation 
-instructions provided in [installation instructions](../../INSTALL.md).
+instructions provided in
+[installation instructions](https://matrix-org.github.io/synapse/latest/setup/installation.html).
 
 ## Setup
 1. Under the service section, ensure the `User` variable matches which user
diff --git a/debian/changelog b/debian/changelog
index cf190b7dba..43d26fc133 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,25 @@
+matrix-synapse-py3 (1.38.0) stable; urgency=medium
+
+  * New synapse release 1.38.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 13 Jul 2021 13:20:56 +0100
+
+matrix-synapse-py3 (1.38.0rc3) prerelease; urgency=medium
+
+  [ Erik Johnston ]
+  * Add synapse_review_recent_signups script
+
+  [ Synapse Packaging team ]
+  * New synapse release 1.38.0rc3.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 13 Jul 2021 11:53:56 +0100
+
+matrix-synapse-py3 (1.37.1) stable; urgency=medium
+
+  * New synapse release 1.37.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Wed, 30 Jun 2021 12:24:06 +0100
+
 matrix-synapse-py3 (1.37.0) stable; urgency=medium
 
   * New synapse release 1.37.0.
diff --git a/debian/hash_password.1 b/debian/hash_password.1
index 383f452991..d64b91e7c8 100644
--- a/debian/hash_password.1
+++ b/debian/hash_password.1
@@ -1,90 +1,58 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "HASH_PASSWORD" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "HASH_PASSWORD" "1" "July 2021" "" ""
 .SH "NAME"
 \fBhash_password\fR \- Calculate the hash of a new password, so that passwords can be reset
-.
 .SH "SYNOPSIS"
 \fBhash_password\fR [\fB\-p\fR|\fB\-\-password\fR [password]] [\fB\-c\fR|\fB\-\-config\fR \fIfile\fR]
-.
 .SH "DESCRIPTION"
 \fBhash_password\fR calculates the hash of a supplied password using bcrypt\.
-.
 .P
 \fBhash_password\fR takes a password as an parameter either on the command line or the \fBSTDIN\fR if not supplied\.
-.
 .P
 It accepts an YAML file which can be used to specify parameters like the number of rounds for bcrypt and password_config section having the pepper value used for the hashing\. By default \fBbcrypt_rounds\fR is set to \fB10\fR\.
-.
 .P
 The hashed password is written on the \fBSTDOUT\fR\.
-.
 .SH "FILES"
 A sample YAML file accepted by \fBhash_password\fR is described below:
-.
 .P
 bcrypt_rounds: 17 password_config: pepper: "random hashing pepper"
-.
 .SH "OPTIONS"
-.
 .TP
 \fB\-p\fR, \fB\-\-password\fR
 Read the password form the command line if [password] is supplied\. If not, prompt the user and read the password form the \fBSTDIN\fR\. It is not recommended to type the password on the command line directly\. Use the STDIN instead\.
-.
 .TP
 \fB\-c\fR, \fB\-\-config\fR
 Read the supplied YAML \fIfile\fR containing the options \fBbcrypt_rounds\fR and the \fBpassword_config\fR section containing the \fBpepper\fR value\.
-.
 .SH "EXAMPLES"
 Hash from the command line:
-.
 .IP "" 4
-.
 .nf
-
 $ hash_password \-p "p@ssw0rd"
 $2b$12$VJNqWQYfsWTEwcELfoSi4Oa8eA17movHqqi8\.X8fWFpum7SxZ9MFe
-.
 .fi
-.
 .IP "" 0
-.
 .P
 Hash from the STDIN:
-.
 .IP "" 4
-.
 .nf
-
 $ hash_password
 Password:
 Confirm password:
 $2b$12$AszlvfmJl2esnyhmn8m/kuR2tdXgROWtWxnX\.rcuAbM8ErLoUhybG
-.
 .fi
-.
 .IP "" 0
-.
 .P
 Using a config file:
-.
 .IP "" 4
-.
 .nf
-
 $ hash_password \-c config\.yml
 Password:
 Confirm password:
 $2b$12$CwI\.wBNr\.w3kmiUlV3T5s\.GT2wH7uebDCovDrCOh18dFedlANK99O
-.
 .fi
-.
 .IP "" 0
-.
 .SH "COPYRIGHT"
-This man page was written by Rahul De <\fIrahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Rahul De <\fI\%mailto:rahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synctl(1), synapse_port_db(1), register_new_matrix_user(1)
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/hash_password.ronn b/debian/hash_password.ronn
index 0b2afa7374..eeb354602d 100644
--- a/debian/hash_password.ronn
+++ b/debian/hash_password.ronn
@@ -66,4 +66,4 @@ for Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synctl(1), synapse_port_db(1), register_new_matrix_user(1)
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/manpages b/debian/manpages
index 2c30583530..4b13f52853 100644
--- a/debian/manpages
+++ b/debian/manpages
@@ -1,4 +1,5 @@
 debian/hash_password.1
 debian/register_new_matrix_user.1
 debian/synapse_port_db.1
+debian/synapse_review_recent_signups.1
 debian/synctl.1
diff --git a/debian/matrix-synapse-py3.links b/debian/matrix-synapse-py3.links
index bf19efa562..53e2965418 100644
--- a/debian/matrix-synapse-py3.links
+++ b/debian/matrix-synapse-py3.links
@@ -1,4 +1,5 @@
 opt/venvs/matrix-synapse/bin/hash_password usr/bin/hash_password
 opt/venvs/matrix-synapse/bin/register_new_matrix_user usr/bin/register_new_matrix_user
 opt/venvs/matrix-synapse/bin/synapse_port_db usr/bin/synapse_port_db
+opt/venvs/matrix-synapse/bin/synapse_review_recent_signups usr/bin/synapse_review_recent_signups
 opt/venvs/matrix-synapse/bin/synctl usr/bin/synctl
diff --git a/debian/register_new_matrix_user.1 b/debian/register_new_matrix_user.1
index 99156a7354..57bfc4e024 100644
--- a/debian/register_new_matrix_user.1
+++ b/debian/register_new_matrix_user.1
@@ -1,72 +1,47 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "REGISTER_NEW_MATRIX_USER" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "REGISTER_NEW_MATRIX_USER" "1" "July 2021" "" ""
 .SH "NAME"
 \fBregister_new_matrix_user\fR \- Used to register new users with a given home server when registration has been disabled
-.
 .SH "SYNOPSIS"
-\fBregister_new_matrix_user\fR options\.\.\.
-.
+\fBregister_new_matrix_user\fR options\|\.\|\.\|\.
 .SH "DESCRIPTION"
 \fBregister_new_matrix_user\fR registers new users with a given home server when registration has been disabled\. For this to work, the home server must be configured with the \'registration_shared_secret\' option set\.
-.
 .P
 This accepts the user credentials like the username, password, is user an admin or not and registers the user onto the homeserver database\. Also, a YAML file containing the shared secret can be provided\. If not, the shared secret can be provided via the command line\.
-.
 .P
 By default it assumes the home server URL to be \fBhttps://localhost:8448\fR\. This can be changed via the \fBserver_url\fR command line option\.
-.
 .SH "FILES"
 A sample YAML file accepted by \fBregister_new_matrix_user\fR is described below:
-.
 .IP "" 4
-.
 .nf
-
 registration_shared_secret: "s3cr3t"
-.
 .fi
-.
 .IP "" 0
-.
 .SH "OPTIONS"
-.
 .TP
 \fB\-u\fR, \fB\-\-user\fR
 Local part of the new user\. Will prompt if omitted\.
-.
 .TP
 \fB\-p\fR, \fB\-\-password\fR
 New password for user\. Will prompt if omitted\. Supplying the password on the command line is not recommended\. Use the STDIN instead\.
-.
 .TP
 \fB\-a\fR, \fB\-\-admin\fR
 Register new user as an admin\. Will prompt if omitted\.
-.
 .TP
 \fB\-c\fR, \fB\-\-config\fR
 Path to server config file containing the shared secret\.
-.
 .TP
 \fB\-k\fR, \fB\-\-shared\-secret\fR
 Shared secret as defined in server config file\. This is an optional parameter as it can be also supplied via the YAML file\.
-.
 .TP
 \fBserver_url\fR
 URL of the home server\. Defaults to \'https://localhost:8448\'\.
-.
 .SH "EXAMPLES"
-.
 .nf
-
 $ register_new_matrix_user \-u user1 \-p p@ssword \-a \-c config\.yaml
-.
 .fi
-.
 .SH "COPYRIGHT"
-This man page was written by Rahul De <\fIrahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Rahul De <\fI\%mailto:rahulde@swecha\.net\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synctl(1), synapse_port_db(1), hash_password(1)
+synctl(1), synapse_port_db(1), hash_password(1), synapse_review_recent_signups(1)
diff --git a/debian/register_new_matrix_user.ronn b/debian/register_new_matrix_user.ronn
index 4c22e74dde..0410b1f4cd 100644
--- a/debian/register_new_matrix_user.ronn
+++ b/debian/register_new_matrix_user.ronn
@@ -58,4 +58,4 @@ for Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synctl(1), synapse_port_db(1), hash_password(1)
+synctl(1), synapse_port_db(1), hash_password(1), synapse_review_recent_signups(1)
diff --git a/debian/synapse_port_db.1 b/debian/synapse_port_db.1
index 4e6bc04827..0e7e20001c 100644
--- a/debian/synapse_port_db.1
+++ b/debian/synapse_port_db.1
@@ -1,83 +1,56 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "SYNAPSE_PORT_DB" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "SYNAPSE_PORT_DB" "1" "July 2021" "" ""
 .SH "NAME"
 \fBsynapse_port_db\fR \- A script to port an existing synapse SQLite database to a new PostgreSQL database\.
-.
 .SH "SYNOPSIS"
 \fBsynapse_port_db\fR [\-v] \-\-sqlite\-database=\fIdbfile\fR \-\-postgres\-config=\fIyamlconfig\fR [\-\-curses] [\-\-batch\-size=\fIbatch\-size\fR]
-.
 .SH "DESCRIPTION"
 \fBsynapse_port_db\fR ports an existing synapse SQLite database to a new PostgreSQL database\.
-.
 .P
 SQLite database is specified with \fB\-\-sqlite\-database\fR option and PostgreSQL configuration required to connect to PostgreSQL database is provided using \fB\-\-postgres\-config\fR configuration\. The configuration is specified in YAML format\.
-.
 .SH "OPTIONS"
-.
 .TP
 \fB\-v\fR
 Print log messages in \fBdebug\fR level instead of \fBinfo\fR level\.
-.
 .TP
 \fB\-\-sqlite\-database\fR
 The snapshot of the SQLite database file\. This must not be currently used by a running synapse server\.
-.
 .TP
 \fB\-\-postgres\-config\fR
 The database config file for the PostgreSQL database\.
-.
 .TP
 \fB\-\-curses\fR
 Display a curses based progress UI\.
-.
 .SH "CONFIG FILE"
 The postgres configuration file must be a valid YAML file with the following options\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBdatabase\fR: Database configuration section\. This section header can be ignored and the options below may be specified as top level keys\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBname\fR: Connector to use when connecting to the database\. This value must be \fBpsycopg2\fR\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBargs\fR: DB API 2\.0 compatible arguments to send to the \fBpsycopg2\fR module\.
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBdbname\fR \- the database name
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBuser\fR \- user name used to authenticate
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBpassword\fR \- password used to authenticate
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBhost\fR \- database host address (defaults to UNIX socket if not provided)
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBport\fR \- connection port number (defaults to 5432 if not provided)
-.
 .IP "" 0
 
-.
-.IP "\(bu" 4
+.IP "\[ci]" 4
 \fBsynchronous_commit\fR: Optional\. Default is True\. If the value is \fBFalse\fR, enable asynchronous commit and don\'t wait for the server to call fsync before ending the transaction\. See: https://www\.postgresql\.org/docs/current/static/wal\-async\-commit\.html
-.
 .IP "" 0
 
-.
 .IP "" 0
-.
 .P
 Following example illustrates the configuration file format\.
-.
 .IP "" 4
-.
 .nf
-
 database:
   name: psycopg2
   args:
@@ -86,13 +59,9 @@ database:
     password: ORohmi9Eet=ohphi
     host: localhost
   synchronous_commit: false
-.
 .fi
-.
 .IP "" 0
-.
 .SH "COPYRIGHT"
-This man page was written by Sunil Mohan Adapa <\fIsunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Sunil Mohan Adapa <\fI\%mailto:sunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synctl(1), hash_password(1), register_new_matrix_user(1)
+synctl(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/synapse_port_db.ronn b/debian/synapse_port_db.ronn
index fcb32ebd0d..e167af2ba4 100644
--- a/debian/synapse_port_db.ronn
+++ b/debian/synapse_port_db.ronn
@@ -47,7 +47,7 @@ following options.
     * `args`:
       DB API 2.0 compatible arguments to send to the `psycopg2` module.
 
-      * `dbname` - the database name 
+      * `dbname` - the database name
 
       * `user` - user name used to authenticate
 
@@ -58,7 +58,7 @@ following options.
 
       * `port` - connection port number (defaults to 5432 if not
         provided)
-      
+
 
     * `synchronous_commit`:
       Optional.  Default is True.  If the value is `False`, enable
@@ -76,7 +76,7 @@ Following example illustrates the configuration file format.
         password: ORohmi9Eet=ohphi
         host: localhost
       synchronous_commit: false
-  
+
 ## COPYRIGHT
 
 This man page was written by Sunil Mohan Adapa <<sunil@medhas.org>> for
@@ -84,4 +84,4 @@ Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synctl(1), hash_password(1), register_new_matrix_user(1)
+synctl(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/synapse_review_recent_signups.1 b/debian/synapse_review_recent_signups.1
new file mode 100644
index 0000000000..2976c085f9
--- /dev/null
+++ b/debian/synapse_review_recent_signups.1
@@ -0,0 +1,26 @@
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "SYNAPSE_REVIEW_RECENT_SIGNUPS" "1" "July 2021" "" ""
+.SH "NAME"
+\fBsynapse_review_recent_signups\fR \- Print users that have recently registered on Synapse
+.SH "SYNOPSIS"
+\fBsynapse_review_recent_signups\fR \fB\-c\fR|\fB\-\-config\fR \fIfile\fR [\fB\-s\fR|\fB\-\-since\fR \fIperiod\fR] [\fB\-e\fR|\fB\-\-exclude\-emails\fR] [\fB\-u\fR|\fB\-\-only\-users\fR]
+.SH "DESCRIPTION"
+\fBsynapse_review_recent_signups\fR prints out recently registered users on a Synapse server, as well as some basic information about the user\.
+.P
+\fBsynapse_review_recent_signups\fR must be supplied with the config of the Synapse server, so that it can fetch the database config and connect to the database\.
+.SH "OPTIONS"
+.TP
+\fB\-c\fR, \fB\-\-config\fR
+The config file(s) used by the Synapse server\.
+.TP
+\fB\-s\fR, \fB\-\-since\fR
+How far back to search for newly registered users\. Defaults to 7d, i\.e\. up to seven days in the past\. Valid units are \'s\', \'m\', \'h\', \'d\', \'w\', or \'y\'\.
+.TP
+\fB\-e\fR, \fB\-\-exclude\-emails\fR
+Do not print out users that have validated emails associated with their account\.
+.TP
+\fB\-u\fR, \fB\-\-only\-users\fR
+Only print out the user IDs of recently registered users, without any additional information
+.SH "SEE ALSO"
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), hash_password(1)
diff --git a/debian/synapse_review_recent_signups.ronn b/debian/synapse_review_recent_signups.ronn
new file mode 100644
index 0000000000..77f2b040b9
--- /dev/null
+++ b/debian/synapse_review_recent_signups.ronn
@@ -0,0 +1,37 @@
+synapse_review_recent_signups(1) -- Print users that have recently registered on Synapse
+========================================================================================
+
+## SYNOPSIS
+
+`synapse_review_recent_signups` `-c`|`--config` <file> [`-s`|`--since` <period>] [`-e`|`--exclude-emails`] [`-u`|`--only-users`]
+
+## DESCRIPTION
+
+**synapse_review_recent_signups** prints out recently registered users on a
+Synapse server, as well as some basic information about the user.
+
+`synapse_review_recent_signups` must be supplied with the config of the Synapse
+server, so that it can fetch the database config and connect to the database.
+
+
+## OPTIONS
+
+  * `-c`, `--config`:
+    The config file(s) used by the Synapse server.
+
+  * `-s`, `--since`:
+    How far back to search for newly registered users. Defaults to 7d, i.e. up
+    to seven days in the past. Valid units are 's', 'm', 'h', 'd', 'w', or 'y'.
+
+  * `-e`, `--exclude-emails`:
+    Do not print out users that have validated emails associated with their
+    account.
+
+  * `-u`, `--only-users`:
+    Only print out the user IDs of recently registered users, without any
+    additional information
+
+
+## SEE ALSO
+
+synctl(1), synapse_port_db(1), register_new_matrix_user(1), hash_password(1)
diff --git a/debian/synctl.1 b/debian/synctl.1
index af58c8d224..2fdd770f09 100644
--- a/debian/synctl.1
+++ b/debian/synctl.1
@@ -1,63 +1,41 @@
-.\" generated with Ronn/v0.7.3
-.\" http://github.com/rtomayko/ronn/tree/0.7.3
-.
-.TH "SYNCTL" "1" "February 2017" "" ""
-.
+.\" generated with Ronn-NG/v0.8.0
+.\" http://github.com/apjanke/ronn-ng/tree/0.8.0
+.TH "SYNCTL" "1" "July 2021" "" ""
 .SH "NAME"
 \fBsynctl\fR \- Synapse server control interface
-.
 .SH "SYNOPSIS"
 Start, stop or restart synapse server\.
-.
 .P
 \fBsynctl\fR {start|stop|restart} [configfile] [\-w|\-\-worker=\fIWORKERCONFIG\fR] [\-a|\-\-all\-processes=\fIWORKERCONFIGDIR\fR]
-.
 .SH "DESCRIPTION"
 \fBsynctl\fR can be used to start, stop or restart Synapse server\. The control operation can be done on all processes or a single worker process\.
-.
 .SH "OPTIONS"
-.
 .TP
 \fBaction\fR
 The value of action should be one of \fBstart\fR, \fBstop\fR or \fBrestart\fR\.
-.
 .TP
 \fBconfigfile\fR
 Optional path of the configuration file to use\. Default value is \fBhomeserver\.yaml\fR\. The configuration file must exist for the operation to succeed\.
-.
 .TP
 \fB\-w\fR, \fB\-\-worker\fR:
-.
-.IP
-Perform start, stop or restart operations on a single worker\. Incompatible with \fB\-a\fR|\fB\-\-all\-processes\fR\. Value passed must be a valid worker\'s configuration file\.
-.
+
 .TP
 \fB\-a\fR, \fB\-\-all\-processes\fR:
-.
-.IP
-Perform start, stop or restart operations on all the workers in the given directory and the main synapse process\. Incompatible with \fB\-w\fR|\fB\-\-worker\fR\. Value passed must be a directory containing valid work configuration files\. All files ending with \fB\.yaml\fR extension shall be considered as configuration files and all other files in the directory are ignored\.
-.
+
 .SH "CONFIGURATION FILE"
 Configuration file may be generated as follows:
-.
 .IP "" 4
-.
 .nf
-
 $ python \-m synapse\.app\.homeserver \-c config\.yaml \-\-generate\-config \-\-server\-name=<server name>
-.
 .fi
-.
 .IP "" 0
-.
 .SH "ENVIRONMENT"
-.
 .TP
 \fBSYNAPSE_CACHE_FACTOR\fR
-Synapse\'s architecture is quite RAM hungry currently \- a lot of recent room data and metadata is deliberately cached in RAM in order to speed up common requests\. This will be improved in future, but for now the easiest way to either reduce the RAM usage (at the risk of slowing things down) is to set the SYNAPSE_CACHE_FACTOR environment variable\. Roughly speaking, a SYNAPSE_CACHE_FACTOR of 1\.0 will max out at around 3\-4GB of resident memory \- this is what we currently run the matrix\.org on\. The default setting is currently 0\.1, which is probably around a ~700MB footprint\. You can dial it down further to 0\.02 if desired, which targets roughly ~512MB\. Conversely you can dial it up if you need performance for lots of users and have a box with a lot of RAM\.
-.
+Synapse\'s architecture is quite RAM hungry currently \- we deliberately cache a lot of recent room data and metadata in RAM in order to speed up common requests\. We\'ll improve this in the future, but for now the easiest way to either reduce the RAM usage (at the risk of slowing things down) is to set the almost\-undocumented \fBSYNAPSE_CACHE_FACTOR\fR environment variable\. The default is 0\.5, which can be decreased to reduce RAM usage in memory constrained enviroments, or increased if performance starts to degrade\.
+.IP
+However, degraded performance due to a low cache factor, common on machines with slow disks, often leads to explosions in memory use due backlogged requests\. In this case, reducing the cache factor will make things worse\. Instead, try increasing it drastically\. 2\.0 is a good starting value\.
 .SH "COPYRIGHT"
-This man page was written by Sunil Mohan Adapa <\fIsunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
-.
+This man page was written by Sunil Mohan Adapa <\fI\%mailto:sunil@medhas\.org\fR> for Debian GNU/Linux distribution\.
 .SH "SEE ALSO"
-synapse_port_db(1), hash_password(1), register_new_matrix_user(1)
+synapse_port_db(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/debian/synctl.ronn b/debian/synctl.ronn
index 10cbda988f..eca6a16815 100644
--- a/debian/synctl.ronn
+++ b/debian/synctl.ronn
@@ -68,4 +68,4 @@ Debian GNU/Linux distribution.
 
 ## SEE ALSO
 
-synapse_port_db(1), hash_password(1), register_new_matrix_user(1)
+synapse_port_db(1), hash_password(1), register_new_matrix_user(1), synapse_review_recent_signups(1)
diff --git a/docker/README.md b/docker/README.md
index 3f28cdada3..edf917bb11 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -45,7 +45,7 @@ docker run -it --rm \
 ```
 
 For information on picking a suitable server name, see
-https://github.com/matrix-org/synapse/blob/master/INSTALL.md.
+https://matrix-org.github.io/synapse/latest/setup/installation.html.
 
 The above command will generate a `homeserver.yaml` in (typically)
 `/var/lib/docker/volumes/synapse-data/_data`. You should check this file, and
@@ -139,7 +139,7 @@ For documentation on using a reverse proxy, see
 https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
 
 For more information on enabling TLS support in synapse itself, see
-https://github.com/matrix-org/synapse/blob/master/INSTALL.md#tls-certificates. Of
+https://matrix-org.github.io/synapse/latest/setup/installation.html#tls-certificates. Of
 course, you will need to expose the TLS port from the container with a `-p`
 argument to `docker run`.
 
diff --git a/docker/build_debian.sh b/docker/build_debian.sh
index f426d2b77b..f572ed9aa0 100644
--- a/docker/build_debian.sh
+++ b/docker/build_debian.sh
@@ -15,6 +15,20 @@ cd /synapse/build
 dch -M -l "+$DIST" "build for $DIST"
 dch -M -r "" --force-distribution --distribution "$DIST"
 
+# if this is a prerelease, set the Section accordingly.
+#
+# When the package is later added to the package repo, reprepro will use the
+# Section to determine which "component" it should go into (see
+# https://manpages.debian.org/stretch/reprepro/reprepro.1.en.html#GUESSING)
+
+DEB_VERSION=`dpkg-parsechangelog -SVersion`
+case $DEB_VERSION in
+    *rc*|*a*|*b*|*c*)
+        sed -ie '/^Section:/c\Section: prerelease' debian/control
+        ;;
+esac
+
+
 dpkg-buildpackage -us -uc
 
 ls -l ..
diff --git a/docs/.sample_config_header.yaml b/docs/.sample_config_header.yaml
index 8c9b31acdb..09e86ca0ca 100644
--- a/docs/.sample_config_header.yaml
+++ b/docs/.sample_config_header.yaml
@@ -8,7 +8,8 @@
 #
 # It is *not* intended to be copied and used as the basis for a real
 # homeserver.yaml. Instead, if you are starting from scratch, please generate
-# a fresh config using Synapse by following the instructions in INSTALL.md.
+# a fresh config using Synapse by following the instructions in
+# https://matrix-org.github.io/synapse/latest/setup/installation.html.
 
 # Configuration options that take a time period can be set using a number
 # followed by a letter. Letters have the following meanings:
diff --git a/docs/MSC1711_certificates_FAQ.md b/docs/MSC1711_certificates_FAQ.md
index ce8189d4ed..7d71c190ab 100644
--- a/docs/MSC1711_certificates_FAQ.md
+++ b/docs/MSC1711_certificates_FAQ.md
@@ -14,7 +14,7 @@ upgraded, however it may be of use to those with old installs returning to the
 project.
 
 If you are setting up a server from scratch you almost certainly should look at
-the [installation guide](../INSTALL.md) instead.
+the [installation guide](setup/installation.md) instead.
 
 ## Introduction
 The goal of Synapse 0.99.0 is to act as a stepping stone to Synapse 1.0.0. It
@@ -132,7 +132,7 @@ your domain, you can simply route all traffic through the reverse proxy by
 updating the SRV record appropriately (or removing it, if the proxy listens on
 8448).
 
-See [reverse_proxy.md](reverse_proxy.md) for information on setting up a
+See [the reverse proxy documentation](reverse_proxy.md) for information on setting up a
 reverse proxy.
 
 #### Option 3: add a .well-known file to delegate your matrix traffic
@@ -303,7 +303,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will
 find it easier to direct federation traffic to a reverse proxy and manage their
 own TLS certificates, and this is a supported configuration.
 
-See [reverse_proxy.md](reverse_proxy.md) for information on setting up a
+See [the reverse proxy documentation](reverse_proxy.md) for information on setting up a
 reverse proxy.
 
 ### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy?
diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md
index b033fc03ef..61bed1e0d5 100644
--- a/docs/admin_api/media_admin_api.md
+++ b/docs/admin_api/media_admin_api.md
@@ -47,7 +47,7 @@ The API returns a JSON body like the following:
 ## List all media uploaded by a user
 
 Listing all media that has been uploaded by a local user can be achieved through
-the use of the [List media of a user](user_admin_api.rst#list-media-of-a-user)
+the use of the [List media of a user](user_admin_api.md#list-media-of-a-user)
 Admin API.
 
 # Quarantine media
@@ -257,7 +257,7 @@ URL Parameters
 * `server_name`: string - The name of your local server (e.g `matrix.org`).
 * `before_ts`: string representing a positive integer - Unix timestamp in ms.
 Files that were last used before this timestamp will be deleted. It is the timestamp of
-last access and not the timestamp creation. 
+last access and not the timestamp creation.
 * `size_gt`: Optional - string representing a positive integer - Size of the media in bytes.
 Files that are larger will be deleted. Defaults to `0`.
 * `keep_profiles`: Optional - string representing a boolean - Switch to also delete files
diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md
index bb7828a525..48777dd231 100644
--- a/docs/admin_api/rooms.md
+++ b/docs/admin_api/rooms.md
@@ -1,13 +1,9 @@
 # Contents
 - [List Room API](#list-room-api)
-  * [Parameters](#parameters)
-  * [Usage](#usage)
 - [Room Details API](#room-details-api)
 - [Room Members API](#room-members-api)
 - [Room State API](#room-state-api)
 - [Delete Room API](#delete-room-api)
-  * [Parameters](#parameters-1)
-  * [Response](#response)
   * [Undoing room shutdowns](#undoing-room-shutdowns)
 - [Make Room Admin API](#make-room-admin-api)
 - [Forward Extremities Admin API](#forward-extremities-admin-api)
@@ -19,7 +15,7 @@ The List Room admin API allows server admins to get a list of rooms on their
 server. There are various parameters available that allow for filtering and
 sorting the returned list. This API supports pagination.
 
-## Parameters
+**Parameters**
 
 The following query parameters are available:
 
@@ -46,6 +42,8 @@ The following query parameters are available:
 * `search_term` - Filter rooms by their room name. Search term can be contained in any
                   part of the room name. Defaults to no filtering.
 
+**Response**
+
 The following fields are possible in the JSON response body:
 
 * `rooms` - An array of objects, each containing information about a room.
@@ -79,17 +77,15 @@ The following fields are possible in the JSON response body:
                  Use `prev_batch` for the `from` value in the next request to
                  get the "previous page" of results.
 
-## Usage
+The API is:
 
 A standard request with no filtering:
 
 ```
 GET /_synapse/admin/v1/rooms
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```jsonc
 {
@@ -137,11 +133,9 @@ Filtering by room name:
 
 ```
 GET /_synapse/admin/v1/rooms?search_term=TWIM
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -172,11 +166,9 @@ Paginating through a list of rooms:
 
 ```
 GET /_synapse/admin/v1/rooms?order_by=size
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```jsonc
 {
@@ -228,11 +220,9 @@ parameter to the value of `next_token`.
 
 ```
 GET /_synapse/admin/v1/rooms?order_by=size&from=100
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```jsonc
 {
@@ -304,17 +294,13 @@ The following fields are possible in the JSON response body:
 * `history_visibility` - Who can see the room history. One of: ["invited", "joined", "shared", "world_readable"].
 * `state_events` - Total number of state_events of a room. Complexity of the room.
 
-## Usage
-
-A standard request:
+The API is:
 
 ```
 GET /_synapse/admin/v1/rooms/<room_id>
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -347,17 +333,13 @@ The response includes the following fields:
 * `members` - A list of all the members that are present in the room, represented by their ids.
 * `total` - Total number of members in the room.
 
-## Usage
-
-A standard request:
+The API is:
 
 ```
 GET /_synapse/admin/v1/rooms/<room_id>/members
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -378,17 +360,13 @@ The response includes the following fields:
 
 * `state` - The current state of the room at the time of request.
 
-## Usage
-
-A standard request:
+The API is:
 
 ```
 GET /_synapse/admin/v1/rooms/<room_id>/state
-
-{}
 ```
 
-Response:
+A response body like the following is returned:
 
 ```json
 {
@@ -432,6 +410,7 @@ DELETE /_synapse/admin/v1/rooms/<room_id>
 ```
 
 with a body of:
+
 ```json
 {
     "new_room_user_id": "@someuser:example.com",
@@ -461,7 +440,7 @@ A response body like the following is returned:
 }
 ```
 
-## Parameters
+**Parameters**
 
 The following parameters should be set in the URL:
 
@@ -491,7 +470,7 @@ The following JSON body parameters are available:
 
 The JSON body must not be empty. The body must be at least `{}`.
 
-## Response
+**Response**
 
 The following fields are returned in the JSON response body:
 
@@ -548,10 +527,10 @@ By default the server admin (the caller) is granted power, but another user can
 optionally be specified, e.g.:
 
 ```
-    POST /_synapse/admin/v1/rooms/<room_id_or_alias>/make_room_admin
-    {
-        "user_id": "@foo:example.com"
-    }
+POST /_synapse/admin/v1/rooms/<room_id_or_alias>/make_room_admin
+{
+    "user_id": "@foo:example.com"
+}
 ```
 
 # Forward Extremities Admin API
@@ -565,7 +544,7 @@ extremities accumulate in a room, performance can become degraded. For details,
 To check the status of forward extremities for a room:
 
 ```
-    GET /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
+GET /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
 ```
 
 A response as follows will be returned:
@@ -581,7 +560,7 @@ A response as follows will be returned:
       "received_ts": 1611263016761
     }
   ]
-}    
+}
 ```
 
 ## Deleting forward extremities
@@ -594,7 +573,7 @@ If a room has lots of forward extremities, the extra can be
 deleted as follows:
 
 ```
-    DELETE /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
+DELETE /_synapse/admin/v1/rooms/<room_id_or_alias>/forward_extremities
 ```
 
 A response as follows will be returned, indicating the amount of forward extremities
diff --git a/docs/admin_api/server_notices.md b/docs/admin_api/server_notices.md
index 858b052b84..323138491a 100644
--- a/docs/admin_api/server_notices.md
+++ b/docs/admin_api/server_notices.md
@@ -45,4 +45,4 @@ Once the notice has been sent, the API will return the following response:
 ```
 
 Note that server notices must be enabled in `homeserver.yaml` before this API
-can be used. See [server_notices.md](../server_notices.md) for more information.
+can be used. See [the server notices documentation](../server_notices.md) for more information.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index ef1e735e33..4a65d0c3bc 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -36,7 +36,17 @@ It returns a JSON body like the following:
     "creation_ts": 1560432506,
     "appservice_id": null,
     "consent_server_notice_sent": null,
-    "consent_version": null
+    "consent_version": null,
+    "external_ids": [
+        {
+            "auth_provider": "<provider1>",
+            "external_id": "<user_id_provider_1>"
+        },
+        {
+            "auth_provider": "<provider2>",
+            "external_id": "<user_id_provider_2>"
+        }
+    ]
 }
 ```
 
diff --git a/docs/consent_tracking.md b/docs/consent_tracking.md
index 3f997e5903..911a1f95db 100644
--- a/docs/consent_tracking.md
+++ b/docs/consent_tracking.md
@@ -152,7 +152,7 @@ version of the policy. To do so:
 
  * ensure that the consent resource is configured, as in the previous section
 
- * ensure that server notices are configured, as in [server_notices.md](server_notices.md).
+ * ensure that server notices are configured, as in [the server notice documentation](server_notices.md).
 
  * Add `server_notice_content` under `user_consent` in `homeserver.yaml`. For
    example:
diff --git a/docs/delegate.md b/docs/delegate.md
index 208ddb6277..05cb635047 100644
--- a/docs/delegate.md
+++ b/docs/delegate.md
@@ -74,7 +74,7 @@ We no longer actively recommend against using a reverse proxy. Many admins will
 find it easier to direct federation traffic to a reverse proxy and manage their
 own TLS certificates, and this is a supported configuration.
 
-See [reverse_proxy.md](reverse_proxy.md) for information on setting up a
+See [the reverse proxy documentation](reverse_proxy.md) for information on setting up a
 reverse proxy.
 
 ### Do I still need to give my TLS certificates to Synapse if I am using a reverse proxy?
diff --git a/docs/federate.md b/docs/federate.md
index 89c2b19638..5107f995be 100644
--- a/docs/federate.md
+++ b/docs/federate.md
@@ -14,7 +14,7 @@ you set the `server_name` to match your machine's public DNS hostname.
 
 For this default configuration to work, you will need to listen for TLS
 connections on port 8448. The preferred way to do that is by using a
-reverse proxy: see [reverse_proxy.md](reverse_proxy.md) for instructions
+reverse proxy: see [the reverse proxy documentation](reverse_proxy.md) for instructions
 on how to correctly set one up.
 
 In some cases you might not want to run Synapse on the machine that has
@@ -23,7 +23,7 @@ traffic to use a different port than 8448. For example, you might want to
 have your user names look like `@user:example.com`, but you want to run
 Synapse on `synapse.example.com` on port 443. This can be done using
 delegation, which allows an admin to control where federation traffic should
-be sent. See [delegate.md](delegate.md) for instructions on how to set this up.
+be sent. See [the delegation documentation](delegate.md) for instructions on how to set this up.
 
 Once federation has been configured, you should be able to join a room over
 federation. A good place to start is `#synapse:matrix.org` - a room for
@@ -44,8 +44,8 @@ a complicated dance which requires connections in both directions).
 
 Another common problem is that people on other servers can't join rooms that
 you invite them to. This can be caused by an incorrectly-configured reverse
-proxy: see [reverse_proxy.md](reverse_proxy.md) for instructions on how to correctly
-configure a reverse proxy.
+proxy: see [the reverse proxy documentation](reverse_proxy.md) for instructions on how
+to correctly configure a reverse proxy.
 
 ### Known issues
 
diff --git a/docs/log_contexts.md b/docs/log_contexts.md
index fe30ca2791..d49dce8830 100644
--- a/docs/log_contexts.md
+++ b/docs/log_contexts.md
@@ -14,12 +14,16 @@ The `synapse.logging.context` module provides a facilities for managing
 the current log context (as well as providing the `LoggingContextFilter`
 class).
 
-Deferreds make the whole thing complicated, so this document describes
+Asynchronous functions make the whole thing complicated, so this document describes
 how it all works, and how to write code which follows the rules.
 
-##Logcontexts without Deferreds
+In this document, "awaitable" refers to any object which can be `await`ed. In the context of
+Synapse, that normally means either a coroutine or a Twisted 
+[`Deferred`](https://twistedmatrix.com/documents/current/api/twisted.internet.defer.Deferred.html).
 
-In the absence of any Deferred voodoo, things are simple enough. As with
+## Logcontexts without asynchronous code
+
+In the absence of any asynchronous voodoo, things are simple enough. As with
 any code of this nature, the rule is that our function should leave
 things as it found them:
 
@@ -55,126 +59,109 @@ def do_request_handling():
     logger.debug("phew")
 ```
 
-## Using logcontexts with Deferreds
+## Using logcontexts with awaitables
 
-Deferreds --- and in particular, `defer.inlineCallbacks` --- break the
-linear flow of code so that there is no longer a single entry point
-where we should set the logcontext and a single exit point where we
-should remove it.
+Awaitables break the linear flow of code so that there is no longer a single entry point
+where we should set the logcontext and a single exit point where we should remove it.
 
 Consider the example above, where `do_request_handling` needs to do some
-blocking operation, and returns a deferred:
+blocking operation, and returns an awaitable:
 
 ```python
-@defer.inlineCallbacks
-def handle_request(request_id):
+async def handle_request(request_id):
     with context.LoggingContext() as request_context:
         request_context.request = request_id
-        yield do_request_handling()
+        await do_request_handling()
         logger.debug("finished")
 ```
 
 In the above flow:
 
 -   The logcontext is set
--   `do_request_handling` is called, and returns a deferred
--   `handle_request` yields the deferred
--   The `inlineCallbacks` wrapper of `handle_request` returns a deferred
+-   `do_request_handling` is called, and returns an awaitable
+-   `handle_request` awaits the awaitable
+-   Execution of `handle_request` is suspended
 
 So we have stopped processing the request (and will probably go on to
 start processing the next), without clearing the logcontext.
 
 To circumvent this problem, synapse code assumes that, wherever you have
-a deferred, you will want to yield on it. To that end, whereever
-functions return a deferred, we adopt the following conventions:
+an awaitable, you will want to `await` it. To that end, whereever
+functions return awaitables, we adopt the following conventions:
 
-**Rules for functions returning deferreds:**
+**Rules for functions returning awaitables:**
 
-> -   If the deferred is already complete, the function returns with the
+> -   If the awaitable is already complete, the function returns with the
 >     same logcontext it started with.
-> -   If the deferred is incomplete, the function clears the logcontext
->     before returning; when the deferred completes, it restores the
+> -   If the awaitable is incomplete, the function clears the logcontext
+>     before returning; when the awaitable completes, it restores the
 >     logcontext before running any callbacks.
 
 That sounds complicated, but actually it means a lot of code (including
 the example above) "just works". There are two cases:
 
--   If `do_request_handling` returns a completed deferred, then the
+-   If `do_request_handling` returns a completed awaitable, then the
     logcontext will still be in place. In this case, execution will
-    continue immediately after the `yield`; the "finished" line will
+    continue immediately after the `await`; the "finished" line will
     be logged against the right context, and the `with` block restores
     the original context before we return to the caller.
--   If the returned deferred is incomplete, `do_request_handling` clears
+-   If the returned awaitable is incomplete, `do_request_handling` clears
     the logcontext before returning. The logcontext is therefore clear
-    when `handle_request` yields the deferred. At that point, the
-    `inlineCallbacks` wrapper adds a callback to the deferred, and
-    returns another (incomplete) deferred to the caller, and it is safe
-    to begin processing the next request.
-
-    Once `do_request_handling`'s deferred completes, it will reinstate
-    the logcontext, before running the callback added by the
-    `inlineCallbacks` wrapper. That callback runs the second half of
-    `handle_request`, so again the "finished" line will be logged
-    against the right context, and the `with` block restores the
-    original context.
+    when `handle_request` `await`s the awaitable.
+
+    Once `do_request_handling`'s awaitable completes, it will reinstate
+    the logcontext, before running the second half of `handle_request`,
+    so again the "finished" line will be logged against the right context,
+    and the `with` block restores the original context.
 
 As an aside, it's worth noting that `handle_request` follows our rules
--though that only matters if the caller has its own logcontext which it
+- though that only matters if the caller has its own logcontext which it
 cares about.
 
 The following sections describe pitfalls and helpful patterns when
 implementing these rules.
 
-Always yield your deferreds
----------------------------
+Always await your awaitables
+----------------------------
 
-Whenever you get a deferred back from a function, you should `yield` on
-it as soon as possible. (Returning it directly to your caller is ok too,
-if you're not doing `inlineCallbacks`.) Do not pass go; do not do any
-logging; do not call any other functions.
+Whenever you get an awaitable back from a function, you should `await` on
+it as soon as possible. Do not pass go; do not do any logging; do not
+call any other functions.
 
 ```python
-@defer.inlineCallbacks
-def fun():
+async def fun():
     logger.debug("starting")
-    yield do_some_stuff()       # just like this
+    await do_some_stuff()       # just like this
 
-    d = more_stuff()
-    result = yield d            # also fine, of course
+    coro = more_stuff()
+    result = await coro         # also fine, of course
 
     return result
-
-def nonInlineCallbacksFun():
-    logger.debug("just a wrapper really")
-    return do_some_stuff()      # this is ok too - the caller will yield on
-                                # it anyway.
 ```
 
 Provided this pattern is followed all the way back up to the callchain
 to where the logcontext was set, this will make things work out ok:
 provided `do_some_stuff` and `more_stuff` follow the rules above, then
-so will `fun` (as wrapped by `inlineCallbacks`) and
-`nonInlineCallbacksFun`.
+so will `fun`.
 
-It's all too easy to forget to `yield`: for instance if we forgot that
-`do_some_stuff` returned a deferred, we might plough on regardless. This
+It's all too easy to forget to `await`: for instance if we forgot that
+`do_some_stuff` returned an awaitable, we might plough on regardless. This
 leads to a mess; it will probably work itself out eventually, but not
 before a load of stuff has been logged against the wrong context.
 (Normally, other things will break, more obviously, if you forget to
-`yield`, so this tends not to be a major problem in practice.)
+`await`, so this tends not to be a major problem in practice.)
 
 Of course sometimes you need to do something a bit fancier with your
-Deferreds - not all code follows the linear A-then-B-then-C pattern.
+awaitable - not all code follows the linear A-then-B-then-C pattern.
 Notes on implementing more complex patterns are in later sections.
 
-## Where you create a new Deferred, make it follow the rules
+## Where you create a new awaitable, make it follow the rules
 
-Most of the time, a Deferred comes from another synapse function.
-Sometimes, though, we need to make up a new Deferred, or we get a
-Deferred back from external code. We need to make it follow our rules.
+Most of the time, an awaitable comes from another synapse function.
+Sometimes, though, we need to make up a new awaitable, or we get an awaitable
+back from external code. We need to make it follow our rules.
 
-The easy way to do it is with a combination of `defer.inlineCallbacks`,
-and `context.PreserveLoggingContext`. Suppose we want to implement
+The easy way to do it is by using `context.make_deferred_yieldable`. Suppose we want to implement
 `sleep`, which returns a deferred which will run its callbacks after a
 given number of seconds. That might look like:
 
@@ -186,25 +173,12 @@ def get_sleep_deferred(seconds):
     return d
 ```
 
-That doesn't follow the rules, but we can fix it by wrapping it with
-`PreserveLoggingContext` and `yield` ing on it:
+That doesn't follow the rules, but we can fix it by calling it through
+`context.make_deferred_yieldable`:
 
 ```python
-@defer.inlineCallbacks
-def sleep(seconds):
-    with PreserveLoggingContext():
-        yield get_sleep_deferred(seconds)
-```
-
-This technique works equally for external functions which return
-deferreds, or deferreds we have made ourselves.
-
-You can also use `context.make_deferred_yieldable`, which just does the
-boilerplate for you, so the above could be written:
-
-```python
-def sleep(seconds):
-    return context.make_deferred_yieldable(get_sleep_deferred(seconds))
+async def sleep(seconds):
+    return await context.make_deferred_yieldable(get_sleep_deferred(seconds))
 ```
 
 ## Fire-and-forget
@@ -213,20 +187,18 @@ Sometimes you want to fire off a chain of execution, but not wait for
 its result. That might look a bit like this:
 
 ```python
-@defer.inlineCallbacks
-def do_request_handling():
-    yield foreground_operation()
+async def do_request_handling():
+    await foreground_operation()
 
     # *don't* do this
     background_operation()
 
     logger.debug("Request handling complete")
 
-@defer.inlineCallbacks
-def background_operation():
-    yield first_background_step()
+async def background_operation():
+    await first_background_step()
     logger.debug("Completed first step")
-    yield second_background_step()
+    await second_background_step()
     logger.debug("Completed second step")
 ```
 
@@ -235,13 +207,13 @@ The above code does a couple of steps in the background after
 against the `request_context` logcontext, which may or may not be
 desirable. There are two big problems with the above, however. The first
 problem is that, if `background_operation` returns an incomplete
-Deferred, it will expect its caller to `yield` immediately, so will have
+awaitable, it will expect its caller to `await` immediately, so will have
 cleared the logcontext. In this example, that means that 'Request
 handling complete' will be logged without any context.
 
 The second problem, which is potentially even worse, is that when the
-Deferred returned by `background_operation` completes, it will restore
-the original logcontext. There is nothing waiting on that Deferred, so
+awaitable returned by `background_operation` completes, it will restore
+the original logcontext. There is nothing waiting on that awaitable, so
 the logcontext will leak into the reactor and possibly get attached to
 some arbitrary future operation.
 
@@ -254,9 +226,8 @@ deferred completes will be the empty logcontext), and will restore the
 current logcontext before continuing the foreground process:
 
 ```python
-@defer.inlineCallbacks
-def do_request_handling():
-    yield foreground_operation()
+async def do_request_handling():
+    await foreground_operation()
 
     # start background_operation off in the empty logcontext, to
     # avoid leaking the current context into the reactor.
@@ -274,16 +245,15 @@ Obviously that option means that the operations done in
 
 The second option is to use `context.run_in_background`, which wraps a
 function so that it doesn't reset the logcontext even when it returns
-an incomplete deferred, and adds a callback to the returned deferred to
+an incomplete awaitable, and adds a callback to the returned awaitable to
 reset the logcontext. In other words, it turns a function that follows
-the Synapse rules about logcontexts and Deferreds into one which behaves
+the Synapse rules about logcontexts and awaitables into one which behaves
 more like an external function --- the opposite operation to that
 described in the previous section. It can be used like this:
 
 ```python
-@defer.inlineCallbacks
-def do_request_handling():
-    yield foreground_operation()
+async def do_request_handling():
+    await foreground_operation()
 
     context.run_in_background(background_operation)
 
@@ -294,152 +264,53 @@ def do_request_handling():
 ## Passing synapse deferreds into third-party functions
 
 A typical example of this is where we want to collect together two or
-more deferred via `defer.gatherResults`:
+more awaitables via `defer.gatherResults`:
 
 ```python
-d1 = operation1()
-d2 = operation2()
-d3 = defer.gatherResults([d1, d2])
+a1 = operation1()
+a2 = operation2()
+a3 = defer.gatherResults([a1, a2])
 ```
 
 This is really a variation of the fire-and-forget problem above, in that
-we are firing off `d1` and `d2` without yielding on them. The difference
+we are firing off `a1` and `a2` without awaiting on them. The difference
 is that we now have third-party code attached to their callbacks. Anyway
 either technique given in the [Fire-and-forget](#fire-and-forget)
 section will work.
 
-Of course, the new Deferred returned by `gatherResults` needs to be
+Of course, the new awaitable returned by `gather` needs to be
 wrapped in order to make it follow the logcontext rules before we can
-yield it, as described in [Where you create a new Deferred, make it
+yield it, as described in [Where you create a new awaitable, make it
 follow the
-rules](#where-you-create-a-new-deferred-make-it-follow-the-rules).
+rules](#where-you-create-a-new-awaitable-make-it-follow-the-rules).
 
 So, option one: reset the logcontext before starting the operations to
 be gathered:
 
 ```python
-@defer.inlineCallbacks
-def do_request_handling():
+async def do_request_handling():
     with PreserveLoggingContext():
-        d1 = operation1()
-        d2 = operation2()
-        result = yield defer.gatherResults([d1, d2])
+        a1 = operation1()
+        a2 = operation2()
+        result = await defer.gatherResults([a1, a2])
 ```
 
 In this case particularly, though, option two, of using
-`context.preserve_fn` almost certainly makes more sense, so that
+`context.run_in_background` almost certainly makes more sense, so that
 `operation1` and `operation2` are both logged against the original
 logcontext. This looks like:
 
 ```python
-@defer.inlineCallbacks
-def do_request_handling():
-    d1 = context.preserve_fn(operation1)()
-    d2 = context.preserve_fn(operation2)()
+async def do_request_handling():
+    a1 = context.run_in_background(operation1)
+    a2 = context.run_in_background(operation2)
 
-    with PreserveLoggingContext():
-        result = yield defer.gatherResults([d1, d2])
+    result = await make_deferred_yieldable(defer.gatherResults([a1, a2]))
 ```
 
-## Was all this really necessary?
-
-The conventions used work fine for a linear flow where everything
-happens in series via `defer.inlineCallbacks` and `yield`, but are
-certainly tricky to follow for any more exotic flows. It's hard not to
-wonder if we could have done something else.
-
-We're not going to rewrite Synapse now, so the following is entirely of
-academic interest, but I'd like to record some thoughts on an
-alternative approach.
-
-I briefly prototyped some code following an alternative set of rules. I
-think it would work, but I certainly didn't get as far as thinking how
-it would interact with concepts as complicated as the cache descriptors.
-
-My alternative rules were:
-
--   functions always preserve the logcontext of their caller, whether or
-    not they are returning a Deferred.
--   Deferreds returned by synapse functions run their callbacks in the
-    same context as the function was orignally called in.
-
-The main point of this scheme is that everywhere that sets the
-logcontext is responsible for clearing it before returning control to
-the reactor.
-
-So, for example, if you were the function which started a
-`with LoggingContext` block, you wouldn't `yield` within it --- instead
-you'd start off the background process, and then leave the `with` block
-to wait for it:
-
-```python
-def handle_request(request_id):
-    with context.LoggingContext() as request_context:
-        request_context.request = request_id
-        d = do_request_handling()
-
-    def cb(r):
-        logger.debug("finished")
-
-    d.addCallback(cb)
-    return d
-```
-
-(in general, mixing `with LoggingContext` blocks and
-`defer.inlineCallbacks` in the same function leads to slighly
-counter-intuitive code, under this scheme).
-
-Because we leave the original `with` block as soon as the Deferred is
-returned (as opposed to waiting for it to be resolved, as we do today),
-the logcontext is cleared before control passes back to the reactor; so
-if there is some code within `do_request_handling` which needs to wait
-for a Deferred to complete, there is no need for it to worry about
-clearing the logcontext before doing so:
-
-```python
-def handle_request():
-    r = do_some_stuff()
-    r.addCallback(do_some_more_stuff)
-    return r
-```
-
---- and provided `do_some_stuff` follows the rules of returning a
-Deferred which runs its callbacks in the original logcontext, all is
-happy.
-
-The business of a Deferred which runs its callbacks in the original
-logcontext isn't hard to achieve --- we have it today, in the shape of
-`context._PreservingContextDeferred`:
-
-```python
-def do_some_stuff():
-    deferred = do_some_io()
-    pcd = _PreservingContextDeferred(LoggingContext.current_context())
-    deferred.chainDeferred(pcd)
-    return pcd
-```
-
-It turns out that, thanks to the way that Deferreds chain together, we
-automatically get the property of a context-preserving deferred with
-`defer.inlineCallbacks`, provided the final Defered the function
-`yields` on has that property. So we can just write:
-
-```python
-@defer.inlineCallbacks
-def handle_request():
-    yield do_some_stuff()
-    yield do_some_more_stuff()
-```
-
-To conclude: I think this scheme would have worked equally well, with
-less danger of messing it up, and probably made some more esoteric code
-easier to write. But again --- changing the conventions of the entire
-Synapse codebase is not a sensible option for the marginal improvement
-offered.
-
-## A note on garbage-collection of Deferred chains
+## A note on garbage-collection of awaitable chains
 
-It turns out that our logcontext rules do not play nicely with Deferred
+It turns out that our logcontext rules do not play nicely with awaitable
 chains which get orphaned and garbage-collected.
 
 Imagine we have some code that looks like this:
@@ -451,13 +322,12 @@ def on_something_interesting():
     for d in listener_queue:
         d.callback("foo")
 
-@defer.inlineCallbacks
-def await_something_interesting():
-    new_deferred = defer.Deferred()
-    listener_queue.append(new_deferred)
+async def await_something_interesting():
+    new_awaitable = defer.Deferred()
+    listener_queue.append(new_awaitable)
 
     with PreserveLoggingContext():
-        yield new_deferred
+        await new_awaitable
 ```
 
 Obviously, the idea here is that we have a bunch of things which are
@@ -476,18 +346,19 @@ def reset_listener_queue():
     listener_queue.clear()
 ```
 
-So, both ends of the deferred chain have now dropped their references,
-and the deferred chain is now orphaned, and will be garbage-collected at
-some point. Note that `await_something_interesting` is a generator
-function, and when Python garbage-collects generator functions, it gives
-them a chance to clean up by making the `yield` raise a `GeneratorExit`
+So, both ends of the awaitable chain have now dropped their references,
+and the awaitable chain is now orphaned, and will be garbage-collected at
+some point. Note that `await_something_interesting` is a coroutine, 
+which Python implements as a generator function.  When Python
+garbage-collects generator functions, it gives them a chance to 
+clean up by making the `async` (or `yield`) raise a `GeneratorExit`
 exception. In our case, that means that the `__exit__` handler of
 `PreserveLoggingContext` will carefully restore the request context, but
 there is now nothing waiting for its return, so the request context is
 never cleared.
 
-To reiterate, this problem only arises when *both* ends of a deferred
-chain are dropped. Dropping the the reference to a deferred you're
-supposed to be calling is probably bad practice, so this doesn't
+To reiterate, this problem only arises when *both* ends of a awaitable
+chain are dropped. Dropping the the reference to an awaitable you're
+supposed to be awaiting is bad practice, so this doesn't
 actually happen too much. Unfortunately, when it does happen, it will
 lead to leaked logcontexts which are incredibly hard to track down.
diff --git a/docs/modules.md b/docs/modules.md
index 3a9fab61b8..c4cb7018f7 100644
--- a/docs/modules.md
+++ b/docs/modules.md
@@ -63,7 +63,7 @@ Modules can register web resources onto Synapse's web server using the following
 API method:
 
 ```python
-def ModuleApi.register_web_resource(path: str, resource: IResource)
+def ModuleApi.register_web_resource(path: str, resource: IResource) -> None
 ```
 
 The path is the full absolute path to register the resource at. For example, if you
@@ -91,12 +91,17 @@ are split in categories. A single module may implement callbacks from multiple c
 and is under no obligation to implement all callbacks from the categories it registers
 callbacks for.
 
+Modules can register callbacks using one of the module API's `register_[...]_callbacks`
+methods. The callback functions are passed to these methods as keyword arguments, with
+the callback name as the argument name and the function as its value. This is demonstrated
+in the example below. A `register_[...]_callbacks` method exists for each module type
+documented in this section.
+
 #### Spam checker callbacks
 
-To register one of the callbacks described in this section, a module needs to use the
-module API's `register_spam_checker_callbacks` method. The callback functions are passed
-to `register_spam_checker_callbacks` as keyword arguments, with the callback name as the
-argument name and the function as its value. This is demonstrated in the example below.
+Spam checker callbacks allow module developers to implement spam mitigation actions for
+Synapse instances. Spam checker callbacks can be registered using the module API's
+`register_spam_checker_callbacks` method.
 
 The available spam checker callbacks are:
 
@@ -115,7 +120,7 @@ async def user_may_invite(inviter: str, invitee: str, room_id: str) -> bool
 
 Called when processing an invitation. The module must return a `bool` indicating whether
 the inviter can invite the invitee to the given room. Both inviter and invitee are
-represented by their Matrix user ID (i.e. `@alice:example.com`).
+represented by their Matrix user ID (e.g. `@alice:example.com`).
 
 ```python
 async def user_may_create_room(user: str) -> bool
@@ -188,13 +193,43 @@ async def check_media_file_for_spam(
 Called when storing a local or remote file. The module must return a boolean indicating
 whether the given file can be stored in the homeserver's media store.
 
+#### Account validity callbacks
+
+Account validity callbacks allow module developers to add extra steps to verify the
+validity on an account, i.e. see if a user can be granted access to their account on the
+Synapse instance. Account validity callbacks can be registered using the module API's
+`register_account_validity_callbacks` method.
+
+The available account validity callbacks are:
+
+```python
+async def is_user_expired(user: str) -> Optional[bool]
+```
+
+Called when processing any authenticated request (except for logout requests). The module
+can return a `bool` to indicate whether the user has expired and should be locked out of
+their account, or `None` if the module wasn't able to figure it out. The user is
+represented by their Matrix user ID (e.g. `@alice:example.com`).
+
+If the module returns `True`, the current request will be denied with the error code
+`ORG_MATRIX_EXPIRED_ACCOUNT` and the HTTP status code 403. Note that this doesn't
+invalidate the user's access token.
+
+```python
+async def on_user_registration(user: str) -> None
+```
+
+Called after successfully registering a user, in case the module needs to perform extra
+operations to keep track of them. (e.g. add them to a database table). The user is
+represented by their Matrix user ID.
+
 ### Porting an existing module that uses the old interface
 
 In order to port a module that uses Synapse's old module interface, its author needs to:
 
 * ensure the module's callbacks are all asynchronous.
 * register their callbacks using one or more of the `register_[...]_callbacks` methods
-  from the `ModuleApi` class in the module's `__init__` method (see [this section](#registering-a-web-resource)
+  from the `ModuleApi` class in the module's `__init__` method (see [this section](#registering-a-callback)
   for more info).
 
 Additionally, if the module is packaged with an additional web resource, the module
diff --git a/docs/postgres.md b/docs/postgres.md
index f83155e52a..2c0a5b803a 100644
--- a/docs/postgres.md
+++ b/docs/postgres.md
@@ -8,14 +8,14 @@ Synapse will require the python postgres client library in order to
 connect to a postgres database.
 
 -   If you are using the [matrix.org debian/ubuntu
-    packages](../INSTALL.md#matrixorg-packages), the necessary python
+    packages](setup/installation.md#matrixorg-packages), the necessary python
     library will already be installed, but you will need to ensure the
     low-level postgres library is installed, which you can do with
     `apt install libpq5`.
 -   For other pre-built packages, please consult the documentation from
     the relevant package.
 -   If you installed synapse [in a
-    virtualenv](../INSTALL.md#installing-from-source), you can install
+    virtualenv](setup/installation.md#installing-from-source), you can install
     the library with:
 
         ~/synapse/env/bin/pip install "matrix-synapse[postgres]"
diff --git a/docs/presence_router_module.md b/docs/presence_router_module.md
index bf859e4254..4a3e720240 100644
--- a/docs/presence_router_module.md
+++ b/docs/presence_router_module.md
@@ -222,7 +222,9 @@ Synapse, amend your homeserver config file with the following.
 
 ```yaml
 presence:
-  routing_module:
+  enabled: true
+
+  presence_router:
     module: my_module.ExamplePresenceRouter
     config:
       # Any configuration options for your module. The below is an example.
diff --git a/docs/replication.md b/docs/replication.md
index ed88233157..e82df0de8a 100644
--- a/docs/replication.md
+++ b/docs/replication.md
@@ -28,7 +28,7 @@ minimal.
 
 ### The Replication Protocol
 
-See [tcp_replication.md](tcp_replication.md)
+See [the TCP replication documentation](tcp_replication.md).
 
 ### The Slaved DataStore
 
diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md
index 01db466f96..76bb45aff2 100644
--- a/docs/reverse_proxy.md
+++ b/docs/reverse_proxy.md
@@ -21,7 +21,7 @@ port 8448. Where these are different, we refer to the 'client port' and the
 'federation port'. See [the Matrix
 specification](https://matrix.org/docs/spec/server_server/latest#resolving-server-names)
 for more details of the algorithm used for federation connections, and
-[delegate.md](delegate.md) for instructions on setting up delegation.
+[Delegation](delegate.md) for instructions on setting up delegation.
 
 **NOTE**: Your reverse proxy must not `canonicalise` or `normalise`
 the requested URI in any way (for example, by decoding `%xx` escapes).
@@ -98,6 +98,33 @@ example.com:8448 {
   reverse_proxy http://localhost:8008
 }
 ```
+[Delegation](delegate.md) example:
+```
+(matrix-well-known-header) {
+    # Headers
+    header Access-Control-Allow-Origin "*"
+    header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS"
+    header Access-Control-Allow-Headers "Origin, X-Requested-With, Content-Type, Accept, Authorization"
+    header Content-Type "application/json"
+}
+
+example.com {
+    handle /.well-known/matrix/server {
+        import matrix-well-known-header
+        respond `{"m.server":"matrix.example.com:443"}`
+    }
+
+    handle /.well-known/matrix/client {
+        import matrix-well-known-header
+        respond `{"m.homeserver":{"base_url":"https://matrix.example.com"},"m.identity_server":{"base_url":"https://identity.example.com"}}`
+    }
+}
+
+matrix.example.com {
+    reverse_proxy /_matrix/* http://localhost:8008
+    reverse_proxy /_synapse/client/* http://localhost:8008
+}
+```
 
 ### Apache
 
diff --git a/docs/room_and_user_statistics.md b/docs/room_and_user_statistics.md
index e1facb38d4..cc38c890bb 100644
--- a/docs/room_and_user_statistics.md
+++ b/docs/room_and_user_statistics.md
@@ -1,9 +1,9 @@
 Room and User Statistics
 ========================
 
-Synapse maintains room and user statistics (as well as a cache of room state),
-in various tables. These can be used for administrative purposes but are also
-used when generating the public room directory.
+Synapse maintains room and user statistics in various tables. These can be used
+for administrative purposes but are also used when generating the public room
+directory.
 
 
 # Synapse Developer Documentation
@@ -15,48 +15,8 @@ used when generating the public room directory.
 * **subject**: Something we are tracking stats about – currently a room or user.
 * **current row**: An entry for a subject in the appropriate current statistics
     table. Each subject can have only one.
-* **historical row**: An entry for a subject in the appropriate historical
-    statistics table. Each subject can have any number of these.
 
 ### Overview
 
-Stats are maintained as time series. There are two kinds of column:
-
-* absolute columns – where the value is correct for the time given by `end_ts`
-    in the stats row. (Imagine a line graph for these values)
-    * They can also be thought of as 'gauges' in Prometheus, if you are familiar.
-* per-slice columns – where the value corresponds to how many of the occurrences
-    occurred within the time slice given by `(end_ts − bucket_size)…end_ts`
-    or `start_ts…end_ts`. (Imagine a histogram for these values)
-
-Stats are maintained in two tables (for each type): current and historical.
-
-Current stats correspond to the present values. Each subject can only have one
-entry.
-
-Historical stats correspond to values in the past. Subjects may have multiple
-entries.
-
-## Concepts around the management of stats
-
-### Current rows
-
-Current rows contain the most up-to-date statistics for a room.
-They only contain absolute columns
-
-### Historical rows
-
-Historical rows can always be considered to be valid for the time slice and
-end time specified.
-
-* historical rows will not exist for every time slice – they will be omitted
-    if there were no changes. In this case, the following assumptions can be
-    made to interpolate/recreate missing rows:
-    - absolute fields have the same values as in the preceding row
-    - per-slice fields are zero (`0`)
-* historical rows will not be retained forever – rows older than a configurable
-    time will be purged.
-
-#### Purge
-
-The purging of historical rows is not yet implemented.
+Stats correspond to the present values. Current rows contain the most up-to-date
+statistics for a room. Each subject can only have one entry.
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 28d495be23..19b2cc6868 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -8,7 +8,8 @@
 #
 # It is *not* intended to be copied and used as the basis for a real
 # homeserver.yaml. Instead, if you are starting from scratch, please generate
-# a fresh config using Synapse by following the instructions in INSTALL.md.
+# a fresh config using Synapse by following the instructions in
+# https://matrix-org.github.io/synapse/latest/setup/installation.html.
 
 # Configuration options that take a time period can be set using a number
 # followed by a letter. Letters have the following meanings:
@@ -36,7 +37,7 @@
 
 # Server admins can expand Synapse's functionality with external modules.
 #
-# See https://matrix-org.github.io/synapse/develop/modules.html for more
+# See https://matrix-org.github.io/synapse/latest/modules.html for more
 # documentation on how to configure or create custom modules for Synapse.
 #
 modules:
@@ -58,7 +59,7 @@ modules:
 # In most cases you should avoid using a matrix specific subdomain such as
 # matrix.example.com or synapse.example.com as the server_name for the same
 # reasons you wouldn't use user@email.example.com as your email address.
-# See https://github.com/matrix-org/synapse/blob/master/docs/delegate.md
+# See https://matrix-org.github.io/synapse/latest/delegate.html
 # for information on how to host Synapse on a subdomain while preserving
 # a clean server_name.
 #
@@ -253,9 +254,9 @@ presence:
 #       'all local interfaces'.
 #
 #   type: the type of listener. Normally 'http', but other valid options are:
-#       'manhole' (see docs/manhole.md),
-#       'metrics' (see docs/metrics-howto.md),
-#       'replication' (see docs/workers.md).
+#       'manhole' (see https://matrix-org.github.io/synapse/latest/manhole.html),
+#       'metrics' (see https://matrix-org.github.io/synapse/latest/metrics-howto.html),
+#       'replication' (see https://matrix-org.github.io/synapse/latest/workers.html).
 #
 #   tls: set to true to enable TLS for this listener. Will use the TLS
 #       key/cert specified in tls_private_key_path / tls_certificate_path.
@@ -280,8 +281,8 @@ presence:
 #   client: the client-server API (/_matrix/client), and the synapse admin
 #       API (/_synapse/admin). Also implies 'media' and 'static'.
 #
-#   consent: user consent forms (/_matrix/consent). See
-#       docs/consent_tracking.md.
+#   consent: user consent forms (/_matrix/consent).
+#       See https://matrix-org.github.io/synapse/latest/consent_tracking.html.
 #
 #   federation: the server-server API (/_matrix/federation). Also implies
 #       'media', 'keys', 'openid'
@@ -290,12 +291,13 @@ presence:
 #
 #   media: the media API (/_matrix/media).
 #
-#   metrics: the metrics interface. See docs/metrics-howto.md.
+#   metrics: the metrics interface.
+#       See https://matrix-org.github.io/synapse/latest/metrics-howto.html.
 #
 #   openid: OpenID authentication.
 #
-#   replication: the HTTP replication API (/_synapse/replication). See
-#       docs/workers.md.
+#   replication: the HTTP replication API (/_synapse/replication).
+#       See https://matrix-org.github.io/synapse/latest/workers.html.
 #
 #   static: static resources under synapse/static (/_matrix/static). (Mostly
 #       useful for 'fallback authentication'.)
@@ -319,7 +321,7 @@ listeners:
   # that unwraps TLS.
   #
   # If you plan to use a reverse proxy, please see
-  # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
+  # https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
   #
   - port: 8008
     tls: false
@@ -680,35 +682,41 @@ retention:
 #external_event_cache_expiry_ms: 1800000
 
 caches:
-   # Controls the global cache factor, which is the default cache factor
-   # for all caches if a specific factor for that cache is not otherwise
-   # set.
-   #
-   # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
-   # variable. Setting by environment variable takes priority over
-   # setting through the config file.
-   #
-   # Defaults to 0.5, which will half the size of all caches.
-   #
-   #global_factor: 1.0
+  # Controls the global cache factor, which is the default cache factor
+  # for all caches if a specific factor for that cache is not otherwise
+  # set.
+  #
+  # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+  # variable. Setting by environment variable takes priority over
+  # setting through the config file.
+  #
+  # Defaults to 0.5, which will half the size of all caches.
+  #
+  #global_factor: 1.0
 
-   # A dictionary of cache name to cache factor for that individual
-   # cache. Overrides the global cache factor for a given cache.
-   #
-   # These can also be set through environment variables comprised
-   # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
-   # letters and underscores. Setting by environment variable
-   # takes priority over setting through the config file.
-   # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
-   #
-   # Some caches have '*' and other characters that are not
-   # alphanumeric or underscores. These caches can be named with or
-   # without the special characters stripped. For example, to specify
-   # the cache factor for `*stateGroupCache*` via an environment
-   # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
-   #
-   per_cache_factors:
-     #get_users_who_share_room_with_user: 2.0
+  # A dictionary of cache name to cache factor for that individual
+  # cache. Overrides the global cache factor for a given cache.
+  #
+  # These can also be set through environment variables comprised
+  # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+  # letters and underscores. Setting by environment variable
+  # takes priority over setting through the config file.
+  # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+  #
+  # Some caches have '*' and other characters that are not
+  # alphanumeric or underscores. These caches can be named with or
+  # without the special characters stripped. For example, to specify
+  # the cache factor for `*stateGroupCache*` via an environment
+  # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
+  #
+  per_cache_factors:
+    #get_users_who_share_room_with_user: 2.0
+
+  # Controls how long an entry can be in a cache without having been
+  # accessed before being evicted. Defaults to None, which means
+  # entries are never evicted based on time.
+  #
+  #expiry_time: 30m
 
 
 ## Database ##
@@ -748,7 +756,8 @@ caches:
 #    cp_min: 5
 #    cp_max: 10
 #
-# For more information on using Synapse with Postgres, see `docs/postgres.md`.
+# For more information on using Synapse with Postgres,
+# see https://matrix-org.github.io/synapse/latest/postgres.html.
 #
 database:
   name: sqlite3
@@ -901,7 +910,7 @@ media_store_path: "DATADIR/media_store"
 #
 # If you are using a reverse proxy you may also need to set this value in
 # your reverse proxy's config. Notably Nginx has a small max body size by default.
-# See https://matrix-org.github.io/synapse/develop/reverse_proxy.html.
+# See https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
 #
 #max_upload_size: 50M
 
@@ -1308,91 +1317,6 @@ account_threepid_delegates:
 #auto_join_rooms_for_guests: false
 
 
-## Account Validity ##
-
-# Optional account validity configuration. This allows for accounts to be denied
-# any request after a given period.
-#
-# Once this feature is enabled, Synapse will look for registered users without an
-# expiration date at startup and will add one to every account it found using the
-# current settings at that time.
-# This means that, if a validity period is set, and Synapse is restarted (it will
-# then derive an expiration date from the current validity period), and some time
-# after that the validity period changes and Synapse is restarted, the users'
-# expiration dates won't be updated unless their account is manually renewed. This
-# date will be randomly selected within a range [now + period - d ; now + period],
-# where d is equal to 10% of the validity period.
-#
-account_validity:
-  # The account validity feature is disabled by default. Uncomment the
-  # following line to enable it.
-  #
-  #enabled: true
-
-  # The period after which an account is valid after its registration. When
-  # renewing the account, its validity period will be extended by this amount
-  # of time. This parameter is required when using the account validity
-  # feature.
-  #
-  #period: 6w
-
-  # The amount of time before an account's expiry date at which Synapse will
-  # send an email to the account's email address with a renewal link. By
-  # default, no such emails are sent.
-  #
-  # If you enable this setting, you will also need to fill out the 'email' and
-  # 'public_baseurl' configuration sections.
-  #
-  #renew_at: 1w
-
-  # The subject of the email sent out with the renewal link. '%(app)s' can be
-  # used as a placeholder for the 'app_name' parameter from the 'email'
-  # section.
-  #
-  # Note that the placeholder must be written '%(app)s', including the
-  # trailing 's'.
-  #
-  # If this is not set, a default value is used.
-  #
-  #renew_email_subject: "Renew your %(app)s account"
-
-  # Directory in which Synapse will try to find templates for the HTML files to
-  # serve to the user when trying to renew an account. If not set, default
-  # templates from within the Synapse package will be used.
-  #
-  # The currently available templates are:
-  #
-  # * account_renewed.html: Displayed to the user after they have successfully
-  #       renewed their account.
-  #
-  # * account_previously_renewed.html: Displayed to the user if they attempt to
-  #       renew their account with a token that is valid, but that has already
-  #       been used. In this case the account is not renewed again.
-  #
-  # * invalid_token.html: Displayed to the user when they try to renew an account
-  #       with an unknown or invalid renewal token.
-  #
-  # See https://github.com/matrix-org/synapse/tree/master/synapse/res/templates for
-  # default template contents.
-  #
-  # The file name of some of these templates can be configured below for legacy
-  # reasons.
-  #
-  #template_dir: "res/templates"
-
-  # A custom file name for the 'account_renewed.html' template.
-  #
-  # If not set, the file is assumed to be named "account_renewed.html".
-  #
-  #account_renewed_html_path: "account_renewed.html"
-
-  # A custom file name for the 'invalid_token.html' template.
-  #
-  # If not set, the file is assumed to be named "invalid_token.html".
-  #
-  #invalid_token_html_path: "invalid_token.html"
-
-
 ## Metrics ###
 
 # Enable collection and rendering of performance metrics
@@ -1841,7 +1765,7 @@ saml2_config:
 #
 #       module: The class name of a custom mapping module. Default is
 #           'synapse.handlers.oidc.JinjaOidcMappingProvider'.
-#           See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
+#           See https://matrix-org.github.io/synapse/latest/sso_mapping_providers.html#openid-mapping-providers
 #           for information on implementing a custom mapping provider.
 #
 #       config: Configuration for the mapping provider module. This section will
@@ -1892,7 +1816,7 @@ saml2_config:
 #     - attribute: groups
 #       value: "admin"
 #
-# See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
+# See https://matrix-org.github.io/synapse/latest/openid.html
 # for information on how to configure these options.
 #
 # For backwards compatibility, it is also possible to configure a single OIDC
@@ -2170,7 +2094,7 @@ sso:
 # Note that this is a non-standard login type and client support is
 # expected to be non-existent.
 #
-# See https://github.com/matrix-org/synapse/blob/master/docs/jwt.md.
+# See https://matrix-org.github.io/synapse/latest/jwt.html.
 #
 #jwt_config:
     # Uncomment the following to enable authorization using JSON web
@@ -2470,7 +2394,7 @@ email:
 # ex. LDAP, external tokens, etc.
 #
 # For more information and known implementations, please see
-# https://github.com/matrix-org/synapse/blob/master/docs/password_auth_providers.md
+# https://matrix-org.github.io/synapse/latest/password_auth_providers.html
 #
 # Note: instances wishing to use SAML or CAS authentication should
 # instead use the `saml2_config` or `cas_config` options,
@@ -2572,7 +2496,7 @@ user_directory:
     #
     # If you set it true, you'll have to rebuild the user_directory search
     # indexes, see:
-    # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
+    # https://matrix-org.github.io/synapse/latest/user_directory.html
     #
     # Uncomment to return search results containing all known users, even if that
     # user does not share a room with the requester.
@@ -2592,7 +2516,7 @@ user_directory:
 # User Consent configuration
 #
 # for detailed instructions, see
-# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md
+# https://matrix-org.github.io/synapse/latest/consent_tracking.html
 #
 # Parts of this section are required if enabling the 'consent' resource under
 # 'listeners', in particular 'template_dir' and 'version'.
@@ -2642,7 +2566,7 @@ user_directory:
 
 
 # Settings for local room and user statistics collection. See
-# docs/room_and_user_statistics.md.
+# https://matrix-org.github.io/synapse/latest/room_and_user_statistics.html.
 #
 stats:
   # Uncomment the following to disable room and user statistics. Note that doing
@@ -2651,11 +2575,6 @@ stats:
   #
   #enabled: false
 
-  # The size of each timeslice in the room_stats_historical and
-  # user_stats_historical tables, as a time period. Defaults to "1d".
-  #
-  #bucket_size: 1h
-
 
 # Server Notices room configuration
 #
@@ -2769,7 +2688,7 @@ opentracing:
     #enabled: true
 
     # The list of homeservers we wish to send and receive span contexts and span baggage.
-    # See docs/opentracing.rst.
+    # See https://matrix-org.github.io/synapse/latest/opentracing.html.
     #
     # This is a list of regexes which are matched against the server_name of the
     # homeserver.
diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml
index ff3c747180..669e600081 100644
--- a/docs/sample_log_config.yaml
+++ b/docs/sample_log_config.yaml
@@ -7,7 +7,7 @@
 # be ingested by ELK stacks. See [2] for details.
 #
 # [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema
-# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md
+# [2]: https://matrix-org.github.io/synapse/latest/structured_logging.html
 
 version: 1
 
diff --git a/docs/server_notices.md b/docs/server_notices.md
index 950a6608e9..339d10a0ab 100644
--- a/docs/server_notices.md
+++ b/docs/server_notices.md
@@ -3,8 +3,8 @@
 'Server Notices' are a new feature introduced in Synapse 0.30. They provide a
 channel whereby server administrators can send messages to users on the server.
 
-They are used as part of communication of the server polices(see
-[consent_tracking.md](consent_tracking.md)), however the intention is that
+They are used as part of communication of the server polices (see
+[Consent Tracking](consent_tracking.md)), however the intention is that
 they may also find a use for features such as "Message of the day".
 
 This is a feature specific to Synapse, but it uses standard Matrix
diff --git a/docs/setup/installation.md b/docs/setup/installation.md
index 8bb1cffd3d..8540a7b0c1 100644
--- a/docs/setup/installation.md
+++ b/docs/setup/installation.md
@@ -1,7 +1,600 @@
-<!--
-  Include the contents of INSTALL.md from the project root without moving it, which may
-  break links around the internet. Additionally, note that SUMMARY.md is unable to 
-  directly link to content outside of the docs/ directory. So we use this file as a 
-  redirection.
--->
-{{#include ../../INSTALL.md}}
\ No newline at end of file
+# Installation Instructions
+
+There are 3 steps to follow under **Installation Instructions**.
+
+- [Installation Instructions](#installation-instructions)
+  - [Choosing your server name](#choosing-your-server-name)
+  - [Installing Synapse](#installing-synapse)
+    - [Installing from source](#installing-from-source)
+      - [Platform-specific prerequisites](#platform-specific-prerequisites)
+        - [Debian/Ubuntu/Raspbian](#debianubunturaspbian)
+        - [ArchLinux](#archlinux)
+        - [CentOS/Fedora](#centosfedora)
+        - [macOS](#macos)
+        - [OpenSUSE](#opensuse)
+        - [OpenBSD](#openbsd)
+        - [Windows](#windows)
+    - [Prebuilt packages](#prebuilt-packages)
+      - [Docker images and Ansible playbooks](#docker-images-and-ansible-playbooks)
+      - [Debian/Ubuntu](#debianubuntu)
+        - [Matrix.org packages](#matrixorg-packages)
+        - [Downstream Debian packages](#downstream-debian-packages)
+        - [Downstream Ubuntu packages](#downstream-ubuntu-packages)
+      - [Fedora](#fedora)
+      - [OpenSUSE](#opensuse-1)
+      - [SUSE Linux Enterprise Server](#suse-linux-enterprise-server)
+      - [ArchLinux](#archlinux-1)
+      - [Void Linux](#void-linux)
+      - [FreeBSD](#freebsd)
+      - [OpenBSD](#openbsd-1)
+      - [NixOS](#nixos)
+  - [Setting up Synapse](#setting-up-synapse)
+    - [Using PostgreSQL](#using-postgresql)
+    - [TLS certificates](#tls-certificates)
+    - [Client Well-Known URI](#client-well-known-uri)
+    - [Email](#email)
+    - [Registering a user](#registering-a-user)
+    - [Setting up a TURN server](#setting-up-a-turn-server)
+    - [URL previews](#url-previews)
+    - [Troubleshooting Installation](#troubleshooting-installation)
+
+
+## Choosing your server name
+
+It is important to choose the name for your server before you install Synapse,
+because it cannot be changed later.
+
+The server name determines the "domain" part of user-ids for users on your
+server: these will all be of the format `@user:my.domain.name`. It also
+determines how other matrix servers will reach yours for federation.
+
+For a test configuration, set this to the hostname of your server. For a more
+production-ready setup, you will probably want to specify your domain
+(`example.com`) rather than a matrix-specific hostname here (in the same way
+that your email address is probably `user@example.com` rather than
+`user@email.example.com`) - but doing so may require more advanced setup: see
+[Setting up Federation](../federate.md).
+
+## Installing Synapse
+
+### Installing from source
+
+(Prebuilt packages are available for some platforms - see [Prebuilt packages](#prebuilt-packages).)
+
+When installing from source please make sure that the [Platform-specific prerequisites](#platform-specific-prerequisites) are already installed.
+
+System requirements:
+
+- POSIX-compliant system (tested on Linux & OS X)
+- Python 3.5.2 or later, up to Python 3.9.
+- At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org
+
+
+To install the Synapse homeserver run:
+
+```sh
+mkdir -p ~/synapse
+virtualenv -p python3 ~/synapse/env
+source ~/synapse/env/bin/activate
+pip install --upgrade pip
+pip install --upgrade setuptools
+pip install matrix-synapse
+```
+
+This will download Synapse from [PyPI](https://pypi.org/project/matrix-synapse)
+and install it, along with the python libraries it uses, into a virtual environment
+under `~/synapse/env`.  Feel free to pick a different directory if you
+prefer.
+
+This Synapse installation can then be later upgraded by using pip again with the
+update flag:
+
+```sh
+source ~/synapse/env/bin/activate
+pip install -U matrix-synapse
+```
+
+Before you can start Synapse, you will need to generate a configuration
+file. To do this, run (in your virtualenv, as before):
+
+```sh
+cd ~/synapse
+python -m synapse.app.homeserver \
+    --server-name my.domain.name \
+    --config-path homeserver.yaml \
+    --generate-config \
+    --report-stats=[yes|no]
+```
+
+... substituting an appropriate value for `--server-name`.
+
+This command will generate you a config file that you can then customise, but it will
+also generate a set of keys for you. These keys will allow your homeserver to
+identify itself to other homeserver, so don't lose or delete them. It would be
+wise to back them up somewhere safe. (If, for whatever reason, you do need to
+change your homeserver's keys, you may find that other homeserver have the
+old key cached. If you update the signing key, you should change the name of the
+key in the `<server name>.signing.key` file (the second word) to something
+different. See the [spec](https://matrix.org/docs/spec/server_server/latest.html#retrieving-server-keys) for more information on key management).
+
+To actually run your new homeserver, pick a working directory for Synapse to
+run (e.g. `~/synapse`), and:
+
+```sh
+cd ~/synapse
+source env/bin/activate
+synctl start
+```
+
+#### Platform-specific prerequisites
+
+Synapse is written in Python but some of the libraries it uses are written in
+C. So before we can install Synapse itself we need a working C compiler and the
+header files for Python C extensions.
+
+##### Debian/Ubuntu/Raspbian
+
+Installing prerequisites on Ubuntu or Debian:
+
+```sh
+sudo apt install build-essential python3-dev libffi-dev \
+                     python3-pip python3-setuptools sqlite3 \
+                     libssl-dev virtualenv libjpeg-dev libxslt1-dev
+```
+
+##### ArchLinux
+
+Installing prerequisites on ArchLinux:
+
+```sh
+sudo pacman -S base-devel python python-pip \
+               python-setuptools python-virtualenv sqlite3
+```
+
+##### CentOS/Fedora
+
+Installing prerequisites on CentOS or Fedora Linux:
+
+```sh
+sudo dnf install libtiff-devel libjpeg-devel libzip-devel freetype-devel \
+                 libwebp-devel libxml2-devel libxslt-devel libpq-devel \
+                 python3-virtualenv libffi-devel openssl-devel python3-devel
+sudo dnf groupinstall "Development Tools"
+```
+
+##### macOS
+
+Installing prerequisites on macOS:
+
+You may need to install the latest Xcode developer tools:
+```sh
+xcode-select --install
+```
+
+On ARM-based Macs you may need to explicitly install libjpeg which is a pillow dependency. You can use Homebrew (https://brew.sh):
+```sh
+ brew install jpeg
+ ```
+
+On macOS Catalina (10.15) you may need to explicitly install OpenSSL
+via brew and inform `pip` about it so that `psycopg2` builds:
+
+```sh
+brew install openssl@1.1
+export LDFLAGS="-L/usr/local/opt/openssl/lib"
+export CPPFLAGS="-I/usr/local/opt/openssl/include"
+```
+
+##### OpenSUSE
+
+Installing prerequisites on openSUSE:
+
+```sh
+sudo zypper in -t pattern devel_basis
+sudo zypper in python-pip python-setuptools sqlite3 python-virtualenv \
+               python-devel libffi-devel libopenssl-devel libjpeg62-devel
+```
+
+##### OpenBSD
+
+A port of Synapse is available under `net/synapse`. The filesystem
+underlying the homeserver directory (defaults to `/var/synapse`) has to be
+mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem
+and mounting it to `/var/synapse` should be taken into consideration.
+
+To be able to build Synapse's dependency on python the `WRKOBJDIR`
+(cf. `bsd.port.mk(5)`) for building python, too, needs to be on a filesystem
+mounted with `wxallowed` (cf. `mount(8)`).
+
+Creating a `WRKOBJDIR` for building python under `/usr/local` (which on a
+default OpenBSD installation is mounted with `wxallowed`):
+
+```sh
+doas mkdir /usr/local/pobj_wxallowed
+```
+
+Assuming `PORTS_PRIVSEP=Yes` (cf. `bsd.port.mk(5)`) and `SUDO=doas` are
+configured in `/etc/mk.conf`:
+
+```sh
+doas chown _pbuild:_pbuild /usr/local/pobj_wxallowed
+```
+
+Setting the `WRKOBJDIR` for building python:
+
+```sh
+echo WRKOBJDIR_lang/python/3.7=/usr/local/pobj_wxallowed  \\nWRKOBJDIR_lang/python/2.7=/usr/local/pobj_wxallowed >> /etc/mk.conf
+```
+
+Building Synapse:
+
+```sh
+cd /usr/ports/net/synapse
+make install
+```
+
+##### Windows
+
+If you wish to run or develop Synapse on Windows, the Windows Subsystem For
+Linux provides a Linux environment on Windows 10 which is capable of using the
+Debian, Fedora, or source installation methods. More information about WSL can
+be found at <https://docs.microsoft.com/en-us/windows/wsl/install-win10> for
+Windows 10 and <https://docs.microsoft.com/en-us/windows/wsl/install-on-server>
+for Windows Server.
+
+### Prebuilt packages
+
+As an alternative to installing from source, prebuilt packages are available
+for a number of platforms.
+
+#### Docker images and Ansible playbooks
+
+There is an official synapse image available at
+<https://hub.docker.com/r/matrixdotorg/synapse> which can be used with
+the docker-compose file available at
+[contrib/docker](https://github.com/matrix-org/synapse/tree/develop/contrib/docker).
+Further information on this including configuration options is available in the README
+on hub.docker.com.
+
+Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a
+Dockerfile to automate a synapse server in a single Docker image, at
+<https://hub.docker.com/r/avhost/docker-matrix/tags/>
+
+Slavi Pantaleev has created an Ansible playbook,
+which installs the offical Docker image of Matrix Synapse
+along with many other Matrix-related services (Postgres database, Element, coturn,
+ma1sd, SSL support, etc.).
+For more details, see
+<https://github.com/spantaleev/matrix-docker-ansible-deploy>
+
+#### Debian/Ubuntu
+
+##### Matrix.org packages
+
+Matrix.org provides Debian/Ubuntu packages of Synapse via
+<https://packages.matrix.org/debian/>.  To install the latest release:
+
+```sh
+sudo apt install -y lsb-release wget apt-transport-https
+sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg
+echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main" |
+    sudo tee /etc/apt/sources.list.d/matrix-org.list
+sudo apt update
+sudo apt install matrix-synapse-py3
+```
+
+Packages are also published for release candidates. To enable the prerelease
+channel, add `prerelease` to the `sources.list` line. For example:
+
+```sh
+sudo wget -O /usr/share/keyrings/matrix-org-archive-keyring.gpg https://packages.matrix.org/debian/matrix-org-archive-keyring.gpg
+echo "deb [signed-by=/usr/share/keyrings/matrix-org-archive-keyring.gpg] https://packages.matrix.org/debian/ $(lsb_release -cs) main prerelease" |
+    sudo tee /etc/apt/sources.list.d/matrix-org.list
+sudo apt update
+sudo apt install matrix-synapse-py3
+```
+
+The fingerprint of the repository signing key (as shown by `gpg
+/usr/share/keyrings/matrix-org-archive-keyring.gpg`) is
+`AAF9AE843A7584B5A3E4CD2BCF45A512DE2DA058`.
+
+##### Downstream Debian packages
+
+We do not recommend using the packages from the default Debian `buster`
+repository at this time, as they are old and suffer from known security
+vulnerabilities. You can install the latest version of Synapse from
+[our repository](#matrixorg-packages) or from `buster-backports`. Please
+see the [Debian documentation](https://backports.debian.org/Instructions/)
+for information on how to use backports.
+
+If you are using Debian `sid` or testing, Synapse is available in the default
+repositories and it should be possible to install it simply with:
+
+```sh
+sudo apt install matrix-synapse
+```
+
+##### Downstream Ubuntu packages
+
+We do not recommend using the packages in the default Ubuntu repository
+at this time, as they are old and suffer from known security vulnerabilities.
+The latest version of Synapse can be installed from [our repository](#matrixorg-packages).
+
+#### Fedora
+
+Synapse is in the Fedora repositories as `matrix-synapse`:
+
+```sh
+sudo dnf install matrix-synapse
+```
+
+Oleg Girko provides Fedora RPMs at
+<https://obs.infoserver.lv/project/monitor/matrix-synapse>
+
+#### OpenSUSE
+
+Synapse is in the OpenSUSE repositories as `matrix-synapse`:
+
+```sh
+sudo zypper install matrix-synapse
+```
+
+#### SUSE Linux Enterprise Server
+
+Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at
+<https://download.opensuse.org/repositories/openSUSE:/Backports:/SLE-15/standard/>
+
+#### ArchLinux
+
+The quickest way to get up and running with ArchLinux is probably with the community package
+<https://www.archlinux.org/packages/community/any/matrix-synapse/>, which should pull in most of
+the necessary dependencies.
+
+pip may be outdated (6.0.7-1 and needs to be upgraded to 6.0.8-1 ):
+
+```sh
+sudo pip install --upgrade pip
+```
+
+If you encounter an error with lib bcrypt causing an Wrong ELF Class:
+ELFCLASS32 (x64 Systems), you may need to reinstall py-bcrypt to correctly
+compile it under the right architecture. (This should not be needed if
+installing under virtualenv):
+
+```sh
+sudo pip uninstall py-bcrypt
+sudo pip install py-bcrypt
+```
+
+#### Void Linux
+
+Synapse can be found in the void repositories as 'synapse':
+
+```sh
+xbps-install -Su
+xbps-install -S synapse
+```
+
+#### FreeBSD
+
+Synapse can be installed via FreeBSD Ports or Packages contributed by Brendan Molloy from:
+
+- Ports: `cd /usr/ports/net-im/py-matrix-synapse && make install clean`
+- Packages: `pkg install py37-matrix-synapse`
+
+#### OpenBSD
+
+As of OpenBSD 6.7 Synapse is available as a pre-compiled binary. The filesystem
+underlying the homeserver directory (defaults to `/var/synapse`) has to be
+mounted with `wxallowed` (cf. `mount(8)`), so creating a separate filesystem
+and mounting it to `/var/synapse` should be taken into consideration.
+
+Installing Synapse:
+
+```sh
+doas pkg_add synapse
+```
+
+#### NixOS
+
+Robin Lambertz has packaged Synapse for NixOS at:
+<https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/matrix-synapse.nix>
+
+## Setting up Synapse
+
+Once you have installed synapse as above, you will need to configure it.
+
+### Using PostgreSQL
+
+By default Synapse uses an [SQLite](https://sqlite.org/) database and in doing so trades
+performance for convenience. Almost all installations should opt to use [PostgreSQL](https://www.postgresql.org)
+instead. Advantages include:
+
+- significant performance improvements due to the superior threading and
+  caching model, smarter query optimiser
+- allowing the DB to be run on separate hardware
+
+For information on how to install and use PostgreSQL in Synapse, please see
+[Using Postgres](../postgres.md)
+
+SQLite is only acceptable for testing purposes. SQLite should not be used in
+a production server. Synapse will perform poorly when using
+SQLite, especially when participating in large rooms.
+
+### TLS certificates
+
+The default configuration exposes a single HTTP port on the local
+interface: `http://localhost:8008`. It is suitable for local testing,
+but for any practical use, you will need Synapse's APIs to be served
+over HTTPS.
+
+The recommended way to do so is to set up a reverse proxy on port
+`8448`. You can find documentation on doing so in
+[the reverse proxy documentation](../reverse_proxy.md).
+
+Alternatively, you can configure Synapse to expose an HTTPS port. To do
+so, you will need to edit `homeserver.yaml`, as follows:
+
+- First, under the `listeners` section, uncomment the configuration for the
+  TLS-enabled listener. (Remove the hash sign (`#`) at the start of
+  each line). The relevant lines are like this:
+
+```yaml
+  - port: 8448
+    type: http
+    tls: true
+    resources:
+      - names: [client, federation]
+  ```
+
+- You will also need to uncomment the `tls_certificate_path` and
+  `tls_private_key_path` lines under the `TLS` section. You will need to manage
+  provisioning of these certificates yourself.
+
+  If you are using your own certificate, be sure to use a `.pem` file that
+  includes the full certificate chain including any intermediate certificates
+  (for instance, if using certbot, use `fullchain.pem` as your certificate, not
+  `cert.pem`).
+
+For a more detailed guide to configuring your server for federation, see
+[Federation](../federate.md).
+
+### Client Well-Known URI
+
+Setting up the client Well-Known URI is optional but if you set it up, it will
+allow users to enter their full username (e.g. `@user:<server_name>`) into clients
+which support well-known lookup to automatically configure the homeserver and
+identity server URLs. This is useful so that users don't have to memorize or think
+about the actual homeserver URL you are using.
+
+The URL `https://<server_name>/.well-known/matrix/client` should return JSON in
+the following format.
+
+```json
+{
+  "m.homeserver": {
+    "base_url": "https://<matrix.example.com>"
+  }
+}
+```
+
+It can optionally contain identity server information as well.
+
+```json
+{
+  "m.homeserver": {
+    "base_url": "https://<matrix.example.com>"
+  },
+  "m.identity_server": {
+    "base_url": "https://<identity.example.com>"
+  }
+}
+```
+
+To work in browser based clients, the file must be served with the appropriate
+Cross-Origin Resource Sharing (CORS) headers. A recommended value would be
+`Access-Control-Allow-Origin: *` which would allow all browser based clients to
+view it.
+
+In nginx this would be something like:
+
+```nginx
+location /.well-known/matrix/client {
+    return 200 '{"m.homeserver": {"base_url": "https://<matrix.example.com>"}}';
+    default_type application/json;
+    add_header Access-Control-Allow-Origin *;
+}
+```
+
+You should also ensure the `public_baseurl` option in `homeserver.yaml` is set
+correctly. `public_baseurl` should be set to the URL that clients will use to
+connect to your server. This is the same URL you put for the `m.homeserver`
+`base_url` above.
+
+```yaml
+public_baseurl: "https://<matrix.example.com>"
+```
+
+### Email
+
+It is desirable for Synapse to have the capability to send email. This allows
+Synapse to send password reset emails, send verifications when an email address
+is added to a user's account, and send email notifications to users when they
+receive new messages.
+
+To configure an SMTP server for Synapse, modify the configuration section
+headed `email`, and be sure to have at least the `smtp_host`, `smtp_port`
+and `notif_from` fields filled out.  You may also need to set `smtp_user`,
+`smtp_pass`, and `require_transport_security`.
+
+If email is not configured, password reset, registration and notifications via
+email will be disabled.
+
+### Registering a user
+
+The easiest way to create a new user is to do so from a client like [Element](https://element.io/).
+
+Alternatively, you can do so from the command line. This can be done as follows:
+
+ 1. If synapse was installed via pip, activate the virtualenv as follows (if Synapse was
+    installed via a prebuilt package, `register_new_matrix_user` should already be
+    on the search path):
+    ```sh
+    cd ~/synapse
+    source env/bin/activate
+    synctl start # if not already running
+    ```
+ 2. Run the following command:
+    ```sh
+    register_new_matrix_user -c homeserver.yaml http://localhost:8008
+    ```
+
+This will prompt you to add details for the new user, and will then connect to
+the running Synapse to create the new user. For example:
+```
+New user localpart: erikj
+Password:
+Confirm password:
+Make admin [no]:
+Success!
+```
+
+This process uses a setting `registration_shared_secret` in
+`homeserver.yaml`, which is shared between Synapse itself and the
+`register_new_matrix_user` script. It doesn't matter what it is (a random
+value is generated by `--generate-config`), but it should be kept secret, as
+anyone with knowledge of it can register users, including admin accounts,
+on your server even if `enable_registration` is `false`.
+
+### Setting up a TURN server
+
+For reliable VoIP calls to be routed via this homeserver, you MUST configure
+a TURN server. See [TURN setup](../turn-howto.md) for details.
+
+### URL previews
+
+Synapse includes support for previewing URLs, which is disabled by default.  To
+turn it on you must enable the `url_preview_enabled: True` config parameter
+and explicitly specify the IP ranges that Synapse is not allowed to spider for
+previewing in the `url_preview_ip_range_blacklist` configuration parameter.
+This is critical from a security perspective to stop arbitrary Matrix users
+spidering 'internal' URLs on your network. At the very least we recommend that
+your loopback and RFC1918 IP addresses are blacklisted.
+
+This also requires the optional `lxml` python dependency to be  installed. This
+in turn requires the `libxml2` library to be available - on  Debian/Ubuntu this
+means `apt-get install libxml2-dev`, or equivalent for your OS.
+
+### Troubleshooting Installation
+
+`pip` seems to leak *lots* of memory during installation. For instance, a Linux
+host with 512MB of RAM may run out of memory whilst installing Twisted. If this
+happens, you will have to individually install the dependencies which are
+failing, e.g.:
+
+```sh
+pip install twisted
+```
+
+If you have any other problems, feel free to ask in
+[#synapse:matrix.org](https://matrix.to/#/#synapse:matrix.org).
diff --git a/docs/spam_checker.md b/docs/spam_checker.md
index c16914e61d..1b6d814937 100644
--- a/docs/spam_checker.md
+++ b/docs/spam_checker.md
@@ -1,6 +1,8 @@
-**Note: this page of the Synapse documentation is now deprecated. For up to date
+<h2 style="color:red">
+This page of the Synapse documentation is now deprecated. For up to date
 documentation on setting up or writing a spam checker module, please see
-[this page](https://matrix-org.github.io/synapse/develop/modules.html).**
+<a href="modules.md">this page</a>.
+</h2>
 
 # Handling spam in Synapse
 
diff --git a/docs/systemd-with-workers/README.md b/docs/systemd-with-workers/README.md
index a7de2de88a..b160d93528 100644
--- a/docs/systemd-with-workers/README.md
+++ b/docs/systemd-with-workers/README.md
@@ -14,10 +14,12 @@ contains an example configuration for the `federation_reader` worker.
 
 ## Synapse configuration files
 
-See [workers.md](../workers.md) for information on how to set up the
-configuration files and reverse-proxy correctly. You can find an example worker
-config in the [workers](https://github.com/matrix-org/synapse/tree/develop/docs/systemd-with-workers/workers/)
-folder.
+See [the worker documentation](../workers.md) for information on how to set up the
+configuration files and reverse-proxy correctly.
+Below is a sample `federation_reader` worker configuration file.
+```yaml
+{{#include workers/federation_reader.yaml}}
+```
 
 Systemd manages daemonization itself, so ensure that none of the configuration
 files set either `daemonize` or `worker_daemonize`.
@@ -72,12 +74,12 @@ systemctl restart matrix-synapse.target
 
 **Optional:** If further hardening is desired, the file
 `override-hardened.conf` may be copied from
-`contrib/systemd/override-hardened.conf` in this repository to the location
+[contrib/systemd/override-hardened.conf](https://github.com/matrix-org/synapse/tree/develop/contrib/systemd/)
+in this repository to the location
 `/etc/systemd/system/matrix-synapse.service.d/override-hardened.conf` (the
 directory may have to be created). It enables certain sandboxing features in
 systemd to further secure the synapse service. You may read the comments to
-understand what the override file is doing. The same file will need to be copied
-to
+understand what the override file is doing. The same file will need to be copied to
 `/etc/systemd/system/matrix-synapse-worker@.service.d/override-hardened-worker.conf`
 (this directory may also have to be created) in order to apply the same
 hardening options to any worker processes.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index a44960c2b8..db0450f563 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -16,7 +16,7 @@ this document.
     summaries.
 
 -   If Synapse was installed using [prebuilt
-    packages](../setup/INSTALL.md#prebuilt-packages), you will need to follow the
+    packages](setup/installation.md#prebuilt-packages), you will need to follow the
     normal process for upgrading those packages.
 
 -   If Synapse was installed from source, then:
@@ -84,7 +84,45 @@ process, for example:
     wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
-    
+
+
+# Upgrading to v1.38.0
+
+## Re-indexing of `events` table on Postgres databases
+
+This release includes a database schema update which requires re-indexing one of
+the larger tables in the database, `events`. This could result in increased
+disk I/O for several hours or days after upgrading while the migration
+completes. Furthermore, because we have to keep the old indexes until the new
+indexes are ready, it could result in a significant, temporary, increase in
+disk space.
+
+To get a rough idea of the disk space required, check the current size of one
+of the indexes. For example, from a `psql` shell, run the following sql:
+
+```sql
+SELECT pg_size_pretty(pg_relation_size('events_order_room'));
+```
+
+We need to rebuild **four** indexes, so you will need to multiply this result
+by four to give an estimate of the disk space required. For example, on one
+particular server:
+
+```
+synapse=# select pg_size_pretty(pg_relation_size('events_order_room'));
+ pg_size_pretty
+----------------
+ 288 MB
+(1 row)
+```
+
+On this server, it would be wise to ensure that at least 1152MB are free.
+
+The additional disk space will be freed once the migration completes.
+
+SQLite databases are unaffected by this change.
+
+
 # Upgrading to v1.37.0
 
 ## Deprecation of the current spam checker interface
diff --git a/docs/usage/configuration/logging_sample_config.md b/docs/usage/configuration/logging_sample_config.md
index 4c4bc6fc16..a673d487b8 100644
--- a/docs/usage/configuration/logging_sample_config.md
+++ b/docs/usage/configuration/logging_sample_config.md
@@ -11,4 +11,4 @@ a fresh config using Synapse by following the instructions in
 
 ```yaml
 {{#include ../../sample_log_config.yaml}}
-``__`
\ No newline at end of file
+```
\ No newline at end of file
diff --git a/docs/workers.md b/docs/workers.md
index 797758ee84..d8672324c3 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -73,7 +73,7 @@ https://hub.docker.com/r/matrixdotorg/synapse/.
 To make effective use of the workers, you will need to configure an HTTP
 reverse-proxy such as nginx or haproxy, which will direct incoming requests to
 the correct worker, or to the main synapse instance. See
-[reverse_proxy.md](reverse_proxy.md) for information on setting up a reverse
+[the reverse proxy documentation](reverse_proxy.md) for information on setting up a reverse
 proxy.
 
 When using workers, each worker process has its own configuration file which
@@ -170,8 +170,8 @@ Finally, you need to start your worker processes. This can be done with either
 `synctl` or your distribution's preferred service manager such as `systemd`. We
 recommend the use of `systemd` where available: for information on setting up
 `systemd` to start synapse workers, see
-[systemd-with-workers](systemd-with-workers). To use `synctl`, see
-[synctl_workers.md](synctl_workers.md).
+[Systemd with Workers](systemd-with-workers). To use `synctl`, see
+[Using synctl with Workers](synctl_workers.md).
 
 
 ## Available worker applications
diff --git a/mypy.ini b/mypy.ini
index c4ff0e6618..8717ae738e 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -75,6 +75,7 @@ files =
   synapse/util/daemonize.py,
   synapse/util/hash.py,
   synapse/util/iterutils.py,
+  synapse/util/linked_list.py,
   synapse/util/metrics.py,
   synapse/util/macaroons.py,
   synapse/util/module_loader.py,
@@ -82,6 +83,7 @@ files =
   synapse/util/stringutils.py,
   synapse/visibility.py,
   tests/replication,
+  tests/test_event_auth.py,
   tests/test_utils,
   tests/handlers/test_password_providers.py,
   tests/rest/client/v1/test_login.py,
diff --git a/scripts-dev/build_debian_packages b/scripts-dev/build_debian_packages
index 546724f89f..e25c5bb265 100755
--- a/scripts-dev/build_debian_packages
+++ b/scripts-dev/build_debian_packages
@@ -10,6 +10,7 @@
 # can be passed on the commandline for debugging.
 
 import argparse
+import json
 import os
 import signal
 import subprocess
@@ -34,6 +35,8 @@ By default, builds for all known distributions, but a list of distributions
 can be passed on the commandline for debugging.
 """
 
+projdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+
 
 class Builder(object):
     def __init__(self, redirect_stdout=False):
@@ -57,9 +60,6 @@ class Builder(object):
             raise
 
     def _inner_build(self, dist, skip_tests=False):
-        projdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        os.chdir(projdir)
-
         tag = dist.split(":", 1)[1]
 
         # Make the dir where the debs will live.
@@ -93,6 +93,7 @@ class Builder(object):
             ],
             stdout=stdout,
             stderr=subprocess.STDOUT,
+            cwd=projdir,
         )
 
         container_name = "synapse_build_" + tag
@@ -180,10 +181,18 @@ if __name__ == "__main__":
         help="skip running tests after building",
     )
     parser.add_argument(
+        "--show-dists-json",
+        action="store_true",
+        help="instead of building the packages, just list the dists to build for, as a json array",
+    )
+    parser.add_argument(
         "dist",
         nargs="*",
         default=DISTS,
         help="a list of distributions to build for. Default: %(default)s",
     )
     args = parser.parse_args()
-    run_builds(dists=args.dist, jobs=args.jobs, skip_tests=args.no_check)
+    if args.show_dists_json:
+        print(json.dumps(DISTS))
+    else:
+        run_builds(dists=args.dist, jobs=args.jobs, skip_tests=args.no_check)
diff --git a/scripts/synapse_review_recent_signups b/scripts/synapse_review_recent_signups
new file mode 100755
index 0000000000..a36d46e14c
--- /dev/null
+++ b/scripts/synapse_review_recent_signups
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse._scripts.review_recent_signups import main
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 2070724c34..5ecce24eee 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.37.1rc1"
+__version__ = "1.38.0"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
diff --git a/synapse/_scripts/review_recent_signups.py b/synapse/_scripts/review_recent_signups.py
new file mode 100644
index 0000000000..01dc0c4237
--- /dev/null
+++ b/synapse/_scripts/review_recent_signups.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+import time
+from datetime import datetime
+from typing import List
+
+import attr
+
+from synapse.config._base import RootConfig, find_config_files, read_config_files
+from synapse.config.database import DatabaseConfig
+from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn
+from synapse.storage.engines import create_engine
+
+
+class ReviewConfig(RootConfig):
+    "A config class that just pulls out the database config"
+    config_classes = [DatabaseConfig]
+
+
+@attr.s(auto_attribs=True)
+class UserInfo:
+    user_id: str
+    creation_ts: int
+    emails: List[str] = attr.Factory(list)
+    private_rooms: List[str] = attr.Factory(list)
+    public_rooms: List[str] = attr.Factory(list)
+    ips: List[str] = attr.Factory(list)
+
+
+def get_recent_users(txn: LoggingTransaction, since_ms: int) -> List[UserInfo]:
+    """Fetches recently registered users and some info on them."""
+
+    sql = """
+        SELECT name, creation_ts FROM users
+        WHERE
+            ? <= creation_ts
+            AND deactivated = 0
+    """
+
+    txn.execute(sql, (since_ms / 1000,))
+
+    user_infos = [UserInfo(user_id, creation_ts) for user_id, creation_ts in txn]
+
+    for user_info in user_infos:
+        user_info.emails = DatabasePool.simple_select_onecol_txn(
+            txn,
+            table="user_threepids",
+            keyvalues={"user_id": user_info.user_id, "medium": "email"},
+            retcol="address",
+        )
+
+        sql = """
+            SELECT room_id, canonical_alias, name, join_rules
+            FROM local_current_membership
+            INNER JOIN room_stats_state USING (room_id)
+            WHERE user_id = ? AND membership = 'join'
+        """
+
+        txn.execute(sql, (user_info.user_id,))
+        for room_id, canonical_alias, name, join_rules in txn:
+            if join_rules == "public":
+                user_info.public_rooms.append(canonical_alias or name or room_id)
+            else:
+                user_info.private_rooms.append(canonical_alias or name or room_id)
+
+        user_info.ips = DatabasePool.simple_select_onecol_txn(
+            txn,
+            table="user_ips",
+            keyvalues={"user_id": user_info.user_id},
+            retcol="ip",
+        )
+
+    return user_infos
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-c",
+        "--config-path",
+        action="append",
+        metavar="CONFIG_FILE",
+        help="The config files for Synapse.",
+        required=True,
+    )
+    parser.add_argument(
+        "-s",
+        "--since",
+        metavar="duration",
+        help="Specify how far back to review user registrations for, defaults to 7d (i.e. 7 days).",
+        default="7d",
+    )
+    parser.add_argument(
+        "-e",
+        "--exclude-emails",
+        action="store_true",
+        help="Exclude users that have validated email addresses",
+    )
+    parser.add_argument(
+        "-u",
+        "--only-users",
+        action="store_true",
+        help="Only print user IDs that match.",
+    )
+
+    config = ReviewConfig()
+
+    config_args = parser.parse_args(sys.argv[1:])
+    config_files = find_config_files(search_paths=config_args.config_path)
+    config_dict = read_config_files(config_files)
+    config.parse_config_dict(
+        config_dict,
+    )
+
+    since_ms = time.time() * 1000 - config.parse_duration(config_args.since)
+    exclude_users_with_email = config_args.exclude_emails
+    include_context = not config_args.only_users
+
+    for database_config in config.database.databases:
+        if "main" in database_config.databases:
+            break
+
+    engine = create_engine(database_config.config)
+
+    with make_conn(database_config, engine, "review_recent_signups") as db_conn:
+        user_infos = get_recent_users(db_conn.cursor(), since_ms)
+
+    for user_info in user_infos:
+        if exclude_users_with_email and user_info.emails:
+            continue
+
+        if include_context:
+            print_public_rooms = ""
+            if user_info.public_rooms:
+                print_public_rooms = "(" + ", ".join(user_info.public_rooms[:3])
+
+                if len(user_info.public_rooms) > 3:
+                    print_public_rooms += ", ..."
+
+                print_public_rooms += ")"
+
+            print("# Created:", datetime.fromtimestamp(user_info.creation_ts))
+            print("# Email:", ", ".join(user_info.emails) or "None")
+            print("# IPs:", ", ".join(user_info.ips))
+            print(
+                "# Number joined public rooms:",
+                len(user_info.public_rooms),
+                print_public_rooms,
+            )
+            print("# Number joined private rooms:", len(user_info.private_rooms))
+            print("#")
+
+        print(user_info.user_id)
+
+        if include_context:
+            print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 29cf257633..05699714ee 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple
 
 import pymacaroons
 from netaddr import IPAddress
@@ -28,7 +28,6 @@ from synapse.api.errors import (
     InvalidClientTokenError,
     MissingClientTokenError,
 )
-from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.appservice import ApplicationService
 from synapse.events import EventBase
 from synapse.http import get_request_user_agent
@@ -38,7 +37,6 @@ from synapse.storage.databases.main.registration import TokenLookupResult
 from synapse.types import Requester, StateMap, UserID, create_requester
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.macaroons import get_value_from_macaroon, satisfy_expiry
-from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -46,15 +44,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-AuthEventTypes = (
-    EventTypes.Create,
-    EventTypes.Member,
-    EventTypes.PowerLevels,
-    EventTypes.JoinRules,
-    EventTypes.RoomHistoryVisibility,
-    EventTypes.ThirdPartyInvite,
-)
-
 # guests always get this device id.
 GUEST_DEVICE_ID = "guest_device"
 
@@ -65,9 +54,7 @@ class _InvalidMacaroonException(Exception):
 
 class Auth:
     """
-    FIXME: This class contains a mix of functions for authenticating users
-    of our client-server API and authenticating events added to room graphs.
-    The latter should be moved to synapse.handlers.event_auth.EventAuthHandler.
+    This class contains functions for authenticating users of our client-server API.
     """
 
     def __init__(self, hs: "HomeServer"):
@@ -75,32 +62,18 @@ class Auth:
         self.clock = hs.get_clock()
         self.store = hs.get_datastore()
         self.state = hs.get_state_handler()
+        self._account_validity_handler = hs.get_account_validity_handler()
 
-        self.token_cache = LruCache(
+        self.token_cache: LruCache[str, Tuple[str, bool]] = LruCache(
             10000, "token_cache"
-        )  # type: LruCache[str, Tuple[str, bool]]
+        )
 
         self._auth_blocking = AuthBlocking(self.hs)
 
-        self._account_validity_enabled = (
-            hs.config.account_validity.account_validity_enabled
-        )
         self._track_appservice_user_ips = hs.config.track_appservice_user_ips
         self._macaroon_secret_key = hs.config.macaroon_secret_key
         self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users
 
-    async def check_from_context(
-        self, room_version: str, event, context, do_sig_check=True
-    ) -> None:
-        auth_event_ids = event.auth_event_ids()
-        auth_events_by_id = await self.store.get_events(auth_event_ids)
-        auth_events = {(e.type, e.state_key): e for e in auth_events_by_id.values()}
-
-        room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
-        event_auth.check(
-            room_version_obj, event, auth_events=auth_events, do_sig_check=do_sig_check
-        )
-
     async def check_user_in_room(
         self,
         room_id: str,
@@ -151,13 +124,6 @@ class Auth:
 
         raise AuthError(403, "User %s not in room %s" % (user_id, room_id))
 
-    async def check_host_in_room(self, room_id: str, host: str) -> bool:
-        with Measure(self.clock, "check_host_in_room"):
-            return await self.store.is_host_joined(room_id, host)
-
-    def get_public_keys(self, invite_event: EventBase) -> List[Dict[str, Any]]:
-        return event_auth.get_public_keys(invite_event)
-
     async def get_user_by_req(
         self,
         request: SynapseRequest,
@@ -219,12 +185,17 @@ class Auth:
             shadow_banned = user_info.shadow_banned
 
             # Deny the request if the user account has expired.
-            if self._account_validity_enabled and not allow_expired:
-                if await self.store.is_account_expired(
-                    user_info.user_id, self.clock.time_msec()
+            if not allow_expired:
+                if await self._account_validity_handler.is_user_expired(
+                    user_info.user_id
                 ):
+                    # Raise the error if either an account validity module has determined
+                    # the account has expired, or the legacy account validity
+                    # implementation is enabled and determined the account has expired
                     raise AuthError(
-                        403, "User account has expired", errcode=Codes.EXPIRED_ACCOUNT
+                        403,
+                        "User account has expired",
+                        errcode=Codes.EXPIRED_ACCOUNT,
                     )
 
             device_id = user_info.device_id
@@ -272,6 +243,37 @@ class Auth:
         except KeyError:
             raise MissingClientTokenError()
 
+    async def validate_appservice_can_control_user_id(
+        self, app_service: ApplicationService, user_id: str
+    ):
+        """Validates that the app service is allowed to control
+        the given user.
+
+        Args:
+            app_service: The app service that controls the user
+            user_id: The author MXID that the app service is controlling
+
+        Raises:
+            AuthError: If the application service is not allowed to control the user
+                (user namespace regex does not match, wrong homeserver, etc)
+                or if the user has not been registered yet.
+        """
+
+        # It's ok if the app service is trying to use the sender from their registration
+        if app_service.sender == user_id:
+            pass
+        # Check to make sure the app service is allowed to control the user
+        elif not app_service.is_interested_in_user(user_id):
+            raise AuthError(
+                403,
+                "Application service cannot masquerade as this user (%s)." % user_id,
+            )
+        # Check to make sure the user is already registered on the homeserver
+        elif not (await self.store.get_user_by_id(user_id)):
+            raise AuthError(
+                403, "Application service has not registered this user (%s)" % user_id
+            )
+
     async def _get_appservice_user_id(
         self, request: Request
     ) -> Tuple[Optional[str], Optional[ApplicationService]]:
@@ -293,13 +295,11 @@ class Auth:
             return app_service.sender, app_service
 
         user_id = request.args[b"user_id"][0].decode("utf8")
+        await self.validate_appservice_can_control_user_id(app_service, user_id)
+
         if app_service.sender == user_id:
             return app_service.sender, app_service
 
-        if not app_service.is_interested_in_user(user_id):
-            raise AuthError(403, "Application service cannot masquerade as this user.")
-        if not (await self.store.get_user_by_id(user_id)):
-            raise AuthError(403, "Application service has not registered this user")
         return user_id, app_service
 
     async def get_user_by_access_token(
@@ -488,44 +488,6 @@ class Auth:
         """
         return await self.store.is_server_admin(user)
 
-    def compute_auth_events(
-        self,
-        event,
-        current_state_ids: StateMap[str],
-        for_verification: bool = False,
-    ) -> List[str]:
-        """Given an event and current state return the list of event IDs used
-        to auth an event.
-
-        If `for_verification` is False then only return auth events that
-        should be added to the event's `auth_events`.
-
-        Returns:
-            List of event IDs.
-        """
-
-        if event.type == EventTypes.Create:
-            return []
-
-        # Currently we ignore the `for_verification` flag even though there are
-        # some situations where we can drop particular auth events when adding
-        # to the event's `auth_events` (e.g. joins pointing to previous joins
-        # when room is publicly joinable). Dropping event IDs has the
-        # advantage that the auth chain for the room grows slower, but we use
-        # the auth chain in state resolution v2 to order events, which means
-        # care must be taken if dropping events to ensure that it doesn't
-        # introduce undesirable "state reset" behaviour.
-        #
-        # All of which sounds a bit tricky so we don't bother for now.
-
-        auth_ids = []
-        for etype, state_key in event_auth.auth_types_for_event(event):
-            auth_ev_id = current_state_ids.get((etype, state_key))
-            if auth_ev_id:
-                auth_ids.append(auth_ev_id)
-
-        return auth_ids
-
     async def check_can_change_room_list(self, room_id: str, user: UserID) -> bool:
         """Determine whether the user is allowed to edit the room's entry in the
         published room list.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 4cb8bbaf70..054ab14ab6 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -118,7 +118,7 @@ class RedirectException(CodeMessageException):
         super().__init__(code=http_code, msg=msg)
         self.location = location
 
-        self.cookies = []  # type: List[bytes]
+        self.cookies: List[bytes] = []
 
 
 class SynapseError(CodeMessageException):
@@ -160,7 +160,7 @@ class ProxiedRequestError(SynapseError):
     ):
         super().__init__(code, msg, errcode)
         if additional_fields is None:
-            self._additional_fields = {}  # type: Dict
+            self._additional_fields: Dict = {}
         else:
             self._additional_fields = dict(additional_fields)
 
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index ce49a0ad58..ad1ff6a9df 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -289,7 +289,7 @@ class Filter:
             room_id = None
             ev_type = "m.presence"
             contains_url = False
-            labels = []  # type: List[str]
+            labels: List[str] = []
         else:
             sender = event.get("sender", None)
             if not sender:
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index b9a10283f4..3e3d09bbd2 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -46,9 +46,7 @@ class Ratelimiter:
         #   * How many times an action has occurred since a point in time
         #   * The point in time
         #   * The rate_hz of this particular entry. This can vary per request
-        self.actions = (
-            OrderedDict()
-        )  # type: OrderedDict[Hashable, Tuple[float, int, float]]
+        self.actions: OrderedDict[Hashable, Tuple[float, int, float]] = OrderedDict()
 
     async def can_do_action(
         self,
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index f6c1c97b40..a20abc5a65 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -195,7 +195,7 @@ class RoomVersions:
     )
 
 
-KNOWN_ROOM_VERSIONS = {
+KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
     v.identifier: v
     for v in (
         RoomVersions.V1,
@@ -209,4 +209,4 @@ KNOWN_ROOM_VERSIONS = {
         RoomVersions.V7,
     )
     # Note that we do not include MSC2043 here unless it is enabled in the config.
-}  # type: Dict[str, RoomVersion]
+}
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 8879136881..b30571fe49 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -21,7 +21,7 @@ import socket
 import sys
 import traceback
 import warnings
-from typing import Awaitable, Callable, Iterable
+from typing import TYPE_CHECKING, Awaitable, Callable, Iterable
 
 from cryptography.utils import CryptographyDeprecationWarning
 from typing_extensions import NoReturn
@@ -41,10 +41,14 @@ from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.logging.context import PreserveLoggingContext
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.metrics.jemalloc import setup_jemalloc_stats
+from synapse.util.caches.lrucache import setup_expire_lru_cache_entries
 from synapse.util.daemonize import daemonize_process
 from synapse.util.rlimit import change_resource_limit
 from synapse.util.versionstring import get_version_string
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
 
 # list of tuples of function, args list, kwargs dict
@@ -312,7 +316,7 @@ def refresh_certificate(hs):
         logger.info("Context factories updated.")
 
 
-async def start(hs: "synapse.server.HomeServer"):
+async def start(hs: "HomeServer"):
     """
     Start a Synapse server or worker.
 
@@ -365,6 +369,9 @@ async def start(hs: "synapse.server.HomeServer"):
 
     load_legacy_spam_checkers(hs)
 
+    # If we've configured an expiry time for caches, start the background job now.
+    setup_expire_lru_cache_entries(hs)
+
     # It is now safe to start your Synapse.
     hs.start_listening()
     hs.get_datastore().db_pool.start_profiling()
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 5b041fcaad..c3d4992518 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -270,7 +270,7 @@ class GenericWorkerServer(HomeServer):
             site_tag = port
 
         # We always include a health resource.
-        resources = {"/health": HealthResource()}  # type: Dict[str, IResource]
+        resources: Dict[str, IResource] = {"/health": HealthResource()}
 
         for res in listener_config.http_options.resources:
             for name in res.names:
@@ -395,10 +395,8 @@ class GenericWorkerServer(HomeServer):
             elif listener.type == "metrics":
                 if not self.config.enable_metrics:
                     logger.warning(
-                        (
-                            "Metrics listener configured, but "
-                            "enable_metrics is not True!"
-                        )
+                        "Metrics listener configured, but "
+                        "enable_metrics is not True!"
                     )
                 else:
                     _base.listen_metrics(listener.bind_addresses, listener.port)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 7af56ac136..920b34d97b 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -305,10 +305,8 @@ class SynapseHomeServer(HomeServer):
             elif listener.type == "metrics":
                 if not self.config.enable_metrics:
                     logger.warning(
-                        (
-                            "Metrics listener configured, but "
-                            "enable_metrics is not True!"
-                        )
+                        "Metrics listener configured, but "
+                        "enable_metrics is not True!"
                     )
                 else:
                     _base.listen_metrics(listener.bind_addresses, listener.port)
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
index 8f86cecb76..96defac1d2 100644
--- a/synapse/app/phone_stats_home.py
+++ b/synapse/app/phone_stats_home.py
@@ -71,6 +71,8 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process):
     # General statistics
     #
 
+    store = hs.get_datastore()
+
     stats["homeserver"] = hs.config.server_name
     stats["server_context"] = hs.config.server_context
     stats["timestamp"] = now
@@ -79,34 +81,38 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process):
     stats["python_version"] = "{}.{}.{}".format(
         version.major, version.minor, version.micro
     )
-    stats["total_users"] = await hs.get_datastore().count_all_users()
+    stats["total_users"] = await store.count_all_users()
 
-    total_nonbridged_users = await hs.get_datastore().count_nonbridged_users()
+    total_nonbridged_users = await store.count_nonbridged_users()
     stats["total_nonbridged_users"] = total_nonbridged_users
 
-    daily_user_type_results = await hs.get_datastore().count_daily_user_type()
+    daily_user_type_results = await store.count_daily_user_type()
     for name, count in daily_user_type_results.items():
         stats["daily_user_type_" + name] = count
 
-    room_count = await hs.get_datastore().get_room_count()
+    room_count = await store.get_room_count()
     stats["total_room_count"] = room_count
 
-    stats["daily_active_users"] = await hs.get_datastore().count_daily_users()
-    stats["monthly_active_users"] = await hs.get_datastore().count_monthly_users()
-    daily_active_e2ee_rooms = await hs.get_datastore().count_daily_active_e2ee_rooms()
+    stats["daily_active_users"] = await store.count_daily_users()
+    stats["monthly_active_users"] = await store.count_monthly_users()
+    daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
     stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
-    stats["daily_e2ee_messages"] = await hs.get_datastore().count_daily_e2ee_messages()
-    daily_sent_e2ee_messages = await hs.get_datastore().count_daily_sent_e2ee_messages()
+    stats["daily_e2ee_messages"] = await store.count_daily_e2ee_messages()
+    daily_sent_e2ee_messages = await store.count_daily_sent_e2ee_messages()
     stats["daily_sent_e2ee_messages"] = daily_sent_e2ee_messages
-    stats["daily_active_rooms"] = await hs.get_datastore().count_daily_active_rooms()
-    stats["daily_messages"] = await hs.get_datastore().count_daily_messages()
-    daily_sent_messages = await hs.get_datastore().count_daily_sent_messages()
+    stats["daily_active_rooms"] = await store.count_daily_active_rooms()
+    stats["daily_messages"] = await store.count_daily_messages()
+    daily_sent_messages = await store.count_daily_sent_messages()
     stats["daily_sent_messages"] = daily_sent_messages
 
-    r30_results = await hs.get_datastore().count_r30_users()
+    r30_results = await store.count_r30_users()
     for name, count in r30_results.items():
         stats["r30_users_" + name] = count
 
+    r30v2_results = await store.count_r30_users()
+    for name, count in r30v2_results.items():
+        stats["r30v2_users_" + name] = count
+
     stats["cache_factor"] = hs.config.caches.global_factor
     stats["event_cache_size"] = hs.config.caches.event_cache_size
 
@@ -115,8 +121,8 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process):
     #
 
     # This only reports info about the *main* database.
-    stats["database_engine"] = hs.get_datastore().db_pool.engine.module.__name__
-    stats["database_server_version"] = hs.get_datastore().db_pool.engine.server_version
+    stats["database_engine"] = store.db_pool.engine.module.__name__
+    stats["database_server_version"] = store.db_pool.engine.server_version
 
     #
     # Logging configuration
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 61152b2c46..935f24263c 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -88,9 +88,9 @@ class ApplicationServiceApi(SimpleHttpClient):
         super().__init__(hs)
         self.clock = hs.get_clock()
 
-        self.protocol_meta_cache = ResponseCache(
+        self.protocol_meta_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
             hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS
-        )  # type: ResponseCache[Tuple[str, str]]
+        )
 
     async def query_user(self, service, user_id):
         if service.url is None:
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index 23ca0c83c1..06fbd1166b 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -5,6 +5,7 @@ from synapse.config import (
     api,
     appservice,
     auth,
+    cache,
     captcha,
     cas,
     consent,
@@ -88,6 +89,7 @@ class RootConfig:
     tracer: tracer.TracerConfig
     redis: redis.RedisConfig
     modules: modules.ModulesConfig
+    caches: cache.CacheConfig
     federation: federation.FederationConfig
 
     config_classes: List = ...
diff --git a/synapse/config/account_validity.py b/synapse/config/account_validity.py
index 957de7f3a6..6be4eafe55 100644
--- a/synapse/config/account_validity.py
+++ b/synapse/config/account_validity.py
@@ -18,6 +18,21 @@ class AccountValidityConfig(Config):
     section = "account_validity"
 
     def read_config(self, config, **kwargs):
+        """Parses the old account validity config. The config format looks like this:
+
+        account_validity:
+            enabled: true
+            period: 6w
+            renew_at: 1w
+            renew_email_subject: "Renew your %(app)s account"
+            template_dir: "res/templates"
+            account_renewed_html_path: "account_renewed.html"
+            invalid_token_html_path: "invalid_token.html"
+
+        We expect admins to use modules for this feature (which is why it doesn't appear
+        in the sample config file), but we want to keep support for it around for a bit
+        for backwards compatibility.
+        """
         account_validity_config = config.get("account_validity") or {}
         self.account_validity_enabled = account_validity_config.get("enabled", False)
         self.account_validity_renew_by_email_enabled = (
@@ -75,90 +90,3 @@ class AccountValidityConfig(Config):
             ],
             account_validity_template_dir,
         )
-
-    def generate_config_section(self, **kwargs):
-        return """\
-        ## Account Validity ##
-
-        # Optional account validity configuration. This allows for accounts to be denied
-        # any request after a given period.
-        #
-        # Once this feature is enabled, Synapse will look for registered users without an
-        # expiration date at startup and will add one to every account it found using the
-        # current settings at that time.
-        # This means that, if a validity period is set, and Synapse is restarted (it will
-        # then derive an expiration date from the current validity period), and some time
-        # after that the validity period changes and Synapse is restarted, the users'
-        # expiration dates won't be updated unless their account is manually renewed. This
-        # date will be randomly selected within a range [now + period - d ; now + period],
-        # where d is equal to 10% of the validity period.
-        #
-        account_validity:
-          # The account validity feature is disabled by default. Uncomment the
-          # following line to enable it.
-          #
-          #enabled: true
-
-          # The period after which an account is valid after its registration. When
-          # renewing the account, its validity period will be extended by this amount
-          # of time. This parameter is required when using the account validity
-          # feature.
-          #
-          #period: 6w
-
-          # The amount of time before an account's expiry date at which Synapse will
-          # send an email to the account's email address with a renewal link. By
-          # default, no such emails are sent.
-          #
-          # If you enable this setting, you will also need to fill out the 'email' and
-          # 'public_baseurl' configuration sections.
-          #
-          #renew_at: 1w
-
-          # The subject of the email sent out with the renewal link. '%(app)s' can be
-          # used as a placeholder for the 'app_name' parameter from the 'email'
-          # section.
-          #
-          # Note that the placeholder must be written '%(app)s', including the
-          # trailing 's'.
-          #
-          # If this is not set, a default value is used.
-          #
-          #renew_email_subject: "Renew your %(app)s account"
-
-          # Directory in which Synapse will try to find templates for the HTML files to
-          # serve to the user when trying to renew an account. If not set, default
-          # templates from within the Synapse package will be used.
-          #
-          # The currently available templates are:
-          #
-          # * account_renewed.html: Displayed to the user after they have successfully
-          #       renewed their account.
-          #
-          # * account_previously_renewed.html: Displayed to the user if they attempt to
-          #       renew their account with a token that is valid, but that has already
-          #       been used. In this case the account is not renewed again.
-          #
-          # * invalid_token.html: Displayed to the user when they try to renew an account
-          #       with an unknown or invalid renewal token.
-          #
-          # See https://github.com/matrix-org/synapse/tree/master/synapse/res/templates for
-          # default template contents.
-          #
-          # The file name of some of these templates can be configured below for legacy
-          # reasons.
-          #
-          #template_dir: "res/templates"
-
-          # A custom file name for the 'account_renewed.html' template.
-          #
-          # If not set, the file is assumed to be named "account_renewed.html".
-          #
-          #account_renewed_html_path: "account_renewed.html"
-
-          # A custom file name for the 'invalid_token.html' template.
-          #
-          # If not set, the file is assumed to be named "invalid_token.html".
-          #
-          #invalid_token_html_path: "invalid_token.html"
-        """
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index 746fc3cc02..1ebea88db2 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -57,14 +57,14 @@ def load_appservices(hostname, config_files):
         return []
 
     # Dicts of value -> filename
-    seen_as_tokens = {}  # type: Dict[str, str]
-    seen_ids = {}  # type: Dict[str, str]
+    seen_as_tokens: Dict[str, str] = {}
+    seen_ids: Dict[str, str] = {}
 
     appservices = []
 
     for config_file in config_files:
         try:
-            with open(config_file, "r") as f:
+            with open(config_file) as f:
                 appservice = _load_appservice(hostname, yaml.safe_load(f), config_file)
                 if appservice.id in seen_ids:
                     raise ConfigError(
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index f865a7f1f4..3560c66a53 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -25,7 +25,7 @@ from ._base import Config, ConfigError
 _CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR"
 
 # Map from canonicalised cache name to cache.
-_CACHES = {}  # type: Dict[str, Callable[[float], None]]
+_CACHES: Dict[str, Callable[[float], None]] = {}
 
 # a lock on the contents of _CACHES
 _CACHES_LOCK = threading.Lock()
@@ -124,41 +124,48 @@ class CacheConfig(Config):
         #external_event_cache_expiry_ms: 1800000
 
         caches:
-           # Controls the global cache factor, which is the default cache factor
-           # for all caches if a specific factor for that cache is not otherwise
-           # set.
-           #
-           # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
-           # variable. Setting by environment variable takes priority over
-           # setting through the config file.
-           #
-           # Defaults to 0.5, which will half the size of all caches.
-           #
-           #global_factor: 1.0
-
-           # A dictionary of cache name to cache factor for that individual
-           # cache. Overrides the global cache factor for a given cache.
-           #
-           # These can also be set through environment variables comprised
-           # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
-           # letters and underscores. Setting by environment variable
-           # takes priority over setting through the config file.
-           # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
-           #
-           # Some caches have '*' and other characters that are not
-           # alphanumeric or underscores. These caches can be named with or
-           # without the special characters stripped. For example, to specify
-           # the cache factor for `*stateGroupCache*` via an environment
-           # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
-           #
-           per_cache_factors:
-             #get_users_who_share_room_with_user: 2.0
+          # Controls the global cache factor, which is the default cache factor
+          # for all caches if a specific factor for that cache is not otherwise
+          # set.
+          #
+          # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+          # variable. Setting by environment variable takes priority over
+          # setting through the config file.
+          #
+          # Defaults to 0.5, which will half the size of all caches.
+          #
+          #global_factor: 1.0
+
+          # A dictionary of cache name to cache factor for that individual
+          # cache. Overrides the global cache factor for a given cache.
+          #
+          # These can also be set through environment variables comprised
+          # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+          # letters and underscores. Setting by environment variable
+          # takes priority over setting through the config file.
+          # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+          #
+          # Some caches have '*' and other characters that are not
+          # alphanumeric or underscores. These caches can be named with or
+          # without the special characters stripped. For example, to specify
+          # the cache factor for `*stateGroupCache*` via an environment
+          # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
+          #
+          per_cache_factors:
+            #get_users_who_share_room_with_user: 2.0
+
+          # Controls how long an entry can be in a cache without having been
+          # accessed before being evicted. Defaults to None, which means
+          # entries are never evicted based on time.
+          #
+          #expiry_time: 30m
         """
 
     def read_config(self, config, **kwargs):
         self.event_cache_size = self.parse_size(
             config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
         )
+<<<<<<< HEAD
 
         self.external_event_cache_expiry_ms = config.get(
             "external_event_cache_expiry_ms", _DEFAULT_EXTERNAL_CACHE_EXPIRY_MS
@@ -167,6 +174,9 @@ class CacheConfig(Config):
             raise ConfigError("external_event_cache_expiry_ms must be a number.")
 
         self.cache_factors = {}  # type: Dict[str, float]
+=======
+        self.cache_factors: Dict[str, float] = {}
+>>>>>>> origin/develop
 
         cache_config = config.get("caches") or {}
         self.global_factor = cache_config.get(
@@ -215,6 +225,12 @@ class CacheConfig(Config):
                     e.message  # noqa: B306, DependencyException.message is a property
                 )
 
+        expiry_time = cache_config.get("expiry_time")
+        if expiry_time:
+            self.expiry_time_msec = self.parse_duration(expiry_time)
+        else:
+            self.expiry_time_msec = None
+
         # Resize all caches (if necessary) with the new factors we've loaded
         self.resize_all_caches()
 
diff --git a/synapse/config/consent.py b/synapse/config/consent.py
index 30d07cc219..b05a9bd97f 100644
--- a/synapse/config/consent.py
+++ b/synapse/config/consent.py
@@ -22,7 +22,7 @@ DEFAULT_CONFIG = """\
 # User Consent configuration
 #
 # for detailed instructions, see
-# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md
+# https://matrix-org.github.io/synapse/latest/consent_tracking.html
 #
 # Parts of this section are required if enabling the 'consent' resource under
 # 'listeners', in particular 'template_dir' and 'version'.
diff --git a/synapse/config/database.py b/synapse/config/database.py
index c76ef1e1de..3d7d92f615 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -62,7 +62,8 @@ DEFAULT_CONFIG = """\
 #    cp_min: 5
 #    cp_max: 10
 #
-# For more information on using Synapse with Postgres, see `docs/postgres.md`.
+# For more information on using Synapse with Postgres,
+# see https://matrix-org.github.io/synapse/latest/postgres.html.
 #
 database:
   name: sqlite3
diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py
index 5564d7d097..bcecbfec03 100644
--- a/synapse/config/emailconfig.py
+++ b/synapse/config/emailconfig.py
@@ -134,9 +134,9 @@ class EmailConfig(Config):
 
                 # trusted_third_party_id_servers does not contain a scheme whereas
                 # account_threepid_delegate_email is expected to. Presume https
-                self.account_threepid_delegate_email = (
+                self.account_threepid_delegate_email: Optional[str] = (
                     "https://" + first_trusted_identity_server
-                )  # type: Optional[str]
+                )
                 self.using_identity_server_from_trusted_list = True
             else:
                 raise ConfigError(
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 7fb1f7021f..e25ccba9ac 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -25,10 +25,10 @@ class ExperimentalConfig(Config):
         experimental = config.get("experimental_features") or {}
 
         # MSC2858 (multiple SSO identity providers)
-        self.msc2858_enabled = experimental.get("msc2858_enabled", False)  # type: bool
+        self.msc2858_enabled: bool = experimental.get("msc2858_enabled", False)
 
         # MSC3026 (busy presence state)
-        self.msc3026_enabled = experimental.get("msc3026_enabled", False)  # type: bool
+        self.msc3026_enabled: bool = experimental.get("msc3026_enabled", False)
 
         # MSC2716 (backfill existing history)
-        self.msc2716_enabled = experimental.get("msc2716_enabled", False)  # type: bool
+        self.msc2716_enabled: bool = experimental.get("msc2716_enabled", False)
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index cdd7a1ef05..7d64993e22 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -22,7 +22,7 @@ class FederationConfig(Config):
 
     def read_config(self, config, **kwargs):
         # FIXME: federation_domain_whitelist needs sytests
-        self.federation_domain_whitelist = None  # type: Optional[dict]
+        self.federation_domain_whitelist: Optional[dict] = None
         federation_domain_whitelist = config.get("federation_domain_whitelist", None)
 
         if federation_domain_whitelist is not None:
diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py
index 9e07e73008..9d295f5856 100644
--- a/synapse/config/jwt.py
+++ b/synapse/config/jwt.py
@@ -64,7 +64,7 @@ class JWTConfig(Config):
         # Note that this is a non-standard login type and client support is
         # expected to be non-existent.
         #
-        # See https://github.com/matrix-org/synapse/blob/master/docs/jwt.md.
+        # See https://matrix-org.github.io/synapse/latest/jwt.html.
         #
         #jwt_config:
             # Uncomment the following to enable authorization using JSON web
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 91d9bcf32e..ad4e6e61c3 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -49,7 +49,7 @@ DEFAULT_LOG_CONFIG = Template(
 # be ingested by ELK stacks. See [2] for details.
 #
 # [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema
-# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md
+# [2]: https://matrix-org.github.io/synapse/latest/structured_logging.html
 
 version: 1
 
diff --git a/synapse/config/modules.py b/synapse/config/modules.py
index 3209e1c492..ae0821e5a5 100644
--- a/synapse/config/modules.py
+++ b/synapse/config/modules.py
@@ -37,7 +37,7 @@ class ModulesConfig(Config):
 
             # Server admins can expand Synapse's functionality with external modules.
             #
-            # See https://matrix-org.github.io/synapse/develop/modules.html for more
+            # See https://matrix-org.github.io/synapse/latest/modules.html for more
             # documentation on how to configure or create custom modules for Synapse.
             #
             modules:
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index ea0abf5aa2..ba89d11cf0 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -166,7 +166,7 @@ class OIDCConfig(Config):
         #
         #       module: The class name of a custom mapping module. Default is
         #           {mapping_provider!r}.
-        #           See https://github.com/matrix-org/synapse/blob/master/docs/sso_mapping_providers.md#openid-mapping-providers
+        #           See https://matrix-org.github.io/synapse/latest/sso_mapping_providers.html#openid-mapping-providers
         #           for information on implementing a custom mapping provider.
         #
         #       config: Configuration for the mapping provider module. This section will
@@ -217,7 +217,7 @@ class OIDCConfig(Config):
         #     - attribute: groups
         #       value: "admin"
         #
-        # See https://github.com/matrix-org/synapse/blob/master/docs/openid.md
+        # See https://matrix-org.github.io/synapse/latest/openid.html
         # for information on how to configure these options.
         #
         # For backwards compatibility, it is also possible to configure a single OIDC
@@ -460,7 +460,7 @@ def _parse_oidc_config_dict(
             ) from e
 
     client_secret_jwt_key_config = oidc_config.get("client_secret_jwt_key")
-    client_secret_jwt_key = None  # type: Optional[OidcProviderClientSecretJwtKey]
+    client_secret_jwt_key: Optional[OidcProviderClientSecretJwtKey] = None
     if client_secret_jwt_key_config is not None:
         keyfile = client_secret_jwt_key_config.get("key_file")
         if keyfile:
diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py
index 1cf69734bb..0f5b2b3977 100644
--- a/synapse/config/password_auth_providers.py
+++ b/synapse/config/password_auth_providers.py
@@ -25,7 +25,7 @@ class PasswordAuthProviderConfig(Config):
     section = "authproviders"
 
     def read_config(self, config, **kwargs):
-        self.password_providers = []  # type: List[Any]
+        self.password_providers: List[Any] = []
         providers = []
 
         # We want to be backwards compatible with the old `ldap_config`
@@ -57,7 +57,7 @@ class PasswordAuthProviderConfig(Config):
         # ex. LDAP, external tokens, etc.
         #
         # For more information and known implementations, please see
-        # https://github.com/matrix-org/synapse/blob/master/docs/password_auth_providers.md
+        # https://matrix-org.github.io/synapse/latest/password_auth_providers.html
         #
         # Note: instances wishing to use SAML or CAS authentication should
         # instead use the `saml2_config` or `cas_config` options,
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 2f77d6703d..0dfb3a227a 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -62,7 +62,7 @@ def parse_thumbnail_requirements(thumbnail_sizes):
         Dictionary mapping from media type string to list of
         ThumbnailRequirement tuples.
     """
-    requirements = {}  # type: Dict[str, List]
+    requirements: Dict[str, List] = {}
     for size in thumbnail_sizes:
         width = size["width"]
         height = size["height"]
@@ -141,7 +141,7 @@ class ContentRepositoryConfig(Config):
         #
         # We don't create the storage providers here as not all workers need
         # them to be started.
-        self.media_storage_providers = []  # type: List[tuple]
+        self.media_storage_providers: List[tuple] = []
 
         for i, provider_config in enumerate(storage_providers):
             # We special case the module "file_system" so as not to need to
@@ -250,7 +250,7 @@ class ContentRepositoryConfig(Config):
         #
         # If you are using a reverse proxy you may also need to set this value in
         # your reverse proxy's config. Notably Nginx has a small max body size by default.
-        # See https://matrix-org.github.io/synapse/develop/reverse_proxy.html.
+        # See https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
         #
         #max_upload_size: 50M
 
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 0833a5f7bc..b9e0c0b300 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -153,7 +153,7 @@ ROOM_COMPLEXITY_TOO_GREAT = (
 METRICS_PORT_WARNING = """\
 The metrics_port configuration option is deprecated in Synapse 0.31 in favour of
 a listener. Please see
-https://github.com/matrix-org/synapse/blob/master/docs/metrics-howto.md
+https://matrix-org.github.io/synapse/latest/metrics-howto.html
 on how to configure the new listener.
 --------------------------------------------------------------------------------"""
 
@@ -505,7 +505,7 @@ class ServerConfig(Config):
                 " greater than 'allowed_lifetime_max'"
             )
 
-        self.retention_purge_jobs = []  # type: List[Dict[str, Optional[int]]]
+        self.retention_purge_jobs: List[Dict[str, Optional[int]]] = []
         for purge_job_config in retention_config.get("purge_jobs", []):
             interval_config = purge_job_config.get("interval")
 
@@ -688,23 +688,21 @@ class ServerConfig(Config):
         # not included in the sample configuration file on purpose as it's a temporary
         # hack, so that some users can trial the new defaults without impacting every
         # user on the homeserver.
-        users_new_default_push_rules = (
+        users_new_default_push_rules: list = (
             config.get("users_new_default_push_rules") or []
-        )  # type: list
+        )
         if not isinstance(users_new_default_push_rules, list):
             raise ConfigError("'users_new_default_push_rules' must be a list")
 
         # Turn the list into a set to improve lookup speed.
-        self.users_new_default_push_rules = set(
-            users_new_default_push_rules
-        )  # type: set
+        self.users_new_default_push_rules: set = set(users_new_default_push_rules)
 
         # Whitelist of domain names that given next_link parameters must have
-        next_link_domain_whitelist = config.get(
+        next_link_domain_whitelist: Optional[List[str]] = config.get(
             "next_link_domain_whitelist"
-        )  # type: Optional[List[str]]
+        )
 
-        self.next_link_domain_whitelist = None  # type: Optional[Set[str]]
+        self.next_link_domain_whitelist: Optional[Set[str]] = None
         if next_link_domain_whitelist is not None:
             if not isinstance(next_link_domain_whitelist, list):
                 raise ConfigError("'next_link_domain_whitelist' must be a list")
@@ -811,7 +809,7 @@ class ServerConfig(Config):
         # In most cases you should avoid using a matrix specific subdomain such as
         # matrix.example.com or synapse.example.com as the server_name for the same
         # reasons you wouldn't use user@email.example.com as your email address.
-        # See https://github.com/matrix-org/synapse/blob/master/docs/delegate.md
+        # See https://matrix-org.github.io/synapse/latest/delegate.html
         # for information on how to host Synapse on a subdomain while preserving
         # a clean server_name.
         #
@@ -988,9 +986,9 @@ class ServerConfig(Config):
         #       'all local interfaces'.
         #
         #   type: the type of listener. Normally 'http', but other valid options are:
-        #       'manhole' (see docs/manhole.md),
-        #       'metrics' (see docs/metrics-howto.md),
-        #       'replication' (see docs/workers.md).
+        #       'manhole' (see https://matrix-org.github.io/synapse/latest/manhole.html),
+        #       'metrics' (see https://matrix-org.github.io/synapse/latest/metrics-howto.html),
+        #       'replication' (see https://matrix-org.github.io/synapse/latest/workers.html).
         #
         #   tls: set to true to enable TLS for this listener. Will use the TLS
         #       key/cert specified in tls_private_key_path / tls_certificate_path.
@@ -1015,8 +1013,8 @@ class ServerConfig(Config):
         #   client: the client-server API (/_matrix/client), and the synapse admin
         #       API (/_synapse/admin). Also implies 'media' and 'static'.
         #
-        #   consent: user consent forms (/_matrix/consent). See
-        #       docs/consent_tracking.md.
+        #   consent: user consent forms (/_matrix/consent).
+        #       See https://matrix-org.github.io/synapse/latest/consent_tracking.html.
         #
         #   federation: the server-server API (/_matrix/federation). Also implies
         #       'media', 'keys', 'openid'
@@ -1025,12 +1023,13 @@ class ServerConfig(Config):
         #
         #   media: the media API (/_matrix/media).
         #
-        #   metrics: the metrics interface. See docs/metrics-howto.md.
+        #   metrics: the metrics interface.
+        #       See https://matrix-org.github.io/synapse/latest/metrics-howto.html.
         #
         #   openid: OpenID authentication.
         #
-        #   replication: the HTTP replication API (/_synapse/replication). See
-        #       docs/workers.md.
+        #   replication: the HTTP replication API (/_synapse/replication).
+        #       See https://matrix-org.github.io/synapse/latest/workers.html.
         #
         #   static: static resources under synapse/static (/_matrix/static). (Mostly
         #       useful for 'fallback authentication'.)
@@ -1050,7 +1049,7 @@ class ServerConfig(Config):
           # that unwraps TLS.
           #
           # If you plan to use a reverse proxy, please see
-          # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.md.
+          # https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
           #
           %(unsecure_http_bindings)s
 
diff --git a/synapse/config/spam_checker.py b/synapse/config/spam_checker.py
index d0311d6468..a233a9ce03 100644
--- a/synapse/config/spam_checker.py
+++ b/synapse/config/spam_checker.py
@@ -26,7 +26,7 @@ LEGACY_SPAM_CHECKER_WARNING = """
 This server is using a spam checker module that is implementing the deprecated spam
 checker interface. Please check with the module's maintainer to see if a new version
 supporting Synapse's generic modules system is available.
-For more information, please see https://matrix-org.github.io/synapse/develop/modules.html
+For more information, please see https://matrix-org.github.io/synapse/latest/modules.html
 ---------------------------------------------------------------------------------------"""
 
 
@@ -34,7 +34,7 @@ class SpamCheckerConfig(Config):
     section = "spamchecker"
 
     def read_config(self, config, **kwargs):
-        self.spam_checkers = []  # type: List[Tuple[Any, Dict]]
+        self.spam_checkers: List[Tuple[Any, Dict]] = []
 
         spam_checkers = config.get("spam_checker") or []
         if isinstance(spam_checkers, dict):
diff --git a/synapse/config/sso.py b/synapse/config/sso.py
index e4346e02aa..d0f04cf8e6 100644
--- a/synapse/config/sso.py
+++ b/synapse/config/sso.py
@@ -39,7 +39,7 @@ class SSOConfig(Config):
     section = "sso"
 
     def read_config(self, config, **kwargs):
-        sso_config = config.get("sso") or {}  # type: Dict[str, Any]
+        sso_config: Dict[str, Any] = config.get("sso") or {}
 
         # The sso-specific template_dir
         self.sso_template_dir = sso_config.get("template_dir")
diff --git a/synapse/config/stats.py b/synapse/config/stats.py
index 3d44b51201..6f253e00c0 100644
--- a/synapse/config/stats.py
+++ b/synapse/config/stats.py
@@ -38,20 +38,16 @@ class StatsConfig(Config):
 
     def read_config(self, config, **kwargs):
         self.stats_enabled = True
-        self.stats_bucket_size = 86400 * 1000
         stats_config = config.get("stats", None)
         if stats_config:
             self.stats_enabled = stats_config.get("enabled", self.stats_enabled)
-            self.stats_bucket_size = self.parse_duration(
-                stats_config.get("bucket_size", "1d")
-            )
         if not self.stats_enabled:
             logger.warning(ROOM_STATS_DISABLED_WARN)
 
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
         return """
         # Settings for local room and user statistics collection. See
-        # docs/room_and_user_statistics.md.
+        # https://matrix-org.github.io/synapse/latest/room_and_user_statistics.html.
         #
         stats:
           # Uncomment the following to disable room and user statistics. Note that doing
@@ -59,9 +55,4 @@ class StatsConfig(Config):
           # correctly.
           #
           #enabled: false
-
-          # The size of each timeslice in the room_stats_historical and
-          # user_stats_historical tables, as a time period. Defaults to "1d".
-          #
-          #bucket_size: 1h
         """
diff --git a/synapse/config/tls.py b/synapse/config/tls.py
index 9a16a8fbae..5679f05e42 100644
--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -66,10 +66,8 @@ class TlsConfig(Config):
         if self.federation_client_minimum_tls_version == "1.3":
             if getattr(SSL, "OP_NO_TLSv1_3", None) is None:
                 raise ConfigError(
-                    (
-                        "federation_client_minimum_tls_version cannot be 1.3, "
-                        "your OpenSSL does not support it"
-                    )
+                    "federation_client_minimum_tls_version cannot be 1.3, "
+                    "your OpenSSL does not support it"
                 )
 
         # Whitelist of domains to not verify certificates for
@@ -80,7 +78,7 @@ class TlsConfig(Config):
             fed_whitelist_entries = []
 
         # Support globs (*) in whitelist values
-        self.federation_certificate_verification_whitelist = []  # type: List[Pattern]
+        self.federation_certificate_verification_whitelist: List[Pattern] = []
         for entry in fed_whitelist_entries:
             try:
                 entry_regex = glob_to_regex(entry.encode("ascii").decode("ascii"))
@@ -132,8 +130,8 @@ class TlsConfig(Config):
             "use_insecure_ssl_client_just_for_testing_do_not_use"
         )
 
-        self.tls_certificate = None  # type: Optional[crypto.X509]
-        self.tls_private_key = None  # type: Optional[crypto.PKey]
+        self.tls_certificate: Optional[crypto.X509] = None
+        self.tls_private_key: Optional[crypto.PKey] = None
 
     def is_disk_cert_valid(self, allow_self_signed=True):
         """
diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py
index d0ea17261f..21b9a88353 100644
--- a/synapse/config/tracer.py
+++ b/synapse/config/tracer.py
@@ -81,7 +81,7 @@ class TracerConfig(Config):
             #enabled: true
 
             # The list of homeservers we wish to send and receive span contexts and span baggage.
-            # See docs/opentracing.rst.
+            # See https://matrix-org.github.io/synapse/latest/opentracing.html.
             #
             # This is a list of regexes which are matched against the server_name of the
             # homeserver.
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index 4cbf79eeed..b10df8a232 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -50,7 +50,7 @@ class UserDirectoryConfig(Config):
             #
             # If you set it true, you'll have to rebuild the user_directory search
             # indexes, see:
-            # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
+            # https://matrix-org.github.io/synapse/latest/user_directory.html
             #
             # Uncomment to return search results containing all known users, even if that
             # user does not share a room with the requester.
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index e5a4685ed4..9e9b1c1c86 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -170,11 +170,13 @@ class Keyring:
             )
         self._key_fetchers = key_fetchers
 
-        self._server_queue = BatchingQueue(
+        self._server_queue: BatchingQueue[
+            _FetchKeyRequest, Dict[str, Dict[str, FetchKeyResult]]
+        ] = BatchingQueue(
             "keyring_server",
             clock=hs.get_clock(),
             process_batch_callback=self._inner_fetch_key_requests,
-        )  # type: BatchingQueue[_FetchKeyRequest, Dict[str, Dict[str, FetchKeyResult]]]
+        )
 
     async def verify_json_for_server(
         self,
@@ -330,7 +332,7 @@ class Keyring:
         # First we need to deduplicate requests for the same key. We do this by
         # taking the *maximum* requested `minimum_valid_until_ts` for each pair
         # of server name/key ID.
-        server_to_key_to_ts = {}  # type: Dict[str, Dict[str, int]]
+        server_to_key_to_ts: Dict[str, Dict[str, int]] = {}
         for request in requests:
             by_server = server_to_key_to_ts.setdefault(request.server_name, {})
             for key_id in request.key_ids:
@@ -355,7 +357,7 @@ class Keyring:
 
         # We now convert the returned list of results into a map from server
         # name to key ID to FetchKeyResult, to return.
-        to_return = {}  # type: Dict[str, Dict[str, FetchKeyResult]]
+        to_return: Dict[str, Dict[str, FetchKeyResult]] = {}
         for (request, results) in zip(deduped_requests, results_per_request):
             to_return_by_server = to_return.setdefault(request.server_name, {})
             for key_id, key_result in results.items():
@@ -455,7 +457,7 @@ class StoreKeyFetcher(KeyFetcher):
         )
 
         res = await self.store.get_server_verify_keys(key_ids_to_fetch)
-        keys = {}  # type: Dict[str, Dict[str, FetchKeyResult]]
+        keys: Dict[str, Dict[str, FetchKeyResult]] = {}
         for (server_name, key_id), key in res.items():
             keys.setdefault(server_name, {})[key_id] = key
         return keys
@@ -603,7 +605,7 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
             ).addErrback(unwrapFirstError)
         )
 
-        union_of_keys = {}  # type: Dict[str, Dict[str, FetchKeyResult]]
+        union_of_keys: Dict[str, Dict[str, FetchKeyResult]] = {}
         for result in results:
             for server_name, keys in result.items():
                 union_of_keys.setdefault(server_name, {}).update(keys)
@@ -656,8 +658,8 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
         except HttpResponseException as e:
             raise KeyLookupError("Remote server returned an error: %s" % (e,))
 
-        keys = {}  # type: Dict[str, Dict[str, FetchKeyResult]]
-        added_keys = []  # type: List[Tuple[str, str, FetchKeyResult]]
+        keys: Dict[str, Dict[str, FetchKeyResult]] = {}
+        added_keys: List[Tuple[str, str, FetchKeyResult]] = []
 
         time_now_ms = self.clock.time_msec()
 
@@ -805,7 +807,7 @@ class ServerKeyFetcher(BaseV2KeyFetcher):
         Raises:
             KeyLookupError if there was a problem making the lookup
         """
-        keys = {}  # type: Dict[str, FetchKeyResult]
+        keys: Dict[str, FetchKeyResult] = {}
 
         for requested_key_id in key_ids:
             # we may have found this key as a side-effect of asking for another.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 33d7c60241..137dff2513 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 import logging
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from canonicaljson import encode_canonical_json
 from signedjson.key import decode_verify_key_bytes
@@ -29,6 +29,7 @@ from synapse.api.room_versions import (
     RoomVersion,
 )
 from synapse.events import EventBase
+from synapse.events.builder import EventBuilder
 from synapse.types import StateMap, UserID, get_domain_from_id
 
 logger = logging.getLogger(__name__)
@@ -47,6 +48,9 @@ def check(
         room_version_obj: the version of the room
         event: the event being checked.
         auth_events: the existing room state.
+        do_sig_check: True if it should be verified that the sending server
+            signed the event.
+        do_size_check: True if the size of the event fields should be verified.
 
     Raises:
         AuthError if the checks fail
@@ -527,7 +531,7 @@ def _check_power_levels(
     user_level = get_user_power_level(event.user_id, auth_events)
 
     # Check other levels:
-    levels_to_check = [
+    levels_to_check: List[Tuple[str, Optional[str]]] = [
         ("users_default", None),
         ("events_default", None),
         ("state_default", None),
@@ -535,7 +539,7 @@ def _check_power_levels(
         ("redact", None),
         ("kick", None),
         ("invite", None),
-    ]  # type: List[Tuple[str, Optional[str]]]
+    ]
 
     old_list = current_state.content.get("users", {})
     for user in set(list(old_list) + list(user_list)):
@@ -565,12 +569,12 @@ def _check_power_levels(
             new_loc = new_loc.get(dir, {})
 
         if level_to_check in old_loc:
-            old_level = int(old_loc[level_to_check])  # type: Optional[int]
+            old_level: Optional[int] = int(old_loc[level_to_check])
         else:
             old_level = None
 
         if level_to_check in new_loc:
-            new_level = int(new_loc[level_to_check])  # type: Optional[int]
+            new_level: Optional[int] = int(new_loc[level_to_check])
         else:
             new_level = None
 
@@ -724,7 +728,7 @@ def get_public_keys(invite_event: EventBase) -> List[Dict[str, Any]]:
     return public_keys
 
 
-def auth_types_for_event(event: EventBase) -> Set[Tuple[str, str]]:
+def auth_types_for_event(event: Union[EventBase, EventBuilder]) -> Set[Tuple[str, str]]:
     """Given an event, return a list of (EventType, StateKey) that may be
     needed to auth the event. The returned list may be a superset of what
     would actually be required depending on the full state of the room.
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 0cb9c1cc1e..0298af4c02 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -105,28 +105,28 @@ class _EventInternalMetadata:
         self._dict = dict(internal_metadata_dict)
 
         # the stream ordering of this event. None, until it has been persisted.
-        self.stream_ordering = None  # type: Optional[int]
+        self.stream_ordering: Optional[int] = None
 
         # whether this event is an outlier (ie, whether we have the state at that point
         # in the DAG)
         self.outlier = False
 
-    out_of_band_membership = DictProperty("out_of_band_membership")  # type: bool
-    send_on_behalf_of = DictProperty("send_on_behalf_of")  # type: str
-    recheck_redaction = DictProperty("recheck_redaction")  # type: bool
-    soft_failed = DictProperty("soft_failed")  # type: bool
-    proactively_send = DictProperty("proactively_send")  # type: bool
-    redacted = DictProperty("redacted")  # type: bool
-    txn_id = DictProperty("txn_id")  # type: str
-    token_id = DictProperty("token_id")  # type: str
-    historical = DictProperty("historical")  # type: bool
+    out_of_band_membership: bool = DictProperty("out_of_band_membership")
+    send_on_behalf_of: str = DictProperty("send_on_behalf_of")
+    recheck_redaction: bool = DictProperty("recheck_redaction")
+    soft_failed: bool = DictProperty("soft_failed")
+    proactively_send: bool = DictProperty("proactively_send")
+    redacted: bool = DictProperty("redacted")
+    txn_id: str = DictProperty("txn_id")
+    token_id: int = DictProperty("token_id")
+    historical: bool = DictProperty("historical")
 
     # XXX: These are set by StreamWorkerStore._set_before_and_after.
     # I'm pretty sure that these are never persisted to the database, so shouldn't
     # be here
-    before = DictProperty("before")  # type: RoomStreamToken
-    after = DictProperty("after")  # type: RoomStreamToken
-    order = DictProperty("order")  # type: Tuple[int, int]
+    before: RoomStreamToken = DictProperty("before")
+    after: RoomStreamToken = DictProperty("after")
+    order: Tuple[int, int] = DictProperty("order")
 
     def get_dict(self) -> JsonDict:
         return dict(self._dict)
@@ -291,6 +291,20 @@ class EventBase(metaclass=abc.ABCMeta):
 
         return pdu_json
 
+    def get_templated_pdu_json(self) -> JsonDict:
+        """
+        Return a JSON object suitable for a templated event, as used in the
+        make_{join,leave,knock} workflow.
+        """
+        # By using _dict directly we don't pull in signatures/unsigned.
+        template_json = dict(self._dict)
+        # The hashes (similar to the signature) need to be recalculated by the
+        # joining/leaving/knocking server after (potentially) modifying the
+        # event.
+        template_json.pop("hashes")
+
+        return template_json
+
     def __set__(self, instance, value):
         raise AttributeError("Unrecognized attribute %s" % (instance,))
 
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 81bf8615b7..87e2bb123b 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
 import attr
 from nacl.signing import SigningKey
 
-from synapse.api.auth import Auth
 from synapse.api.constants import MAX_DEPTH
 from synapse.api.errors import UnsupportedRoomVersionError
 from synapse.api.room_versions import (
@@ -34,10 +33,14 @@ from synapse.types import EventID, JsonDict
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
+if TYPE_CHECKING:
+    from synapse.handlers.event_auth import EventAuthHandler
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
 
 
-@attr.s(slots=True, cmp=False, frozen=True)
+@attr.s(slots=True, cmp=False, frozen=True, auto_attribs=True)
 class EventBuilder:
     """A format independent event builder used to build up the event content
     before signing the event.
@@ -62,31 +65,30 @@ class EventBuilder:
         _signing_key: The signing key to use to sign the event as the server
     """
 
-    _state = attr.ib(type=StateHandler)
-    _auth = attr.ib(type=Auth)
-    _store = attr.ib(type=DataStore)
-    _clock = attr.ib(type=Clock)
-    _hostname = attr.ib(type=str)
-    _signing_key = attr.ib(type=SigningKey)
+    _state: StateHandler
+    _event_auth_handler: "EventAuthHandler"
+    _store: DataStore
+    _clock: Clock
+    _hostname: str
+    _signing_key: SigningKey
 
-    room_version = attr.ib(type=RoomVersion)
+    room_version: RoomVersion
 
-    room_id = attr.ib(type=str)
-    type = attr.ib(type=str)
-    sender = attr.ib(type=str)
+    room_id: str
+    type: str
+    sender: str
 
-    content = attr.ib(default=attr.Factory(dict), type=JsonDict)
-    unsigned = attr.ib(default=attr.Factory(dict), type=JsonDict)
+    content: JsonDict = attr.Factory(dict)
+    unsigned: JsonDict = attr.Factory(dict)
 
     # These only exist on a subset of events, so they raise AttributeError if
     # someone tries to get them when they don't exist.
-    _state_key = attr.ib(default=None, type=Optional[str])
-    _redacts = attr.ib(default=None, type=Optional[str])
-    _origin_server_ts = attr.ib(default=None, type=Optional[int])
+    _state_key: Optional[str] = None
+    _redacts: Optional[str] = None
+    _origin_server_ts: Optional[int] = None
 
-    internal_metadata = attr.ib(
-        default=attr.Factory(lambda: _EventInternalMetadata({})),
-        type=_EventInternalMetadata,
+    internal_metadata: _EventInternalMetadata = attr.Factory(
+        lambda: _EventInternalMetadata({})
     )
 
     @property
@@ -123,17 +125,19 @@ class EventBuilder:
             state_ids = await self._state.get_current_state_ids(
                 self.room_id, prev_event_ids
             )
-            auth_event_ids = self._auth.compute_auth_events(self, state_ids)
+            auth_event_ids = self._event_auth_handler.compute_auth_events(
+                self, state_ids
+            )
 
         format_version = self.room_version.event_format
         if format_version == EventFormatVersions.V1:
             # The types of auth/prev events changes between event versions.
-            auth_events = await self._store.add_event_hashes(
-                auth_event_ids
-            )  # type: Union[List[str], List[Tuple[str, Dict[str, str]]]]
-            prev_events = await self._store.add_event_hashes(
-                prev_event_ids
-            )  # type: Union[List[str], List[Tuple[str, Dict[str, str]]]]
+            auth_events: Union[
+                List[str], List[Tuple[str, Dict[str, str]]]
+            ] = await self._store.add_event_hashes(auth_event_ids)
+            prev_events: Union[
+                List[str], List[Tuple[str, Dict[str, str]]]
+            ] = await self._store.add_event_hashes(prev_event_ids)
         else:
             auth_events = auth_event_ids
             prev_events = prev_event_ids
@@ -152,7 +156,7 @@ class EventBuilder:
         # the db)
         depth = min(depth, MAX_DEPTH)
 
-        event_dict = {
+        event_dict: Dict[str, Any] = {
             "auth_events": auth_events,
             "prev_events": prev_events,
             "type": self.type,
@@ -162,7 +166,7 @@ class EventBuilder:
             "unsigned": self.unsigned,
             "depth": depth,
             "prev_state": [],
-        }  # type: Dict[str, Any]
+        }
 
         if self.is_state():
             event_dict["state_key"] = self._state_key
@@ -184,24 +188,23 @@ class EventBuilder:
 
 
 class EventBuilderFactory:
-    def __init__(self, hs):
+    def __init__(self, hs: "HomeServer"):
         self.clock = hs.get_clock()
         self.hostname = hs.hostname
         self.signing_key = hs.signing_key
 
         self.store = hs.get_datastore()
         self.state = hs.get_state_handler()
-        self.auth = hs.get_auth()
+        self._event_auth_handler = hs.get_event_auth_handler()
 
-    def new(self, room_version, key_values):
+    def new(self, room_version: str, key_values: dict) -> EventBuilder:
         """Generate an event builder appropriate for the given room version
 
         Deprecated: use for_room_version with a RoomVersion object instead
 
         Args:
-            room_version (str): Version of the room that we're creating an event builder
-                for
-            key_values (dict): Fields used as the basis of the new event
+            room_version: Version of the room that we're creating an event builder for
+            key_values: Fields used as the basis of the new event
 
         Returns:
             EventBuilder
@@ -212,13 +215,15 @@ class EventBuilderFactory:
             raise UnsupportedRoomVersionError()
         return self.for_room_version(v, key_values)
 
-    def for_room_version(self, room_version, key_values):
+    def for_room_version(
+        self, room_version: RoomVersion, key_values: dict
+    ) -> EventBuilder:
         """Generate an event builder appropriate for the given room version
 
         Args:
-            room_version (synapse.api.room_versions.RoomVersion):
+            room_version:
                 Version of the room that we're creating an event builder for
-            key_values (dict): Fields used as the basis of the new event
+            key_values: Fields used as the basis of the new event
 
         Returns:
             EventBuilder
@@ -226,7 +231,7 @@ class EventBuilderFactory:
         return EventBuilder(
             store=self.store,
             state=self.state,
-            auth=self.auth,
+            event_auth_handler=self._event_auth_handler,
             clock=self.clock,
             hostname=self.hostname,
             signing_key=self.signing_key,
@@ -286,15 +291,15 @@ def create_local_event_from_event_dict(
 _event_id_counter = 0
 
 
-def _create_event_id(clock, hostname):
+def _create_event_id(clock: Clock, hostname: str) -> str:
     """Create a new event ID
 
     Args:
-        clock (Clock)
-        hostname (str): The server name for the event ID
+        clock
+        hostname: The server name for the event ID
 
     Returns:
-        str
+        The new event ID
     """
 
     global _event_id_counter
diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py
index efec16c226..57f1d53fa8 100644
--- a/synapse/events/spamcheck.py
+++ b/synapse/events/spamcheck.py
@@ -76,7 +76,7 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"):
     """Wrapper that loads spam checkers configured using the old configuration, and
     registers the spam checker hooks they implement.
     """
-    spam_checkers = []  # type: List[Any]
+    spam_checkers: List[Any] = []
     api = hs.get_module_api()
     for module, config in hs.config.spam_checkers:
         # Older spam checkers don't accept the `api` argument, so we
@@ -239,7 +239,7 @@ class SpamChecker:
             will be used as the error message returned to the user.
         """
         for callback in self._check_event_for_spam_callbacks:
-            res = await callback(event)  # type: Union[bool, str]
+            res: Union[bool, str] = await callback(event)
             if res:
                 return res
 
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index ed09c6af1f..c767d30627 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -86,7 +86,7 @@ class FederationClient(FederationBase):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
-        self.pdu_destination_tried = {}  # type: Dict[str, Dict[str, int]]
+        self.pdu_destination_tried: Dict[str, Dict[str, int]] = {}
         self._clock.looping_call(self._clear_tried_cache, 60 * 1000)
         self.state = hs.get_state_handler()
         self.transport_layer = hs.get_federation_transport_client()
@@ -94,13 +94,13 @@ class FederationClient(FederationBase):
         self.hostname = hs.hostname
         self.signing_key = hs.signing_key
 
-        self._get_pdu_cache = ExpiringCache(
+        self._get_pdu_cache: ExpiringCache[str, EventBase] = ExpiringCache(
             cache_name="get_pdu_cache",
             clock=self._clock,
             max_len=1000,
             expiry_ms=120 * 1000,
             reset_expiry_on_get=False,
-        )  # type: ExpiringCache[str, EventBase]
+        )
 
     def _clear_tried_cache(self):
         """Clear pdu_destination_tried cache"""
@@ -293,10 +293,10 @@ class FederationClient(FederationBase):
                     transaction_data,
                 )
 
-                pdu_list = [
+                pdu_list: List[EventBase] = [
                     event_from_pdu_json(p, room_version, outlier=outlier)
                     for p in transaction_data["pdus"]
-                ]  # type: List[EventBase]
+                ]
 
                 if pdu_list and pdu_list[0]:
                     pdu = pdu_list[0]
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 742d29291e..29619aeeb8 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -108,9 +108,9 @@ class FederationServer(FederationBase):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
-        self.auth = hs.get_auth()
         self.handler = hs.get_federation_handler()
         self.state = hs.get_state_handler()
+        self._event_auth_handler = hs.get_event_auth_handler()
 
         self.device_handler = hs.get_device_handler()
 
@@ -122,12 +122,12 @@ class FederationServer(FederationBase):
 
         # origins that we are currently processing a transaction from.
         # a dict from origin to txn id.
-        self._active_transactions = {}  # type: Dict[str, str]
+        self._active_transactions: Dict[str, str] = {}
 
         # We cache results for transaction with the same ID
-        self._transaction_resp_cache = ResponseCache(
+        self._transaction_resp_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
             hs.get_clock(), "fed_txn_handler", timeout_ms=30000
-        )  # type: ResponseCache[Tuple[str, str]]
+        )
 
         self.transaction_actions = TransactionActions(self.store)
 
@@ -135,12 +135,12 @@ class FederationServer(FederationBase):
 
         # We cache responses to state queries, as they take a while and often
         # come in waves.
-        self._state_resp_cache = ResponseCache(
-            hs.get_clock(), "state_resp", timeout_ms=30000
-        )  # type: ResponseCache[Tuple[str, Optional[str]]]
-        self._state_ids_resp_cache = ResponseCache(
+        self._state_resp_cache: ResponseCache[
+            Tuple[str, Optional[str]]
+        ] = ResponseCache(hs.get_clock(), "state_resp", timeout_ms=30000)
+        self._state_ids_resp_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
             hs.get_clock(), "state_ids_resp", timeout_ms=30000
-        )  # type: ResponseCache[Tuple[str, str]]
+        )
 
         self._federation_metrics_domains = (
             hs.config.federation.federation_metrics_domains
@@ -148,6 +148,41 @@ class FederationServer(FederationBase):
 
         self._room_prejoin_state_types = hs.config.api.room_prejoin_state
 
+        # Whether we have started handling old events in the staging area.
+        self._started_handling_of_staged_events = False
+
+    @wrap_as_background_process("_handle_old_staged_events")
+    async def _handle_old_staged_events(self) -> None:
+        """Handle old staged events by fetching all rooms that have staged
+        events and start the processing of each of those rooms.
+        """
+
+        # Get all the rooms IDs with staged events.
+        room_ids = await self.store.get_all_rooms_with_staged_incoming_events()
+
+        # We then shuffle them so that if there are multiple instances doing
+        # this work they're less likely to collide.
+        random.shuffle(room_ids)
+
+        for room_id in room_ids:
+            room_version = await self.store.get_room_version(room_id)
+
+            # Try and acquire the processing lock for the room, if we get it start a
+            # background process for handling the events in the room.
+            lock = await self.store.try_acquire_lock(
+                _INBOUND_EVENT_HANDLING_LOCK_NAME, room_id
+            )
+            if lock:
+                logger.info("Handling old staged inbound events in %s", room_id)
+                self._process_incoming_pdus_in_room_inner(
+                    room_id,
+                    room_version,
+                    lock,
+                )
+
+            # We pause a bit so that we don't start handling all rooms at once.
+            await self._clock.sleep(random.uniform(0, 0.1))
+
     async def on_backfill_request(
         self, origin: str, room_id: str, versions: List[str], limit: int
     ) -> Tuple[int, Dict[str, Any]]:
@@ -166,6 +201,12 @@ class FederationServer(FederationBase):
     async def on_incoming_transaction(
         self, origin: str, transaction_data: JsonDict
     ) -> Tuple[int, Dict[str, Any]]:
+        # If we receive a transaction we should make sure that kick off handling
+        # any old events in the staging area.
+        if not self._started_handling_of_staged_events:
+            self._started_handling_of_staged_events = True
+            self._handle_old_staged_events()
+
         # keep this as early as possible to make the calculated origin ts as
         # accurate as possible.
         request_time = self._clock.time_msec()
@@ -296,7 +337,7 @@ class FederationServer(FederationBase):
 
         origin_host, _ = parse_server_name(origin)
 
-        pdus_by_room = {}  # type: Dict[str, List[EventBase]]
+        pdus_by_room: Dict[str, List[EventBase]] = {}
 
         newest_pdu_ts = 0
 
@@ -369,22 +410,21 @@ class FederationServer(FederationBase):
 
         async def process_pdu(pdu: EventBase) -> JsonDict:
             event_id = pdu.event_id
-            with pdu_process_time.time():
-                with nested_logging_context(event_id):
-                    try:
-                        await self._handle_received_pdu(origin, pdu)
-                        return {}
-                    except FederationError as e:
-                        logger.warning("Error handling PDU %s: %s", event_id, e)
-                        return {"error": str(e)}
-                    except Exception as e:
-                        f = failure.Failure()
-                        logger.error(
-                            "Failed to handle PDU %s",
-                            event_id,
-                            exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
-                        )
-                        return {"error": str(e)}
+            with nested_logging_context(event_id):
+                try:
+                    await self._handle_received_pdu(origin, pdu)
+                    return {}
+                except FederationError as e:
+                    logger.warning("Error handling PDU %s: %s", event_id, e)
+                    return {"error": str(e)}
+                except Exception as e:
+                    f = failure.Failure()
+                    logger.error(
+                        "Failed to handle PDU %s",
+                        event_id,
+                        exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
+                    )
+                    return {"error": str(e)}
 
         await concurrently_execute(
             process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT
@@ -421,7 +461,7 @@ class FederationServer(FederationBase):
         origin_host, _ = parse_server_name(origin)
         await self.check_server_matches_acl(origin_host, room_id)
 
-        in_room = await self.auth.check_host_in_room(room_id, origin)
+        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
         if not in_room:
             raise AuthError(403, "Host not in room.")
 
@@ -454,7 +494,7 @@ class FederationServer(FederationBase):
         origin_host, _ = parse_server_name(origin)
         await self.check_server_matches_acl(origin_host, room_id)
 
-        in_room = await self.auth.check_host_in_room(room_id, origin)
+        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
         if not in_room:
             raise AuthError(403, "Host not in room.")
 
@@ -476,9 +516,9 @@ class FederationServer(FederationBase):
         self, room_id: str, event_id: Optional[str]
     ) -> Dict[str, list]:
         if event_id:
-            pdus = await self.handler.get_state_for_pdu(
+            pdus: Iterable[EventBase] = await self.handler.get_state_for_pdu(
                 room_id, event_id
-            )  # type: Iterable[EventBase]
+            )
         else:
             pdus = (await self.state.get_current_state(room_id)).values()
 
@@ -522,8 +562,7 @@ class FederationServer(FederationBase):
             raise IncompatibleRoomVersionError(room_version=room_version)
 
         pdu = await self.handler.on_make_join_request(origin, room_id, user_id)
-        time_now = self._clock.time_msec()
-        return {"event": pdu.get_pdu_json(time_now), "room_version": room_version}
+        return {"event": pdu.get_templated_pdu_json(), "room_version": room_version}
 
     async def on_invite_request(
         self, origin: str, content: JsonDict, room_version_id: str
@@ -571,8 +610,7 @@ class FederationServer(FederationBase):
 
         room_version = await self.store.get_room_version_id(room_id)
 
-        time_now = self._clock.time_msec()
-        return {"event": pdu.get_pdu_json(time_now), "room_version": room_version}
+        return {"event": pdu.get_templated_pdu_json(), "room_version": room_version}
 
     async def on_send_leave_request(
         self, origin: str, content: JsonDict, room_id: str
@@ -619,9 +657,8 @@ class FederationServer(FederationBase):
             )
 
         pdu = await self.handler.on_make_knock_request(origin, room_id, user_id)
-        time_now = self._clock.time_msec()
         return {
-            "event": pdu.get_pdu_json(time_now),
+            "event": pdu.get_templated_pdu_json(),
             "room_version": room_version.identifier,
         }
 
@@ -751,7 +788,7 @@ class FederationServer(FederationBase):
         log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
         results = await self.store.claim_e2e_one_time_keys(query)
 
-        json_result = {}  # type: Dict[str, Dict[str, dict]]
+        json_result: Dict[str, Dict[str, dict]] = {}
         for user_id, device_keys in results.items():
             for device_id, keys in device_keys.items():
                 for key_id, json_str in keys.items():
@@ -883,32 +920,39 @@ class FederationServer(FederationBase):
         room_id: str,
         room_version: RoomVersion,
         lock: Lock,
-        latest_origin: str,
-        latest_event: EventBase,
+        latest_origin: Optional[str] = None,
+        latest_event: Optional[EventBase] = None,
     ) -> None:
         """Process events in the staging area for the given room.
 
         The latest_origin and latest_event args are the latest origin and event
-        received.
+        received (or None to simply pull the next event from the database).
         """
 
         # The common path is for the event we just received be the only event in
         # the room, so instead of pulling the event out of the DB and parsing
         # the event we just pull out the next event ID and check if that matches.
-        next_origin, next_event_id = await self.store.get_next_staged_event_id_for_room(
-            room_id
-        )
-        if next_origin == latest_origin and next_event_id == latest_event.event_id:
-            origin = latest_origin
-            event = latest_event
-        else:
+        if latest_event is not None and latest_origin is not None:
+            (
+                next_origin,
+                next_event_id,
+            ) = await self.store.get_next_staged_event_id_for_room(room_id)
+            if next_origin != latest_origin or next_event_id != latest_event.event_id:
+                latest_origin = None
+                latest_event = None
+
+        if latest_origin is None or latest_event is None:
             next = await self.store.get_next_staged_event_for_room(
                 room_id, room_version
             )
             if not next:
+                await lock.release()
                 return
 
             origin, event = next
+        else:
+            origin = latest_origin
+            event = latest_event
 
         # We loop round until there are no more events in the room in the
         # staging area, or we fail to get the lock (which means another process
@@ -932,9 +976,13 @@ class FederationServer(FederationBase):
                         exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
                     )
 
-                await self.store.remove_received_event_from_staging(
+                received_ts = await self.store.remove_received_event_from_staging(
                     origin, event.event_id
                 )
+                if received_ts is not None:
+                    pdu_process_time.observe(
+                        (self._clock.time_msec() - received_ts) / 1000
+                    )
 
             # We need to do this check outside the lock to avoid a race between
             # a new event being inserted by another instance and it attempting
@@ -1068,17 +1116,13 @@ class FederationHandlerRegistry:
         self._get_query_client = ReplicationGetQueryRestServlet.make_client(hs)
         self._send_edu = ReplicationFederationSendEduRestServlet.make_client(hs)
 
-        self.edu_handlers = (
-            {}
-        )  # type: Dict[str, Callable[[str, dict], Awaitable[None]]]
-        self.query_handlers = (
-            {}
-        )  # type: Dict[str, Callable[[dict], Awaitable[JsonDict]]]
+        self.edu_handlers: Dict[str, Callable[[str, dict], Awaitable[None]]] = {}
+        self.query_handlers: Dict[str, Callable[[dict], Awaitable[JsonDict]]] = {}
 
         # Map from type to instance names that we should route EDU handling to.
         # We randomly choose one instance from the list to route to for each new
         # EDU received.
-        self._edu_type_to_instance = {}  # type: Dict[str, List[str]]
+        self._edu_type_to_instance: Dict[str, List[str]] = {}
 
     def register_edu_handler(
         self, edu_type: str, handler: Callable[[str, JsonDict], Awaitable[None]]
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 65d76ea974..1fbf325fdc 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -71,34 +71,32 @@ class FederationRemoteSendQueue(AbstractFederationSender):
         # We may have multiple federation sender instances, so we need to track
         # their positions separately.
         self._sender_instances = hs.config.worker.federation_shard_config.instances
-        self._sender_positions = {}  # type: Dict[str, int]
+        self._sender_positions: Dict[str, int] = {}
 
         # Pending presence map user_id -> UserPresenceState
-        self.presence_map = {}  # type: Dict[str, UserPresenceState]
+        self.presence_map: Dict[str, UserPresenceState] = {}
 
         # Stores the destinations we need to explicitly send presence to about a
         # given user.
         # Stream position -> (user_id, destinations)
-        self.presence_destinations = (
-            SortedDict()
-        )  # type: SortedDict[int, Tuple[str, Iterable[str]]]
+        self.presence_destinations: SortedDict[
+            int, Tuple[str, Iterable[str]]
+        ] = SortedDict()
 
         # (destination, key) -> EDU
-        self.keyed_edu = {}  # type: Dict[Tuple[str, tuple], Edu]
+        self.keyed_edu: Dict[Tuple[str, tuple], Edu] = {}
 
         # stream position -> (destination, key)
-        self.keyed_edu_changed = (
-            SortedDict()
-        )  # type: SortedDict[int, Tuple[str, tuple]]
+        self.keyed_edu_changed: SortedDict[int, Tuple[str, tuple]] = SortedDict()
 
-        self.edus = SortedDict()  # type: SortedDict[int, Edu]
+        self.edus: SortedDict[int, Edu] = SortedDict()
 
         # stream ID for the next entry into keyed_edu_changed/edus.
         self.pos = 1
 
         # map from stream ID to the time that stream entry was generated, so that we
         # can clear out entries after a while
-        self.pos_time = SortedDict()  # type: SortedDict[int, int]
+        self.pos_time: SortedDict[int, int] = SortedDict()
 
         # EVERYTHING IS SAD. In particular, python only makes new scopes when
         # we make a new function, so we need to make a new function so the inner
@@ -291,7 +289,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
 
         # list of tuple(int, BaseFederationRow), where the first is the position
         # of the federation stream.
-        rows = []  # type: List[Tuple[int, BaseFederationRow]]
+        rows: List[Tuple[int, BaseFederationRow]] = []
 
         # Fetch presence to send to destinations
         i = self.presence_destinations.bisect_right(from_token)
@@ -445,11 +443,11 @@ class EduRow(BaseFederationRow, namedtuple("EduRow", ("edu",))):  # Edu
         buff.edus.setdefault(self.edu.destination, []).append(self.edu)
 
 
-_rowtypes = (
+_rowtypes: Tuple[Type[BaseFederationRow], ...] = (
     PresenceDestinationsRow,
     KeyedEduRow,
     EduRow,
-)  # type: Tuple[Type[BaseFederationRow], ...]
+)
 
 TypeToRow = {Row.TypeId: Row for Row in _rowtypes}
 
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index deb40f4610..d980e0d986 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -14,9 +14,12 @@
 
 import abc
 import logging
+from collections import OrderedDict
 from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Set, Tuple
 
+import attr
 from prometheus_client import Counter
+from typing_extensions import Literal
 
 from twisted.internet import defer
 
@@ -33,8 +36,12 @@ from synapse.metrics import (
     event_processing_loop_room_count,
     events_processed_counter,
 )
-from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.metrics.background_process_metrics import (
+    run_as_background_process,
+    wrap_as_background_process,
+)
 from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
+from synapse.util import Clock
 from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
@@ -137,6 +144,84 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
 
+@attr.s
+class _PresenceQueue:
+    """A queue of destinations that need to be woken up due to new presence
+    updates.
+
+    Staggers waking up of per destination queues to ensure that we don't attempt
+    to start TLS connections with many hosts all at once, leading to pinned CPU.
+    """
+
+    # The maximum duration in seconds between queuing up a destination and it
+    # being woken up.
+    _MAX_TIME_IN_QUEUE = 30.0
+
+    # The maximum duration in seconds between waking up consecutive destination
+    # queues.
+    _MAX_DELAY = 0.1
+
+    sender: "FederationSender" = attr.ib()
+    clock: Clock = attr.ib()
+    queue: "OrderedDict[str, Literal[None]]" = attr.ib(factory=OrderedDict)
+    processing: bool = attr.ib(default=False)
+
+    def add_to_queue(self, destination: str) -> None:
+        """Add a destination to the queue to be woken up."""
+
+        self.queue[destination] = None
+
+        if not self.processing:
+            self._handle()
+
+    @wrap_as_background_process("_PresenceQueue.handle")
+    async def _handle(self) -> None:
+        """Background process to drain the queue."""
+
+        if not self.queue:
+            return
+
+        assert not self.processing
+        self.processing = True
+
+        try:
+            # We start with a delay that should drain the queue quickly enough that
+            # we process all destinations in the queue in _MAX_TIME_IN_QUEUE
+            # seconds.
+            #
+            # We also add an upper bound to the delay, to gracefully handle the
+            # case where the queue only has a few entries in it.
+            current_sleep_seconds = min(
+                self._MAX_DELAY, self._MAX_TIME_IN_QUEUE / len(self.queue)
+            )
+
+            while self.queue:
+                destination, _ = self.queue.popitem(last=False)
+
+                queue = self.sender._get_per_destination_queue(destination)
+
+                if not queue._new_data_to_send:
+                    # The per destination queue has already been woken up.
+                    continue
+
+                queue.attempt_new_transaction()
+
+                await self.clock.sleep(current_sleep_seconds)
+
+                if not self.queue:
+                    break
+
+                # More destinations may have been added to the queue, so we may
+                # need to reduce the delay to ensure everything gets processed
+                # within _MAX_TIME_IN_QUEUE seconds.
+                current_sleep_seconds = min(
+                    current_sleep_seconds, self._MAX_TIME_IN_QUEUE / len(self.queue)
+                )
+
+        finally:
+            self.processing = False
+
+
 class FederationSender(AbstractFederationSender):
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
@@ -148,14 +233,14 @@ class FederationSender(AbstractFederationSender):
         self.clock = hs.get_clock()
         self.is_mine_id = hs.is_mine_id
 
-        self._presence_router = None  # type: Optional[PresenceRouter]
+        self._presence_router: Optional["PresenceRouter"] = None
         self._transaction_manager = TransactionManager(hs)
 
         self._instance_name = hs.get_instance_name()
         self._federation_shard_config = hs.config.worker.federation_shard_config
 
         # map from destination to PerDestinationQueue
-        self._per_destination_queues = {}  # type: Dict[str, PerDestinationQueue]
+        self._per_destination_queues: Dict[str, PerDestinationQueue] = {}
 
         LaterGauge(
             "synapse_federation_transaction_queue_pending_destinations",
@@ -192,9 +277,7 @@ class FederationSender(AbstractFederationSender):
         # awaiting a call to flush_read_receipts_for_room. The presence of an entry
         # here for a given room means that we are rate-limiting RR flushes to that room,
         # and that there is a pending call to _flush_rrs_for_room in the system.
-        self._queues_awaiting_rr_flush_by_room = (
-            {}
-        )  # type: Dict[str, Set[PerDestinationQueue]]
+        self._queues_awaiting_rr_flush_by_room: Dict[str, Set[PerDestinationQueue]] = {}
 
         self._rr_txn_interval_per_room_ms = (
             1000.0 / hs.config.federation_rr_transactions_per_room_per_second
@@ -210,6 +293,8 @@ class FederationSender(AbstractFederationSender):
 
         self._external_cache = hs.get_external_cache()
 
+        self._presence_queue = _PresenceQueue(self, self.clock)
+
     def _get_per_destination_queue(self, destination: str) -> PerDestinationQueue:
         """Get or create a PerDestinationQueue for the given destination
 
@@ -265,7 +350,7 @@ class FederationSender(AbstractFederationSender):
                     if not event.internal_metadata.should_proactively_send():
                         return
 
-                    destinations = None  # type: Optional[Set[str]]
+                    destinations: Optional[Set[str]] = None
                     if not event.prev_event_ids():
                         # If there are no prev event IDs then the state is empty
                         # and so no remote servers in the room
@@ -331,7 +416,7 @@ class FederationSender(AbstractFederationSender):
                         for event in events:
                             await handle_event(event)
 
-                events_by_room = {}  # type: Dict[str, List[EventBase]]
+                events_by_room: Dict[str, List[EventBase]] = {}
                 for event in events:
                     events_by_room.setdefault(event.room_id, []).append(event)
 
@@ -519,7 +604,12 @@ class FederationSender(AbstractFederationSender):
                 self._instance_name, destination
             ):
                 continue
-            self._get_per_destination_queue(destination).send_presence(states)
+
+            self._get_per_destination_queue(destination).send_presence(
+                states, start_loop=False
+            )
+
+            self._presence_queue.add_to_queue(destination)
 
     def build_and_send_edu(
         self,
@@ -628,7 +718,7 @@ class FederationSender(AbstractFederationSender):
         In order to reduce load spikes, adds a delay between each destination.
         """
 
-        last_processed = None  # type: Optional[str]
+        last_processed: Optional[str] = None
 
         while True:
             destinations_to_wake = (
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3a2efd56ee..c11d1f6d31 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -105,34 +105,34 @@ class PerDestinationQueue:
         # catch-up at startup.
         # New events will only be sent once this is finished, at which point
         # _catching_up is flipped to False.
-        self._catching_up = True  # type: bool
+        self._catching_up: bool = True
 
         # The stream_ordering of the most recent PDU that was discarded due to
         # being in catch-up mode.
-        self._catchup_last_skipped = 0  # type: int
+        self._catchup_last_skipped: int = 0
 
         # Cache of the last successfully-transmitted stream ordering for this
         # destination (we are the only updater so this is safe)
-        self._last_successful_stream_ordering = None  # type: Optional[int]
+        self._last_successful_stream_ordering: Optional[int] = None
 
         # a queue of pending PDUs
-        self._pending_pdus = []  # type: List[EventBase]
+        self._pending_pdus: List[EventBase] = []
 
         # XXX this is never actually used: see
         # https://github.com/matrix-org/synapse/issues/7549
-        self._pending_edus = []  # type: List[Edu]
+        self._pending_edus: List[Edu] = []
 
         # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered
         # based on their key (e.g. typing events by room_id)
         # Map of (edu_type, key) -> Edu
-        self._pending_edus_keyed = {}  # type: Dict[Tuple[str, Hashable], Edu]
+        self._pending_edus_keyed: Dict[Tuple[str, Hashable], Edu] = {}
 
         # Map of user_id -> UserPresenceState of pending presence to be sent to this
         # destination
-        self._pending_presence = {}  # type: Dict[str, UserPresenceState]
+        self._pending_presence: Dict[str, UserPresenceState] = {}
 
         # room_id -> receipt_type -> user_id -> receipt_dict
-        self._pending_rrs = {}  # type: Dict[str, Dict[str, Dict[str, dict]]]
+        self._pending_rrs: Dict[str, Dict[str, Dict[str, dict]]] = {}
         self._rrs_pending_flush = False
 
         # stream_id of last successfully sent to-device message.
@@ -171,14 +171,24 @@ class PerDestinationQueue:
 
         self.attempt_new_transaction()
 
-    def send_presence(self, states: Iterable[UserPresenceState]) -> None:
-        """Add presence updates to the queue. Start the transmission loop if necessary.
+    def send_presence(
+        self, states: Iterable[UserPresenceState], start_loop: bool = True
+    ) -> None:
+        """Add presence updates to the queue.
+
+        Args:
+            states: Presence updates to send
+            start_loop: Whether to start the transmission loop if not already
+                running.
 
         Args:
             states: presence to send
         """
         self._pending_presence.update({state.user_id: state for state in states})
-        self.attempt_new_transaction()
+        self._new_data_to_send = True
+
+        if start_loop:
+            self.attempt_new_transaction()
 
     def queue_read_receipt(self, receipt: ReadReceipt) -> None:
         """Add a RR to the list to be sent. Doesn't start the transmission loop yet
@@ -243,7 +253,7 @@ class PerDestinationQueue:
         )
 
     async def _transaction_transmission_loop(self) -> None:
-        pending_pdus = []  # type: List[EventBase]
+        pending_pdus: List[EventBase] = []
         try:
             self.transmission_loop_running = True
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index c9e7c57461..98b1bf77fd 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -395,9 +395,9 @@ class TransportLayerClient:
             # this uses MSC2197 (Search Filtering over Federation)
             path = _create_v1_path("/publicRooms")
 
-            data = {
+            data: Dict[str, Any] = {
                 "include_all_networks": "true" if include_all_networks else "false"
-            }  # type: Dict[str, Any]
+            }
             if third_party_instance_id:
                 data["third_party_instance_id"] = third_party_instance_id
             if limit:
@@ -423,9 +423,9 @@ class TransportLayerClient:
         else:
             path = _create_v1_path("/publicRooms")
 
-            args = {
+            args: Dict[str, Any] = {
                 "include_all_networks": "true" if include_all_networks else "false"
-            }  # type: Dict[str, Any]
+            }
             if third_party_instance_id:
                 args["third_party_instance_id"] = (third_party_instance_id,)
             if limit:
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index d37d9565fc..2974d4d0cc 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -1013,7 +1013,7 @@ class PublicRoomList(BaseFederationServlet):
         if not self.allow_access:
             raise FederationDeniedError(origin)
 
-        limit = int(content.get("limit", 100))  # type: Optional[int]
+        limit: Optional[int] = int(content.get("limit", 100))
         since_token = content.get("since", None)
         search_filter = content.get("filter", None)
 
@@ -1095,7 +1095,9 @@ class FederationGroupsProfileServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1110,7 +1112,9 @@ class FederationGroupsProfileServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1131,7 +1135,9 @@ class FederationGroupsSummaryServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1152,7 +1158,9 @@ class FederationGroupsRoomsServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1174,7 +1182,9 @@ class FederationGroupsAddRoomsServlet(BaseGroupsServerServlet):
         group_id: str,
         room_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1192,7 +1202,9 @@ class FederationGroupsAddRoomsServlet(BaseGroupsServerServlet):
         group_id: str,
         room_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1220,7 +1232,9 @@ class FederationGroupsAddRoomsConfigServlet(BaseGroupsServerServlet):
         room_id: str,
         config_key: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1243,7 +1257,9 @@ class FederationGroupsUsersServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1264,7 +1280,9 @@ class FederationGroupsInvitedUsersServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1288,7 +1306,9 @@ class FederationGroupsInviteServlet(BaseGroupsServerServlet):
         group_id: str,
         user_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1354,7 +1374,9 @@ class FederationGroupsRemoveUserServlet(BaseGroupsServerServlet):
         group_id: str,
         user_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1487,7 +1509,9 @@ class FederationGroupsSummaryRoomsServlet(BaseGroupsServerServlet):
         category_id: str,
         room_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1523,7 +1547,9 @@ class FederationGroupsSummaryRoomsServlet(BaseGroupsServerServlet):
         category_id: str,
         room_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1549,7 +1575,9 @@ class FederationGroupsCategoriesServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1571,7 +1599,9 @@ class FederationGroupsCategoryServlet(BaseGroupsServerServlet):
         group_id: str,
         category_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1589,7 +1619,9 @@ class FederationGroupsCategoryServlet(BaseGroupsServerServlet):
         group_id: str,
         category_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1618,7 +1650,9 @@ class FederationGroupsCategoryServlet(BaseGroupsServerServlet):
         group_id: str,
         category_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1644,7 +1678,9 @@ class FederationGroupsRolesServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1666,7 +1702,9 @@ class FederationGroupsRoleServlet(BaseGroupsServerServlet):
         group_id: str,
         role_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1682,7 +1720,9 @@ class FederationGroupsRoleServlet(BaseGroupsServerServlet):
         group_id: str,
         role_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1713,7 +1753,9 @@ class FederationGroupsRoleServlet(BaseGroupsServerServlet):
         group_id: str,
         role_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1750,7 +1792,9 @@ class FederationGroupsSummaryUsersServlet(BaseGroupsServerServlet):
         role_id: str,
         user_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1784,7 +1828,9 @@ class FederationGroupsSummaryUsersServlet(BaseGroupsServerServlet):
         role_id: str,
         user_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1825,7 +1871,9 @@ class FederationGroupsSettingJoinPolicyServlet(BaseGroupsServerServlet):
         query: Dict[bytes, List[bytes]],
         group_id: str,
     ) -> Tuple[int, JsonDict]:
-        requester_user_id = parse_string_from_args(query, "requester_user_id")
+        requester_user_id = parse_string_from_args(
+            query, "requester_user_id", required=True
+        )
         if get_domain_from_id(requester_user_id) != origin:
             raise SynapseError(403, "requester_user_id doesn't match origin")
 
@@ -1943,7 +1991,7 @@ class RoomComplexityServlet(BaseFederationServlet):
         return 200, complexity
 
 
-FEDERATION_SERVLET_CLASSES = (
+FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (
     FederationSendServlet,
     FederationEventServlet,
     FederationStateV1Servlet,
@@ -1971,15 +2019,13 @@ FEDERATION_SERVLET_CLASSES = (
     FederationSpaceSummaryServlet,
     FederationV1SendKnockServlet,
     FederationMakeKnockServlet,
-)  # type: Tuple[Type[BaseFederationServlet], ...]
+)
 
-OPENID_SERVLET_CLASSES = (
-    OpenIdUserInfo,
-)  # type: Tuple[Type[BaseFederationServlet], ...]
+OPENID_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (OpenIdUserInfo,)
 
-ROOM_LIST_CLASSES = (PublicRoomList,)  # type: Tuple[Type[PublicRoomList], ...]
+ROOM_LIST_CLASSES: Tuple[Type[PublicRoomList], ...] = (PublicRoomList,)
 
-GROUP_SERVER_SERVLET_CLASSES = (
+GROUP_SERVER_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (
     FederationGroupsProfileServlet,
     FederationGroupsSummaryServlet,
     FederationGroupsRoomsServlet,
@@ -1998,19 +2044,19 @@ GROUP_SERVER_SERVLET_CLASSES = (
     FederationGroupsAddRoomsServlet,
     FederationGroupsAddRoomsConfigServlet,
     FederationGroupsSettingJoinPolicyServlet,
-)  # type: Tuple[Type[BaseFederationServlet], ...]
+)
 
 
-GROUP_LOCAL_SERVLET_CLASSES = (
+GROUP_LOCAL_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (
     FederationGroupsLocalInviteServlet,
     FederationGroupsRemoveLocalUserServlet,
     FederationGroupsBulkPublicisedServlet,
-)  # type: Tuple[Type[BaseFederationServlet], ...]
+)
 
 
-GROUP_ATTESTATION_SERVLET_CLASSES = (
+GROUP_ATTESTATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (
     FederationGroupsRenewAttestaionServlet,
-)  # type: Tuple[Type[BaseFederationServlet], ...]
+)
 
 
 DEFAULT_SERVLET_GROUPS = (
diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py
index a06d060ebf..3dc55ab861 100644
--- a/synapse/groups/groups_server.py
+++ b/synapse/groups/groups_server.py
@@ -707,9 +707,9 @@ class GroupsServerHandler(GroupsServerWorkerHandler):
         See accept_invite, join_group.
         """
         if not self.hs.is_mine_id(user_id):
-            local_attestation = self.attestations.create_attestation(
-                group_id, user_id
-            )  # type: Optional[JsonDict]
+            local_attestation: Optional[
+                JsonDict
+            ] = self.attestations.create_attestation(group_id, user_id)
 
             remote_attestation = content["attestation"]
 
@@ -868,9 +868,9 @@ class GroupsServerHandler(GroupsServerWorkerHandler):
                 remote_attestation, user_id=requester_user_id, group_id=group_id
             )
 
-            local_attestation = self.attestations.create_attestation(
-                group_id, requester_user_id
-            )  # type: Optional[JsonDict]
+            local_attestation: Optional[
+                JsonDict
+            ] = self.attestations.create_attestation(group_id, requester_user_id)
         else:
             local_attestation = None
             remote_attestation = None
diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py
index d800e16912..525f3d39b1 100644
--- a/synapse/handlers/_base.py
+++ b/synapse/handlers/_base.py
@@ -38,10 +38,10 @@ class BaseHandler:
     """
 
     def __init__(self, hs: "HomeServer"):
-        self.store = hs.get_datastore()  # type: synapse.storage.DataStore
+        self.store = hs.get_datastore()
         self.auth = hs.get_auth()
         self.notifier = hs.get_notifier()
-        self.state_handler = hs.get_state_handler()  # type: synapse.state.StateHandler
+        self.state_handler = hs.get_state_handler()
         self.distributor = hs.get_distributor()
         self.clock = hs.get_clock()
         self.hs = hs
@@ -55,12 +55,12 @@ class BaseHandler:
         # Check whether ratelimiting room admin message redaction is enabled
         # by the presence of rate limits in the config
         if self.hs.config.rc_admin_redaction:
-            self.admin_redaction_ratelimiter = Ratelimiter(
+            self.admin_redaction_ratelimiter: Optional[Ratelimiter] = Ratelimiter(
                 store=self.store,
                 clock=self.clock,
                 rate_hz=self.hs.config.rc_admin_redaction.per_second,
                 burst_count=self.hs.config.rc_admin_redaction.burst_count,
-            )  # type: Optional[Ratelimiter]
+            )
         else:
             self.admin_redaction_ratelimiter = None
 
diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py
index d752cf34f0..078accd634 100644
--- a/synapse/handlers/account_validity.py
+++ b/synapse/handlers/account_validity.py
@@ -15,9 +15,11 @@
 import email.mime.multipart
 import email.utils
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple
+from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Tuple
 
-from synapse.api.errors import StoreError, SynapseError
+from twisted.web.http import Request
+
+from synapse.api.errors import AuthError, StoreError, SynapseError
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.types import UserID
 from synapse.util import stringutils
@@ -27,6 +29,15 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+# Types for callbacks to be registered via the module api
+IS_USER_EXPIRED_CALLBACK = Callable[[str], Awaitable[Optional[bool]]]
+ON_USER_REGISTRATION_CALLBACK = Callable[[str], Awaitable]
+# Temporary hooks to allow for a transition from `/_matrix/client` endpoints
+# to `/_synapse/client/account_validity`. See `register_account_validity_callbacks`.
+ON_LEGACY_SEND_MAIL_CALLBACK = Callable[[str], Awaitable]
+ON_LEGACY_RENEW_CALLBACK = Callable[[str], Awaitable[Tuple[bool, bool, int]]]
+ON_LEGACY_ADMIN_REQUEST = Callable[[Request], Awaitable]
+
 
 class AccountValidityHandler:
     def __init__(self, hs: "HomeServer"):
@@ -70,6 +81,99 @@ class AccountValidityHandler:
             if hs.config.run_background_tasks:
                 self.clock.looping_call(self._send_renewal_emails, 30 * 60 * 1000)
 
+        self._is_user_expired_callbacks: List[IS_USER_EXPIRED_CALLBACK] = []
+        self._on_user_registration_callbacks: List[ON_USER_REGISTRATION_CALLBACK] = []
+        self._on_legacy_send_mail_callback: Optional[
+            ON_LEGACY_SEND_MAIL_CALLBACK
+        ] = None
+        self._on_legacy_renew_callback: Optional[ON_LEGACY_RENEW_CALLBACK] = None
+
+        # The legacy admin requests callback isn't a protected attribute because we need
+        # to access it from the admin servlet, which is outside of this handler.
+        self.on_legacy_admin_request_callback: Optional[ON_LEGACY_ADMIN_REQUEST] = None
+
+    def register_account_validity_callbacks(
+        self,
+        is_user_expired: Optional[IS_USER_EXPIRED_CALLBACK] = None,
+        on_user_registration: Optional[ON_USER_REGISTRATION_CALLBACK] = None,
+        on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None,
+        on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None,
+        on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None,
+    ):
+        """Register callbacks from module for each hook."""
+        if is_user_expired is not None:
+            self._is_user_expired_callbacks.append(is_user_expired)
+
+        if on_user_registration is not None:
+            self._on_user_registration_callbacks.append(on_user_registration)
+
+        # The builtin account validity feature exposes 3 endpoints (send_mail, renew, and
+        # an admin one). As part of moving the feature into a module, we need to change
+        # the path from /_matrix/client/unstable/account_validity/... to
+        # /_synapse/client/account_validity, because:
+        #
+        #   * the feature isn't part of the Matrix spec thus shouldn't live under /_matrix
+        #   * the way we register servlets means that modules can't register resources
+        #     under /_matrix/client
+        #
+        # We need to allow for a transition period between the old and new endpoints
+        # in order to allow for clients to update (and for emails to be processed).
+        #
+        # Once the email-account-validity module is loaded, it will take control of account
+        # validity by moving the rows from our `account_validity` table into its own table.
+        #
+        # Therefore, we need to allow modules (in practice just the one implementing the
+        # email-based account validity) to temporarily hook into the legacy endpoints so we
+        # can route the traffic coming into the old endpoints into the module, which is
+        # why we have the following three temporary hooks.
+        if on_legacy_send_mail is not None:
+            if self._on_legacy_send_mail_callback is not None:
+                raise RuntimeError("Tried to register on_legacy_send_mail twice")
+
+            self._on_legacy_send_mail_callback = on_legacy_send_mail
+
+        if on_legacy_renew is not None:
+            if self._on_legacy_renew_callback is not None:
+                raise RuntimeError("Tried to register on_legacy_renew twice")
+
+            self._on_legacy_renew_callback = on_legacy_renew
+
+        if on_legacy_admin_request is not None:
+            if self.on_legacy_admin_request_callback is not None:
+                raise RuntimeError("Tried to register on_legacy_admin_request twice")
+
+            self.on_legacy_admin_request_callback = on_legacy_admin_request
+
+    async def is_user_expired(self, user_id: str) -> bool:
+        """Checks if a user has expired against third-party modules.
+
+        Args:
+            user_id: The user to check the expiry of.
+
+        Returns:
+            Whether the user has expired.
+        """
+        for callback in self._is_user_expired_callbacks:
+            expired = await callback(user_id)
+            if expired is not None:
+                return expired
+
+        if self._account_validity_enabled:
+            # If no module could determine whether the user has expired and the legacy
+            # configuration is enabled, fall back to it.
+            return await self.store.is_account_expired(user_id, self.clock.time_msec())
+
+        return False
+
+    async def on_user_registration(self, user_id: str):
+        """Tell third-party modules about a user's registration.
+
+        Args:
+            user_id: The ID of the newly registered user.
+        """
+        for callback in self._on_user_registration_callbacks:
+            await callback(user_id)
+
     @wrap_as_background_process("send_renewals")
     async def _send_renewal_emails(self) -> None:
         """Gets the list of users whose account is expiring in the amount of time
@@ -95,6 +199,17 @@ class AccountValidityHandler:
         Raises:
             SynapseError if the user is not set to renew.
         """
+        # If a module supports sending a renewal email from here, do that, otherwise do
+        # the legacy dance.
+        if self._on_legacy_send_mail_callback is not None:
+            await self._on_legacy_send_mail_callback(user_id)
+            return
+
+        if not self._account_validity_renew_by_email_enabled:
+            raise AuthError(
+                403, "Account renewal via email is disabled on this server."
+            )
+
         expiration_ts = await self.store.get_expiration_ts_for_user(user_id)
 
         # If this user isn't set to be expired, raise an error.
@@ -209,6 +324,10 @@ class AccountValidityHandler:
         token is considered stale. A token is stale if the 'token_used_ts_ms' db column
         is non-null.
 
+        This method exists to support handling the legacy account validity /renew
+        endpoint. If a module implements the on_legacy_renew callback, then this process
+        is delegated to the module instead.
+
         Args:
             renewal_token: Token sent with the renewal request.
         Returns:
@@ -218,6 +337,11 @@ class AccountValidityHandler:
               * An int representing the user's expiry timestamp as milliseconds since the
                 epoch, or 0 if the token was invalid.
         """
+        # If a module supports triggering a renew from here, do that, otherwise do the
+        # legacy dance.
+        if self._on_legacy_renew_callback is not None:
+            return await self._on_legacy_renew_callback(renewal_token)
+
         try:
             (
                 user_id,
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index f72ded038e..bfa7f2c545 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -62,9 +62,16 @@ class AdminHandler(BaseHandler):
         if ret:
             profile = await self.store.get_profileinfo(user.localpart)
             threepids = await self.store.user_get_threepids(user.to_string())
+            external_ids = [
+                ({"auth_provider": auth_provider, "external_id": external_id})
+                for auth_provider, external_id in await self.store.get_external_ids_by_user(
+                    user.to_string()
+                )
+            ]
             ret["displayname"] = profile.display_name
             ret["avatar_url"] = profile.avatar_url
             ret["threepids"] = threepids
+            ret["external_ids"] = external_ids
         return ret
 
     async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Any:
@@ -132,7 +139,7 @@ class AdminHandler(BaseHandler):
             to_key = RoomStreamToken(None, stream_ordering)
 
             # Events that we've processed in this room
-            written_events = set()  # type: Set[str]
+            written_events: Set[str] = set()
 
             # We need to track gaps in the events stream so that we can then
             # write out the state at those events. We do this by keeping track
@@ -145,7 +152,7 @@ class AdminHandler(BaseHandler):
             # The reverse mapping to above, i.e. map from unseen event to events
             # that have the unseen event in their prev_events, i.e. the unseen
             # events "children".
-            unseen_to_child_events = {}  # type: Dict[str, Set[str]]
+            unseen_to_child_events: Dict[str, Set[str]] = {}
 
             # We fetch events in the room the user could see by fetching *all*
             # events that we have and then filtering, this isn't the most
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 862638cc4f..21a17cd2e8 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -96,7 +96,7 @@ class ApplicationServicesHandler:
                         self.current_max, limit
                     )
 
-                    events_by_room = {}  # type: Dict[str, List[EventBase]]
+                    events_by_room: Dict[str, List[EventBase]] = {}
                     for event in events:
                         events_by_room.setdefault(event.room_id, []).append(event)
 
@@ -275,7 +275,7 @@ class ApplicationServicesHandler:
     async def _handle_presence(
         self, service: ApplicationService, users: Collection[Union[str, UserID]]
     ) -> List[JsonDict]:
-        events = []  # type: List[JsonDict]
+        events: List[JsonDict] = []
         presence_source = self.event_sources.sources["presence"]
         from_key = await self.store.get_type_stream_id_for_appservice(
             service, "presence"
@@ -375,7 +375,7 @@ class ApplicationServicesHandler:
         self, only_protocol: Optional[str] = None
     ) -> Dict[str, JsonDict]:
         services = self.store.get_app_services()
-        protocols = {}  # type: Dict[str, List[JsonDict]]
+        protocols: Dict[str, List[JsonDict]] = {}
 
         # Collect up all the individual protocol responses out of the ASes
         for s in services:
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index e2ac595a62..22a8552241 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -191,7 +191,7 @@ class AuthHandler(BaseHandler):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
-        self.checkers = {}  # type: Dict[str, UserInteractiveAuthChecker]
+        self.checkers: Dict[str, UserInteractiveAuthChecker] = {}
         for auth_checker_class in INTERACTIVE_AUTH_CHECKERS:
             inst = auth_checker_class(hs)
             if inst.is_enabled():
@@ -296,7 +296,7 @@ class AuthHandler(BaseHandler):
 
         # A mapping of user ID to extra attributes to include in the login
         # response.
-        self._extra_attributes = {}  # type: Dict[str, SsoLoginExtraAttributes]
+        self._extra_attributes: Dict[str, SsoLoginExtraAttributes] = {}
 
     async def validate_user_via_ui_auth(
         self,
@@ -500,7 +500,7 @@ class AuthHandler(BaseHandler):
                 all the stages in any of the permitted flows.
         """
 
-        sid = None  # type: Optional[str]
+        sid: Optional[str] = None
         authdict = clientdict.pop("auth", {})
         if "session" in authdict:
             sid = authdict["session"]
@@ -588,9 +588,9 @@ class AuthHandler(BaseHandler):
             )
 
         # check auth type currently being presented
-        errordict = {}  # type: Dict[str, Any]
+        errordict: Dict[str, Any] = {}
         if "type" in authdict:
-            login_type = authdict["type"]  # type: str
+            login_type: str = authdict["type"]
             try:
                 result = await self._check_auth_dict(authdict, clientip)
                 if result:
@@ -766,7 +766,7 @@ class AuthHandler(BaseHandler):
             LoginType.TERMS: self._get_params_terms,
         }
 
-        params = {}  # type: Dict[str, Any]
+        params: Dict[str, Any] = {}
 
         for f in public_flows:
             for stage in f:
@@ -1530,9 +1530,9 @@ class AuthHandler(BaseHandler):
         except StoreError:
             raise SynapseError(400, "Unknown session ID: %s" % (session_id,))
 
-        user_id_to_verify = await self.get_session_data(
+        user_id_to_verify: str = await self.get_session_data(
             session_id, UIAuthSessionDataConstants.REQUEST_USER_ID
-        )  # type: str
+        )
 
         idps = await self.hs.get_sso_handler().get_identity_providers_for_user(
             user_id_to_verify
diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py
index 7346ccfe93..0325f86e20 100644
--- a/synapse/handlers/cas.py
+++ b/synapse/handlers/cas.py
@@ -40,7 +40,7 @@ class CasError(Exception):
 
     def __str__(self):
         if self.error_description:
-            return "{}: {}".format(self.error, self.error_description)
+            return f"{self.error}: {self.error_description}"
         return self.error
 
 
@@ -171,7 +171,7 @@ class CasHandler:
 
         # Iterate through the nodes and pull out the user and any extra attributes.
         user = None
-        attributes = {}  # type: Dict[str, List[Optional[str]]]
+        attributes: Dict[str, List[Optional[str]]] = {}
         for child in root[0]:
             if child.tag.endswith("user"):
                 user = child.text
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 95bdc5902a..46ee834407 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -452,7 +452,7 @@ class DeviceHandler(DeviceWorkerHandler):
             user_id
         )
 
-        hosts = set()  # type: Set[str]
+        hosts: Set[str] = set()
         if self.hs.is_mine_id(user_id):
             hosts.update(get_domain_from_id(u) for u in users_who_share_room)
             hosts.discard(self.server_name)
@@ -613,20 +613,20 @@ class DeviceListUpdater:
         self._remote_edu_linearizer = Linearizer(name="remote_device_list")
 
         # user_id -> list of updates waiting to be handled.
-        self._pending_updates = (
-            {}
-        )  # type: Dict[str, List[Tuple[str, str, Iterable[str], JsonDict]]]
+        self._pending_updates: Dict[
+            str, List[Tuple[str, str, Iterable[str], JsonDict]]
+        ] = {}
 
         # Recently seen stream ids. We don't bother keeping these in the DB,
         # but they're useful to have them about to reduce the number of spurious
         # resyncs.
-        self._seen_updates = ExpiringCache(
+        self._seen_updates: ExpiringCache[str, Set[str]] = ExpiringCache(
             cache_name="device_update_edu",
             clock=self.clock,
             max_len=10000,
             expiry_ms=30 * 60 * 1000,
             iterable=True,
-        )  # type: ExpiringCache[str, Set[str]]
+        )
 
         # Attempt to resync out of sync device lists every 30s.
         self._resync_retry_in_progress = False
@@ -755,7 +755,7 @@ class DeviceListUpdater:
         """Given a list of updates for a user figure out if we need to do a full
         resync, or whether we have enough data that we can just apply the delta.
         """
-        seen_updates = self._seen_updates.get(user_id, set())  # type: Set[str]
+        seen_updates: Set[str] = self._seen_updates.get(user_id, set())
 
         extremity = await self.store.get_device_list_last_stream_id_for_remote(user_id)
 
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 580b941595..679b47f081 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -203,7 +203,7 @@ class DeviceMessageHandler:
         log_kv({"number_of_to_device_messages": len(messages)})
         set_tag("sender", sender_user_id)
         local_messages = {}
-        remote_messages = {}  # type: Dict[str, Dict[str, Dict[str, JsonDict]]]
+        remote_messages: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
         for user_id, by_device in messages.items():
             # Ratelimit local cross-user key requests by the sending device.
             if (
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 4064a2b859..d487fee627 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -22,6 +22,7 @@ from synapse.api.errors import (
     CodeMessageException,
     Codes,
     NotFoundError,
+    RequestSendFailed,
     ShadowBanError,
     StoreError,
     SynapseError,
@@ -236,9 +237,9 @@ class DirectoryHandler(BaseHandler):
     async def get_association(self, room_alias: RoomAlias) -> JsonDict:
         room_id = None
         if self.hs.is_mine(room_alias):
-            result = await self.get_association_from_room_alias(
-                room_alias
-            )  # type: Optional[RoomAliasMapping]
+            result: Optional[
+                RoomAliasMapping
+            ] = await self.get_association_from_room_alias(room_alias)
 
             if result:
                 room_id = result.room_id
@@ -252,12 +253,14 @@ class DirectoryHandler(BaseHandler):
                     retry_on_dns_fail=False,
                     ignore_backoff=True,
                 )
+            except RequestSendFailed:
+                raise SynapseError(502, "Failed to fetch alias")
             except CodeMessageException as e:
                 logging.warning("Error retrieving alias")
                 if e.code == 404:
                     fed_result = None
                 else:
-                    raise
+                    raise SynapseError(502, "Failed to fetch alias")
 
             if fed_result and "room_id" in fed_result and "servers" in fed_result:
                 room_id = fed_result["room_id"]
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 3972849d4d..d92370859f 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -115,9 +115,9 @@ class E2eKeysHandler:
                 the number of in-flight queries at a time.
         """
         with await self._query_devices_linearizer.queue((from_user_id, from_device_id)):
-            device_keys_query = query_body.get(
+            device_keys_query: Dict[str, Iterable[str]] = query_body.get(
                 "device_keys", {}
-            )  # type: Dict[str, Iterable[str]]
+            )
 
             # separate users by domain.
             # make a map from domain to user_id to device_ids
@@ -136,7 +136,7 @@ class E2eKeysHandler:
 
             # First get local devices.
             # A map of destination -> failure response.
-            failures = {}  # type: Dict[str, JsonDict]
+            failures: Dict[str, JsonDict] = {}
             results = {}
             if local_query:
                 local_result = await self.query_local_devices(local_query)
@@ -151,11 +151,9 @@ class E2eKeysHandler:
 
             # Now attempt to get any remote devices from our local cache.
             # A map of destination -> user ID -> device IDs.
-            remote_queries_not_in_cache = (
-                {}
-            )  # type: Dict[str, Dict[str, Iterable[str]]]
+            remote_queries_not_in_cache: Dict[str, Dict[str, Iterable[str]]] = {}
             if remote_queries:
-                query_list = []  # type: List[Tuple[str, Optional[str]]]
+                query_list: List[Tuple[str, Optional[str]]] = []
                 for user_id, device_ids in remote_queries.items():
                     if device_ids:
                         query_list.extend(
@@ -362,9 +360,9 @@ class E2eKeysHandler:
             A map from user_id -> device_id -> device details
         """
         set_tag("local_query", query)
-        local_query = []  # type: List[Tuple[str, Optional[str]]]
+        local_query: List[Tuple[str, Optional[str]]] = []
 
-        result_dict = {}  # type: Dict[str, Dict[str, dict]]
+        result_dict: Dict[str, Dict[str, dict]] = {}
         for user_id, device_ids in query.items():
             # we use UserID.from_string to catch invalid user ids
             if not self.is_mine(UserID.from_string(user_id)):
@@ -402,9 +400,9 @@ class E2eKeysHandler:
         self, query_body: Dict[str, Dict[str, Optional[List[str]]]]
     ) -> JsonDict:
         """Handle a device key query from a federated server"""
-        device_keys_query = query_body.get(
+        device_keys_query: Dict[str, Optional[List[str]]] = query_body.get(
             "device_keys", {}
-        )  # type: Dict[str, Optional[List[str]]]
+        )
         res = await self.query_local_devices(device_keys_query)
         ret = {"device_keys": res}
 
@@ -421,8 +419,8 @@ class E2eKeysHandler:
     async def claim_one_time_keys(
         self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: int
     ) -> JsonDict:
-        local_query = []  # type: List[Tuple[str, str, str]]
-        remote_queries = {}  # type: Dict[str, Dict[str, Dict[str, str]]]
+        local_query: List[Tuple[str, str, str]] = []
+        remote_queries: Dict[str, Dict[str, Dict[str, str]]] = {}
 
         for user_id, one_time_keys in query.get("one_time_keys", {}).items():
             # we use UserID.from_string to catch invalid user ids
@@ -439,8 +437,8 @@ class E2eKeysHandler:
         results = await self.store.claim_e2e_one_time_keys(local_query)
 
         # A map of user ID -> device ID -> key ID -> key.
-        json_result = {}  # type: Dict[str, Dict[str, Dict[str, JsonDict]]]
-        failures = {}  # type: Dict[str, JsonDict]
+        json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        failures: Dict[str, JsonDict] = {}
         for user_id, device_keys in results.items():
             for device_id, keys in device_keys.items():
                 for key_id, json_str in keys.items():
@@ -768,8 +766,8 @@ class E2eKeysHandler:
         Raises:
             SynapseError: if the input is malformed
         """
-        signature_list = []  # type: List[SignatureListItem]
-        failures = {}  # type: Dict[str, Dict[str, JsonDict]]
+        signature_list: List["SignatureListItem"] = []
+        failures: Dict[str, Dict[str, JsonDict]] = {}
         if not signatures:
             return signature_list, failures
 
@@ -930,8 +928,8 @@ class E2eKeysHandler:
         Raises:
             SynapseError: if the input is malformed
         """
-        signature_list = []  # type: List[SignatureListItem]
-        failures = {}  # type: Dict[str, Dict[str, JsonDict]]
+        signature_list: List["SignatureListItem"] = []
+        failures: Dict[str, Dict[str, JsonDict]] = {}
         if not signatures:
             return signature_list, failures
 
@@ -1300,7 +1298,7 @@ class SigningKeyEduUpdater:
         self._remote_edu_linearizer = Linearizer(name="remote_signing_key")
 
         # user_id -> list of updates waiting to be handled.
-        self._pending_updates = {}  # type: Dict[str, List[Tuple[JsonDict, JsonDict]]]
+        self._pending_updates: Dict[str, List[Tuple[JsonDict, JsonDict]]] = {}
 
     async def incoming_signing_key_update(
         self, origin: str, edu_content: JsonDict
@@ -1349,7 +1347,7 @@ class SigningKeyEduUpdater:
                 # This can happen since we batch updates
                 return
 
-            device_ids = []  # type: List[str]
+            device_ids: List[str] = []
 
             logger.info("pending updates: %r", pending_updates)
 
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index 989996b628..41dbdfd0a1 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING, Collection, Optional
+from typing import TYPE_CHECKING, Collection, List, Optional, Union
 
+from synapse import event_auth
 from synapse.api.constants import (
     EventTypes,
     JoinRules,
@@ -20,9 +21,11 @@ from synapse.api.constants import (
     RestrictedJoinRuleTypes,
 )
 from synapse.api.errors import AuthError
-from synapse.api.room_versions import RoomVersion
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.events import EventBase
+from synapse.events.builder import EventBuilder
 from synapse.types import StateMap
+from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -34,8 +37,63 @@ class EventAuthHandler:
     """
 
     def __init__(self, hs: "HomeServer"):
+        self._clock = hs.get_clock()
         self._store = hs.get_datastore()
 
+    async def check_from_context(
+        self, room_version: str, event, context, do_sig_check=True
+    ) -> None:
+        auth_event_ids = event.auth_event_ids()
+        auth_events_by_id = await self._store.get_events(auth_event_ids)
+        auth_events = {(e.type, e.state_key): e for e in auth_events_by_id.values()}
+
+        room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
+        event_auth.check(
+            room_version_obj, event, auth_events=auth_events, do_sig_check=do_sig_check
+        )
+
+    def compute_auth_events(
+        self,
+        event: Union[EventBase, EventBuilder],
+        current_state_ids: StateMap[str],
+        for_verification: bool = False,
+    ) -> List[str]:
+        """Given an event and current state return the list of event IDs used
+        to auth an event.
+
+        If `for_verification` is False then only return auth events that
+        should be added to the event's `auth_events`.
+
+        Returns:
+            List of event IDs.
+        """
+
+        if event.type == EventTypes.Create:
+            return []
+
+        # Currently we ignore the `for_verification` flag even though there are
+        # some situations where we can drop particular auth events when adding
+        # to the event's `auth_events` (e.g. joins pointing to previous joins
+        # when room is publicly joinable). Dropping event IDs has the
+        # advantage that the auth chain for the room grows slower, but we use
+        # the auth chain in state resolution v2 to order events, which means
+        # care must be taken if dropping events to ensure that it doesn't
+        # introduce undesirable "state reset" behaviour.
+        #
+        # All of which sounds a bit tricky so we don't bother for now.
+
+        auth_ids = []
+        for etype, state_key in event_auth.auth_types_for_event(event):
+            auth_ev_id = current_state_ids.get((etype, state_key))
+            if auth_ev_id:
+                auth_ids.append(auth_ev_id)
+
+        return auth_ids
+
+    async def check_host_in_room(self, room_id: str, host: str) -> bool:
+        with Measure(self._clock, "check_host_in_room"):
+            return await self._store.is_host_joined(room_id, host)
+
     async def check_restricted_join_rules(
         self,
         state_ids: StateMap[str],
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index f134f1e234..4b3f037072 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -93,7 +93,7 @@ class EventStreamHandler(BaseHandler):
 
             # When the user joins a new room, or another user joins a currently
             # joined room, we need to send down presence for those users.
-            to_add = []  # type: List[JsonDict]
+            to_add: List[JsonDict] = []
             for event in events:
                 if not isinstance(event, EventBase):
                     continue
@@ -103,9 +103,9 @@ class EventStreamHandler(BaseHandler):
                     # Send down presence.
                     if event.state_key == auth_user_id:
                         # Send down presence for everyone in the room.
-                        users = await self.store.get_users_in_room(
+                        users: Iterable[str] = await self.store.get_users_in_room(
                             event.room_id
-                        )  # type: Iterable[str]
+                        )
                     else:
                         users = [event.state_key]
 
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index d929c65131..cf389be3e4 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -181,7 +181,7 @@ class FederationHandler(BaseHandler):
 
         # When joining a room we need to queue any events for that room up.
         # For each room, a list of (pdu, origin) tuples.
-        self.room_queues = {}  # type: Dict[str, List[Tuple[EventBase, str]]]
+        self.room_queues: Dict[str, List[Tuple[EventBase, str]]] = {}
         self._room_pdu_linearizer = Linearizer("fed_room_pdu")
 
         self._room_backfill = Linearizer("room_backfill")
@@ -250,7 +250,9 @@ class FederationHandler(BaseHandler):
         #
         # Note that if we were never in the room then we would have already
         # dropped the event, since we wouldn't know the room version.
-        is_in_room = await self.auth.check_host_in_room(room_id, self.server_name)
+        is_in_room = await self._event_auth_handler.check_host_in_room(
+            room_id, self.server_name
+        )
         if not is_in_room:
             logger.info(
                 "Ignoring PDU from %s as we're not in the room",
@@ -366,7 +368,7 @@ class FederationHandler(BaseHandler):
                     ours = await self.state_store.get_state_groups_ids(room_id, seen)
 
                     # state_maps is a list of mappings from (type, state_key) to event_id
-                    state_maps = list(ours.values())  # type: List[StateMap[str]]
+                    state_maps: List[StateMap[str]] = list(ours.values())
 
                     # we don't need this any more, let's delete it.
                     del ours
@@ -733,7 +735,7 @@ class FederationHandler(BaseHandler):
         # we need to make sure we re-load from the database to get the rejected
         # state correct.
         fetched_events.update(
-            (await self.store.get_events(missing_desired_events, allow_rejected=True))
+            await self.store.get_events(missing_desired_events, allow_rejected=True)
         )
 
         # check for events which were in the wrong room.
@@ -843,7 +845,7 @@ class FederationHandler(BaseHandler):
                 # exact key to expect. Otherwise check it matches any key we
                 # have for that device.
 
-                current_keys = []  # type: Container[str]
+                current_keys: Container[str] = []
 
                 if device:
                     keys = device.get("keys", {}).get("keys", {})
@@ -1183,7 +1185,7 @@ class FederationHandler(BaseHandler):
                 if e_type == EventTypes.Member and event.membership == Membership.JOIN
             ]
 
-            joined_domains = {}  # type: Dict[str, int]
+            joined_domains: Dict[str, int] = {}
             for u, d in joined_users:
                 try:
                     dom = get_domain_from_id(u)
@@ -1312,7 +1314,7 @@ class FederationHandler(BaseHandler):
 
         room_version = await self.store.get_room_version(room_id)
 
-        event_map = {}  # type: Dict[str, EventBase]
+        event_map: Dict[str, EventBase] = {}
 
         async def get_event(event_id: str):
             with nested_logging_context(event_id):
@@ -1412,12 +1414,15 @@ class FederationHandler(BaseHandler):
 
         Invites must be signed by the invitee's server before distribution.
         """
-        pdu = await self.federation_client.send_invite(
-            destination=target_host,
-            room_id=event.room_id,
-            event_id=event.event_id,
-            pdu=event,
-        )
+        try:
+            pdu = await self.federation_client.send_invite(
+                destination=target_host,
+                room_id=event.room_id,
+                event_id=event.event_id,
+                pdu=event,
+            )
+        except RequestSendFailed:
+            raise SynapseError(502, f"Can't connect to server {target_host}")
 
         return pdu
 
@@ -1591,7 +1596,7 @@ class FederationHandler(BaseHandler):
 
         # Ask the remote server to create a valid knock event for us. Once received,
         # we sign the event
-        params = {"ver": supported_room_versions}  # type: Dict[str, Iterable[str]]
+        params: Dict[str, Iterable[str]] = {"ver": supported_room_versions}
         origin, event, event_format_version = await self._make_and_verify_event(
             target_hosts, room_id, knockee, Membership.KNOCK, content, params=params
         )
@@ -1674,7 +1679,9 @@ class FederationHandler(BaseHandler):
         room_version = await self.store.get_room_version_id(room_id)
 
         # now check that we are *still* in the room
-        is_in_room = await self.auth.check_host_in_room(room_id, self.server_name)
+        is_in_room = await self._event_auth_handler.check_host_in_room(
+            room_id, self.server_name
+        )
         if not is_in_room:
             logger.info(
                 "Got /make_join request for room %s we are no longer in",
@@ -1705,7 +1712,7 @@ class FederationHandler(BaseHandler):
 
         # The remote hasn't signed it yet, obviously. We'll do the full checks
         # when we get the event back in `on_send_join_request`
-        await self.auth.check_from_context(
+        await self._event_auth_handler.check_from_context(
             room_version, event, context, do_sig_check=False
         )
 
@@ -1877,7 +1884,7 @@ class FederationHandler(BaseHandler):
         try:
             # The remote hasn't signed it yet, obviously. We'll do the full checks
             # when we get the event back in `on_send_leave_request`
-            await self.auth.check_from_context(
+            await self._event_auth_handler.check_from_context(
                 room_version, event, context, do_sig_check=False
             )
         except AuthError as e:
@@ -1939,7 +1946,7 @@ class FederationHandler(BaseHandler):
         try:
             # The remote hasn't signed it yet, obviously. We'll do the full checks
             # when we get the event back in `on_send_knock_request`
-            await self.auth.check_from_context(
+            await self._event_auth_handler.check_from_context(
                 room_version, event, context, do_sig_check=False
             )
         except AuthError as e:
@@ -2111,7 +2118,7 @@ class FederationHandler(BaseHandler):
     async def on_backfill_request(
         self, origin: str, room_id: str, pdu_list: List[str], limit: int
     ) -> List[EventBase]:
-        in_room = await self.auth.check_host_in_room(room_id, origin)
+        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
         if not in_room:
             raise AuthError(403, "Host not in room.")
 
@@ -2146,7 +2153,9 @@ class FederationHandler(BaseHandler):
         )
 
         if event:
-            in_room = await self.auth.check_host_in_room(event.room_id, origin)
+            in_room = await self._event_auth_handler.check_host_in_room(
+                event.room_id, origin
+            )
             if not in_room:
                 raise AuthError(403, "Host not in room.")
 
@@ -2444,14 +2453,14 @@ class FederationHandler(BaseHandler):
             state_sets_d = await self.state_store.get_state_groups(
                 event.room_id, extrem_ids
             )
-            state_sets = list(state_sets_d.values())  # type: List[Iterable[EventBase]]
+            state_sets: List[Iterable[EventBase]] = list(state_sets_d.values())
             state_sets.append(state)
             current_states = await self.state_handler.resolve_events(
                 room_version, state_sets, event
             )
-            current_state_ids = {
+            current_state_ids: StateMap[str] = {
                 k: e.event_id for k, e in current_states.items()
-            }  # type: StateMap[str]
+            }
         else:
             current_state_ids = await self.state_handler.get_current_state_ids(
                 event.room_id, latest_event_ids=extrem_ids
@@ -2499,7 +2508,7 @@ class FederationHandler(BaseHandler):
         latest_events: List[str],
         limit: int,
     ) -> List[EventBase]:
-        in_room = await self.auth.check_host_in_room(room_id, origin)
+        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
         if not in_room:
             raise AuthError(403, "Host not in room.")
 
@@ -2562,7 +2571,7 @@ class FederationHandler(BaseHandler):
 
         if not auth_events:
             prev_state_ids = await context.get_prev_state_ids()
-            auth_events_ids = self.auth.compute_auth_events(
+            auth_events_ids = self._event_auth_handler.compute_auth_events(
                 event, prev_state_ids, for_verification=True
             )
             auth_events_x = await self.store.get_events(auth_events_ids)
@@ -2808,7 +2817,7 @@ class FederationHandler(BaseHandler):
         """
         # exclude the state key of the new event from the current_state in the context.
         if event.is_state():
-            event_key = (event.type, event.state_key)  # type: Optional[Tuple[str, str]]
+            event_key: Optional[Tuple[str, str]] = (event.type, event.state_key)
         else:
             event_key = None
         state_updates = {
@@ -2991,7 +3000,7 @@ class FederationHandler(BaseHandler):
             "state_key": target_user_id,
         }
 
-        if await self.auth.check_host_in_room(room_id, self.hs.hostname):
+        if await self._event_auth_handler.check_host_in_room(room_id, self.hs.hostname):
             room_version = await self.store.get_room_version_id(room_id)
             builder = self.event_builder_factory.new(room_version, event_dict)
 
@@ -3011,7 +3020,9 @@ class FederationHandler(BaseHandler):
             event.internal_metadata.send_on_behalf_of = self.hs.hostname
 
             try:
-                await self.auth.check_from_context(room_version, event, context)
+                await self._event_auth_handler.check_from_context(
+                    room_version, event, context
+                )
             except AuthError as e:
                 logger.warning("Denying new third party invite %r because %s", event, e)
                 raise e
@@ -3023,9 +3034,13 @@ class FederationHandler(BaseHandler):
             await member_handler.send_membership_event(None, event, context)
         else:
             destinations = {x.split(":", 1)[-1] for x in (sender_user_id, room_id)}
-            await self.federation_client.forward_third_party_invite(
-                destinations, room_id, event_dict
-            )
+
+            try:
+                await self.federation_client.forward_third_party_invite(
+                    destinations, room_id, event_dict
+                )
+            except (RequestSendFailed, HttpResponseException):
+                raise SynapseError(502, "Failed to forward third party invite")
 
     async def on_exchange_third_party_invite_request(
         self, event_dict: JsonDict
@@ -3054,7 +3069,9 @@ class FederationHandler(BaseHandler):
         )
 
         try:
-            await self.auth.check_from_context(room_version, event, context)
+            await self._event_auth_handler.check_from_context(
+                room_version, event, context
+            )
         except AuthError as e:
             logger.warning("Denying third party invite %r because %s", event, e)
             raise e
@@ -3139,10 +3156,10 @@ class FederationHandler(BaseHandler):
 
         logger.debug("Checking auth on event %r", event.content)
 
-        last_exception = None  # type: Optional[Exception]
+        last_exception: Optional[Exception] = None
 
         # for each public key in the 3pid invite event
-        for public_key_object in self.hs.get_auth().get_public_keys(invite_event):
+        for public_key_object in event_auth.get_public_keys(invite_event):
             try:
                 # for each sig on the third_party_invite block of the actual invite
                 for server, signature_block in signed["signatures"].items():
diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py
index 157f2ff218..1a6c5c64a2 100644
--- a/synapse/handlers/groups_local.py
+++ b/synapse/handlers/groups_local.py
@@ -214,7 +214,7 @@ class GroupsLocalWorkerHandler:
     async def bulk_get_publicised_groups(
         self, user_ids: Iterable[str], proxy: bool = True
     ) -> JsonDict:
-        destinations = {}  # type: Dict[str, Set[str]]
+        destinations: Dict[str, Set[str]] = {}
         local_users = set()
 
         for user_id in user_ids:
@@ -227,7 +227,7 @@ class GroupsLocalWorkerHandler:
             raise SynapseError(400, "Some user_ids are not local")
 
         results = {}
-        failed_results = []  # type: List[str]
+        failed_results: List[str] = []
         for destination, dest_user_ids in destinations.items():
             try:
                 r = await self.transport_client.bulk_get_publicised_groups(
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 33d16fbf9c..0961dec5ab 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -302,7 +302,7 @@ class IdentityHandler(BaseHandler):
             )
 
         url = "https://%s/_matrix/identity/api/v1/3pid/unbind" % (id_server,)
-        url_bytes = "/_matrix/identity/api/v1/3pid/unbind".encode("ascii")
+        url_bytes = b"/_matrix/identity/api/v1/3pid/unbind"
 
         content = {
             "mxid": mxid,
@@ -695,7 +695,7 @@ class IdentityHandler(BaseHandler):
                 return data["mxid"]
         except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
-        except IOError as e:
+        except OSError as e:
             logger.warning("Error from v1 identity server lookup: %s" % (e,))
 
         return None
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 76242865ae..5d49640760 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -46,9 +46,17 @@ class InitialSyncHandler(BaseHandler):
         self.state = hs.get_state_handler()
         self.clock = hs.get_clock()
         self.validator = EventValidator()
-        self.snapshot_cache = ResponseCache(
-            hs.get_clock(), "initial_sync_cache"
-        )  # type: ResponseCache[Tuple[str, Optional[StreamToken], Optional[StreamToken], str, Optional[int], bool, bool]]
+        self.snapshot_cache: ResponseCache[
+            Tuple[
+                str,
+                Optional[StreamToken],
+                Optional[StreamToken],
+                str,
+                Optional[int],
+                bool,
+                bool,
+            ]
+        ] = ResponseCache(hs.get_clock(), "initial_sync_cache")
         self._event_serializer = hs.get_event_client_serializer()
         self.storage = hs.get_storage()
         self.state_store = self.storage.state
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index db12abd59d..c7fe4ff89e 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -81,7 +81,7 @@ class MessageHandler:
 
         # The scheduled call to self._expire_event. None if no call is currently
         # scheduled.
-        self._scheduled_expiry = None  # type: Optional[IDelayedCall]
+        self._scheduled_expiry: Optional[IDelayedCall] = None
 
         if not hs.config.worker_app:
             run_as_background_process(
@@ -196,9 +196,7 @@ class MessageHandler:
                 room_state_events = await self.state_store.get_state_for_events(
                     [event.event_id], state_filter=state_filter
                 )
-                room_state = room_state_events[
-                    event.event_id
-                ]  # type: Mapping[Any, EventBase]
+                room_state: Mapping[Any, EventBase] = room_state_events[event.event_id]
             else:
                 raise AuthError(
                     403,
@@ -385,6 +383,7 @@ class EventCreationHandler:
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
         self.auth = hs.get_auth()
+        self._event_auth_handler = hs.get_event_auth_handler()
         self.store = hs.get_datastore()
         self.storage = hs.get_storage()
         self.state = hs.get_state_handler()
@@ -420,9 +419,9 @@ class EventCreationHandler:
         self.action_generator = hs.get_action_generator()
 
         self.spam_checker = hs.get_spam_checker()
-        self.third_party_event_rules = (
+        self.third_party_event_rules: "ThirdPartyEventRules" = (
             self.hs.get_third_party_event_rules()
-        )  # type: ThirdPartyEventRules
+        )
 
         self._block_events_without_consent_error = (
             self.config.block_events_without_consent_error
@@ -439,7 +438,7 @@ class EventCreationHandler:
         #
         # map from room id to time-of-last-attempt.
         #
-        self._rooms_to_exclude_from_dummy_event_insertion = {}  # type: Dict[str, int]
+        self._rooms_to_exclude_from_dummy_event_insertion: Dict[str, int] = {}
         # The number of forward extremeities before a dummy event is sent.
         self._dummy_events_threshold = hs.config.dummy_events_threshold
 
@@ -464,9 +463,7 @@ class EventCreationHandler:
         # Stores the state groups we've recently added to the joined hosts
         # external cache. Note that the timeout must be significantly less than
         # the TTL on the external cache.
-        self._external_cache_joined_hosts_updates = (
-            None
-        )  # type: Optional[ExpiringCache]
+        self._external_cache_joined_hosts_updates: Optional[ExpiringCache] = None
         if self._external_cache.is_enabled():
             self._external_cache_joined_hosts_updates = ExpiringCache(
                 "_external_cache_joined_hosts_updates",
@@ -509,12 +506,17 @@ class EventCreationHandler:
                 Should normally be left as None, which will cause them to be calculated
                 based on the room state at the prev_events.
 
+                If non-None, prev_event_ids must also be provided.
+
             require_consent: Whether to check if the requester has
                 consented to the privacy policy.
 
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
+            historical: Indicates whether the message is being inserted
+                back in time around some existing events. This is used to skip
+                a few checks and mark the event as backfilled.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -581,6 +583,9 @@ class EventCreationHandler:
         # Strip down the auth_event_ids to only what we need to auth the event.
         # For example, we don't need extra m.room.member that don't match event.sender
         if auth_event_ids is not None:
+            # If auth events are provided, prev events must be also.
+            assert prev_event_ids is not None
+
             temp_event = await builder.build(
                 prev_event_ids=prev_event_ids,
                 auth_event_ids=auth_event_ids,
@@ -592,7 +597,7 @@ class EventCreationHandler:
                 (e.type, e.state_key): e.event_id for e in auth_events
             }
             # Actually strip down and use the necessary auth events
-            auth_event_ids = self.auth.compute_auth_events(
+            auth_event_ids = self._event_auth_handler.compute_auth_events(
                 event=temp_event,
                 current_state_ids=auth_event_state_map,
                 for_verification=False,
@@ -766,6 +771,7 @@ class EventCreationHandler:
         txn_id: Optional[str] = None,
         ignore_shadow_ban: bool = False,
         outlier: bool = False,
+        historical: bool = False,
         depth: Optional[int] = None,
     ) -> Tuple[EventBase, int]:
         """
@@ -784,6 +790,8 @@ class EventCreationHandler:
                 The event ids to use as the auth_events for the new event.
                 Should normally be left as None, which will cause them to be calculated
                 based on the room state at the prev_events.
+
+                If non-None, prev_event_ids must also be provided.
             ratelimit: Whether to rate limit this send.
             txn_id: The transaction ID.
             ignore_shadow_ban: True if shadow-banned users should be allowed to
@@ -791,6 +799,9 @@ class EventCreationHandler:
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
+            historical: Indicates whether the message is being inserted
+                back in time around some existing events. This is used to skip
+                a few checks and mark the event as backfilled.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -839,6 +850,7 @@ class EventCreationHandler:
                 prev_event_ids=prev_event_ids,
                 auth_event_ids=auth_event_ids,
                 outlier=outlier,
+                historical=historical,
                 depth=depth,
             )
 
@@ -1049,7 +1061,9 @@ class EventCreationHandler:
             assert event.content["membership"] == Membership.LEAVE
         else:
             try:
-                await self.auth.check_from_context(room_version, event, context)
+                await self._event_auth_handler.check_from_context(
+                    room_version, event, context
+                )
             except AuthError as err:
                 logger.warning("Denying new event %r because %s", event, err)
                 raise err
@@ -1281,7 +1295,7 @@ class EventCreationHandler:
             # Validate a newly added alias or newly added alt_aliases.
 
             original_alias = None
-            original_alt_aliases = []  # type: List[str]
+            original_alt_aliases: List[str] = []
 
             original_event_id = event.unsigned.get("replaces_state")
             if original_event_id:
@@ -1374,7 +1388,7 @@ class EventCreationHandler:
                     raise AuthError(403, "Redacting server ACL events is not permitted")
 
             prev_state_ids = await context.get_prev_state_ids()
-            auth_events_ids = self.auth.compute_auth_events(
+            auth_events_ids = self._event_auth_handler.compute_auth_events(
                 event, prev_state_ids, for_verification=True
             )
             auth_events_map = await self.store.get_events(auth_events_ids)
@@ -1584,11 +1598,13 @@ class EventCreationHandler:
         for k, v in original_event.internal_metadata.get_dict().items():
             setattr(builder.internal_metadata, k, v)
 
-        # the event type hasn't changed, so there's no point in re-calculating the
-        # auth events.
+        # modules can send new state events, so we re-calculate the auth events just in
+        # case.
+        prev_event_ids = await self.store.get_prev_events_for_room(builder.room_id)
+
         event = await builder.build(
-            prev_event_ids=original_event.prev_event_ids(),
-            auth_event_ids=original_event.auth_event_ids(),
+            prev_event_ids=prev_event_ids,
+            auth_event_ids=None,
         )
 
         # we rebuild the event context, to be on the safe side. If nothing else,
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index ee6e41c0e4..eca8f16040 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -72,26 +72,26 @@ _SESSION_COOKIES = [
     (b"oidc_session_no_samesite", b"HttpOnly"),
 ]
 
+
 #: A token exchanged from the token endpoint, as per RFC6749 sec 5.1. and
 #: OpenID.Core sec 3.1.3.3.
-Token = TypedDict(
-    "Token",
-    {
-        "access_token": str,
-        "token_type": str,
-        "id_token": Optional[str],
-        "refresh_token": Optional[str],
-        "expires_in": int,
-        "scope": Optional[str],
-    },
-)
+class Token(TypedDict):
+    access_token: str
+    token_type: str
+    id_token: Optional[str]
+    refresh_token: Optional[str]
+    expires_in: int
+    scope: Optional[str]
+
 
 #: A JWK, as per RFC7517 sec 4. The type could be more precise than that, but
 #: there is no real point of doing this in our case.
 JWK = Dict[str, str]
 
+
 #: A JWK Set, as per RFC7517 sec 5.
-JWKS = TypedDict("JWKS", {"keys": List[JWK]})
+class JWKS(TypedDict):
+    keys: List[JWK]
 
 
 class OidcHandler:
@@ -105,9 +105,9 @@ class OidcHandler:
         assert provider_confs
 
         self._token_generator = OidcSessionTokenGenerator(hs)
-        self._providers = {
+        self._providers: Dict[str, "OidcProvider"] = {
             p.idp_id: OidcProvider(hs, self._token_generator, p) for p in provider_confs
-        }  # type: Dict[str, OidcProvider]
+        }
 
     async def load_metadata(self) -> None:
         """Validate the config and load the metadata from the remote endpoint.
@@ -178,7 +178,7 @@ class OidcHandler:
         # are two.
 
         for cookie_name, _ in _SESSION_COOKIES:
-            session = request.getCookie(cookie_name)  # type: Optional[bytes]
+            session: Optional[bytes] = request.getCookie(cookie_name)
             if session is not None:
                 break
         else:
@@ -255,7 +255,7 @@ class OidcError(Exception):
 
     def __str__(self):
         if self.error_description:
-            return "{}: {}".format(self.error, self.error_description)
+            return f"{self.error}: {self.error_description}"
         return self.error
 
 
@@ -277,7 +277,7 @@ class OidcProvider:
         self._token_generator = token_generator
 
         self._config = provider
-        self._callback_url = hs.config.oidc_callback_url  # type: str
+        self._callback_url: str = hs.config.oidc_callback_url
 
         # Calculate the prefix for OIDC callback paths based on the public_baseurl.
         # We'll insert this into the Path= parameter of any session cookies we set.
@@ -290,7 +290,7 @@ class OidcProvider:
         self._scopes = provider.scopes
         self._user_profile_method = provider.user_profile_method
 
-        client_secret = None  # type: Union[None, str, JwtClientSecret]
+        client_secret: Optional[Union[str, JwtClientSecret]] = None
         if provider.client_secret:
             client_secret = provider.client_secret
         elif provider.client_secret_jwt_key:
@@ -305,7 +305,7 @@ class OidcProvider:
             provider.client_id,
             client_secret,
             provider.client_auth_method,
-        )  # type: ClientAuth
+        )
         self._client_auth_method = provider.client_auth_method
 
         # cache of metadata for the identity provider (endpoint uris, mostly). This is
@@ -324,7 +324,7 @@ class OidcProvider:
         self._allow_existing_users = provider.allow_existing_users
 
         self._http_client = hs.get_proxied_http_client()
-        self._server_name = hs.config.server_name  # type: str
+        self._server_name: str = hs.config.server_name
 
         # identifier for the external_ids table
         self.idp_id = provider.idp_id
@@ -639,7 +639,7 @@ class OidcProvider:
             )
             logger.warning(description)
             # Body was still valid JSON. Might be useful to log it for debugging.
-            logger.warning("Code exchange response: {resp!r}".format(resp=resp))
+            logger.warning("Code exchange response: %r", resp)
             raise OidcError("server_error", description)
 
         return resp
@@ -1217,10 +1217,12 @@ class OidcSessionData:
     ui_auth_session_id = attr.ib(type=str)
 
 
-UserAttributeDict = TypedDict(
-    "UserAttributeDict",
-    {"localpart": Optional[str], "display_name": Optional[str], "emails": List[str]},
-)
+class UserAttributeDict(TypedDict):
+    localpart: Optional[str]
+    display_name: Optional[str]
+    emails: List[str]
+
+
 C = TypeVar("C")
 
 
@@ -1381,7 +1383,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
         if display_name == "":
             display_name = None
 
-        emails = []  # type: List[str]
+        emails: List[str] = []
         email = render_template_field(self._config.email_template)
         if email:
             emails.append(email)
@@ -1391,7 +1393,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
         )
 
     async def get_extra_attributes(self, userinfo: UserInfo, token: Token) -> JsonDict:
-        extras = {}  # type: Dict[str, str]
+        extras: Dict[str, str] = {}
         for key, template in self._config.extra_attributes.items():
             try:
                 extras[key] = template.render(user=userinfo).strip()
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 1e1186c29e..1dbafd253d 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -81,9 +81,9 @@ class PaginationHandler:
         self._server_name = hs.hostname
 
         self.pagination_lock = ReadWriteLock()
-        self._purges_in_progress_by_room = set()  # type: Set[str]
+        self._purges_in_progress_by_room: Set[str] = set()
         # map from purge id to PurgeStatus
-        self._purges_by_id = {}  # type: Dict[str, PurgeStatus]
+        self._purges_by_id: Dict[str, PurgeStatus] = {}
         self._event_serializer = hs.get_event_client_serializer()
 
         self._retention_default_max_lifetime = hs.config.retention_default_max_lifetime
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 44ed7a0712..016c5df2ca 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -378,14 +378,14 @@ class WorkerPresenceHandler(BasePresenceHandler):
 
         # The number of ongoing syncs on this process, by user id.
         # Empty if _presence_enabled is false.
-        self._user_to_num_current_syncs = {}  # type: Dict[str, int]
+        self._user_to_num_current_syncs: Dict[str, int] = {}
 
         self.notifier = hs.get_notifier()
         self.instance_id = hs.get_instance_id()
 
         # user_id -> last_sync_ms. Lists the users that have stopped syncing but
         # we haven't notified the presence writer of that yet
-        self.users_going_offline = {}  # type: Dict[str, int]
+        self.users_going_offline: Dict[str, int] = {}
 
         self._bump_active_client = ReplicationBumpPresenceActiveTime.make_client(hs)
         self._set_state_client = ReplicationPresenceSetState.make_client(hs)
@@ -650,7 +650,7 @@ class PresenceHandler(BasePresenceHandler):
 
         # Set of users who have presence in the `user_to_current_state` that
         # have not yet been persisted
-        self.unpersisted_users_changes = set()  # type: Set[str]
+        self.unpersisted_users_changes: Set[str] = set()
 
         hs.get_reactor().addSystemEventTrigger(
             "before",
@@ -664,7 +664,7 @@ class PresenceHandler(BasePresenceHandler):
 
         # Keeps track of the number of *ongoing* syncs on this process. While
         # this is non zero a user will never go offline.
-        self.user_to_num_current_syncs = {}  # type: Dict[str, int]
+        self.user_to_num_current_syncs: Dict[str, int] = {}
 
         # Keeps track of the number of *ongoing* syncs on other processes.
         # While any sync is ongoing on another process the user will never
@@ -674,8 +674,8 @@ class PresenceHandler(BasePresenceHandler):
         # we assume that all the sync requests on that process have stopped.
         # Stored as a dict from process_id to set of user_id, and a dict of
         # process_id to millisecond timestamp last updated.
-        self.external_process_to_current_syncs = {}  # type: Dict[str, Set[str]]
-        self.external_process_last_updated_ms = {}  # type: Dict[str, int]
+        self.external_process_to_current_syncs: Dict[str, Set[str]] = {}
+        self.external_process_last_updated_ms: Dict[str, int] = {}
 
         self.external_sync_linearizer = Linearizer(name="external_sync_linearizer")
 
@@ -1581,9 +1581,7 @@ class PresenceEventSource:
 
             # The set of users that we're interested in and that have had a presence update.
             # We'll actually pull the presence updates for these users at the end.
-            interested_and_updated_users = (
-                set()
-            )  # type: Union[Set[str], FrozenSet[str]]
+            interested_and_updated_users: Union[Set[str], FrozenSet[str]] = set()
 
             if from_key:
                 # First get all users that have had a presence update
@@ -1950,8 +1948,8 @@ async def get_interested_parties(
         A 2-tuple of `(room_ids_to_states, users_to_states)`,
         with each item being a dict of `entity_name` -> `[UserPresenceState]`
     """
-    room_ids_to_states = {}  # type: Dict[str, List[UserPresenceState]]
-    users_to_states = {}  # type: Dict[str, List[UserPresenceState]]
+    room_ids_to_states: Dict[str, List[UserPresenceState]] = {}
+    users_to_states: Dict[str, List[UserPresenceState]] = {}
     for state in states:
         room_ids = await store.get_rooms_for_user(state.user_id)
         for room_id in room_ids:
@@ -2063,12 +2061,12 @@ class PresenceFederationQueue:
         # stream_id, destinations, user_ids)`. We don't store the full states
         # for efficiency, and remote workers will already have the full states
         # cached.
-        self._queue = []  # type: List[Tuple[int, int, Collection[str], Set[str]]]
+        self._queue: List[Tuple[int, int, Collection[str], Set[str]]] = []
 
         self._next_id = 1
 
         # Map from instance name to current token
-        self._current_tokens = {}  # type: Dict[str, int]
+        self._current_tokens: Dict[str, int] = {}
 
         if self._queue_presence_updates:
             self._clock.looping_call(self._clear_queue, self._CLEAR_ITEMS_EVERY_MS)
@@ -2168,7 +2166,7 @@ class PresenceFederationQueue:
         # handle the case where `from_token` stream ID has already been dropped.
         start_idx = max(from_token + 1 - self._next_id, -len(self._queue))
 
-        to_send = []  # type: List[Tuple[int, Tuple[str, str]]]
+        to_send: List[Tuple[int, Tuple[str, str]]] = []
         limited = False
         new_id = upto_token
         for _, stream_id, destinations, user_ids in self._queue[start_idx:]:
@@ -2216,7 +2214,7 @@ class PresenceFederationQueue:
         if not self._federation:
             return
 
-        hosts_to_users = {}  # type: Dict[str, Set[str]]
+        hosts_to_users: Dict[str, Set[str]] = {}
         for row in rows:
             hosts_to_users.setdefault(row.destination, set()).add(row.user_id)
 
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 05b4a97b59..20a033d0ba 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -197,7 +197,7 @@ class ProfileHandler(BaseHandler):
                 400, "Displayname is too long (max %i)" % (MAX_DISPLAYNAME_LEN,)
             )
 
-        displayname_to_set = new_displayname  # type: Optional[str]
+        displayname_to_set: Optional[str] = new_displayname
         if new_displayname == "":
             displayname_to_set = None
 
@@ -286,7 +286,7 @@ class ProfileHandler(BaseHandler):
                 400, "Avatar URL is too long (max %i)" % (MAX_AVATAR_URL_LEN,)
             )
 
-        avatar_url_to_set = new_avatar_url  # type: Optional[str]
+        avatar_url_to_set: Optional[str] = new_avatar_url
         if new_avatar_url == "":
             avatar_url_to_set = None
 
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index f782d9db32..283483fc2c 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -30,6 +30,8 @@ class ReceiptsHandler(BaseHandler):
 
         self.server_name = hs.config.server_name
         self.store = hs.get_datastore()
+        self.event_auth_handler = hs.get_event_auth_handler()
+
         self.hs = hs
 
         # We only need to poke the federation sender explicitly if its on the
@@ -59,6 +61,19 @@ class ReceiptsHandler(BaseHandler):
         """Called when we receive an EDU of type m.receipt from a remote HS."""
         receipts = []
         for room_id, room_values in content.items():
+            # If we're not in the room just ditch the event entirely. This is
+            # probably an old server that has come back and thinks we're still in
+            # the room (or we've been rejoined to the room by a state reset).
+            is_in_room = await self.event_auth_handler.check_host_in_room(
+                room_id, self.server_name
+            )
+            if not is_in_room:
+                logger.info(
+                    "Ignoring receipt from %s as we're not in the room",
+                    origin,
+                )
+                continue
+
             for receipt_type, users in room_values.items():
                 for user_id, user_values in users.items():
                     if get_domain_from_id(user_id) != origin:
@@ -83,8 +98,8 @@ class ReceiptsHandler(BaseHandler):
 
     async def _handle_new_receipts(self, receipts: List[ReadReceipt]) -> bool:
         """Takes a list of receipts, stores them and informs the notifier."""
-        min_batch_id = None  # type: Optional[int]
-        max_batch_id = None  # type: Optional[int]
+        min_batch_id: Optional[int] = None
+        max_batch_id: Optional[int] = None
 
         for receipt in receipts:
             res = await self.store.insert_receipt(
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 26ef016179..8cf614136e 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -55,15 +55,12 @@ login_counter = Counter(
     ["guest", "auth_provider"],
 )
 
-LoginDict = TypedDict(
-    "LoginDict",
-    {
-        "device_id": str,
-        "access_token": str,
-        "valid_until_ms": Optional[int],
-        "refresh_token": Optional[str],
-    },
-)
+
+class LoginDict(TypedDict):
+    device_id: str
+    access_token: str
+    valid_until_ms: Optional[int]
+    refresh_token: Optional[str]
 
 
 class RegistrationHandler(BaseHandler):
@@ -77,6 +74,7 @@ class RegistrationHandler(BaseHandler):
         self.identity_handler = self.hs.get_identity_handler()
         self.ratelimiter = hs.get_registration_ratelimiter()
         self.macaroon_gen = hs.get_macaroon_generator()
+        self._account_validity_handler = hs.get_account_validity_handler()
         self._server_notices_mxid = hs.config.server_notices_mxid
         self._server_name = hs.hostname
 
@@ -700,6 +698,10 @@ class RegistrationHandler(BaseHandler):
                 shadow_banned=shadow_banned,
             )
 
+            # Only call the account validity module(s) on the main process, to avoid
+            # repeating e.g. database writes on all of the workers.
+            await self._account_validity_handler.on_user_registration(user_id)
+
     async def register_device(
         self,
         user_id: str,
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 835d874cee..64656fda22 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -83,10 +83,11 @@ class RoomCreationHandler(BaseHandler):
         self.spam_checker = hs.get_spam_checker()
         self.event_creation_handler = hs.get_event_creation_handler()
         self.room_member_handler = hs.get_room_member_handler()
+        self._event_auth_handler = hs.get_event_auth_handler()
         self.config = hs.config
 
         # Room state based off defined presets
-        self._presets_dict = {
+        self._presets_dict: Dict[str, Dict[str, Any]] = {
             RoomCreationPreset.PRIVATE_CHAT: {
                 "join_rules": JoinRules.INVITE,
                 "history_visibility": HistoryVisibility.SHARED,
@@ -108,7 +109,7 @@ class RoomCreationHandler(BaseHandler):
                 "guest_can_join": False,
                 "power_level_content_override": {},
             },
-        }  # type: Dict[str, Dict[str, Any]]
+        }
 
         # Modify presets to selectively enable encryption by default per homeserver config
         for preset_name, preset_config in self._presets_dict.items():
@@ -126,9 +127,9 @@ class RoomCreationHandler(BaseHandler):
         # If a user tries to update the same room multiple times in quick
         # succession, only process the first attempt and return its result to
         # subsequent requests
-        self._upgrade_response_cache = ResponseCache(
+        self._upgrade_response_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
             hs.get_clock(), "room_upgrade", timeout_ms=FIVE_MINUTES_IN_MS
-        )  # type: ResponseCache[Tuple[str, str]]
+        )
         self._server_notices_mxid = hs.config.server_notices_mxid
 
         self.third_party_event_rules = hs.get_third_party_event_rules()
@@ -226,7 +227,7 @@ class RoomCreationHandler(BaseHandler):
             },
         )
         old_room_version = await self.store.get_room_version_id(old_room_id)
-        await self.auth.check_from_context(
+        await self._event_auth_handler.check_from_context(
             old_room_version, tombstone_event, tombstone_context
         )
 
@@ -376,10 +377,10 @@ class RoomCreationHandler(BaseHandler):
         if not await self.spam_checker.user_may_create_room(user_id):
             raise SynapseError(403, "You are not permitted to create rooms")
 
-        creation_content = {
+        creation_content: JsonDict = {
             "room_version": new_room_version.identifier,
             "predecessor": {"room_id": old_room_id, "event_id": tombstone_event_id},
-        }  # type: JsonDict
+        }
 
         # Check if old room was non-federatable
 
@@ -935,7 +936,7 @@ class RoomCreationHandler(BaseHandler):
                 etype=EventTypes.PowerLevels, content=pl_content
             )
         else:
-            power_level_content = {
+            power_level_content: JsonDict = {
                 "users": {creator_id: 100},
                 "users_default": 0,
                 "events": {
@@ -954,7 +955,7 @@ class RoomCreationHandler(BaseHandler):
                 "kick": 50,
                 "redact": 50,
                 "invite": 50,
-            }  # type: JsonDict
+            }
 
             if config["original_invitees_have_ops"]:
                 for invitee in invite_list:
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 5e3ef7ce3a..6284bcdfbc 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -20,7 +20,12 @@ import msgpack
 from unpaddedbase64 import decode_base64, encode_base64
 
 from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
-from synapse.api.errors import Codes, HttpResponseException
+from synapse.api.errors import (
+    Codes,
+    HttpResponseException,
+    RequestSendFailed,
+    SynapseError,
+)
 from synapse.types import JsonDict, ThirdPartyInstanceID
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.response_cache import ResponseCache
@@ -42,12 +47,12 @@ class RoomListHandler(BaseHandler):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.enable_room_list_search = hs.config.enable_room_list_search
-        self.response_cache = ResponseCache(
-            hs.get_clock(), "room_list"
-        )  # type: ResponseCache[Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]]]
-        self.remote_response_cache = ResponseCache(
-            hs.get_clock(), "remote_room_list", timeout_ms=30 * 1000
-        )  # type: ResponseCache[Tuple[str, Optional[int], Optional[str], bool, Optional[str]]]
+        self.response_cache: ResponseCache[
+            Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]]
+        ] = ResponseCache(hs.get_clock(), "room_list")
+        self.remote_response_cache: ResponseCache[
+            Tuple[str, Optional[int], Optional[str], bool, Optional[str]]
+        ] = ResponseCache(hs.get_clock(), "remote_room_list", timeout_ms=30 * 1000)
 
     async def get_local_public_room_list(
         self,
@@ -134,10 +139,10 @@ class RoomListHandler(BaseHandler):
         if since_token:
             batch_token = RoomListNextBatch.from_token(since_token)
 
-            bounds = (
+            bounds: Optional[Tuple[int, str]] = (
                 batch_token.last_joined_members,
                 batch_token.last_room_id,
-            )  # type: Optional[Tuple[int, str]]
+            )
             forwards = batch_token.direction_is_forward
             has_batch_token = True
         else:
@@ -177,7 +182,7 @@ class RoomListHandler(BaseHandler):
 
         results = [build_room_entry(r) for r in results]
 
-        response = {}  # type: JsonDict
+        response: JsonDict = {}
         num_results = len(results)
         if limit is not None:
             more_to_come = num_results == probing_limit
@@ -417,14 +422,17 @@ class RoomListHandler(BaseHandler):
         repl_layer = self.hs.get_federation_client()
         if search_filter:
             # We can't cache when asking for search
-            return await repl_layer.get_public_rooms(
-                server_name,
-                limit=limit,
-                since_token=since_token,
-                search_filter=search_filter,
-                include_all_networks=include_all_networks,
-                third_party_instance_id=third_party_instance_id,
-            )
+            try:
+                return await repl_layer.get_public_rooms(
+                    server_name,
+                    limit=limit,
+                    since_token=since_token,
+                    search_filter=search_filter,
+                    include_all_networks=include_all_networks,
+                    third_party_instance_id=third_party_instance_id,
+                )
+            except (RequestSendFailed, HttpResponseException):
+                raise SynapseError(502, "Failed to fetch room list")
 
         key = (
             server_name,
diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py
index 80ba65b9e0..e6e71e9729 100644
--- a/synapse/handlers/saml.py
+++ b/synapse/handlers/saml.py
@@ -83,7 +83,7 @@ class SamlHandler(BaseHandler):
         self.unstable_idp_brand = None
 
         # a map from saml session id to Saml2SessionData object
-        self._outstanding_requests_dict = {}  # type: Dict[str, Saml2SessionData]
+        self._outstanding_requests_dict: Dict[str, Saml2SessionData] = {}
 
         self._sso_handler = hs.get_sso_handler()
         self._sso_handler.register_identity_provider(self)
@@ -372,7 +372,7 @@ class SamlHandler(BaseHandler):
 
 
 DOT_REPLACE_PATTERN = re.compile(
-    ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),))
+    "[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),)
 )
 
 
@@ -386,10 +386,10 @@ def dot_replace_for_mxid(username: str) -> str:
     return username
 
 
-MXID_MAPPER_MAP = {
+MXID_MAPPER_MAP: Dict[str, Callable[[str], str]] = {
     "hexencode": map_username_to_mxid_localpart,
     "dotreplace": dot_replace_for_mxid,
-}  # type: Dict[str, Callable[[str], str]]
+}
 
 
 @attr.s
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 4e718d3f63..8226d6f5a1 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -192,7 +192,7 @@ class SearchHandler(BaseHandler):
         # If doing a subset of all rooms seearch, check if any of the rooms
         # are from an upgraded room, and search their contents as well
         if search_filter.rooms:
-            historical_room_ids = []  # type: List[str]
+            historical_room_ids: List[str] = []
             for room_id in search_filter.rooms:
                 # Add any previous rooms to the search if they exist
                 ids = await self.get_old_rooms_from_upgraded_room(room_id)
@@ -216,9 +216,9 @@ class SearchHandler(BaseHandler):
         rank_map = {}  # event_id -> rank of event
         allowed_events = []
         # Holds result of grouping by room, if applicable
-        room_groups = {}  # type: Dict[str, JsonDict]
+        room_groups: Dict[str, JsonDict] = {}
         # Holds result of grouping by sender, if applicable
-        sender_group = {}  # type: Dict[str, JsonDict]
+        sender_group: Dict[str, JsonDict] = {}
 
         # Holds the next_batch for the entire result set if one of those exists
         global_next_batch = None
@@ -262,7 +262,7 @@ class SearchHandler(BaseHandler):
                 s["results"].append(e.event_id)
 
         elif order_by == "recent":
-            room_events = []  # type: List[EventBase]
+            room_events: List[EventBase] = []
             i = 0
 
             pagination_token = batch_token
diff --git a/synapse/handlers/space_summary.py b/synapse/handlers/space_summary.py
index 266f369883..5f7d4602bd 100644
--- a/synapse/handlers/space_summary.py
+++ b/synapse/handlers/space_summary.py
@@ -24,6 +24,7 @@ from synapse.api.constants import (
     EventContentFields,
     EventTypes,
     HistoryVisibility,
+    JoinRules,
     Membership,
     RoomTypes,
 )
@@ -89,14 +90,14 @@ class SpaceSummaryHandler:
         room_queue = deque((_RoomQueueEntry(room_id, ()),))
 
         # rooms we have already processed
-        processed_rooms = set()  # type: Set[str]
+        processed_rooms: Set[str] = set()
 
         # events we have already processed. We don't necessarily have their event ids,
         # so instead we key on (room id, state key)
-        processed_events = set()  # type: Set[Tuple[str, str]]
+        processed_events: Set[Tuple[str, str]] = set()
 
-        rooms_result = []  # type: List[JsonDict]
-        events_result = []  # type: List[JsonDict]
+        rooms_result: List[JsonDict] = []
+        events_result: List[JsonDict] = []
 
         while room_queue and len(rooms_result) < MAX_ROOMS:
             queue_entry = room_queue.popleft()
@@ -150,14 +151,21 @@ class SpaceSummaryHandler:
                     # The room should only be included in the summary if:
                     #     a. the user is in the room;
                     #     b. the room is world readable; or
-                    #     c. the user is in a space that has been granted access to
-                    #        the room.
+                    #     c. the user could join the room, e.g. the join rules
+                    #        are set to public or the user is in a space that
+                    #        has been granted access to the room.
                     #
                     # Note that we know the user is not in the root room (which is
                     # why the remote call was made in the first place), but the user
                     # could be in one of the children rooms and we just didn't know
                     # about the link.
-                    include_room = room.get("world_readable") is True
+
+                    # The API doesn't return the room version so assume that a
+                    # join rule of knock is valid.
+                    include_room = (
+                        room.get("join_rules") in (JoinRules.PUBLIC, JoinRules.KNOCK)
+                        or room.get("world_readable") is True
+                    )
 
                     # Check if the user is a member of any of the allowed spaces
                     # from the response.
@@ -264,10 +272,10 @@ class SpaceSummaryHandler:
         # the set of rooms that we should not walk further. Initialise it with the
         # excluded-rooms list; we will add other rooms as we process them so that
         # we do not loop.
-        processed_rooms = set(exclude_rooms)  # type: Set[str]
+        processed_rooms: Set[str] = set(exclude_rooms)
 
-        rooms_result = []  # type: List[JsonDict]
-        events_result = []  # type: List[JsonDict]
+        rooms_result: List[JsonDict] = []
+        events_result: List[JsonDict] = []
 
         while room_queue and len(rooms_result) < MAX_ROOMS:
             room_id = room_queue.popleft()
@@ -345,7 +353,7 @@ class SpaceSummaryHandler:
             max_children = MAX_ROOMS_PER_SPACE
 
         now = self._clock.time_msec()
-        events_result = []  # type: List[JsonDict]
+        events_result: List[JsonDict] = []
         for edge_event in itertools.islice(child_events, max_children):
             events_result.append(
                 await self._event_serializer.serialize_event(
@@ -420,9 +428,8 @@ class SpaceSummaryHandler:
 
         It should be included if:
 
-        * The requester is joined or invited to the room.
-        * The requester can join without an invite (per MSC3083).
-        * The origin server has any user that is joined or invited to the room.
+        * The requester is joined or can join the room (per MSC3173).
+        * The origin server has any user that is joined or can join the room.
         * The history visibility is set to world readable.
 
         Args:
@@ -441,13 +448,39 @@ class SpaceSummaryHandler:
 
         # If there's no state for the room, it isn't known.
         if not state_ids:
+            # The user might have a pending invite for the room.
+            if requester and await self._store.get_invite_for_local_user_in_room(
+                requester, room_id
+            ):
+                return True
+
             logger.info("room %s is unknown, omitting from summary", room_id)
             return False
 
         room_version = await self._store.get_room_version(room_id)
 
-        # if we have an authenticated requesting user, first check if they are able to view
-        # stripped state in the room.
+        # Include the room if it has join rules of public or knock.
+        join_rules_event_id = state_ids.get((EventTypes.JoinRules, ""))
+        if join_rules_event_id:
+            join_rules_event = await self._store.get_event(join_rules_event_id)
+            join_rule = join_rules_event.content.get("join_rule")
+            if join_rule == JoinRules.PUBLIC or (
+                room_version.msc2403_knocking and join_rule == JoinRules.KNOCK
+            ):
+                return True
+
+        # Include the room if it is peekable.
+        hist_vis_event_id = state_ids.get((EventTypes.RoomHistoryVisibility, ""))
+        if hist_vis_event_id:
+            hist_vis_ev = await self._store.get_event(hist_vis_event_id)
+            hist_vis = hist_vis_ev.content.get("history_visibility")
+            if hist_vis == HistoryVisibility.WORLD_READABLE:
+                return True
+
+        # Otherwise we need to check information specific to the user or server.
+
+        # If we have an authenticated requesting user, check if they are a member
+        # of the room (or can join the room).
         if requester:
             member_event_id = state_ids.get((EventTypes.Member, requester), None)
 
@@ -470,9 +503,11 @@ class SpaceSummaryHandler:
                     return True
 
         # If this is a request over federation, check if the host is in the room or
-        # is in one of the spaces specified via the join rules.
+        # has a user who could join the room.
         elif origin:
-            if await self._auth.check_host_in_room(room_id, origin):
+            if await self._event_auth_handler.check_host_in_room(
+                room_id, origin
+            ) or await self._store.is_host_invited(room_id, origin):
                 return True
 
             # Alternately, if the host has a user in any of the spaces specified
@@ -485,21 +520,15 @@ class SpaceSummaryHandler:
                     await self._event_auth_handler.get_rooms_that_allow_join(state_ids)
                 )
                 for space_id in allowed_rooms:
-                    if await self._auth.check_host_in_room(space_id, origin):
+                    if await self._event_auth_handler.check_host_in_room(
+                        space_id, origin
+                    ):
                         return True
 
-        # otherwise, check if the room is peekable
-        hist_vis_event_id = state_ids.get((EventTypes.RoomHistoryVisibility, ""), None)
-        if hist_vis_event_id:
-            hist_vis_ev = await self._store.get_event(hist_vis_event_id)
-            hist_vis = hist_vis_ev.content.get("history_visibility")
-            if hist_vis == HistoryVisibility.WORLD_READABLE:
-                return True
-
         logger.info(
-            "room %s is unpeekable and user %s is not a member / not allowed to join, omitting from summary",
+            "room %s is unpeekable and requester %s is not a member / not allowed to join, omitting from summary",
             room_id,
-            requester,
+            requester or origin,
         )
         return False
 
@@ -533,6 +562,7 @@ class SpaceSummaryHandler:
             "canonical_alias": stats["canonical_alias"],
             "num_joined_members": stats["joined_members"],
             "avatar_url": stats["avatar"],
+            "join_rules": stats["join_rules"],
             "world_readable": (
                 stats["history_visibility"] == HistoryVisibility.WORLD_READABLE
             ),
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 0b297e54c4..1b855a685c 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -202,10 +202,10 @@ class SsoHandler:
         self._mapping_lock = Linearizer(name="sso_user_mapping", clock=hs.get_clock())
 
         # a map from session id to session data
-        self._username_mapping_sessions = {}  # type: Dict[str, UsernameMappingSession]
+        self._username_mapping_sessions: Dict[str, UsernameMappingSession] = {}
 
         # map from idp_id to SsoIdentityProvider
-        self._identity_providers = {}  # type: Dict[str, SsoIdentityProvider]
+        self._identity_providers: Dict[str, SsoIdentityProvider] = {}
 
         self._consent_at_registration = hs.config.consent.user_consent_at_registration
 
@@ -296,7 +296,7 @@ class SsoHandler:
             )
 
         # if the client chose an IdP, use that
-        idp = None  # type: Optional[SsoIdentityProvider]
+        idp: Optional[SsoIdentityProvider] = None
         if idp_id:
             idp = self._identity_providers.get(idp_id)
             if not idp:
@@ -669,9 +669,9 @@ class SsoHandler:
             remote_user_id,
         )
 
-        user_id_to_verify = await self._auth_handler.get_session_data(
+        user_id_to_verify: str = await self._auth_handler.get_session_data(
             ui_auth_session_id, UIAuthSessionDataConstants.REQUEST_USER_ID
-        )  # type: str
+        )
 
         if not user_id:
             logger.warning(
@@ -793,7 +793,7 @@ class SsoHandler:
         session.use_display_name = use_display_name
 
         emails_from_idp = set(session.emails)
-        filtered_emails = set()  # type: Set[str]
+        filtered_emails: Set[str] = set()
 
         # we iterate through the list rather than just building a set conjunction, so
         # that we can log attempts to use unknown addresses
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 4e45d1da57..3fd89af2a4 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -45,12 +45,11 @@ class StatsHandler:
         self.clock = hs.get_clock()
         self.notifier = hs.get_notifier()
         self.is_mine_id = hs.is_mine_id
-        self.stats_bucket_size = hs.config.stats_bucket_size
 
         self.stats_enabled = hs.config.stats_enabled
 
         # The current position in the current_state_delta stream
-        self.pos = None  # type: Optional[int]
+        self.pos: Optional[int] = None
 
         # Guard to ensure we only process deltas one at a time
         self._is_processing = False
@@ -106,20 +105,6 @@ class StatsHandler:
                 room_deltas = {}
                 user_deltas = {}
 
-            # Then count deltas for total_events and total_event_bytes.
-            (
-                room_count,
-                user_count,
-            ) = await self.store.get_changes_room_total_events_and_bytes(
-                self.pos, max_pos
-            )
-
-            for room_id, fields in room_count.items():
-                room_deltas.setdefault(room_id, Counter()).update(fields)
-
-            for user_id, fields in user_count.items():
-                user_deltas.setdefault(user_id, Counter()).update(fields)
-
             logger.debug("room_deltas: %s", room_deltas)
             logger.debug("user_deltas: %s", user_deltas)
 
@@ -146,10 +131,10 @@ class StatsHandler:
             mapping from room/user ID to changes in the various fields.
         """
 
-        room_to_stats_deltas = {}  # type: Dict[str, CounterType[str]]
-        user_to_stats_deltas = {}  # type: Dict[str, CounterType[str]]
+        room_to_stats_deltas: Dict[str, CounterType[str]] = {}
+        user_to_stats_deltas: Dict[str, CounterType[str]] = {}
 
-        room_to_state_updates = {}  # type: Dict[str, Dict[str, Any]]
+        room_to_state_updates: Dict[str, Dict[str, Any]] = {}
 
         for delta in deltas:
             typ = delta["type"]
@@ -179,14 +164,12 @@ class StatsHandler:
                 )
                 continue
 
-            event_content = {}  # type: JsonDict
+            event_content: JsonDict = {}
 
-            sender = None
             if event_id is not None:
                 event = await self.store.get_event(event_id, allow_none=True)
                 if event:
                     event_content = event.content or {}
-                    sender = event.sender
 
             # All the values in this dict are deltas (RELATIVE changes)
             room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
@@ -244,12 +227,6 @@ class StatsHandler:
                     room_stats_delta["joined_members"] += 1
                 elif membership == Membership.INVITE:
                     room_stats_delta["invited_members"] += 1
-
-                    if sender and self.is_mine_id(sender):
-                        user_to_stats_deltas.setdefault(sender, Counter())[
-                            "invites_sent"
-                        ] += 1
-
                 elif membership == Membership.LEAVE:
                     room_stats_delta["left_members"] += 1
                 elif membership == Membership.BAN:
@@ -279,10 +256,6 @@ class StatsHandler:
                 room_state["is_federatable"] = (
                     event_content.get("m.federate", True) is True
                 )
-                if sender and self.is_mine_id(sender):
-                    user_to_stats_deltas.setdefault(sender, Counter())[
-                        "rooms_created"
-                    ] += 1
             elif typ == EventTypes.JoinRules:
                 room_state["join_rules"] = event_content.get("join_rule")
             elif typ == EventTypes.RoomHistoryVisibility:
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b9a0361059..150a4f291e 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -278,12 +278,14 @@ class SyncHandler:
         self.state_store = self.storage.state
 
         # ExpiringCache((User, Device)) -> LruCache(user_id => event_id)
-        self.lazy_loaded_members_cache = ExpiringCache(
+        self.lazy_loaded_members_cache: ExpiringCache[
+            Tuple[str, Optional[str]], LruCache[str, str]
+        ] = ExpiringCache(
             "lazy_loaded_members_cache",
             self.clock,
             max_len=0,
             expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE,
-        )  # type: ExpiringCache[Tuple[str, Optional[str]], LruCache[str, str]]
+        )
 
     async def wait_for_sync_for_user(
         self,
@@ -440,7 +442,7 @@ class SyncHandler:
             )
             now_token = now_token.copy_and_replace("typing_key", typing_key)
 
-            ephemeral_by_room = {}  # type: JsonDict
+            ephemeral_by_room: JsonDict = {}
 
             for event in typing:
                 # we want to exclude the room_id from the event, but modifying the
@@ -502,7 +504,7 @@ class SyncHandler:
                 # We check if there are any state events, if there are then we pass
                 # all current state events to the filter_events function. This is to
                 # ensure that we always include current state in the timeline
-                current_state_ids = frozenset()  # type: FrozenSet[str]
+                current_state_ids: FrozenSet[str] = frozenset()
                 if any(e.is_state() for e in recents):
                     current_state_ids_map = await self.store.get_current_state_ids(
                         room_id
@@ -783,9 +785,9 @@ class SyncHandler:
     def get_lazy_loaded_members_cache(
         self, cache_key: Tuple[str, Optional[str]]
     ) -> LruCache[str, str]:
-        cache = self.lazy_loaded_members_cache.get(
+        cache: Optional[LruCache[str, str]] = self.lazy_loaded_members_cache.get(
             cache_key
-        )  # type: Optional[LruCache[str, str]]
+        )
         if cache is None:
             logger.debug("creating LruCache for %r", cache_key)
             cache = LruCache(LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE)
@@ -984,7 +986,7 @@ class SyncHandler:
                     if t[0] == EventTypes.Member:
                         cache.set(t[1], event_id)
 
-        state = {}  # type: Dict[str, EventBase]
+        state: Dict[str, EventBase] = {}
         if state_ids:
             state = await self.store.get_events(list(state_ids.values()))
 
@@ -1088,8 +1090,8 @@ class SyncHandler:
 
         logger.debug("Fetching OTK data")
         device_id = sync_config.device_id
-        one_time_key_counts = {}  # type: JsonDict
-        unused_fallback_key_types = []  # type: List[str]
+        one_time_key_counts: JsonDict = {}
+        unused_fallback_key_types: List[str] = []
         if device_id:
             one_time_key_counts = await self.store.count_e2e_one_time_keys(
                 user_id, device_id
@@ -1437,7 +1439,7 @@ class SyncHandler:
         )
 
         if block_all_room_ephemeral:
-            ephemeral_by_room = {}  # type: Dict[str, List[JsonDict]]
+            ephemeral_by_room: Dict[str, List[JsonDict]] = {}
         else:
             now_token, ephemeral_by_room = await self.ephemeral_by_room(
                 sync_result_builder,
@@ -1468,7 +1470,7 @@ class SyncHandler:
 
         # If there is ignored users account data and it matches the proper type,
         # then use it.
-        ignored_users = frozenset()  # type: FrozenSet[str]
+        ignored_users: FrozenSet[str] = frozenset()
         if ignored_account_data:
             ignored_users_data = ignored_account_data.get("ignored_users", {})
             if isinstance(ignored_users_data, dict):
@@ -1586,7 +1588,7 @@ class SyncHandler:
             user_id, since_token.room_key, now_token.room_key
         )
 
-        mem_change_events_by_room_id = {}  # type: Dict[str, List[EventBase]]
+        mem_change_events_by_room_id: Dict[str, List[EventBase]] = {}
         for event in rooms_changed:
             mem_change_events_by_room_id.setdefault(event.room_id, []).append(event)
 
@@ -1599,7 +1601,7 @@ class SyncHandler:
             logger.debug(
                 "Membership changes in %s: [%s]",
                 room_id,
-                ", ".join(("%s (%s)" % (e.event_id, e.membership) for e in events)),
+                ", ".join("%s (%s)" % (e.event_id, e.membership) for e in events),
             )
 
             non_joins = [e for e in events if e.membership != Membership.JOIN]
@@ -1722,7 +1724,7 @@ class SyncHandler:
                 # This is all screaming out for a refactor, as the logic here is
                 # subtle and the moving parts numerous.
                 if leave_event.internal_metadata.is_out_of_band_membership():
-                    batch_events = [leave_event]  # type: Optional[List[EventBase]]
+                    batch_events: Optional[List[EventBase]] = [leave_event]
                 else:
                     batch_events = None
 
@@ -1971,7 +1973,7 @@ class SyncHandler:
             room_id, batch, sync_config, since_token, now_token, full_state=full_state
         )
 
-        summary = {}  # type: Optional[JsonDict]
+        summary: Optional[JsonDict] = {}
 
         # we include a summary in room responses when we're lazy loading
         # members (as the client otherwise doesn't have enough info to form
@@ -1995,7 +1997,7 @@ class SyncHandler:
             )
 
         if room_builder.rtype == "joined":
-            unread_notifications = {}  # type: Dict[str, int]
+            unread_notifications: Dict[str, int] = {}
             room_sync = JoinedSyncResult(
                 room_id=room_id,
                 timeline=batch,
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index e22393adc4..0cb651a400 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -68,11 +68,11 @@ class FollowerTypingHandler:
             )
 
         # map room IDs to serial numbers
-        self._room_serials = {}  # type: Dict[str, int]
+        self._room_serials: Dict[str, int] = {}
         # map room IDs to sets of users currently typing
-        self._room_typing = {}  # type: Dict[str, Set[str]]
+        self._room_typing: Dict[str, Set[str]] = {}
 
-        self._member_last_federation_poke = {}  # type: Dict[RoomMember, int]
+        self._member_last_federation_poke: Dict[RoomMember, int] = {}
         self.wheel_timer = WheelTimer(bucket_size=5000)
         self._latest_room_serial = 0
 
@@ -208,6 +208,7 @@ class TypingWriterHandler(FollowerTypingHandler):
 
         self.auth = hs.get_auth()
         self.notifier = hs.get_notifier()
+        self.event_auth_handler = hs.get_event_auth_handler()
 
         self.hs = hs
 
@@ -216,7 +217,7 @@ class TypingWriterHandler(FollowerTypingHandler):
         hs.get_distributor().observe("user_left_room", self.user_left_room)
 
         # clock time we expect to stop
-        self._member_typing_until = {}  # type: Dict[RoomMember, int]
+        self._member_typing_until: Dict[RoomMember, int] = {}
 
         # caches which room_ids changed at which serials
         self._typing_stream_change_cache = StreamChangeCache(
@@ -326,6 +327,19 @@ class TypingWriterHandler(FollowerTypingHandler):
         room_id = content["room_id"]
         user_id = content["user_id"]
 
+        # If we're not in the room just ditch the event entirely. This is
+        # probably an old server that has come back and thinks we're still in
+        # the room (or we've been rejoined to the room by a state reset).
+        is_in_room = await self.event_auth_handler.check_host_in_room(
+            room_id, self.server_name
+        )
+        if not is_in_room:
+            logger.info(
+                "Ignoring typing update from %s as we're not in the room",
+                origin,
+            )
+            return
+
         member = RoomMember(user_id=user_id, room_id=room_id)
 
         # Check that the string is a valid user id
@@ -391,9 +405,9 @@ class TypingWriterHandler(FollowerTypingHandler):
         if last_id == current_id:
             return [], current_id, False
 
-        changed_rooms = self._typing_stream_change_cache.get_all_entities_changed(
-            last_id
-        )  # type: Optional[Iterable[str]]
+        changed_rooms: Optional[
+            Iterable[str]
+        ] = self._typing_stream_change_cache.get_all_entities_changed(last_id)
 
         if changed_rooms is None:
             changed_rooms = self._room_serials
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index dacc4f3076..6edb1da50a 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -52,7 +52,7 @@ class UserDirectoryHandler(StateDeltasHandler):
         self.search_all_users = hs.config.user_directory_search_all_users
         self.spam_checker = hs.get_spam_checker()
         # The current position in the current_state_delta stream
-        self.pos = None  # type: Optional[int]
+        self.pos: Optional[int] = None
 
         # Guard to ensure we only process deltas one at a time
         self._is_processing = False
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index ed4671b7de..578fc48ef4 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -69,7 +69,7 @@ def _get_requested_host(request: IRequest) -> bytes:
         return hostname
 
     # no Host header, use the address/port that the request arrived on
-    host = request.getHost()  # type: Union[address.IPv4Address, address.IPv6Address]
+    host: Union[address.IPv4Address, address.IPv6Address] = request.getHost()
 
     hostname = host.host.encode("ascii")
 
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 1ca6624fd5..2ac76b15c2 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -160,7 +160,7 @@ class _IPBlacklistingResolver:
     def resolveHostName(
         self, recv: IResolutionReceiver, hostname: str, portNumber: int = 0
     ) -> IResolutionReceiver:
-        addresses = []  # type: List[IAddress]
+        addresses: List[IAddress] = []
 
         def _callback() -> None:
             has_bad_ip = False
@@ -333,9 +333,9 @@ class SimpleHttpClient:
         if self._ip_blacklist:
             # If we have an IP blacklist, we need to use a DNS resolver which
             # filters out blacklisted IP addresses, to prevent DNS rebinding.
-            self.reactor = BlacklistingReactorWrapper(
+            self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
                 hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
-            )  # type: ISynapseReactor
+            )
         else:
             self.reactor = hs.get_reactor()
 
@@ -349,14 +349,14 @@ class SimpleHttpClient:
         pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5))
         pool.cachedConnectionTimeout = 2 * 60
 
-        self.agent = ProxyAgent(
+        self.agent: IAgent = ProxyAgent(
             self.reactor,
             hs.get_reactor(),
             connectTimeout=15,
             contextFactory=self.hs.get_http_client_context_factory(),
             pool=pool,
             use_proxy=use_proxy,
-        )  # type: IAgent
+        )
 
         if self._ip_blacklist:
             # If we have an IP blacklist, we then install the blacklisting Agent
@@ -411,7 +411,7 @@ class SimpleHttpClient:
                         cooperator=self._cooperator,
                     )
 
-                request_deferred = treq.request(
+                request_deferred: defer.Deferred = treq.request(
                     method,
                     uri,
                     agent=self.agent,
@@ -421,7 +421,7 @@ class SimpleHttpClient:
                     # response bodies.
                     unbuffered=True,
                     **self._extra_treq_args,
-                )  # type: defer.Deferred
+                )
 
                 # we use our own timeout mechanism rather than treq's as a workaround
                 # for https://twistedmatrix.com/trac/ticket/9534.
@@ -772,7 +772,7 @@ class BodyExceededMaxSize(Exception):
 class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol):
     """A protocol which immediately errors upon receiving data."""
 
-    transport = None  # type: Optional[ITCPTransport]
+    transport: Optional[ITCPTransport] = None
 
     def __init__(self, deferred: defer.Deferred):
         self.deferred = deferred
@@ -798,7 +798,7 @@ class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol):
 class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
     """A protocol which reads body to a stream, erroring if the body exceeds a maximum size."""
 
-    transport = None  # type: Optional[ITCPTransport]
+    transport: Optional[ITCPTransport] = None
 
     def __init__(
         self, stream: ByteWriteable, deferred: defer.Deferred, max_size: Optional[int]
diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py
index 20d39a4ea6..43f2140429 100644
--- a/synapse/http/federation/well_known_resolver.py
+++ b/synapse/http/federation/well_known_resolver.py
@@ -70,10 +70,8 @@ WELL_KNOWN_RETRY_ATTEMPTS = 3
 logger = logging.getLogger(__name__)
 
 
-_well_known_cache = TTLCache("well-known")  # type: TTLCache[bytes, Optional[bytes]]
-_had_valid_well_known_cache = TTLCache(
-    "had-valid-well-known"
-)  # type: TTLCache[bytes, bool]
+_well_known_cache: TTLCache[bytes, Optional[bytes]] = TTLCache("well-known")
+_had_valid_well_known_cache: TTLCache[bytes, bool] = TTLCache("had-valid-well-known")
 
 
 @attr.s(slots=True, frozen=True)
@@ -130,9 +128,10 @@ class WellKnownResolver:
         # requests for the same server in parallel?
         try:
             with Measure(self._clock, "get_well_known"):
-                result, cache_period = await self._fetch_well_known(
-                    server_name
-                )  # type: Optional[bytes], float
+                result: Optional[bytes]
+                cache_period: float
+
+                result, cache_period = await self._fetch_well_known(server_name)
 
         except _FetchWellKnownFailure as e:
             if prev_result and e.temporary:
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index b8849c0150..2efa15bf04 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -43,6 +43,7 @@ from twisted.internet import defer
 from twisted.internet.error import DNSLookupError
 from twisted.internet.interfaces import IReactorTime
 from twisted.internet.task import _EPSILON, Cooperator
+from twisted.web.client import ResponseFailed
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IBodyProducer, IResponse
 
@@ -105,7 +106,7 @@ class ByteParser(ByteWriteable, Generic[T], abc.ABC):
     the parsed data.
     """
 
-    CONTENT_TYPE = abc.abstractproperty()  # type: str  # type: ignore
+    CONTENT_TYPE: str = abc.abstractproperty()  # type: ignore
     """The expected content type of the response, e.g. `application/json`. If
     the content type doesn't match we fail the request.
     """
@@ -262,6 +263,15 @@ async def _handle_response(
             request.uri.decode("ascii"),
         )
         raise RequestSendFailed(e, can_retry=True) from e
+    except ResponseFailed as e:
+        logger.warning(
+            "{%s} [%s] Failed to read response - %s %s",
+            request.txn_id,
+            request.destination,
+            request.method,
+            request.uri.decode("ascii"),
+        )
+        raise RequestSendFailed(e, can_retry=True) from e
     except Exception as e:
         logger.warning(
             "{%s} [%s] Error reading response %s %s: %s",
@@ -317,11 +327,11 @@ class MatrixFederationHttpClient:
 
         # We need to use a DNS resolver which filters out blacklisted IP
         # addresses, to prevent DNS rebinding.
-        self.reactor = BlacklistingReactorWrapper(
+        self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
             hs.get_reactor(),
             hs.config.federation_ip_range_whitelist,
             hs.config.federation_ip_range_blacklist,
-        )  # type: ISynapseReactor
+        )
 
         user_agent = hs.version_string
         if hs.config.user_agent_suffix:
@@ -494,7 +504,7 @@ class MatrixFederationHttpClient:
         )
 
         # Inject the span into the headers
-        headers_dict = {}  # type: Dict[bytes, List[bytes]]
+        headers_dict: Dict[bytes, List[bytes]] = {}
         opentracing.inject_header_dict(headers_dict, request.destination)
 
         headers_dict[b"User-Agent"] = [self.version_string_bytes]
@@ -523,9 +533,9 @@ class MatrixFederationHttpClient:
                             destination_bytes, method_bytes, url_to_sign_bytes, json
                         )
                         data = encode_canonical_json(json)
-                        producer = QuieterFileBodyProducer(
+                        producer: Optional[IBodyProducer] = QuieterFileBodyProducer(
                             BytesIO(data), cooperator=self._cooperator
-                        )  # type: Optional[IBodyProducer]
+                        )
                     else:
                         producer = None
                         auth_headers = self.build_auth_headers(
@@ -1137,6 +1147,24 @@ class MatrixFederationHttpClient:
                 msg,
             )
             raise SynapseError(502, msg, Codes.TOO_LARGE)
+        except defer.TimeoutError as e:
+            logger.warning(
+                "{%s} [%s] Timed out reading response - %s %s",
+                request.txn_id,
+                request.destination,
+                request.method,
+                request.uri.decode("ascii"),
+            )
+            raise RequestSendFailed(e, can_retry=True) from e
+        except ResponseFailed as e:
+            logger.warning(
+                "{%s} [%s] Failed to read response - %s %s",
+                request.txn_id,
+                request.destination,
+                request.method,
+                request.uri.decode("ascii"),
+            )
+            raise RequestSendFailed(e, can_retry=True) from e
         except Exception as e:
             logger.warning(
                 "{%s} [%s] Error reading response: %s",
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 7dfae8b786..f7193e60bd 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -117,7 +117,8 @@ class ProxyAgent(_AgentBase):
             https_proxy = proxies["https"].encode() if "https" in proxies else None
             no_proxy = proxies["no"] if "no" in proxies else None
 
-        # Parse credentials from https proxy connection string if present
+        # Parse credentials from http and https proxy connection string if present
+        self.http_proxy_creds, http_proxy = parse_username_password(http_proxy)
         self.https_proxy_creds, https_proxy = parse_username_password(https_proxy)
 
         self.http_proxy_endpoint = _http_proxy_endpoint(
@@ -171,7 +172,7 @@ class ProxyAgent(_AgentBase):
         """
         uri = uri.strip()
         if not _VALID_URI.match(uri):
-            raise ValueError("Invalid URI {!r}".format(uri))
+            raise ValueError(f"Invalid URI {uri!r}")
 
         parsed_uri = URI.fromBytes(uri)
         pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
@@ -189,6 +190,15 @@ class ProxyAgent(_AgentBase):
             and self.http_proxy_endpoint
             and not should_skip_proxy
         ):
+            # Determine whether we need to set Proxy-Authorization headers
+            if self.http_proxy_creds:
+                # Set a Proxy-Authorization header
+                if headers is None:
+                    headers = Headers()
+                headers.addRawHeader(
+                    b"Proxy-Authorization",
+                    self.http_proxy_creds.as_proxy_authorization_value(),
+                )
             # Cache *all* connections under the same key, since we are only
             # connecting to a single destination, the proxy:
             pool_key = ("http-proxy", self.http_proxy_endpoint)
diff --git a/synapse/http/server.py b/synapse/http/server.py
index efbc6d5b25..b79fa722e9 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -81,7 +81,7 @@ def return_json_error(f: failure.Failure, request: SynapseRequest) -> None:
 
     if f.check(SynapseError):
         # mypy doesn't understand that f.check asserts the type.
-        exc = f.value  # type: SynapseError  # type: ignore
+        exc: SynapseError = f.value  # type: ignore
         error_code = exc.code
         error_dict = exc.error_dict()
 
@@ -132,7 +132,7 @@ def return_html_error(
     """
     if f.check(CodeMessageException):
         # mypy doesn't understand that f.check asserts the type.
-        cme = f.value  # type: CodeMessageException  # type: ignore
+        cme: CodeMessageException = f.value  # type: ignore
         code = cme.code
         msg = cme.msg
 
@@ -404,7 +404,7 @@ class JsonResource(DirectServeJsonResource):
             key word arguments to pass to the callback
         """
         # At this point the path must be bytes.
-        request_path_bytes = request.path  # type: bytes  # type: ignore
+        request_path_bytes: bytes = request.path  # type: ignore
         request_path = request_path_bytes.decode("ascii")
         # Treat HEAD requests as GET requests.
         request_method = request.method
@@ -557,7 +557,7 @@ class _ByteProducer:
         request: Request,
         iterator: Iterator[bytes],
     ):
-        self._request = request  # type: Optional[Request]
+        self._request: Optional[Request] = request
         self._iterator = iterator
         self._paused = False
 
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 6ba2ce1e53..04560fb589 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -205,7 +205,7 @@ def parse_string(
             parameter is present, must be one of a list of allowed values and
             is not one of those allowed values.
     """
-    args = request.args  # type: Dict[bytes, List[bytes]]  # type: ignore
+    args: Dict[bytes, List[bytes]] = request.args  # type: ignore
     return parse_string_from_args(
         args,
         name,
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 40754b7bea..190084e8aa 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -64,16 +64,16 @@ class SynapseRequest(Request):
     def __init__(self, channel, *args, max_request_body_size=1024, **kw):
         Request.__init__(self, channel, *args, **kw)
         self._max_request_body_size = max_request_body_size
-        self.site = channel.site  # type: SynapseSite
+        self.site: SynapseSite = channel.site
         self._channel = channel  # this is used by the tests
         self.start_time = 0.0
 
         # The requester, if authenticated. For federation requests this is the
         # server name, for client requests this is the Requester object.
-        self._requester = None  # type: Optional[Union[Requester, str]]
+        self._requester: Optional[Union[Requester, str]] = None
 
         # we can't yet create the logcontext, as we don't know the method.
-        self.logcontext = None  # type: Optional[LoggingContext]
+        self.logcontext: Optional[LoggingContext] = None
 
         global _next_request_seq
         self.request_seq = _next_request_seq
@@ -152,7 +152,7 @@ class SynapseRequest(Request):
         Returns:
             The redacted URI as a string.
         """
-        uri = self.uri  # type: Union[bytes, str]
+        uri: Union[bytes, str] = self.uri
         if isinstance(uri, bytes):
             uri = uri.decode("ascii", errors="replace")
         return redact_uri(uri)
@@ -167,7 +167,7 @@ class SynapseRequest(Request):
         Returns:
             The request method as a string.
         """
-        method = self.method  # type: Union[bytes, str]
+        method: Union[bytes, str] = self.method
         if isinstance(method, bytes):
             return self.method.decode("ascii")
         return method
@@ -384,7 +384,7 @@ class SynapseRequest(Request):
         # authenticated (e.g. and admin is puppetting a user) then we log both.
         requester, authenticated_entity = self.get_authenticated_entity()
         if authenticated_entity:
-            requester = "{}.{}".format(authenticated_entity, requester)
+            requester = f"{authenticated_entity}.{requester}"
 
         self.site.access_logger.log(
             log_level,
@@ -434,8 +434,8 @@ class XForwardedForRequest(SynapseRequest):
     """
 
     # the client IP and ssl flag, as extracted from the headers.
-    _forwarded_for = None  # type: Optional[_XForwardedForAddress]
-    _forwarded_https = False  # type: bool
+    _forwarded_for: "Optional[_XForwardedForAddress]" = None
+    _forwarded_https: bool = False
 
     def requestReceived(self, command, path, version):
         # this method is called by the Channel once the full request has been
diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py
index c515690b38..8202d0494d 100644
--- a/synapse/logging/_remote.py
+++ b/synapse/logging/_remote.py
@@ -110,9 +110,9 @@ class RemoteHandler(logging.Handler):
         self.port = port
         self.maximum_buffer = maximum_buffer
 
-        self._buffer = deque()  # type: Deque[logging.LogRecord]
-        self._connection_waiter = None  # type: Optional[Deferred]
-        self._producer = None  # type: Optional[LogProducer]
+        self._buffer: Deque[logging.LogRecord] = deque()
+        self._connection_waiter: Optional[Deferred] = None
+        self._producer: Optional[LogProducer] = None
 
         # Connect without DNS lookups if it's a direct IP.
         if _reactor is None:
@@ -123,9 +123,9 @@ class RemoteHandler(logging.Handler):
         try:
             ip = ip_address(self.host)
             if isinstance(ip, IPv4Address):
-                endpoint = TCP4ClientEndpoint(
+                endpoint: IStreamClientEndpoint = TCP4ClientEndpoint(
                     _reactor, self.host, self.port
-                )  # type: IStreamClientEndpoint
+                )
             elif isinstance(ip, IPv6Address):
                 endpoint = TCP6ClientEndpoint(_reactor, self.host, self.port)
             else:
@@ -165,7 +165,7 @@ class RemoteHandler(logging.Handler):
         def writer(result: Protocol) -> None:
             # Force recognising transport as a Connection and not the more
             # generic ITransport.
-            transport = result.transport  # type: Connection  # type: ignore
+            transport: Connection = result.transport  # type: ignore
 
             # We have a connection. If we already have a producer, and its
             # transport is the same, just trigger a resumeProducing.
@@ -188,7 +188,7 @@ class RemoteHandler(logging.Handler):
             self._producer.resumeProducing()
             self._connection_waiter = None
 
-        deferred = self._service.whenConnected(failAfterFailures=1)  # type: Deferred
+        deferred: Deferred = self._service.whenConnected(failAfterFailures=1)
         deferred.addCallbacks(writer, fail)
         self._connection_waiter = deferred
 
diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py
index c7a971a9d6..b9933a1528 100644
--- a/synapse/logging/_structured.py
+++ b/synapse/logging/_structured.py
@@ -63,7 +63,7 @@ def parse_drain_configs(
             DrainType.CONSOLE_JSON,
             DrainType.FILE_JSON,
         ):
-            formatter = "json"  # type: Optional[str]
+            formatter: Optional[str] = "json"
         elif logging_type in (
             DrainType.CONSOLE_JSON_TERSE,
             DrainType.NETWORK_JSON_TERSE,
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 7fc11a9ac2..18ac507802 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -113,13 +113,13 @@ class ContextResourceUsage:
             self.reset()
         else:
             # FIXME: mypy can't infer the types set via reset() above, so specify explicitly for now
-            self.ru_utime = copy_from.ru_utime  # type: float
-            self.ru_stime = copy_from.ru_stime  # type: float
-            self.db_txn_count = copy_from.db_txn_count  # type: int
+            self.ru_utime: float = copy_from.ru_utime
+            self.ru_stime: float = copy_from.ru_stime
+            self.db_txn_count: int = copy_from.db_txn_count
 
-            self.db_txn_duration_sec = copy_from.db_txn_duration_sec  # type: float
-            self.db_sched_duration_sec = copy_from.db_sched_duration_sec  # type: float
-            self.evt_db_fetch_count = copy_from.evt_db_fetch_count  # type: int
+            self.db_txn_duration_sec: float = copy_from.db_txn_duration_sec
+            self.db_sched_duration_sec: float = copy_from.db_sched_duration_sec
+            self.evt_db_fetch_count: int = copy_from.evt_db_fetch_count
 
     def copy(self) -> "ContextResourceUsage":
         return ContextResourceUsage(copy_from=self)
@@ -289,12 +289,12 @@ class LoggingContext:
 
         # The thread resource usage when the logcontext became active. None
         # if the context is not currently active.
-        self.usage_start = None  # type: Optional[resource._RUsage]
+        self.usage_start: Optional[resource._RUsage] = None
 
         self.main_thread = get_thread_id()
         self.request = None
         self.tag = ""
-        self.scope = None  # type: Optional[_LogContextScope]
+        self.scope: Optional["_LogContextScope"] = None
 
         # keep track of whether we have hit the __exit__ block for this context
         # (suggesting that the the thing that created the context thinks it should
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 140ed711e3..ecd51f1b4a 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -251,7 +251,7 @@ try:
             except Exception:
                 logger.exception("Failed to report span")
 
-    RustReporter = _WrappedRustReporter  # type: Optional[Type[_WrappedRustReporter]]
+    RustReporter: Optional[Type[_WrappedRustReporter]] = _WrappedRustReporter
 except ImportError:
     RustReporter = None
 
@@ -286,7 +286,7 @@ class SynapseBaggage:
 # Block everything by default
 # A regex which matches the server_names to expose traces for.
 # None means 'block everything'.
-_homeserver_whitelist = None  # type: Optional[Pattern[str]]
+_homeserver_whitelist: Optional[Pattern[str]] = None
 
 # Util methods
 
@@ -374,7 +374,7 @@ def init_tracer(hs: "HomeServer"):
 
     config = JaegerConfig(
         config=hs.config.jaeger_config,
-        service_name="{} {}".format(hs.config.server_name, hs.get_instance_name()),
+        service_name=f"{hs.config.server_name} {hs.get_instance_name()}",
         scope_manager=LogContextScopeManager(hs.config),
         metrics_factory=PrometheusMetricsFactory(),
     )
@@ -662,7 +662,7 @@ def inject_header_dict(
 
     span = opentracing.tracer.active_span
 
-    carrier = {}  # type: Dict[str, str]
+    carrier: Dict[str, str] = {}
     opentracing.tracer.inject(span.context, opentracing.Format.HTTP_HEADERS, carrier)
 
     for key, value in carrier.items():
@@ -704,7 +704,7 @@ def get_active_span_text_map(destination=None):
     if destination and not whitelisted_homeserver(destination):
         return {}
 
-    carrier = {}  # type: Dict[str, str]
+    carrier: Dict[str, str] = {}
     opentracing.tracer.inject(
         opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier
     )
@@ -718,7 +718,7 @@ def active_span_context_as_string():
     Returns:
         The active span context encoded as a string.
     """
-    carrier = {}  # type: Dict[str, str]
+    carrier: Dict[str, str] = {}
     if opentracing:
         opentracing.tracer.inject(
             opentracing.tracer.active_span.context, opentracing.Format.TEXT_MAP, carrier
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index fef2846669..f237b8a236 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -46,7 +46,7 @@ logger = logging.getLogger(__name__)
 METRICS_PREFIX = "/_synapse/metrics"
 
 running_on_pypy = platform.python_implementation() == "PyPy"
-all_gauges = {}  # type: Dict[str, Union[LaterGauge, InFlightGauge]]
+all_gauges: "Dict[str, Union[LaterGauge, InFlightGauge]]" = {}
 
 HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
 
@@ -130,7 +130,7 @@ class InFlightGauge:
         )
 
         # Counts number of in flight blocks for a given set of label values
-        self._registrations = {}  # type: Dict
+        self._registrations: Dict = {}
 
         # Protects access to _registrations
         self._lock = threading.Lock()
@@ -248,7 +248,7 @@ class GaugeBucketCollector:
 
         # We initially set this to None. We won't report metrics until
         # this has been initialised after a successful data update
-        self._metric = None  # type: Optional[GaugeHistogramMetricFamily]
+        self._metric: Optional[GaugeHistogramMetricFamily] = None
 
         registry.register(self)
 
diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py
index 8002be56e0..bb9bcb5592 100644
--- a/synapse/metrics/_exposition.py
+++ b/synapse/metrics/_exposition.py
@@ -34,7 +34,7 @@ from twisted.web.resource import Resource
 
 from synapse.util import caches
 
-CONTENT_TYPE_LATEST = str("text/plain; version=0.0.4; charset=utf-8")
+CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
 
 
 INF = float("inf")
@@ -55,8 +55,8 @@ def floatToGoString(d):
         # Go switches to exponents sooner than Python.
         # We only need to care about positive values for le/quantile.
         if d > 0 and dot > 6:
-            mantissa = "{0}.{1}{2}".format(s[0], s[1:dot], s[dot + 1 :]).rstrip("0.")
-            return "{0}e+0{1}".format(mantissa, dot - 1)
+            mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.")
+            return f"{mantissa}e+0{dot - 1}"
         return s
 
 
@@ -65,7 +65,7 @@ def sample_line(line, name):
         labelstr = "{{{0}}}".format(
             ",".join(
                 [
-                    '{0}="{1}"'.format(
+                    '{}="{}"'.format(
                         k,
                         v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""),
                     )
@@ -78,10 +78,8 @@ def sample_line(line, name):
     timestamp = ""
     if line.timestamp is not None:
         # Convert to milliseconds.
-        timestamp = " {0:d}".format(int(float(line.timestamp) * 1000))
-    return "{0}{1} {2}{3}\n".format(
-        name, labelstr, floatToGoString(line.value), timestamp
-    )
+        timestamp = f" {int(float(line.timestamp) * 1000):d}"
+    return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp)
 
 
 def generate_latest(registry, emit_help=False):
@@ -118,14 +116,14 @@ def generate_latest(registry, emit_help=False):
         # Output in the old format for compatibility.
         if emit_help:
             output.append(
-                "# HELP {0} {1}\n".format(
+                "# HELP {} {}\n".format(
                     mname,
                     metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
                 )
             )
-        output.append("# TYPE {0} {1}\n".format(mname, mtype))
+        output.append(f"# TYPE {mname} {mtype}\n")
 
-        om_samples = {}  # type: Dict[str, List[str]]
+        om_samples: Dict[str, List[str]] = {}
         for s in metric.samples:
             for suffix in ["_created", "_gsum", "_gcount"]:
                 if s.name == metric.name + suffix:
@@ -143,13 +141,13 @@ def generate_latest(registry, emit_help=False):
         for suffix, lines in sorted(om_samples.items()):
             if emit_help:
                 output.append(
-                    "# HELP {0}{1} {2}\n".format(
+                    "# HELP {}{} {}\n".format(
                         metric.name,
                         suffix,
                         metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
                     )
                 )
-            output.append("# TYPE {0}{1} gauge\n".format(metric.name, suffix))
+            output.append(f"# TYPE {metric.name}{suffix} gauge\n")
             output.extend(lines)
 
         # Get rid of the weird colon things while we're at it
@@ -163,12 +161,12 @@ def generate_latest(registry, emit_help=False):
         # Also output in the new format, if it's different.
         if emit_help:
             output.append(
-                "# HELP {0} {1}\n".format(
+                "# HELP {} {}\n".format(
                     mnewname,
                     metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
                 )
             )
-        output.append("# TYPE {0} {1}\n".format(mnewname, mtype))
+        output.append(f"# TYPE {mnewname} {mtype}\n")
 
         for s in metric.samples:
             # Get rid of the OpenMetrics specific samples (we should already have
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index de96ca0821..3a14260752 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -93,7 +93,7 @@ _background_process_db_sched_duration = Counter(
 # map from description to a counter, so that we can name our logcontexts
 # incrementally. (It actually duplicates _background_process_start_count, but
 # it's much simpler to do so than to try to combine them.)
-_background_process_counts = {}  # type: Dict[str, int]
+_background_process_counts: Dict[str, int] = {}
 
 # Set of all running background processes that became active active since the
 # last time metrics were scraped (i.e. background processes that performed some
@@ -103,7 +103,7 @@ _background_process_counts = {}  # type: Dict[str, int]
 # background processes stacking up behind a lock or linearizer, where we then
 # only need to iterate over and update metrics for the process that have
 # actually been active and can ignore the idle ones.
-_background_processes_active_since_last_scrape = set()  # type: Set[_BackgroundProcess]
+_background_processes_active_since_last_scrape: "Set[_BackgroundProcess]" = set()
 
 # A lock that covers the above set and dict
 _bg_metrics_lock = threading.Lock()
@@ -137,8 +137,7 @@ class _Collector:
             _background_process_db_txn_duration,
             _background_process_db_sched_duration,
         ):
-            for r in m.collect():
-                yield r
+            yield from m.collect()
 
 
 REGISTRY.register(_Collector())
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 721c45abac..5df9349134 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -12,18 +12,42 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import email.utils
 import logging
-from typing import TYPE_CHECKING, Any, Generator, Iterable, List, Optional, Tuple
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+)
+
+import jinja2
 
 from twisted.internet import defer
 from twisted.web.resource import IResource
 
 from synapse.events import EventBase
 from synapse.http.client import SimpleHttpClient
+from synapse.http.server import (
+    DirectServeHtmlResource,
+    DirectServeJsonResource,
+    respond_with_html,
+)
+from synapse.http.servlet import parse_json_object_from_request
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage.database import DatabasePool, LoggingTransaction
+from synapse.storage.databases.main.roommember import ProfileInfo
 from synapse.storage.state import StateFilter
-from synapse.types import JsonDict, UserID, create_requester
+from synapse.types import JsonDict, Requester, UserID, create_requester
+from synapse.util import Clock
+from synapse.util.caches.descriptors import cached
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -33,7 +57,20 @@ This package defines the 'stable' API which can be used by extension modules whi
 are loaded into Synapse.
 """
 
-__all__ = ["errors", "make_deferred_yieldable", "run_in_background", "ModuleApi"]
+__all__ = [
+    "errors",
+    "make_deferred_yieldable",
+    "parse_json_object_from_request",
+    "respond_with_html",
+    "run_in_background",
+    "cached",
+    "UserID",
+    "DatabasePool",
+    "LoggingTransaction",
+    "DirectServeHtmlResource",
+    "DirectServeJsonResource",
+    "ModuleApi",
+]
 
 logger = logging.getLogger(__name__)
 
@@ -52,12 +89,27 @@ class ModuleApi:
         self._server_name = hs.hostname
         self._presence_stream = hs.get_event_sources().sources["presence"]
         self._state = hs.get_state_handler()
+        self._clock: Clock = hs.get_clock()
+        self._send_email_handler = hs.get_send_email_handler()
+
+        try:
+            app_name = self._hs.config.email_app_name
+
+            self._from_string = self._hs.config.email_notif_from % {"app": app_name}
+        except (KeyError, TypeError):
+            # If substitution failed (which can happen if the string contains
+            # placeholders other than just "app", or if the type of the placeholder is
+            # not a string), fall back to the bare strings.
+            self._from_string = self._hs.config.email_notif_from
+
+        self._raw_from = email.utils.parseaddr(self._from_string)[1]
 
         # We expose these as properties below in order to attach a helpful docstring.
-        self._http_client = hs.get_simple_http_client()  # type: SimpleHttpClient
+        self._http_client: SimpleHttpClient = hs.get_simple_http_client()
         self._public_room_list_manager = PublicRoomListManager(hs)
 
         self._spam_checker = hs.get_spam_checker()
+        self._account_validity_handler = hs.get_account_validity_handler()
 
     #################################################################################
     # The following methods should only be called during the module's initialisation.
@@ -67,6 +119,11 @@ class ModuleApi:
         """Registers callbacks for spam checking capabilities."""
         return self._spam_checker.register_callbacks
 
+    @property
+    def register_account_validity_callbacks(self):
+        """Registers callbacks for account validity capabilities."""
+        return self._account_validity_handler.register_account_validity_callbacks
+
     def register_web_resource(self, path: str, resource: IResource):
         """Registers a web resource to be served at the given path.
 
@@ -101,22 +158,56 @@ class ModuleApi:
         """
         return self._public_room_list_manager
 
-    def get_user_by_req(self, req, allow_guest=False):
+    @property
+    def public_baseurl(self) -> str:
+        """The configured public base URL for this homeserver."""
+        return self._hs.config.public_baseurl
+
+    @property
+    def email_app_name(self) -> str:
+        """The application name configured in the homeserver's configuration."""
+        return self._hs.config.email.email_app_name
+
+    async def get_user_by_req(
+        self,
+        req: SynapseRequest,
+        allow_guest: bool = False,
+        allow_expired: bool = False,
+    ) -> Requester:
         """Check the access_token provided for a request
 
         Args:
-            req (twisted.web.server.Request): Incoming HTTP request
-            allow_guest (bool): True if guest users should be allowed. If this
+            req: Incoming HTTP request
+            allow_guest: True if guest users should be allowed. If this
                 is False, and the access token is for a guest user, an
                 AuthError will be thrown
+            allow_expired: True if expired users should be allowed. If this
+                is False, and the access token is for an expired user, an
+                AuthError will be thrown
+
         Returns:
-            twisted.internet.defer.Deferred[synapse.types.Requester]:
-                the requester for this request
+            The requester for this request
+
         Raises:
-            synapse.api.errors.AuthError: if no user by that token exists,
+            InvalidClientCredentialsError: if no user by that token exists,
                 or the token is invalid.
         """
-        return self._auth.get_user_by_req(req, allow_guest)
+        return await self._auth.get_user_by_req(
+            req,
+            allow_guest,
+            allow_expired=allow_expired,
+        )
+
+    async def is_user_admin(self, user_id: str) -> bool:
+        """Checks if a user is a server admin.
+
+        Args:
+            user_id: The Matrix ID of the user to check.
+
+        Returns:
+            True if the user is a server admin, False otherwise.
+        """
+        return await self._store.is_server_admin(UserID.from_string(user_id))
 
     def get_qualified_user_id(self, username):
         """Qualify a user id, if necessary
@@ -134,6 +225,32 @@ class ModuleApi:
             return username
         return UserID(username, self._hs.hostname).to_string()
 
+    async def get_profile_for_user(self, localpart: str) -> ProfileInfo:
+        """Look up the profile info for the user with the given localpart.
+
+        Args:
+            localpart: The localpart to look up profile information for.
+
+        Returns:
+            The profile information (i.e. display name and avatar URL).
+        """
+        return await self._store.get_profileinfo(localpart)
+
+    async def get_threepids_for_user(self, user_id: str) -> List[Dict[str, str]]:
+        """Look up the threepids (email addresses and phone numbers) associated with the
+        given Matrix user ID.
+
+        Args:
+            user_id: The Matrix user ID to look up threepids for.
+
+        Returns:
+            A list of threepids, each threepid being represented by a dictionary
+            containing a "medium" key which value is "email" for email addresses and
+            "msisdn" for phone numbers, and an "address" key which value is the
+            threepid's address.
+        """
+        return await self._store.user_get_threepids(user_id)
+
     def check_user_exists(self, user_id):
         """Check if user exists.
 
@@ -464,6 +581,88 @@ class ModuleApi:
                 presence_events, destination
             )
 
+    def looping_background_call(
+        self,
+        f: Callable,
+        msec: float,
+        *args,
+        desc: Optional[str] = None,
+        **kwargs,
+    ):
+        """Wraps a function as a background process and calls it repeatedly.
+
+        Waits `msec` initially before calling `f` for the first time.
+
+        Args:
+            f: The function to call repeatedly. f can be either synchronous or
+                asynchronous, and must follow Synapse's logcontext rules.
+                More info about logcontexts is available at
+                https://matrix-org.github.io/synapse/latest/log_contexts.html
+            msec: How long to wait between calls in milliseconds.
+            *args: Positional arguments to pass to function.
+            desc: The background task's description. Default to the function's name.
+            **kwargs: Key arguments to pass to function.
+        """
+        if desc is None:
+            desc = f.__name__
+
+        if self._hs.config.run_background_tasks:
+            self._clock.looping_call(
+                run_as_background_process,
+                msec,
+                desc,
+                f,
+                *args,
+                **kwargs,
+            )
+        else:
+            logger.warning(
+                "Not running looping call %s as the configuration forbids it",
+                f,
+            )
+
+    async def send_mail(
+        self,
+        recipient: str,
+        subject: str,
+        html: str,
+        text: str,
+    ):
+        """Send an email on behalf of the homeserver.
+
+        Args:
+            recipient: The email address for the recipient.
+            subject: The email's subject.
+            html: The email's HTML content.
+            text: The email's text content.
+        """
+        await self._send_email_handler.send_email(
+            email_address=recipient,
+            subject=subject,
+            app_name=self.email_app_name,
+            html=html,
+            text=text,
+        )
+
+    def read_templates(
+        self,
+        filenames: List[str],
+        custom_template_directory: Optional[str] = None,
+    ) -> List[jinja2.Template]:
+        """Read and load the content of the template files at the given location.
+        By default, Synapse will look for these templates in its configured template
+        directory, but another directory to search in can be provided.
+
+        Args:
+            filenames: The name of the template files to look for.
+            custom_template_directory: An additional directory to look for the files in.
+
+        Returns:
+            A list containing the loaded templates, with the orders matching the one of
+            the filenames parameter.
+        """
+        return self._hs.config.read_templates(filenames, custom_template_directory)
+
 
 class PublicRoomListManager:
     """Contains methods for adding to, removing from and querying whether a room
diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py
index 02bbb0be39..98ea911a81 100644
--- a/synapse/module_api/errors.py
+++ b/synapse/module_api/errors.py
@@ -14,5 +14,9 @@
 
 """Exception types which are exposed as part of the stable module API"""
 
-from synapse.api.errors import RedirectException, SynapseError  # noqa: F401
+from synapse.api.errors import (  # noqa: F401
+    InvalidClientCredentialsError,
+    RedirectException,
+    SynapseError,
+)
 from synapse.config._base import ConfigError  # noqa: F401
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 3c3cc47631..c5fbebc17d 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -203,21 +203,21 @@ class Notifier:
     UNUSED_STREAM_EXPIRY_MS = 10 * 60 * 1000
 
     def __init__(self, hs: "synapse.server.HomeServer"):
-        self.user_to_user_stream = {}  # type: Dict[str, _NotifierUserStream]
-        self.room_to_user_streams = {}  # type: Dict[str, Set[_NotifierUserStream]]
+        self.user_to_user_stream: Dict[str, _NotifierUserStream] = {}
+        self.room_to_user_streams: Dict[str, Set[_NotifierUserStream]] = {}
 
         self.hs = hs
         self.storage = hs.get_storage()
         self.event_sources = hs.get_event_sources()
         self.store = hs.get_datastore()
-        self.pending_new_room_events = []  # type: List[_PendingRoomEventEntry]
+        self.pending_new_room_events: List[_PendingRoomEventEntry] = []
 
         # Called when there are new things to stream over replication
-        self.replication_callbacks = []  # type: List[Callable[[], None]]
+        self.replication_callbacks: List[Callable[[], None]] = []
 
         # Called when remote servers have come back online after having been
         # down.
-        self.remote_server_up_callbacks = []  # type: List[Callable[[str], None]]
+        self.remote_server_up_callbacks: List[Callable[[str], None]] = []
 
         self.clock = hs.get_clock()
         self.appservice_handler = hs.get_application_service_handler()
@@ -237,7 +237,7 @@ class Notifier:
         # when rendering the metrics page, which is likely once per minute at
         # most when scraping it.
         def count_listeners():
-            all_user_streams = set()  # type: Set[_NotifierUserStream]
+            all_user_streams: Set[_NotifierUserStream] = set()
 
             for streams in list(self.room_to_user_streams.values()):
                 all_user_streams |= streams
@@ -329,8 +329,8 @@ class Notifier:
         pending = self.pending_new_room_events
         self.pending_new_room_events = []
 
-        users = set()  # type: Set[UserID]
-        rooms = set()  # type: Set[str]
+        users: Set[UserID] = set()
+        rooms: Set[str] = set()
 
         for entry in pending:
             if entry.event_pos.persisted_after(max_room_stream_token):
@@ -580,7 +580,7 @@ class Notifier:
             if after_token == before_token:
                 return EventStreamResult([], (from_token, from_token))
 
-            events = []  # type: List[EventBase]
+            events: List[EventBase] = []
             end_token = from_token
 
             for name, source in self.event_sources.sources.items():
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 350646f458..c337e530d3 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -104,7 +104,7 @@ class BulkPushRuleEvaluator:
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
         self.store = hs.get_datastore()
-        self.auth = hs.get_auth()
+        self._event_auth_handler = hs.get_event_auth_handler()
 
         # Used by `RulesForRoom` to ensure only one thing mutates the cache at a
         # time. Keyed off room_id.
@@ -172,7 +172,7 @@ class BulkPushRuleEvaluator:
             # not having a power level event is an extreme edge case
             auth_events = {POWER_KEY: await self.store.get_event(pl_event_id)}
         else:
-            auth_events_ids = self.auth.compute_auth_events(
+            auth_events_ids = self._event_auth_handler.compute_auth_events(
                 event, prev_state_ids, for_verification=False
             )
             auth_events_dict = await self.store.get_events(auth_events_ids)
@@ -194,7 +194,7 @@ class BulkPushRuleEvaluator:
         count_as_unread = _should_count_as_unread(event, context)
 
         rules_by_user = await self._get_rules_for_event(event, context)
-        actions_by_user = {}  # type: Dict[str, List[Union[dict, str]]]
+        actions_by_user: Dict[str, List[Union[dict, str]]] = {}
 
         room_members = await self.store.get_joined_users_from_context(event, context)
 
@@ -207,7 +207,7 @@ class BulkPushRuleEvaluator:
             event, len(room_members), sender_power_level, power_levels
         )
 
-        condition_cache = {}  # type: Dict[str, bool]
+        condition_cache: Dict[str, bool] = {}
 
         # If the event is not a state event check if any users ignore the sender.
         if not event.is_state():
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 2ee0ccd58a..1fc9716a34 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -26,10 +26,10 @@ def format_push_rules_for_user(user: UserID, ruleslist) -> Dict[str, Dict[str, l
     # We're going to be mutating this a lot, so do a deep copy
     ruleslist = copy.deepcopy(ruleslist)
 
-    rules = {
+    rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = {
         "global": {},
         "device": {},
-    }  # type: Dict[str, Dict[str, List[Dict[str, Any]]]]
+    }
 
     rules["global"] = _add_empty_priority_class_arrays(rules["global"])
 
diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py
index 99a18874d1..e08e125cb8 100644
--- a/synapse/push/emailpusher.py
+++ b/synapse/push/emailpusher.py
@@ -66,8 +66,8 @@ class EmailPusher(Pusher):
 
         self.store = self.hs.get_datastore()
         self.email = pusher_config.pushkey
-        self.timed_call = None  # type: Optional[IDelayedCall]
-        self.throttle_params = {}  # type: Dict[str, ThrottleParams]
+        self.timed_call: Optional[IDelayedCall] = None
+        self.throttle_params: Dict[str, ThrottleParams] = {}
         self._inited = False
 
         self._is_processing = False
@@ -168,7 +168,7 @@ class EmailPusher(Pusher):
             )
         )
 
-        soonest_due_at = None  # type: Optional[int]
+        soonest_due_at: Optional[int] = None
 
         if not unprocessed:
             await self.save_last_stream_ordering_and_success(self.max_stream_ordering)
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 06bf5f8ada..36aabd8422 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -71,7 +71,7 @@ class HttpPusher(Pusher):
         self.data = pusher_config.data
         self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC
         self.failing_since = pusher_config.failing_since
-        self.timed_call = None  # type: Optional[IDelayedCall]
+        self.timed_call: Optional[IDelayedCall] = None
         self._is_processing = False
         self._group_unread_count_by_room = hs.config.push_group_unread_count_by_room
         self._pusherpool = hs.get_pusherpool()
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 5f9ea5003a..7be5fe1e9b 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -110,7 +110,7 @@ class Mailer:
         self.state_handler = self.hs.get_state_handler()
         self.storage = hs.get_storage()
         self.app_name = app_name
-        self.email_subjects = hs.config.email_subjects  # type: EmailSubjectConfig
+        self.email_subjects: EmailSubjectConfig = hs.config.email_subjects
 
         logger.info("Created Mailer for app_name %s" % app_name)
 
@@ -230,7 +230,7 @@ class Mailer:
             [pa["event_id"] for pa in push_actions]
         )
 
-        notifs_by_room = {}  # type: Dict[str, List[Dict[str, Any]]]
+        notifs_by_room: Dict[str, List[Dict[str, Any]]] = {}
         for pa in push_actions:
             notifs_by_room.setdefault(pa["room_id"], []).append(pa)
 
@@ -356,13 +356,13 @@ class Mailer:
 
         room_name = await calculate_room_name(self.store, room_state_ids, user_id)
 
-        room_vars = {
+        room_vars: Dict[str, Any] = {
             "title": room_name,
             "hash": string_ordinal_total(room_id),  # See sender avatar hash
             "notifs": [],
             "invite": is_invite,
             "link": self._make_room_link(room_id),
-        }  # type: Dict[str, Any]
+        }
 
         if not is_invite:
             for n in notifs:
@@ -460,9 +460,9 @@ class Mailer:
         type_state_key = ("m.room.member", event.sender)
         sender_state_event_id = room_state_ids.get(type_state_key)
         if sender_state_event_id:
-            sender_state_event = await self.store.get_event(
+            sender_state_event: Optional[EventBase] = await self.store.get_event(
                 sender_state_event_id
-            )  # type: Optional[EventBase]
+            )
         else:
             # Attempt to check the historical state for the room.
             historical_state = await self.state_store.get_state_for_event(
diff --git a/synapse/push/presentable_names.py b/synapse/push/presentable_names.py
index 412941393f..0510c1cbd5 100644
--- a/synapse/push/presentable_names.py
+++ b/synapse/push/presentable_names.py
@@ -199,7 +199,7 @@ def name_from_member_event(member_event: EventBase) -> str:
 
 
 def _state_as_two_level_dict(state: StateMap[str]) -> Dict[str, Dict[str, str]]:
-    ret = {}  # type: Dict[str, Dict[str, str]]
+    ret: Dict[str, Dict[str, str]] = {}
     for k, v in state.items():
         ret.setdefault(k[0], {})[k[1]] = v
     return ret
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 98b90a4f51..7a8dc63976 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -195,9 +195,9 @@ class PushRuleEvaluatorForEvent:
 
 
 # Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches
-regex_cache = LruCache(
+regex_cache: LruCache[Tuple[str, bool, bool], Pattern] = LruCache(
     50000, "regex_push_cache"
-)  # type: LruCache[Tuple[str, bool, bool], Pattern]
+)
 
 
 def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
diff --git a/synapse/push/pusher.py b/synapse/push/pusher.py
index c51938b8cf..021275437c 100644
--- a/synapse/push/pusher.py
+++ b/synapse/push/pusher.py
@@ -31,13 +31,13 @@ class PusherFactory:
         self.hs = hs
         self.config = hs.config
 
-        self.pusher_types = {
+        self.pusher_types: Dict[str, Callable[[HomeServer, PusherConfig], Pusher]] = {
             "http": HttpPusher
-        }  # type: Dict[str, Callable[[HomeServer, PusherConfig], Pusher]]
+        }
 
         logger.info("email enable notifs: %r", hs.config.email_enable_notifs)
         if hs.config.email_enable_notifs:
-            self.mailers = {}  # type: Dict[str, Mailer]
+            self.mailers: Dict[str, Mailer] = {}
 
             self._notif_template_html = hs.config.email_notif_template_html
             self._notif_template_text = hs.config.email_notif_template_text
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index 579fcdf472..85621f33ef 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -62,10 +62,6 @@ class PusherPool:
         self.store = self.hs.get_datastore()
         self.clock = self.hs.get_clock()
 
-        self._account_validity_enabled = (
-            hs.config.account_validity.account_validity_enabled
-        )
-
         # We shard the handling of push notifications by user ID.
         self._pusher_shard_config = hs.config.push.pusher_shard_config
         self._instance_name = hs.get_instance_name()
@@ -87,7 +83,9 @@ class PusherPool:
         self._last_room_stream_id_seen = self.store.get_room_max_stream_ordering()
 
         # map from user id to app_id:pushkey to pusher
-        self.pushers = {}  # type: Dict[str, Dict[str, Pusher]]
+        self.pushers: Dict[str, Dict[str, Pusher]] = {}
+
+        self._account_validity_handler = hs.get_account_validity_handler()
 
     def start(self) -> None:
         """Starts the pushers off in a background process."""
@@ -238,12 +236,9 @@ class PusherPool:
 
             for u in users_affected:
                 # Don't push if the user account has expired
-                if self._account_validity_enabled:
-                    expired = await self.store.is_account_expired(
-                        u, self.clock.time_msec()
-                    )
-                    if expired:
-                        continue
+                expired = await self._account_validity_handler.is_user_expired(u)
+                if expired:
+                    continue
 
                 if u in self.pushers:
                     for p in self.pushers[u].values():
@@ -268,12 +263,9 @@ class PusherPool:
 
             for u in users_affected:
                 # Don't push if the user account has expired
-                if self._account_validity_enabled:
-                    expired = await self.store.is_account_expired(
-                        u, self.clock.time_msec()
-                    )
-                    if expired:
-                        continue
+                expired = await self._account_validity_handler.is_user_expired(u)
+                if expired:
+                    continue
 
                 if u in self.pushers:
                     for p in self.pushers[u].values():
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 271c17c226..cdcbdd772b 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -115,7 +115,7 @@ CONDITIONAL_REQUIREMENTS = {
     "cache_memory": ["pympler"],
 }
 
-ALL_OPTIONAL_REQUIREMENTS = set()  # type: Set[str]
+ALL_OPTIONAL_REQUIREMENTS: Set[str] = set()
 
 for name, optional_deps in CONDITIONAL_REQUIREMENTS.items():
     # Exclude systemd as it's a system-based requirement.
@@ -193,7 +193,7 @@ def check_requirements(for_feature=None):
     if not for_feature:
         # Check the optional dependencies are up to date. We allow them to not be
         # installed.
-        OPTS = sum(CONDITIONAL_REQUIREMENTS.values(), [])  # type: List[str]
+        OPTS: List[str] = sum(CONDITIONAL_REQUIREMENTS.values(), [])
 
         for dependency in OPTS:
             try:
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index f13a7c23b4..25589b0042 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -85,17 +85,17 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             is received.
     """
 
-    NAME = abc.abstractproperty()  # type: str  # type: ignore
-    PATH_ARGS = abc.abstractproperty()  # type: Tuple[str, ...]  # type: ignore
+    NAME: str = abc.abstractproperty()  # type: ignore
+    PATH_ARGS: Tuple[str, ...] = abc.abstractproperty()  # type: ignore
     METHOD = "POST"
     CACHE = True
     RETRY_ON_TIMEOUT = True
 
     def __init__(self, hs: "HomeServer"):
         if self.CACHE:
-            self.response_cache = ResponseCache(
+            self.response_cache: ResponseCache[str] = ResponseCache(
                 hs.get_clock(), "repl." + self.NAME, timeout_ms=30 * 60 * 1000
-            )  # type: ResponseCache[str]
+            )
 
         # We reserve `instance_name` as a parameter to sending requests, so we
         # assert here that sub classes don't try and use the name.
@@ -232,7 +232,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                 # have a good idea that the request has either succeeded or failed on
                 # the master, and so whether we should clean up or not.
                 while True:
-                    headers = {}  # type: Dict[bytes, List[bytes]]
+                    headers: Dict[bytes, List[bytes]] = {}
                     # Add an authorization header, if configured.
                     if replication_secret:
                         headers[b"Authorization"] = [b"Bearer " + replication_secret]
diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
index faa99387a7..e460dd85cd 100644
--- a/synapse/replication/slave/storage/_base.py
+++ b/synapse/replication/slave/storage/_base.py
@@ -27,7 +27,9 @@ class BaseSlavedStore(CacheInvalidationWorkerStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
         if isinstance(self.database_engine, PostgresEngine):
-            self._cache_id_gen = MultiWriterIdGenerator(
+            self._cache_id_gen: Optional[
+                MultiWriterIdGenerator
+            ] = MultiWriterIdGenerator(
                 db_conn,
                 database,
                 stream_name="caches",
@@ -41,7 +43,7 @@ class BaseSlavedStore(CacheInvalidationWorkerStore):
                 ],
                 sequence_name="cache_invalidation_stream_seq",
                 writers=[],
-            )  # type: Optional[MultiWriterIdGenerator]
+            )
         else:
             self._cache_id_gen = None
 
diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py
index 13ed87adc4..436d39c320 100644
--- a/synapse/replication/slave/storage/client_ips.py
+++ b/synapse/replication/slave/storage/client_ips.py
@@ -23,9 +23,9 @@ class SlavedClientIpStore(BaseSlavedStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
 
-        self.client_ip_last_seen = LruCache(
+        self.client_ip_last_seen: LruCache[tuple, int] = LruCache(
             cache_name="client_ip_last_seen", max_size=50000
-        )  # type: LruCache[tuple, int]
+        )
 
     async def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):
         now = int(self._clock.time_msec())
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 62d7809175..9d4859798b 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -121,13 +121,13 @@ class ReplicationDataHandler:
         self._pusher_pool = hs.get_pusherpool()
         self._presence_handler = hs.get_presence_handler()
 
-        self.send_handler = None  # type: Optional[FederationSenderHandler]
+        self.send_handler: Optional[FederationSenderHandler] = None
         if hs.should_send_federation():
             self.send_handler = FederationSenderHandler(hs)
 
         # Map from stream to list of deferreds waiting for the stream to
         # arrive at a particular position. The lists are sorted by stream position.
-        self._streams_to_waiters = {}  # type: Dict[str, List[Tuple[int, Deferred]]]
+        self._streams_to_waiters: Dict[str, List[Tuple[int, Deferred]]] = {}
 
     async def on_rdata(
         self, stream_name: str, instance_name: str, token: int, rows: list
@@ -173,7 +173,7 @@ class ReplicationDataHandler:
             if entities:
                 self.notifier.on_new_event("to_device_key", token, users=entities)
         elif stream_name == DeviceListsStream.NAME:
-            all_room_ids = set()  # type: Set[str]
+            all_room_ids: Set[str] = set()
             for row in rows:
                 if row.entity.startswith("@"):
                     room_ids = await self.store.get_rooms_for_user(row.entity)
@@ -201,7 +201,7 @@ class ReplicationDataHandler:
                 if row.data.rejected:
                     continue
 
-                extra_users = ()  # type: Tuple[UserID, ...]
+                extra_users: Tuple[UserID, ...] = ()
                 if row.data.type == EventTypes.Member and row.data.state_key:
                     extra_users = (UserID.from_string(row.data.state_key),)
 
@@ -348,7 +348,7 @@ class FederationSenderHandler:
 
         # Stores the latest position in the federation stream we've gotten up
         # to. This is always set before we use it.
-        self.federation_position = None  # type: Optional[int]
+        self.federation_position: Optional[int] = None
 
         self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer")
 
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 505d450e19..1311b013da 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -34,7 +34,7 @@ class Command(metaclass=abc.ABCMeta):
     A full command line on the wire is constructed from `NAME + " " + to_line()`
     """
 
-    NAME = None  # type: str
+    NAME: str
 
     @classmethod
     @abc.abstractmethod
@@ -380,7 +380,7 @@ class RemoteServerUpCommand(_SimpleCommand):
     NAME = "REMOTE_SERVER_UP"
 
 
-_COMMANDS = (
+_COMMANDS: Tuple[Type[Command], ...] = (
     ServerCommand,
     RdataCommand,
     PositionCommand,
@@ -393,7 +393,7 @@ _COMMANDS = (
     UserIpCommand,
     RemoteServerUpCommand,
     ClearUserSyncsCommand,
-)  # type: Tuple[Type[Command], ...]
+)
 
 # Map of command name to command type.
 COMMAND_MAP = {cmd.NAME: cmd for cmd in _COMMANDS}
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 2ad7a200bb..eae4515363 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -105,12 +105,12 @@ class ReplicationCommandHandler:
             hs.get_instance_name() in hs.config.worker.writers.presence
         )
 
-        self._streams = {
+        self._streams: Dict[str, Stream] = {
             stream.NAME: stream(hs) for stream in STREAMS_MAP.values()
-        }  # type: Dict[str, Stream]
+        }
 
         # List of streams that this instance is the source of
-        self._streams_to_replicate = []  # type: List[Stream]
+        self._streams_to_replicate: List[Stream] = []
 
         for stream in self._streams.values():
             if hs.config.redis.redis_enabled and stream.NAME == CachesStream.NAME:
@@ -180,14 +180,14 @@ class ReplicationCommandHandler:
 
         # Map of stream name to batched updates. See RdataCommand for info on
         # how batching works.
-        self._pending_batches = {}  # type: Dict[str, List[Any]]
+        self._pending_batches: Dict[str, List[Any]] = {}
 
         # The factory used to create connections.
-        self._factory = None  # type: Optional[ReconnectingClientFactory]
+        self._factory: Optional[ReconnectingClientFactory] = None
 
         # The currently connected connections. (The list of places we need to send
         # outgoing replication commands to.)
-        self._connections = []  # type: List[IReplicationConnection]
+        self._connections: List[IReplicationConnection] = []
 
         LaterGauge(
             "synapse_replication_tcp_resource_total_connections",
@@ -200,7 +200,7 @@ class ReplicationCommandHandler:
         # them in order in a separate background process.
 
         # the streams which are currently being processed by _unsafe_process_queue
-        self._processing_streams = set()  # type: Set[str]
+        self._processing_streams: Set[str] = set()
 
         # for each stream, a queue of commands that are awaiting processing, and the
         # connection that they arrived on.
@@ -210,7 +210,7 @@ class ReplicationCommandHandler:
 
         # For each connection, the incoming stream names that have received a POSITION
         # from that connection.
-        self._streams_by_connection = {}  # type: Dict[IReplicationConnection, Set[str]]
+        self._streams_by_connection: Dict[IReplicationConnection, Set[str]] = {}
 
         LaterGauge(
             "synapse_replication_tcp_command_queue",
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 6e3705364f..8c80153ab6 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -102,7 +102,7 @@ tcp_outbound_commands_counter = Counter(
 
 # A list of all connected protocols. This allows us to send metrics about the
 # connections.
-connected_connections = []  # type: List[BaseReplicationStreamProtocol]
+connected_connections: "List[BaseReplicationStreamProtocol]" = []
 
 
 logger = logging.getLogger(__name__)
@@ -146,15 +146,15 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
 
     # The transport is going to be an ITCPTransport, but that doesn't have the
     # (un)registerProducer methods, those are only on the implementation.
-    transport = None  # type: Connection
+    transport: Connection
 
     delimiter = b"\n"
 
     # Valid commands we expect to receive
-    VALID_INBOUND_COMMANDS = []  # type: Collection[str]
+    VALID_INBOUND_COMMANDS: Collection[str] = []
 
     # Valid commands we can send
-    VALID_OUTBOUND_COMMANDS = []  # type: Collection[str]
+    VALID_OUTBOUND_COMMANDS: Collection[str] = []
 
     max_line_buffer = 10000
 
@@ -165,7 +165,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
         self.last_received_command = self.clock.time_msec()
         self.last_sent_command = 0
         # When we requested the connection be closed
-        self.time_we_closed = None  # type: Optional[int]
+        self.time_we_closed: Optional[int] = None
 
         self.received_ping = False  # Have we received a ping from the other side
 
@@ -175,10 +175,10 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
         self.conn_id = random_string(5)  # To dedupe in case of name clashes.
 
         # List of pending commands to send once we've established the connection
-        self.pending_commands = []  # type: List[Command]
+        self.pending_commands: List[Command] = []
 
         # The LoopingCall for sending pings.
-        self._send_ping_loop = None  # type: Optional[task.LoopingCall]
+        self._send_ping_loop: Optional[task.LoopingCall] = None
 
         # a logcontext which we use for processing incoming commands. We declare it as a
         # background process so that the CPU stats get reported to prometheus.
diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index 6a2c2655e4..8c0df627c8 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -57,7 +57,7 @@ class ConstantProperty(Generic[T, V]):
     it.
     """
 
-    constant = attr.ib()  # type: V
+    constant: V = attr.ib()
 
     def __get__(self, obj: Optional[T], objtype: Optional[Type[T]] = None) -> V:
         return self.constant
@@ -91,9 +91,9 @@ class RedisSubscriber(txredisapi.SubscriberProtocol):
             commands.
     """
 
-    synapse_handler = None  # type: ReplicationCommandHandler
-    synapse_stream_name = None  # type: str
-    synapse_outbound_redis_connection = None  # type: txredisapi.RedisProtocol
+    synapse_handler: "ReplicationCommandHandler"
+    synapse_stream_name: str
+    synapse_outbound_redis_connection: txredisapi.RedisProtocol
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index b03824925a..3716c41bea 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -85,9 +85,9 @@ class Stream:
     time it was called.
     """
 
-    NAME = None  # type: str  # The name of the stream
+    NAME: str  # The name of the stream
     # The type of the row. Used by the default impl of parse_row.
-    ROW_TYPE = None  # type: Any
+    ROW_TYPE: Any = None
 
     @classmethod
     def parse_row(cls, row: StreamRow):
@@ -283,9 +283,7 @@ class PresenceStream(Stream):
 
             assert isinstance(presence_handler, PresenceHandler)
 
-            update_function = (
-                presence_handler.get_all_presence_updates
-            )  # type: UpdateFunction
+            update_function: UpdateFunction = presence_handler.get_all_presence_updates
         else:
             # Query presence writer process
             update_function = make_http_update_function(hs, self.NAME)
@@ -334,9 +332,9 @@ class TypingStream(Stream):
         if writer_instance == hs.get_instance_name():
             # On the writer, query the typing handler
             typing_writer_handler = hs.get_typing_writer_handler()
-            update_function = (
-                typing_writer_handler.get_all_typing_updates
-            )  # type: Callable[[str, int, int, int], Awaitable[Tuple[List[Tuple[int, Any]], int, bool]]]
+            update_function: Callable[
+                [str, int, int, int], Awaitable[Tuple[List[Tuple[int, Any]], int, bool]]
+            ] = typing_writer_handler.get_all_typing_updates
             current_token_function = typing_writer_handler.get_current_token
         else:
             # Query the typing writer process
diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py
index e7e87bac92..a030e9299e 100644
--- a/synapse/replication/tcp/streams/events.py
+++ b/synapse/replication/tcp/streams/events.py
@@ -65,7 +65,7 @@ class BaseEventsStreamRow:
     """
 
     # Unique string that ids the type. Must be overridden in sub classes.
-    TypeId = None  # type: str
+    TypeId: str
 
     @classmethod
     def from_data(cls, data):
@@ -103,10 +103,10 @@ class EventsStreamCurrentStateRow(BaseEventsStreamRow):
     event_id = attr.ib()  # str, optional
 
 
-_EventRows = (
+_EventRows: Tuple[Type[BaseEventsStreamRow], ...] = (
     EventsStreamEventRow,
     EventsStreamCurrentStateRow,
-)  # type: Tuple[Type[BaseEventsStreamRow], ...]
+)
 
 TypeToRow = {Row.TypeId: Row for Row in _EventRows}
 
@@ -157,9 +157,9 @@ class EventsStream(Stream):
 
         # now we fetch up to that many rows from the events table
 
-        event_rows = await self._store.get_all_new_forward_event_rows(
+        event_rows: List[Tuple] = await self._store.get_all_new_forward_event_rows(
             instance_name, from_token, current_token, target_row_count
-        )  # type: List[Tuple]
+        )
 
         # we rely on get_all_new_forward_event_rows strictly honouring the limit, so
         # that we know it is safe to just take upper_limit = event_rows[-1][0].
@@ -172,7 +172,7 @@ class EventsStream(Stream):
 
         if len(event_rows) == target_row_count:
             limited = True
-            upper_limit = event_rows[-1][0]  # type: int
+            upper_limit: int = event_rows[-1][0]
         else:
             limited = False
             upper_limit = current_token
@@ -191,30 +191,30 @@ class EventsStream(Stream):
         # finally, fetch the ex-outliers rows. We assume there are few enough of these
         # not to bother with the limit.
 
-        ex_outliers_rows = await self._store.get_ex_outlier_stream_rows(
+        ex_outliers_rows: List[Tuple] = await self._store.get_ex_outlier_stream_rows(
             instance_name, from_token, upper_limit
-        )  # type: List[Tuple]
+        )
 
         # we now need to turn the raw database rows returned into tuples suitable
         # for the replication protocol (basically, we add an identifier to
         # distinguish the row type). At the same time, we can limit the event_rows
         # to the max stream_id from state_rows.
 
-        event_updates = (
+        event_updates: Iterable[Tuple[int, Tuple]] = (
             (stream_id, (EventsStreamEventRow.TypeId, rest))
             for (stream_id, *rest) in event_rows
             if stream_id <= upper_limit
-        )  # type: Iterable[Tuple[int, Tuple]]
+        )
 
-        state_updates = (
+        state_updates: Iterable[Tuple[int, Tuple]] = (
             (stream_id, (EventsStreamCurrentStateRow.TypeId, rest))
             for (stream_id, *rest) in state_rows
-        )  # type: Iterable[Tuple[int, Tuple]]
+        )
 
-        ex_outliers_updates = (
+        ex_outliers_updates: Iterable[Tuple[int, Tuple]] = (
             (stream_id, (EventsStreamEventRow.TypeId, rest))
             for (stream_id, *rest) in ex_outliers_rows
-        )  # type: Iterable[Tuple[int, Tuple]]
+        )
 
         # we need to return a sorted list, so merge them together.
         updates = list(heapq.merge(event_updates, state_updates, ex_outliers_updates))
diff --git a/synapse/replication/tcp/streams/federation.py b/synapse/replication/tcp/streams/federation.py
index 096a85d363..c445af9bd9 100644
--- a/synapse/replication/tcp/streams/federation.py
+++ b/synapse/replication/tcp/streams/federation.py
@@ -51,9 +51,9 @@ class FederationStream(Stream):
             current_token = current_token_without_instance(
                 federation_sender.get_current_token
             )
-            update_function = (
-                federation_sender.get_replication_rows
-            )  # type: Callable[[str, int, int, int], Awaitable[Tuple[List[Tuple[int, Any]], int, bool]]]
+            update_function: Callable[
+                [str, int, int, int], Awaitable[Tuple[List[Tuple[int, Any]], int, bool]]
+            ] = federation_sender.get_replication_rows
 
         elif hs.should_send_federation():
             # federation sender: Query master process
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index f0cddd2d2c..40ee33646c 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -402,9 +402,9 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, RestServlet):
 
         # Get the room ID from the identifier.
         try:
-            remote_room_hosts = [
+            remote_room_hosts: Optional[List[str]] = [
                 x.decode("ascii") for x in request.args[b"server_name"]
-            ]  # type: Optional[List[str]]
+            ]
         except Exception:
             remote_room_hosts = None
         room_id, remote_room_hosts = await self.resolve_room_id(
@@ -462,6 +462,7 @@ class MakeRoomAdminRestServlet(ResolveRoomIdMixin, RestServlet):
         super().__init__(hs)
         self.hs = hs
         self.auth = hs.get_auth()
+        self.store = hs.get_datastore()
         self.event_creation_handler = hs.get_event_creation_handler()
         self.state_handler = hs.get_state_handler()
         self.is_mine_id = hs.is_mine_id
@@ -500,7 +501,13 @@ class MakeRoomAdminRestServlet(ResolveRoomIdMixin, RestServlet):
             admin_user_id = None
 
             for admin_user in reversed(admin_users):
-                if room_state.get((EventTypes.Member, admin_user)):
+                (
+                    current_membership_type,
+                    _,
+                ) = await self.store.get_local_current_membership_for_user_in_room(
+                    admin_user, room_id
+                )
+                if current_membership_type == "join":
                     admin_user_id = admin_user
                     break
 
@@ -652,9 +659,7 @@ class RoomEventContextServlet(RestServlet):
         filter_str = parse_string(request, "filter", encoding="utf-8")
         if filter_str:
             filter_json = urlparse.unquote(filter_str)
-            event_filter = Filter(
-                json_decoder.decode(filter_json)
-            )  # type: Optional[Filter]
+            event_filter: Optional[Filter] = Filter(json_decoder.decode(filter_json))
         else:
             event_filter = None
 
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 7d75564758..589e47fa47 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -357,7 +357,7 @@ class UserRegisterServlet(RestServlet):
     def __init__(self, hs: "HomeServer"):
         self.auth_handler = hs.get_auth_handler()
         self.reactor = hs.get_reactor()
-        self.nonces = {}  # type: Dict[str, int]
+        self.nonces: Dict[str, int] = {}
         self.hs = hs
 
     def _clear_old_nonces(self):
@@ -560,16 +560,24 @@ class AccountValidityRenewServlet(RestServlet):
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
 
-        body = parse_json_object_from_request(request)
+        if self.account_activity_handler.on_legacy_admin_request_callback:
+            expiration_ts = await (
+                self.account_activity_handler.on_legacy_admin_request_callback(request)
+            )
+        else:
+            body = parse_json_object_from_request(request)
 
-        if "user_id" not in body:
-            raise SynapseError(400, "Missing property 'user_id' in the request body")
+            if "user_id" not in body:
+                raise SynapseError(
+                    400,
+                    "Missing property 'user_id' in the request body",
+                )
 
-        expiration_ts = await self.account_activity_handler.renew_account_for_user(
-            body["user_id"],
-            body.get("expiration_ts"),
-            not body.get("enable_renewal_emails", True),
-        )
+            expiration_ts = await self.account_activity_handler.renew_account_for_user(
+                body["user_id"],
+                body.get("expiration_ts"),
+                not body.get("enable_renewal_emails", True),
+            )
 
         res = {"expiration_ts": expiration_ts}
         return 200, res
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index cbcb60fe31..11567bf32c 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -44,19 +44,14 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-LoginResponse = TypedDict(
-    "LoginResponse",
-    {
-        "user_id": str,
-        "access_token": str,
-        "home_server": str,
-        "expires_in_ms": Optional[int],
-        "refresh_token": Optional[str],
-        "device_id": str,
-        "well_known": Optional[Dict[str, Any]],
-    },
-    total=False,
-)
+class LoginResponse(TypedDict, total=False):
+    user_id: str
+    access_token: str
+    home_server: str
+    expires_in_ms: Optional[int]
+    refresh_token: Optional[str]
+    device_id: str
+    well_known: Optional[Dict[str, Any]]
 
 
 class LoginRestServlet(RestServlet):
@@ -121,7 +116,7 @@ class LoginRestServlet(RestServlet):
             flows.append({"type": LoginRestServlet.CAS_TYPE})
 
         if self.cas_enabled or self.saml2_enabled or self.oidc_enabled:
-            sso_flow = {
+            sso_flow: JsonDict = {
                 "type": LoginRestServlet.SSO_TYPE,
                 "identity_providers": [
                     _get_auth_flow_dict_for_idp(
@@ -129,7 +124,7 @@ class LoginRestServlet(RestServlet):
                     )
                     for idp in self._sso_handler.get_identity_providers().values()
                 ],
-            }  # type: JsonDict
+            }
 
             if self._msc2858_enabled:
                 # backwards-compatibility support for clients which don't
@@ -150,9 +145,7 @@ class LoginRestServlet(RestServlet):
             # login flow types returned.
             flows.append({"type": LoginRestServlet.TOKEN_TYPE})
 
-        flows.extend(
-            ({"type": t} for t in self.auth_handler.get_supported_login_types())
-        )
+        flows.extend({"type": t} for t in self.auth_handler.get_supported_login_types())
 
         flows.append({"type": LoginRestServlet.APPSERVICE_TYPE})
 
@@ -447,7 +440,7 @@ def _get_auth_flow_dict_for_idp(
         use_unstable_brands: whether we should use brand identifiers suitable
            for the unstable API
     """
-    e = {"id": idp.idp_id, "name": idp.idp_name}  # type: JsonDict
+    e: JsonDict = {"id": idp.idp_id, "name": idp.idp_name}
     if idp.idp_icon:
         e["icon"] = idp.idp_icon
     if idp.idp_brand:
@@ -561,7 +554,7 @@ class SsoRedirectServlet(RestServlet):
             finish_request(request)
             return
 
-        args = request.args  # type: Dict[bytes, List[bytes]]  # type: ignore
+        args: Dict[bytes, List[bytes]] = request.args  # type: ignore
         client_redirect_url = parse_bytes_from_args(args, "redirectUrl", required=True)
         sso_url = await self._sso_handler.handle_redirect_request(
             request,
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 92ebe838fd..31a1193cd3 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -29,6 +29,7 @@ from synapse.api.errors import (
     SynapseError,
 )
 from synapse.api.filtering import Filter
+from synapse.appservice import ApplicationService
 from synapse.events.utils import format_event_for_client_v2
 from synapse.http.servlet import (
     RestServlet,
@@ -47,11 +48,13 @@ from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import (
     JsonDict,
+    Requester,
     RoomAlias,
     RoomID,
     StreamToken,
     ThirdPartyInstanceID,
     UserID,
+    create_requester,
 )
 from synapse.util import json_decoder
 from synapse.util.stringutils import parse_and_validate_server_name, random_string
@@ -309,7 +312,7 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
         self.room_member_handler = hs.get_room_member_handler()
         self.auth = hs.get_auth()
 
-    async def inherit_depth_from_prev_ids(self, prev_event_ids) -> int:
+    async def _inherit_depth_from_prev_ids(self, prev_event_ids) -> int:
         (
             most_recent_prev_event_id,
             most_recent_prev_event_depth,
@@ -349,6 +352,54 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
 
         return depth
 
+    def _create_insertion_event_dict(
+        self, sender: str, room_id: str, origin_server_ts: int
+    ):
+        """Creates an event dict for an "insertion" event with the proper fields
+        and a random chunk ID.
+
+        Args:
+            sender: The event author MXID
+            room_id: The room ID that the event belongs to
+            origin_server_ts: Timestamp when the event was sent
+
+        Returns:
+            Tuple of event ID and stream ordering position
+        """
+
+        next_chunk_id = random_string(8)
+        insertion_event = {
+            "type": EventTypes.MSC2716_INSERTION,
+            "sender": sender,
+            "room_id": room_id,
+            "content": {
+                EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id,
+                EventContentFields.MSC2716_HISTORICAL: True,
+            },
+            "origin_server_ts": origin_server_ts,
+        }
+
+        return insertion_event
+
+    async def _create_requester_for_user_id_from_app_service(
+        self, user_id: str, app_service: ApplicationService
+    ) -> Requester:
+        """Creates a new requester for the given user_id
+        and validates that the app service is allowed to control
+        the given user.
+
+        Args:
+            user_id: The author MXID that the app service is controlling
+            app_service: The app service that controls the user
+
+        Returns:
+            Requester object
+        """
+
+        await self.auth.validate_appservice_can_control_user_id(app_service, user_id)
+
+        return create_requester(user_id, app_service=app_service)
+
     async def on_POST(self, request, room_id):
         requester = await self.auth.get_user_by_req(request, allow_guest=False)
 
@@ -414,7 +465,9 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
             if event_dict["type"] == EventTypes.Member:
                 membership = event_dict["content"].get("membership", None)
                 event_id, _ = await self.room_member_handler.update_membership(
-                    requester,
+                    await self._create_requester_for_user_id_from_app_service(
+                        state_event["sender"], requester.app_service
+                    ),
                     target=UserID.from_string(event_dict["state_key"]),
                     room_id=room_id,
                     action=membership,
@@ -434,7 +487,9 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
                     event,
                     _,
                 ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                    requester,
+                    await self._create_requester_for_user_id_from_app_service(
+                        state_event["sender"], requester.app_service
+                    ),
                     event_dict,
                     outlier=True,
                     prev_event_ids=[fake_prev_event_id],
@@ -449,37 +504,73 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
 
         events_to_create = body["events"]
 
-        # If provided, connect the chunk to the last insertion point
-        # The chunk ID passed in comes from the chunk_id in the
-        # "insertion" event from the previous chunk.
+        prev_event_ids = prev_events_from_query
+        inherited_depth = await self._inherit_depth_from_prev_ids(
+            prev_events_from_query
+        )
+
+        # Figure out which chunk to connect to. If they passed in
+        # chunk_id_from_query let's use it. The chunk ID passed in comes
+        # from the chunk_id in the "insertion" event from the previous chunk.
+        last_event_in_chunk = events_to_create[-1]
+        chunk_id_to_connect_to = chunk_id_from_query
+        base_insertion_event = None
         if chunk_id_from_query:
-            last_event_in_chunk = events_to_create[-1]
-            last_event_in_chunk["content"][
-                EventContentFields.MSC2716_CHUNK_ID
-            ] = chunk_id_from_query
+            # TODO: Verify the chunk_id_from_query corresponds to an insertion event
+            pass
+        # Otherwise, create an insertion event to act as a starting point.
+        #
+        # We don't always have an insertion event to start hanging more history
+        # off of (ideally there would be one in the main DAG, but that's not the
+        # case if we're wanting to add history to e.g. existing rooms without
+        # an insertion event), in which case we just create a new insertion event
+        # that can then get pointed to by a "marker" event later.
+        else:
+            base_insertion_event_dict = self._create_insertion_event_dict(
+                sender=requester.user.to_string(),
+                room_id=room_id,
+                origin_server_ts=last_event_in_chunk["origin_server_ts"],
+            )
+            base_insertion_event_dict["prev_events"] = prev_event_ids.copy()
+
+            (
+                base_insertion_event,
+                _,
+            ) = await self.event_creation_handler.create_and_send_nonmember_event(
+                await self._create_requester_for_user_id_from_app_service(
+                    base_insertion_event_dict["sender"],
+                    requester.app_service,
+                ),
+                base_insertion_event_dict,
+                prev_event_ids=base_insertion_event_dict.get("prev_events"),
+                auth_event_ids=auth_event_ids,
+                historical=True,
+                depth=inherited_depth,
+            )
+
+            chunk_id_to_connect_to = base_insertion_event["content"][
+                EventContentFields.MSC2716_NEXT_CHUNK_ID
+            ]
 
-        # Add an "insertion" event to the start of each chunk (next to the oldest
+        # Connect this current chunk to the insertion event from the previous chunk
+        last_event_in_chunk["content"][
+            EventContentFields.MSC2716_CHUNK_ID
+        ] = chunk_id_to_connect_to
+
+        # Add an "insertion" event to the start of each chunk (next to the oldest-in-time
         # event in the chunk) so the next chunk can be connected to this one.
-        next_chunk_id = random_string(64)
-        insertion_event = {
-            "type": EventTypes.MSC2716_INSERTION,
-            "sender": requester.user.to_string(),
-            "content": {
-                EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id,
-                EventContentFields.MSC2716_HISTORICAL: True,
-            },
+        insertion_event = self._create_insertion_event_dict(
+            sender=requester.user.to_string(),
+            room_id=room_id,
             # Since the insertion event is put at the start of the chunk,
-            # where the oldest event is, copy the origin_server_ts from
+            # where the oldest-in-time event is, copy the origin_server_ts from
             # the first event we're inserting
-            "origin_server_ts": events_to_create[0]["origin_server_ts"],
-        }
+            origin_server_ts=events_to_create[0]["origin_server_ts"],
+        )
         # Prepend the insertion event to the start of the chunk
         events_to_create = [insertion_event] + events_to_create
 
-        inherited_depth = await self.inherit_depth_from_prev_ids(prev_events_from_query)
-
         event_ids = []
-        prev_event_ids = prev_events_from_query
         events_to_persist = []
         for ev in events_to_create:
             assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"])
@@ -498,7 +589,9 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
             }
 
             event, context = await self.event_creation_handler.create_event(
-                requester,
+                await self._create_requester_for_user_id_from_app_service(
+                    ev["sender"], requester.app_service
+                ),
                 event_dict,
                 prev_event_ids=event_dict.get("prev_events"),
                 auth_event_ids=auth_event_ids,
@@ -528,15 +621,23 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet):
         # where topological_ordering is just depth.
         for (event, context) in reversed(events_to_persist):
             ev = await self.event_creation_handler.handle_new_client_event(
-                requester=requester,
+                await self._create_requester_for_user_id_from_app_service(
+                    event["sender"], requester.app_service
+                ),
                 event=event,
                 context=context,
             )
 
+        # Add the base_insertion_event to the bottom of the list we return
+        if base_insertion_event is not None:
+            event_ids.append(base_insertion_event.event_id)
+
         return 200, {
             "state_events": auth_event_ids,
             "events": event_ids,
-            "next_chunk_id": next_chunk_id,
+            "next_chunk_id": insertion_event["content"][
+                EventContentFields.MSC2716_NEXT_CHUNK_ID
+            ],
         }
 
     def on_GET(self, request, room_id):
@@ -682,7 +783,7 @@ class PublicRoomListRestServlet(TransactionRestServlet):
         server = parse_string(request, "server", default=None)
         content = parse_json_object_from_request(request)
 
-        limit = int(content.get("limit", 100))  # type: Optional[int]
+        limit: Optional[int] = int(content.get("limit", 100))
         since_token = content.get("since", None)
         search_filter = content.get("filter", None)
 
@@ -828,9 +929,7 @@ class RoomMessageListRestServlet(RestServlet):
         filter_str = parse_string(request, "filter", encoding="utf-8")
         if filter_str:
             filter_json = urlparse.unquote(filter_str)
-            event_filter = Filter(
-                json_decoder.decode(filter_json)
-            )  # type: Optional[Filter]
+            event_filter: Optional[Filter] = Filter(json_decoder.decode(filter_json))
             if (
                 event_filter
                 and event_filter.filter_json.get("event_format", "client")
@@ -943,9 +1042,7 @@ class RoomEventContextServlet(RestServlet):
         filter_str = parse_string(request, "filter", encoding="utf-8")
         if filter_str:
             filter_json = urlparse.unquote(filter_str)
-            event_filter = Filter(
-                json_decoder.decode(filter_json)
-            )  # type: Optional[Filter]
+            event_filter: Optional[Filter] = Filter(json_decoder.decode(filter_json))
         else:
             event_filter = None
 
diff --git a/synapse/rest/client/v2_alpha/account_validity.py b/synapse/rest/client/v2_alpha/account_validity.py
index 2d1ad3d3fb..3ebe401861 100644
--- a/synapse/rest/client/v2_alpha/account_validity.py
+++ b/synapse/rest/client/v2_alpha/account_validity.py
@@ -14,7 +14,7 @@
 
 import logging
 
-from synapse.api.errors import AuthError, SynapseError
+from synapse.api.errors import SynapseError
 from synapse.http.server import respond_with_html
 from synapse.http.servlet import RestServlet
 
@@ -92,11 +92,6 @@ class AccountValiditySendMailServlet(RestServlet):
         )
 
     async def on_POST(self, request):
-        if not self.account_validity_renew_by_email_enabled:
-            raise AuthError(
-                403, "Account renewal via email is disabled on this server."
-            )
-
         requester = await self.auth.get_user_by_req(request, allow_expired=True)
         user_id = requester.user.to_string()
         await self.account_activity_handler.send_renewal_email_to_user(user_id)
diff --git a/synapse/rest/client/v2_alpha/sendtodevice.py b/synapse/rest/client/v2_alpha/sendtodevice.py
index f8dcee603c..d537d811d8 100644
--- a/synapse/rest/client/v2_alpha/sendtodevice.py
+++ b/synapse/rest/client/v2_alpha/sendtodevice.py
@@ -59,7 +59,7 @@ class SendToDeviceRestServlet(servlet.RestServlet):
             requester, message_type, content["messages"]
         )
 
-        response = (200, {})  # type: Tuple[int, dict]
+        response: Tuple[int, dict] = (200, {})
         return response
 
 
diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py
index e52570cd8e..4282e2b228 100644
--- a/synapse/rest/consent/consent_resource.py
+++ b/synapse/rest/consent/consent_resource.py
@@ -117,7 +117,7 @@ class ConsentResource(DirectServeHtmlResource):
         has_consented = False
         public_version = username == ""
         if not public_version:
-            args = request.args  # type: Dict[bytes, List[bytes]]
+            args: Dict[bytes, List[bytes]] = request.args
             userhmac_bytes = parse_bytes_from_args(args, "h", required=True)
 
             self._check_hash(username, userhmac_bytes)
@@ -154,7 +154,7 @@ class ConsentResource(DirectServeHtmlResource):
         """
         version = parse_string(request, "v", required=True)
         username = parse_string(request, "u", required=True)
-        args = request.args  # type: Dict[bytes, List[bytes]]
+        args: Dict[bytes, List[bytes]] = request.args
         userhmac = parse_bytes_from_args(args, "h", required=True)
 
         self._check_hash(username, userhmac)
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index d56a1ae482..63a40b1852 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -97,7 +97,7 @@ class RemoteKey(DirectServeJsonResource):
     async def _async_render_GET(self, request):
         if len(request.postpath) == 1:
             (server,) = request.postpath
-            query = {server.decode("ascii"): {}}  # type: dict
+            query: dict = {server.decode("ascii"): {}}
         elif len(request.postpath) == 2:
             server, key_id = request.postpath
             minimum_valid_until_ts = parse_integer(request, "minimum_valid_until_ts")
@@ -141,7 +141,7 @@ class RemoteKey(DirectServeJsonResource):
         time_now_ms = self.clock.time_msec()
 
         # Note that the value is unused.
-        cache_misses = {}  # type: Dict[str, Dict[str, int]]
+        cache_misses: Dict[str, Dict[str, int]] = {}
         for (server_name, key_id, _), results in cached.items():
             results = [(result["ts_added_ms"], result) for result in results]
 
diff --git a/synapse/rest/media/v1/__init__.py b/synapse/rest/media/v1/__init__.py
index d20186bbd0..3dd16d4bb5 100644
--- a/synapse/rest/media/v1/__init__.py
+++ b/synapse/rest/media/v1/__init__.py
@@ -17,7 +17,7 @@ import PIL.Image
 # check for JPEG support.
 try:
     PIL.Image._getdecoder("rgb", "jpeg", None)
-except IOError as e:
+except OSError as e:
     if str(e).startswith("decoder jpeg not available"):
         raise Exception(
             "FATAL: jpeg codec not supported. Install pillow correctly! "
@@ -32,7 +32,7 @@ except Exception:
 # check for PNG support.
 try:
     PIL.Image._getdecoder("rgb", "zip", None)
-except IOError as e:
+except OSError as e:
     if str(e).startswith("decoder zip not available"):
         raise Exception(
             "FATAL: zip codec not supported. Install pillow correctly! "
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 0fb4cd81f1..90364ebcf7 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -49,7 +49,7 @@ TEXT_CONTENT_TYPES = [
 def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
     try:
         # The type on postpath seems incorrect in Twisted 21.2.0.
-        postpath = request.postpath  # type: List[bytes]  # type: ignore
+        postpath: List[bytes] = request.postpath  # type: ignore
         assert postpath
 
         # This allows users to append e.g. /test.png to the URL. Useful for
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 21c43c340c..4f702f890c 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -78,16 +78,16 @@ class MediaRepository:
 
         Thumbnailer.set_limits(self.max_image_pixels)
 
-        self.primary_base_path = hs.config.media_store_path  # type: str
-        self.filepaths = MediaFilePaths(self.primary_base_path)  # type: MediaFilePaths
+        self.primary_base_path: str = hs.config.media_store_path
+        self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path)
 
         self.dynamic_thumbnails = hs.config.dynamic_thumbnails
         self.thumbnail_requirements = hs.config.thumbnail_requirements
 
         self.remote_media_linearizer = Linearizer(name="media_remote")
 
-        self.recently_accessed_remotes = set()  # type: Set[Tuple[str, str]]
-        self.recently_accessed_locals = set()  # type: Set[str]
+        self.recently_accessed_remotes: Set[Tuple[str, str]] = set()
+        self.recently_accessed_locals: Set[str] = set()
 
         self.federation_domain_whitelist = hs.config.federation_domain_whitelist
 
@@ -711,7 +711,7 @@ class MediaRepository:
 
         # We deduplicate the thumbnail sizes by ignoring the cropped versions if
         # they have the same dimensions of a scaled one.
-        thumbnails = {}  # type: Dict[Tuple[int, int, str], str]
+        thumbnails: Dict[Tuple[int, int, str], str] = {}
         for r_width, r_height, r_method, r_type in requirements:
             if r_method == "crop":
                 thumbnails.setdefault((r_width, r_height, r_type), r_method)
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index c7fd97c46c..56cdc1b4ed 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -191,7 +191,7 @@ class MediaStorage:
 
         for provider in self.storage_providers:
             for path in paths:
-                res = await provider.fetch(path, file_info)  # type: Any
+                res: Any = await provider.fetch(path, file_info)
                 if res:
                     logger.debug("Streaming %s from %s", path, provider)
                     return res
@@ -233,7 +233,7 @@ class MediaStorage:
             os.makedirs(dirname)
 
         for provider in self.storage_providers:
-            res = await provider.fetch(path, file_info)  # type: Any
+            res: Any = await provider.fetch(path, file_info)
             if res:
                 with res:
                     consumer = BackgroundFileConsumer(
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 0adfb1a70f..8e7fead3a2 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -169,12 +169,12 @@ class PreviewUrlResource(DirectServeJsonResource):
 
         # memory cache mapping urls to an ObservableDeferred returning
         # JSON-encoded OG metadata
-        self._cache = ExpiringCache(
+        self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
             cache_name="url_previews",
             clock=self.clock,
             # don't spider URLs more often than once an hour
             expiry_ms=ONE_HOUR,
-        )  # type: ExpiringCache[str, ObservableDeferred]
+        )
 
         if self._worker_run_media_background_jobs:
             self._cleaner_loop = self.clock.looping_call(
@@ -460,7 +460,7 @@ class PreviewUrlResource(DirectServeJsonResource):
         file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
 
         # If this URL can be accessed via oEmbed, use that instead.
-        url_to_download = url  # type: Optional[str]
+        url_to_download: Optional[str] = url
         oembed_url = self._get_oembed_url(url)
         if oembed_url:
             # The result might be a new URL to download, or it might be HTML content.
@@ -788,7 +788,7 @@ def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]:
     # "og:video:height" : "720",
     # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3",
 
-    og = {}  # type: Dict[str, Optional[str]]
+    og: Dict[str, Optional[str]] = {}
     for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
         if "content" in tag.attrib:
             # if we've got more than 50 tags, someone is taking the piss
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index 62dc4aae2d..146adca8f1 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -61,11 +61,11 @@ class UploadResource(DirectServeJsonResource):
                 errcode=Codes.TOO_LARGE,
             )
 
-        args = request.args  # type: Dict[bytes, List[bytes]]  # type: ignore
+        args: Dict[bytes, List[bytes]] = request.args  # type: ignore
         upload_name_bytes = parse_bytes_from_args(args, "filename")
         if upload_name_bytes:
             try:
-                upload_name = upload_name_bytes.decode("utf8")  # type: Optional[str]
+                upload_name: Optional[str] = upload_name_bytes.decode("utf8")
             except UnicodeDecodeError:
                 raise SynapseError(
                     msg="Invalid UTF-8 filename parameter: %r" % (upload_name), code=400
@@ -89,7 +89,7 @@ class UploadResource(DirectServeJsonResource):
         # TODO(markjh): parse content-dispostion
 
         try:
-            content = request.content  # type: IO  # type: ignore
+            content: IO = request.content  # type: ignore
             content_uri = await self.media_repo.create_content(
                 media_type, upload_name, content, content_length, requester.user
             )
diff --git a/synapse/rest/synapse/client/pick_username.py b/synapse/rest/synapse/client/pick_username.py
index 9b002cc15e..ab24ec0a8e 100644
--- a/synapse/rest/synapse/client/pick_username.py
+++ b/synapse/rest/synapse/client/pick_username.py
@@ -118,9 +118,9 @@ class AccountDetailsResource(DirectServeHtmlResource):
             use_display_name = parse_boolean(request, "use_display_name", default=False)
 
             try:
-                emails_to_use = [
+                emails_to_use: List[str] = [
                     val.decode("utf-8") for val in request.args.get(b"use_email", [])
-                ]  # type: List[str]
+                ]
             except ValueError:
                 raise SynapseError(400, "Query parameter use_email must be utf-8")
         except SynapseError as e:
diff --git a/synapse/server.py b/synapse/server.py
index 2c27d2a7e8..095dba9ad0 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -247,15 +247,15 @@ class HomeServer(metaclass=abc.ABCMeta):
         # the key we use to sign events and requests
         self.signing_key = config.key.signing_key[0]
         self.config = config
-        self._listening_services = []  # type: List[twisted.internet.tcp.Port]
-        self.start_time = None  # type: Optional[int]
+        self._listening_services: List[twisted.internet.tcp.Port] = []
+        self.start_time: Optional[int] = None
 
         self._instance_id = random_string(5)
         self._instance_name = config.worker.instance_name
 
         self.version_string = version_string
 
-        self.datastores = None  # type: Optional[Databases]
+        self.datastores: Optional[Databases] = None
 
         self._module_web_resources: Dict[str, IResource] = {}
         self._module_web_resources_consumed = False
diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py
index e65f6f88fe..4e0f814035 100644
--- a/synapse/server_notices/consent_server_notices.py
+++ b/synapse/server_notices/consent_server_notices.py
@@ -34,7 +34,7 @@ class ConsentServerNotices:
         self._server_notices_manager = hs.get_server_notices_manager()
         self._store = hs.get_datastore()
 
-        self._users_in_progress = set()  # type: Set[str]
+        self._users_in_progress: Set[str] = set()
 
         self._current_consent_version = hs.config.user_consent_version
         self._server_notice_content = hs.config.user_consent_server_notice_content
diff --git a/synapse/server_notices/resource_limits_server_notices.py b/synapse/server_notices/resource_limits_server_notices.py
index e4b0bc5c72..073b0d754f 100644
--- a/synapse/server_notices/resource_limits_server_notices.py
+++ b/synapse/server_notices/resource_limits_server_notices.py
@@ -205,7 +205,7 @@ class ResourceLimitsServerNotices:
             # The user has yet to join the server notices room
             pass
 
-        referenced_events = []  # type: List[str]
+        referenced_events: List[str] = []
         if pinned_state_event is not None:
             referenced_events = list(pinned_state_event.content.get("pinned", []))
 
diff --git a/synapse/server_notices/server_notices_sender.py b/synapse/server_notices/server_notices_sender.py
index c875b15b32..cdf0973d05 100644
--- a/synapse/server_notices/server_notices_sender.py
+++ b/synapse/server_notices/server_notices_sender.py
@@ -32,10 +32,12 @@ class ServerNoticesSender(WorkerServerNoticesSender):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
-        self._server_notices = (
+        self._server_notices: Iterable[
+            Union[ConsentServerNotices, ResourceLimitsServerNotices]
+        ] = (
             ConsentServerNotices(hs),
             ResourceLimitsServerNotices(hs),
-        )  # type: Iterable[Union[ConsentServerNotices, ResourceLimitsServerNotices]]
+        )
 
     async def on_user_syncing(self, user_id: str) -> None:
         """Called when the user performs a sync operation.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index a1770f620e..6223daf522 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -309,9 +309,9 @@ class StateHandler:
 
         if old_state:
             # if we're given the state before the event, then we use that
-            state_ids_before_event = {
+            state_ids_before_event: StateMap[str] = {
                 (s.type, s.state_key): s.event_id for s in old_state
-            }  # type: StateMap[str]
+            }
             state_group_before_event = None
             state_group_before_event_prev_group = None
             deltas_to_state_group_before_event = None
@@ -513,23 +513,25 @@ class StateResolutionHandler:
         self.resolve_linearizer = Linearizer(name="state_resolve_lock")
 
         # dict of set of event_ids -> _StateCacheEntry.
-        self._state_cache = ExpiringCache(
+        self._state_cache: ExpiringCache[
+            FrozenSet[int], _StateCacheEntry
+        ] = ExpiringCache(
             cache_name="state_cache",
             clock=self.clock,
             max_len=100000,
             expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000,
             iterable=True,
             reset_expiry_on_get=True,
-        )  # type: ExpiringCache[FrozenSet[int], _StateCacheEntry]
+        )
 
         #
         # stuff for tracking time spent on state-res by room
         #
 
         # tracks the amount of work done on state res per room
-        self._state_res_metrics = defaultdict(
+        self._state_res_metrics: DefaultDict[str, _StateResMetrics] = defaultdict(
             _StateResMetrics
-        )  # type: DefaultDict[str, _StateResMetrics]
+        )
 
         self.clock.looping_call(self._report_metrics, 120 * 1000)
 
@@ -700,9 +702,9 @@ class StateResolutionHandler:
         items = self._state_res_metrics.items()
 
         # log the N biggest rooms
-        biggest = heapq.nlargest(
+        biggest: List[Tuple[str, _StateResMetrics]] = heapq.nlargest(
             n_to_log, items, key=lambda i: extract_key(i[1])
-        )  # type: List[Tuple[str, _StateResMetrics]]
+        )
         metrics_logger.debug(
             "%i biggest rooms for state-res by %s: %s",
             len(biggest),
@@ -754,7 +756,7 @@ def _make_state_cache_entry(
 
     # failing that, look for the closest match.
     prev_group = None
-    delta_ids = None  # type: Optional[StateMap[str]]
+    delta_ids: Optional[StateMap[str]] = None
 
     for old_group, old_state in state_groups_ids.items():
         n_delta_ids = {k: v for k, v in new_state.items() if old_state.get(k) != v}
diff --git a/synapse/state/v1.py b/synapse/state/v1.py
index 318e998813..267193cedf 100644
--- a/synapse/state/v1.py
+++ b/synapse/state/v1.py
@@ -159,7 +159,7 @@ def _seperate(
     """
     state_set_iterator = iter(state_sets)
     unconflicted_state = dict(next(state_set_iterator))
-    conflicted_state = {}  # type: MutableStateMap[Set[str]]
+    conflicted_state: MutableStateMap[Set[str]] = {}
 
     for state_set in state_set_iterator:
         for key, value in state_set.items():
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 008644cd98..e66e6571c8 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -276,7 +276,7 @@ async def _get_auth_chain_difference(
     # event IDs if they appear in the `event_map`. This is the intersection of
     # the event's auth chain with the events in the `event_map` *plus* their
     # auth event IDs.
-    events_to_auth_chain = {}  # type: Dict[str, Set[str]]
+    events_to_auth_chain: Dict[str, Set[str]] = {}
     for event in event_map.values():
         chain = {event.event_id}
         events_to_auth_chain[event.event_id] = chain
@@ -301,17 +301,17 @@ async def _get_auth_chain_difference(
         # ((type, state_key)->event_id) mappings; and (b) we have stripped out
         # unpersisted events and replaced them with the persisted events in
         # their auth chain.
-        state_sets_ids = []  # type: List[Set[str]]
+        state_sets_ids: List[Set[str]] = []
 
         # For each state set, the unpersisted event IDs reachable (by their auth
         # chain) from the events in that set.
-        unpersisted_set_ids = []  # type: List[Set[str]]
+        unpersisted_set_ids: List[Set[str]] = []
 
         for state_set in state_sets:
-            set_ids = set()  # type: Set[str]
+            set_ids: Set[str] = set()
             state_sets_ids.append(set_ids)
 
-            unpersisted_ids = set()  # type: Set[str]
+            unpersisted_ids: Set[str] = set()
             unpersisted_set_ids.append(unpersisted_ids)
 
             for event_id in state_set.values():
@@ -334,7 +334,7 @@ async def _get_auth_chain_difference(
         union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
         intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
 
-        difference_from_event_map = union - intersection  # type: Collection[str]
+        difference_from_event_map: Collection[str] = union - intersection
     else:
         difference_from_event_map = ()
         state_sets_ids = [set(state_set.values()) for state_set in state_sets]
@@ -458,7 +458,7 @@ async def _reverse_topological_power_sort(
         The sorted list
     """
 
-    graph = {}  # type: Dict[str, Set[str]]
+    graph: Dict[str, Set[str]] = {}
     for idx, event_id in enumerate(event_ids, start=1):
         await _add_event_and_auth_chain_to_graph(
             graph, room_id, event_id, event_map, state_res_store, auth_diff
@@ -657,7 +657,7 @@ async def _get_mainline_depth_for_event(
     """
 
     room_id = event.room_id
-    tmp_event = event  # type: Optional[EventBase]
+    tmp_event: Optional[EventBase] = event
 
     # We do an iterative search, replacing `event with the power level in its
     # auth events (if any)
@@ -767,7 +767,7 @@ def lexicographical_topological_sort(
     # outgoing edges, c.f.
     # https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
     outdegree_map = graph
-    reverse_graph = {}  # type: Dict[str, Set[str]]
+    reverse_graph: Dict[str, Set[str]] = {}
 
     # Lists of nodes with zero out degree. Is actually a tuple of
     # `(key(node), node)` so that sorting does the right thing
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 142787fdfd..82b31d24f1 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -92,14 +92,12 @@ class BackgroundUpdater:
         self.db_pool = database
 
         # if a background update is currently running, its name.
-        self._current_background_update = None  # type: Optional[str]
-
-        self._background_update_performance = (
-            {}
-        )  # type: Dict[str, BackgroundUpdatePerformance]
-        self._background_update_handlers = (
-            {}
-        )  # type: Dict[str, Callable[[JsonDict, int], Awaitable[int]]]
+        self._current_background_update: Optional[str] = None
+
+        self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
+        self._background_update_handlers: Dict[
+            str, Callable[[JsonDict, int], Awaitable[int]]
+        ] = {}
         self._all_done = False
 
     def start_doing_background_updates(self) -> None:
@@ -411,7 +409,7 @@ class BackgroundUpdater:
             c.execute(sql)
 
         if isinstance(self.db_pool.engine, engines.PostgresEngine):
-            runner = create_index_psql  # type: Optional[Callable[[Connection], None]]
+            runner: Optional[Callable[[Connection], None]] = create_index_psql
         elif psql_only:
             runner = None
         else:
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index d470cdacde..ccf9ac51ef 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -111,7 +111,7 @@ def make_conn(
     db_config: DatabaseConnectionConfig,
     engine: BaseDatabaseEngine,
     default_txn_name: str,
-) -> Connection:
+) -> "LoggingDatabaseConnection":
     """Make a new connection to the database and return it.
 
     Returns:
@@ -670,8 +670,8 @@ class DatabasePool:
         Returns:
             The result of func
         """
-        after_callbacks = []  # type: List[_CallbackListEntry]
-        exception_callbacks = []  # type: List[_CallbackListEntry]
+        after_callbacks: List[_CallbackListEntry] = []
+        exception_callbacks: List[_CallbackListEntry] = []
 
         if not current_context():
             logger.warning("Starting db txn '%s' from sentinel context", desc)
@@ -907,7 +907,7 @@ class DatabasePool:
         # The sort is to ensure that we don't rely on dictionary iteration
         # order.
         keys, vals = zip(
-            *[zip(*(sorted(i.items(), key=lambda kv: kv[0]))) for i in values if i]
+            *(zip(*(sorted(i.items(), key=lambda kv: kv[0]))) for i in values if i)
         )
 
         for k in keys:
@@ -1090,7 +1090,7 @@ class DatabasePool:
                 return False
 
         # We didn't find any existing rows, so insert a new one
-        allvalues = {}  # type: Dict[str, Any]
+        allvalues: Dict[str, Any] = {}
         allvalues.update(keyvalues)
         allvalues.update(values)
         allvalues.update(insertion_values)
@@ -1121,7 +1121,7 @@ class DatabasePool:
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
         """
-        allvalues = {}  # type: Dict[str, Any]
+        allvalues: Dict[str, Any] = {}
         allvalues.update(keyvalues)
         allvalues.update(insertion_values or {})
 
@@ -1257,7 +1257,7 @@ class DatabasePool:
             value_values: A list of each row's value column values.
                 Ignored if value_names is empty.
         """
-        allnames = []  # type: List[str]
+        allnames: List[str] = []
         allnames.extend(key_names)
         allnames.extend(value_names)
 
@@ -1566,7 +1566,7 @@ class DatabasePool:
         """
         keyvalues = keyvalues or {}
 
-        results = []  # type: List[Dict[str, Any]]
+        results: List[Dict[str, Any]] = []
 
         if not iterable:
             return results
@@ -1978,7 +1978,7 @@ class DatabasePool:
             raise ValueError("order_direction must be one of 'ASC' or 'DESC'.")
 
         where_clause = "WHERE " if filters or keyvalues or exclude_keyvalues else ""
-        arg_list = []  # type: List[Any]
+        arg_list: List[Any] = []
         if filters:
             where_clause += " AND ".join("%s LIKE ?" % (k,) for k in filters)
             arg_list += list(filters.values())
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 9f182c2a89..e2d1b758bd 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -48,9 +48,7 @@ def _make_exclusive_regex(
     ]
     if exclusive_user_regexes:
         exclusive_user_regex = "|".join("(" + r + ")" for r in exclusive_user_regexes)
-        exclusive_user_pattern = re.compile(
-            exclusive_user_regex
-        )  # type: Optional[Pattern]
+        exclusive_user_pattern: Optional[Pattern] = re.compile(exclusive_user_regex)
     else:
         # We handle this case specially otherwise the constructed regex
         # will always match
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 50e7ddd735..c55508867d 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -203,9 +203,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             "delete_messages_for_device", delete_messages_for_device_txn
         )
 
-        log_kv(
-            {"message": "deleted {} messages for device".format(count), "count": count}
-        )
+        log_kv({"message": f"deleted {count} messages for device", "count": count})
 
         # Update the cache, ensuring that we only ever increase the value
         last_deleted_stream_id = self._last_device_delete_cache.get(
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 0e3dd4e9ca..78ae68ec68 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -247,7 +247,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore):
 
         txn.execute(sql, query_params)
 
-        result = {}  # type: Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]
+        result: Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]] = {}
         for (user_id, device_id, display_name, key_json) in txn:
             if include_deleted_devices:
                 deleted_devices.remove((user_id, device_id))
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index f23f8c6ecf..d39368c20e 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -16,6 +16,8 @@ import logging
 from queue import Empty, PriorityQueue
 from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple
 
+from prometheus_client import Gauge
+
 from synapse.api.constants import MAX_DEPTH
 from synapse.api.errors import StoreError
 from synapse.api.room_versions import RoomVersion
@@ -32,6 +34,16 @@ from synapse.util.caches.descriptors import cached
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.iterutils import batch_iter
 
+oldest_pdu_in_federation_staging = Gauge(
+    "synapse_federation_server_oldest_inbound_pdu_in_staging",
+    "The age in seconds since we received the oldest pdu in the federation staging area",
+)
+
+number_pdus_in_federation_queue = Gauge(
+    "synapse_federation_server_number_inbound_pdu_in_staging",
+    "The total number of events in the inbound federation staging",
+)
+
 logger = logging.getLogger(__name__)
 
 
@@ -50,9 +62,11 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             )
 
         # Cache of event ID to list of auth event IDs and their depths.
-        self._event_auth_cache = LruCache(
+        self._event_auth_cache: LruCache[str, List[Tuple[str, int]]] = LruCache(
             500000, "_event_auth_cache", size_callback=len
-        )  # type: LruCache[str, List[Tuple[str, int]]]
+        )
+
+        self._clock.looping_call(self._get_stats_for_federation_staging, 30 * 1000)
 
     async def get_auth_chain(
         self, room_id: str, event_ids: Collection[str], include_given: bool = False
@@ -123,10 +137,10 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         initial_events = set(event_ids)
 
         # All the events that we've found that are reachable from the events.
-        seen_events = set()  # type: Set[str]
+        seen_events: Set[str] = set()
 
         # A map from chain ID to max sequence number of the given events.
-        event_chains = {}  # type: Dict[int, int]
+        event_chains: Dict[int, int] = {}
 
         sql = """
             SELECT event_id, chain_id, sequence_number
@@ -168,7 +182,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         """
 
         # A map from chain ID to max sequence number *reachable* from any event ID.
-        chains = {}  # type: Dict[int, int]
+        chains: Dict[int, int] = {}
 
         # Add all linked chains reachable from initial set of chains.
         for batch in batch_iter(event_chains, 1000):
@@ -339,14 +353,14 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         initial_events = set(state_sets[0]).union(*state_sets[1:])
 
         # Map from event_id -> (chain ID, seq no)
-        chain_info = {}  # type: Dict[str, Tuple[int, int]]
+        chain_info: Dict[str, Tuple[int, int]] = {}
 
         # Map from chain ID -> seq no -> event Id
-        chain_to_event = {}  # type: Dict[int, Dict[int, str]]
+        chain_to_event: Dict[int, Dict[int, str]] = {}
 
         # All the chains that we've found that are reachable from the state
         # sets.
-        seen_chains = set()  # type: Set[int]
+        seen_chains: Set[int] = set()
 
         sql = """
             SELECT event_id, chain_id, sequence_number
@@ -378,9 +392,9 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
 
         # Corresponds to `state_sets`, except as a map from chain ID to max
         # sequence number reachable from the state set.
-        set_to_chain = []  # type: List[Dict[int, int]]
+        set_to_chain: List[Dict[int, int]] = []
         for state_set in state_sets:
-            chains = {}  # type: Dict[int, int]
+            chains: Dict[int, int] = {}
             set_to_chain.append(chains)
 
             for event_id in state_set:
@@ -432,7 +446,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
 
         # Mapping from chain ID to the range of sequence numbers that should be
         # pulled from the database.
-        chain_to_gap = {}  # type: Dict[int, Tuple[int, int]]
+        chain_to_gap: Dict[int, Tuple[int, int]] = {}
 
         for chain_id in seen_chains:
             min_seq_no = min(chains.get(chain_id, 0) for chains in set_to_chain)
@@ -541,7 +555,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         }
 
         # The sorted list of events whose auth chains we should walk.
-        search = []  # type: List[Tuple[int, str]]
+        search: List[Tuple[int, str]] = []
 
         # We need to get the depth of the initial events for sorting purposes.
         sql = """
@@ -564,7 +578,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         search.sort()
 
         # Map from event to its auth events
-        event_to_auth_events = {}  # type: Dict[str, Set[str]]
+        event_to_auth_events: Dict[str, Set[str]] = {}
 
         base_sql = """
             SELECT a.event_id, auth_id, depth
@@ -1075,16 +1089,62 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         self,
         origin: str,
         event_id: str,
-    ) -> None:
-        """Remove the given event from the staging area"""
-        await self.db_pool.simple_delete(
-            table="federation_inbound_events_staging",
-            keyvalues={
-                "origin": origin,
-                "event_id": event_id,
-            },
-            desc="remove_received_event_from_staging",
-        )
+    ) -> Optional[int]:
+        """Remove the given event from the staging area.
+
+        Returns:
+            The received_ts of the row that was deleted, if any.
+        """
+        if self.db_pool.engine.supports_returning:
+
+            def _remove_received_event_from_staging_txn(txn):
+                sql = """
+                    DELETE FROM federation_inbound_events_staging
+                    WHERE origin = ? AND event_id = ?
+                    RETURNING received_ts
+                """
+
+                txn.execute(sql, (origin, event_id))
+                return txn.fetchone()
+
+            row = await self.db_pool.runInteraction(
+                "remove_received_event_from_staging",
+                _remove_received_event_from_staging_txn,
+                db_autocommit=True,
+            )
+            if row is None:
+                return None
+
+            return row[0]
+
+        else:
+
+            def _remove_received_event_from_staging_txn(txn):
+                received_ts = self.db_pool.simple_select_one_onecol_txn(
+                    txn,
+                    table="federation_inbound_events_staging",
+                    keyvalues={
+                        "origin": origin,
+                        "event_id": event_id,
+                    },
+                    retcol="received_ts",
+                    allow_none=True,
+                )
+                self.db_pool.simple_delete_txn(
+                    txn,
+                    table="federation_inbound_events_staging",
+                    keyvalues={
+                        "origin": origin,
+                        "event_id": event_id,
+                    },
+                )
+
+                return received_ts
+
+            return await self.db_pool.runInteraction(
+                "remove_received_event_from_staging",
+                _remove_received_event_from_staging_txn,
+            )
 
     async def get_next_staged_event_id_for_room(
         self,
@@ -1147,6 +1207,42 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
 
         return origin, event
 
+    async def get_all_rooms_with_staged_incoming_events(self) -> List[str]:
+        """Get the room IDs of all events currently staged."""
+        return await self.db_pool.simple_select_onecol(
+            table="federation_inbound_events_staging",
+            keyvalues={},
+            retcol="DISTINCT room_id",
+            desc="get_all_rooms_with_staged_incoming_events",
+        )
+
+    @wrap_as_background_process("_get_stats_for_federation_staging")
+    async def _get_stats_for_federation_staging(self):
+        """Update the prometheus metrics for the inbound federation staging area."""
+
+        def _get_stats_for_federation_staging_txn(txn):
+            txn.execute(
+                "SELECT coalesce(count(*), 0) FROM federation_inbound_events_staging"
+            )
+            (count,) = txn.fetchone()
+
+            txn.execute(
+                "SELECT coalesce(min(received_ts), 0) FROM federation_inbound_events_staging"
+            )
+
+            (received_ts,) = txn.fetchone()
+
+            age = self._clock.time_msec() - received_ts
+
+            return count, age
+
+        count, age = await self.db_pool.runInteraction(
+            "_get_stats_for_federation_staging", _get_stats_for_federation_staging_txn
+        )
+
+        number_pdus_in_federation_queue.set(count)
+        oldest_pdu_in_federation_staging.set(age)
+
 
 class EventFederationStore(EventFederationWorkerStore):
     """Responsible for storing and serving up the various graphs associated
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index d1237c65cc..55caa6bbe7 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -759,7 +759,7 @@ class EventPushActionsWorkerStore(SQLBaseStore):
         # object because we might not have the same amount of rows in each of them. To do
         # this, we use a dict indexed on the user ID and room ID to make it easier to
         # populate.
-        summaries = {}  # type: Dict[Tuple[str, str], _EventPushSummary]
+        summaries: Dict[Tuple[str, str], _EventPushSummary] = {}
         for row in txn:
             summaries[(row[0], row[1])] = _EventPushSummary(
                 unread_count=row[2],
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 897fa06639..a396a201d4 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -109,10 +109,8 @@ class PersistEventsStore:
 
         # Ideally we'd move these ID gens here, unfortunately some other ID
         # generators are chained off them so doing so is a bit of a PITA.
-        self._backfill_id_gen = (
-            self.store._backfill_id_gen
-        )  # type: MultiWriterIdGenerator
-        self._stream_id_gen = self.store._stream_id_gen  # type: MultiWriterIdGenerator
+        self._backfill_id_gen: MultiWriterIdGenerator = self.store._backfill_id_gen
+        self._stream_id_gen: MultiWriterIdGenerator = self.store._stream_id_gen
 
         # This should only exist on instances that are configured to write
         assert (
@@ -221,7 +219,7 @@ class PersistEventsStore:
         Returns:
             Filtered event ids
         """
-        results = []  # type: List[str]
+        results: List[str] = []
 
         def _get_events_which_are_prevs_txn(txn, batch):
             sql = """
@@ -508,7 +506,7 @@ class PersistEventsStore:
         """
 
         # Map from event ID to chain ID/sequence number.
-        chain_map = {}  # type: Dict[str, Tuple[int, int]]
+        chain_map: Dict[str, Tuple[int, int]] = {}
 
         # Set of event IDs to calculate chain ID/seq numbers for.
         events_to_calc_chain_id_for = set(event_to_room_id)
@@ -817,8 +815,8 @@ class PersistEventsStore:
         #      new chain if the sequence number has already been allocated.
         #
 
-        existing_chains = set()  # type: Set[int]
-        tree = []  # type: List[Tuple[str, Optional[str]]]
+        existing_chains: Set[int] = set()
+        tree: List[Tuple[str, Optional[str]]] = []
 
         # We need to do this in a topologically sorted order as we want to
         # generate chain IDs/sequence numbers of an event's auth events before
@@ -848,7 +846,7 @@ class PersistEventsStore:
         )
         txn.execute(sql % (clause,), args)
 
-        chain_to_max_seq_no = {row[0]: row[1] for row in txn}  # type: Dict[Any, int]
+        chain_to_max_seq_no: Dict[Any, int] = {row[0]: row[1] for row in txn}
 
         # Allocate the new events chain ID/sequence numbers.
         #
@@ -858,8 +856,8 @@ class PersistEventsStore:
         # number of new chain IDs in one call, replacing all temporary
         # objects with real allocated chain IDs.
 
-        unallocated_chain_ids = set()  # type: Set[object]
-        new_chain_tuples = {}  # type: Dict[str, Tuple[Any, int]]
+        unallocated_chain_ids: Set[object] = set()
+        new_chain_tuples: Dict[str, Tuple[Any, int]] = {}
         for event_id, auth_event_id in tree:
             # If we reference an auth_event_id we fetch the allocated chain ID,
             # either from the existing `chain_map` or the newly generated
@@ -870,7 +868,7 @@ class PersistEventsStore:
                 if not existing_chain_id:
                     existing_chain_id = chain_map[auth_event_id]
 
-            new_chain_tuple = None  # type: Optional[Tuple[Any, int]]
+            new_chain_tuple: Optional[Tuple[Any, int]] = None
             if existing_chain_id:
                 # We found a chain ID/sequence number candidate, check its
                 # not already taken.
@@ -897,9 +895,9 @@ class PersistEventsStore:
         )
 
         # Map from potentially temporary chain ID to real chain ID
-        chain_id_to_allocated_map = dict(
+        chain_id_to_allocated_map: Dict[Any, int] = dict(
             zip(unallocated_chain_ids, newly_allocated_chain_ids)
-        )  # type: Dict[Any, int]
+        )
         chain_id_to_allocated_map.update((c, c) for c in existing_chains)
 
         return {
@@ -1175,9 +1173,9 @@ class PersistEventsStore:
         Returns:
             list[(EventBase, EventContext)]: filtered list
         """
-        new_events_and_contexts = (
-            OrderedDict()
-        )  # type: OrderedDict[str, Tuple[EventBase, EventContext]]
+        new_events_and_contexts: OrderedDict[
+            str, Tuple[EventBase, EventContext]
+        ] = OrderedDict()
         for event, context in events_and_contexts:
             prev_event_context = new_events_and_contexts.get(event.event_id)
             if prev_event_context:
@@ -1205,7 +1203,7 @@ class PersistEventsStore:
                 we are persisting
             backfilled (bool): True if the events were backfilled
         """
-        depth_updates = {}  # type: Dict[str, int]
+        depth_updates: Dict[str, int] = {}
         for event, context in events_and_contexts:
             # Remove the any existing cache entries for the event_ids
             txn.call_after(self.store._invalidate_get_event_cache, event.event_id)
@@ -1580,11 +1578,11 @@ class PersistEventsStore:
         # invalidate the cache for the redacted event
         txn.call_after(self.store._invalidate_get_event_cache, event.redacts)
 
-        self.db_pool.simple_insert_txn(
+        self.db_pool.simple_upsert_txn(
             txn,
             table="redactions",
+            keyvalues={"event_id": event.event_id},
             values={
-                "event_id": event.event_id,
                 "redacts": event.redacts,
                 "received_ts": self._clock.time_msec(),
             },
@@ -1885,7 +1883,7 @@ class PersistEventsStore:
                 ),
             )
 
-            room_to_event_ids = {}  # type: Dict[str, List[str]]
+            room_to_event_ids: Dict[str, List[str]] = {}
             for e, _ in events_and_contexts:
                 room_to_event_ids.setdefault(e.room_id, []).append(e.event_id)
 
@@ -2012,10 +2010,6 @@ class PersistEventsStore:
 
         Forward extremities are handled when we first start persisting the events.
         """
-        events_by_room = {}  # type: Dict[str, List[EventBase]]
-        for ev in events:
-            events_by_room.setdefault(ev.room_id, []).append(ev)
-
         query = (
             "INSERT INTO event_backward_extremities (event_id, room_id)"
             " SELECT ?, ? WHERE NOT EXISTS ("
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index da3a7df27b..6fcb2b8353 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -29,13 +29,18 @@ from synapse.types import JsonDict
 logger = logging.getLogger(__name__)
 
 
-_REPLACE_STREAM_ORDRING_SQL_COMMANDS = (
+_REPLACE_STREAM_ORDERING_SQL_COMMANDS = (
     # there should be no leftover rows without a stream_ordering2, but just in case...
     "UPDATE events SET stream_ordering2 = stream_ordering WHERE stream_ordering2 IS NULL",
-    # finally, we can drop the rule and switch the columns
+    # now we can drop the rule and switch the columns
     "DROP RULE populate_stream_ordering2 ON events",
     "ALTER TABLE events DROP COLUMN stream_ordering",
     "ALTER TABLE events RENAME COLUMN stream_ordering2 TO stream_ordering",
+    # ... and finally, rename the indexes into place for consistency with sqlite
+    "ALTER INDEX event_contains_url_index2 RENAME TO event_contains_url_index",
+    "ALTER INDEX events_order_room2 RENAME TO events_order_room",
+    "ALTER INDEX events_room_stream2 RENAME TO events_room_stream",
+    "ALTER INDEX events_ts2 RENAME TO events_ts",
 )
 
 
@@ -45,6 +50,10 @@ class _BackgroundUpdates:
     DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities"
     POPULATE_STREAM_ORDERING2 = "populate_stream_ordering2"
     INDEX_STREAM_ORDERING2 = "index_stream_ordering2"
+    INDEX_STREAM_ORDERING2_CONTAINS_URL = "index_stream_ordering2_contains_url"
+    INDEX_STREAM_ORDERING2_ROOM_ORDER = "index_stream_ordering2_room_order"
+    INDEX_STREAM_ORDERING2_ROOM_STREAM = "index_stream_ordering2_room_stream"
+    INDEX_STREAM_ORDERING2_TS = "index_stream_ordering2_ts"
     REPLACE_STREAM_ORDERING_COLUMN = "replace_stream_ordering_column"
 
 
@@ -155,12 +164,16 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             self._purged_chain_cover_index,
         )
 
+        ################################################################################
+
         # bg updates for replacing stream_ordering with a BIGINT
         # (these only run on postgres.)
+
         self.db_pool.updates.register_background_update_handler(
             _BackgroundUpdates.POPULATE_STREAM_ORDERING2,
             self._background_populate_stream_ordering2,
         )
+        # CREATE UNIQUE INDEX events_stream_ordering ON events(stream_ordering2);
         self.db_pool.updates.register_background_index_update(
             _BackgroundUpdates.INDEX_STREAM_ORDERING2,
             index_name="events_stream_ordering",
@@ -168,11 +181,42 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             columns=["stream_ordering2"],
             unique=True,
         )
+        # CREATE INDEX event_contains_url_index ON events(room_id, topological_ordering, stream_ordering) WHERE contains_url = true AND outlier = false;
+        self.db_pool.updates.register_background_index_update(
+            _BackgroundUpdates.INDEX_STREAM_ORDERING2_CONTAINS_URL,
+            index_name="event_contains_url_index2",
+            table="events",
+            columns=["room_id", "topological_ordering", "stream_ordering2"],
+            where_clause="contains_url = true AND outlier = false",
+        )
+        # CREATE INDEX events_order_room ON events(room_id, topological_ordering, stream_ordering);
+        self.db_pool.updates.register_background_index_update(
+            _BackgroundUpdates.INDEX_STREAM_ORDERING2_ROOM_ORDER,
+            index_name="events_order_room2",
+            table="events",
+            columns=["room_id", "topological_ordering", "stream_ordering2"],
+        )
+        # CREATE INDEX events_room_stream ON events(room_id, stream_ordering);
+        self.db_pool.updates.register_background_index_update(
+            _BackgroundUpdates.INDEX_STREAM_ORDERING2_ROOM_STREAM,
+            index_name="events_room_stream2",
+            table="events",
+            columns=["room_id", "stream_ordering2"],
+        )
+        # CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering);
+        self.db_pool.updates.register_background_index_update(
+            _BackgroundUpdates.INDEX_STREAM_ORDERING2_TS,
+            index_name="events_ts2",
+            table="events",
+            columns=["origin_server_ts", "stream_ordering2"],
+        )
         self.db_pool.updates.register_background_update_handler(
             _BackgroundUpdates.REPLACE_STREAM_ORDERING_COLUMN,
             self._background_replace_stream_ordering_column,
         )
 
+        ################################################################################
+
     async def _background_reindex_fields_sender(self, progress, batch_size):
         target_min_stream_id = progress["target_min_stream_id_inclusive"]
         max_stream_id = progress["max_stream_id_exclusive"]
@@ -916,9 +960,9 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
         event_to_types = {row[0]: (row[1], row[2]) for row in rows}
 
         # Calculate the new last position we've processed up to.
-        new_last_depth = rows[-1][3] if rows else last_depth  # type: int
-        new_last_stream = rows[-1][4] if rows else last_stream  # type: int
-        new_last_room_id = rows[-1][5] if rows else ""  # type: str
+        new_last_depth: int = rows[-1][3] if rows else last_depth
+        new_last_stream: int = rows[-1][4] if rows else last_stream
+        new_last_room_id: str = rows[-1][5] if rows else ""
 
         # Map from room_id to last depth/stream_ordering processed for the room,
         # excluding the last room (which we're likely still processing). We also
@@ -945,7 +989,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             retcols=("event_id", "auth_id"),
         )
 
-        event_to_auth_chain = {}  # type: Dict[str, List[str]]
+        event_to_auth_chain: Dict[str, List[str]] = {}
         for row in auth_events:
             event_to_auth_chain.setdefault(row["event_id"], []).append(row["auth_id"])
 
@@ -1098,10 +1142,20 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
         """Drop the old 'stream_ordering' column and rename 'stream_ordering2' into its place."""
 
         def process(txn: Cursor) -> None:
-            for sql in _REPLACE_STREAM_ORDRING_SQL_COMMANDS:
+            for sql in _REPLACE_STREAM_ORDERING_SQL_COMMANDS:
                 logger.info("completing stream_ordering migration: %s", sql)
                 txn.execute(sql)
 
+        # ANALYZE the new column to build stats on it, to encourage PostgreSQL to use the
+        # indexes on it.
+        # We need to pass execute a dummy function to handle the txn's result otherwise
+        # it tries to call fetchall() on it and fails because there's no result to fetch.
+        await self.db_pool.execute(
+            "background_analyze_new_stream_ordering_column",
+            lambda txn: None,
+            "ANALYZE events(stream_ordering2)",
+        )
+
         await self.db_pool.runInteraction(
             "_background_replace_stream_ordering_column", process
         )
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 86fd79f3a6..5ff29530bb 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1468,10 +1468,10 @@ class EventsWorkerStore(SQLBaseStore):
         # we need to make sure that, for every stream id in the results, we get *all*
         # the rows with that stream id.
 
-        rows = await self.db_pool.runInteraction(
+        rows: List[Tuple] = await self.db_pool.runInteraction(
             "get_all_updated_current_state_deltas",
             get_all_updated_current_state_deltas_txn,
-        )  # type: List[Tuple]
+        )
 
         # if we've got fewer rows than the limit, we're good
         if len(rows) < target_row_count:
@@ -1572,7 +1572,7 @@ class EventsWorkerStore(SQLBaseStore):
         """
 
         mapping = {}
-        txn_id_to_event = {}  # type: Dict[Tuple[str, int, str], str]
+        txn_id_to_event: Dict[Tuple[str, int, str], str] = {}
 
         for event in events:
             token_id = getattr(event.internal_metadata, "token_id", None)
diff --git a/synapse/storage/databases/main/group_server.py b/synapse/storage/databases/main/group_server.py
index 66ad363bfb..e70d3649ff 100644
--- a/synapse/storage/databases/main/group_server.py
+++ b/synapse/storage/databases/main/group_server.py
@@ -27,8 +27,11 @@ from synapse.util import json_encoder
 _DEFAULT_CATEGORY_ID = ""
 _DEFAULT_ROLE_ID = ""
 
+
 # A room in a group.
-_RoomInGroup = TypedDict("_RoomInGroup", {"room_id": str, "is_public": bool})
+class _RoomInGroup(TypedDict):
+    room_id: str
+    is_public: bool
 
 
 class GroupServerWorkerStore(SQLBaseStore):
@@ -92,6 +95,7 @@ class GroupServerWorkerStore(SQLBaseStore):
               "is_public": False                    # Whether this is a public room or not
             }
         """
+
         # TODO: Pagination
 
         def _get_rooms_in_group_txn(txn):
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index e76188328c..774861074c 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -310,14 +310,25 @@ class Lock:
         _excinst: Optional[BaseException],
         _exctb: Optional[TracebackType],
     ) -> bool:
+        await self.release()
+
+        return False
+
+    async def release(self) -> None:
+        """Release the lock.
+
+        This is automatically called when using the lock as a context manager.
+        """
+
+        if self._dropped:
+            return
+
         if self._looping_call.running:
             self._looping_call.stop()
 
         await self._store._drop_lock(self._lock_name, self._lock_key, self._token)
         self._dropped = True
 
-        return False
-
     def __del__(self) -> None:
         if not self._dropped:
             # We should not be dropped without the lock being released (unless
diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py
index c3f551d377..dc0bbc56ac 100644
--- a/synapse/storage/databases/main/metrics.py
+++ b/synapse/storage/databases/main/metrics.py
@@ -316,11 +316,140 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
 
         return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
 
+    async def count_r30v2_users(self) -> Dict[str, int]:
+        """
+        Counts the number of 30 day retained users, defined as users that:
+         - Appear more than once in the past 60 days
+         - Have more than 30 days between the most and least recent appearances that
+           occurred in the past 60 days.
+
+        (This is the second version of this metric, hence R30'v2')
+
+        Returns:
+             A mapping from client type to the number of 30-day retained users for that client.
+
+             The dict keys are:
+              - "all" (a combined number of users across any and all clients)
+              - "android" (Element Android)
+              - "ios" (Element iOS)
+              - "electron" (Element Desktop)
+              - "web" (any web application -- it's not possible to distinguish Element Web here)
+        """
+
+        def _count_r30v2_users(txn):
+            thirty_days_in_secs = 86400 * 30
+            now = int(self._clock.time())
+            sixty_days_ago_in_secs = now - 2 * thirty_days_in_secs
+            one_day_from_now_in_secs = now + 86400
+
+            # This is the 'per-platform' count.
+            sql = """
+                SELECT
+                    client_type,
+                    count(client_type)
+                FROM
+                    (
+                        SELECT
+                            user_id,
+                            CASE
+                                WHEN
+                                    LOWER(user_agent) LIKE '%%riot%%' OR
+                                    LOWER(user_agent) LIKE '%%element%%'
+                                    THEN CASE
+                                        WHEN
+                                            LOWER(user_agent) LIKE '%%electron%%'
+                                            THEN 'electron'
+                                        WHEN
+                                            LOWER(user_agent) LIKE '%%android%%'
+                                            THEN 'android'
+                                        WHEN
+                                            LOWER(user_agent) LIKE '%%ios%%'
+                                            THEN 'ios'
+                                        ELSE 'unknown'
+                                    END
+                                WHEN
+                                    LOWER(user_agent) LIKE '%%mozilla%%' OR
+                                    LOWER(user_agent) LIKE '%%gecko%%'
+                                    THEN 'web'
+                                ELSE 'unknown'
+                            END as client_type
+                        FROM
+                            user_daily_visits
+                        WHERE
+                            timestamp > ?
+                            AND
+                            timestamp < ?
+                        GROUP BY
+                            user_id,
+                            client_type
+                        HAVING
+                            max(timestamp) - min(timestamp) > ?
+                    ) AS temp
+                GROUP BY
+                    client_type
+                ;
+            """
+
+            # We initialise all the client types to zero, so we get an explicit
+            # zero if they don't appear in the query results
+            results = {"ios": 0, "android": 0, "web": 0, "electron": 0}
+            txn.execute(
+                sql,
+                (
+                    sixty_days_ago_in_secs * 1000,
+                    one_day_from_now_in_secs * 1000,
+                    thirty_days_in_secs * 1000,
+                ),
+            )
+
+            for row in txn:
+                if row[0] == "unknown":
+                    continue
+                results[row[0]] = row[1]
+
+            # This is the 'all users' count.
+            sql = """
+                SELECT COUNT(*) FROM (
+                    SELECT
+                        1
+                    FROM
+                        user_daily_visits
+                    WHERE
+                        timestamp > ?
+                        AND
+                        timestamp < ?
+                    GROUP BY
+                        user_id
+                    HAVING
+                        max(timestamp) - min(timestamp) > ?
+                ) AS r30_users
+            """
+
+            txn.execute(
+                sql,
+                (
+                    sixty_days_ago_in_secs * 1000,
+                    one_day_from_now_in_secs * 1000,
+                    thirty_days_in_secs * 1000,
+                ),
+            )
+            row = txn.fetchone()
+            if row is None:
+                results["all"] = 0
+            else:
+                results["all"] = row[0]
+
+            return results
+
+        return await self.db_pool.runInteraction(
+            "count_r30v2_users", _count_r30v2_users
+        )
+
     def _get_start_of_day(self):
         """
         Returns millisecond unixtime for start of UTC day.
         """
-        now = time.gmtime()
+        now = time.gmtime(self._clock.time())
         today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
         return today_start * 1000
 
@@ -352,7 +481,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
                     ) udv
                     ON u.user_id = udv.user_id AND u.device_id=udv.device_id
                     INNER JOIN users ON users.name=u.user_id
-                    WHERE last_seen > ? AND last_seen <= ?
+                    WHERE ? <= last_seen AND last_seen < ?
                     AND udv.timestamp IS NULL AND users.is_guest=0
                     AND users.appservice_id IS NULL
                     GROUP BY u.user_id, u.device_id
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index 9b4e95e134..ba7075caa5 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -73,20 +73,20 @@ class ProfileWorkerStore(SQLBaseStore):
     async def set_profile_displayname(
         self, user_localpart: str, new_displayname: Optional[str]
     ) -> None:
-        await self.db_pool.simple_update_one(
+        await self.db_pool.simple_upsert(
             table="profiles",
             keyvalues={"user_id": user_localpart},
-            updatevalues={"displayname": new_displayname},
+            values={"displayname": new_displayname},
             desc="set_profile_displayname",
         )
 
     async def set_profile_avatar_url(
         self, user_localpart: str, new_avatar_url: Optional[str]
     ) -> None:
-        await self.db_pool.simple_update_one(
+        await self.db_pool.simple_upsert(
             table="profiles",
             keyvalues={"user_id": user_localpart},
-            updatevalues={"avatar_url": new_avatar_url},
+            values={"avatar_url": new_avatar_url},
             desc="set_profile_avatar_url",
         )
 
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 7fb7780d0f..664c65dac5 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -115,7 +115,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         logger.info("[purge] looking for events to delete")
 
         should_delete_expr = "state_key IS NULL"
-        should_delete_params = ()  # type: Tuple[Any, ...]
+        should_delete_params: Tuple[Any, ...] = ()
         if not delete_local_events:
             should_delete_expr += " AND event_id NOT LIKE ?"
 
@@ -215,6 +215,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "event_relations",
             "event_search",
             "rejections",
+            "redactions",
         ):
             logger.info("[purge] removing events from %s", table)
 
@@ -392,7 +393,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "room_memberships",
             "room_stats_state",
             "room_stats_current",
-            "room_stats_historical",
             "room_stats_earliest_token",
             "rooms",
             "stream_ordering_to_exterm",
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index db52176337..a7fb8cd848 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -79,9 +79,9 @@ class PushRulesWorkerStore(
         super().__init__(database, db_conn, hs)
 
         if hs.config.worker.worker_app is None:
-            self._push_rules_stream_id_gen = StreamIdGenerator(
-                db_conn, "push_rules_stream", "stream_id"
-            )  # type: Union[StreamIdGenerator, SlavedIdTracker]
+            self._push_rules_stream_id_gen: Union[
+                StreamIdGenerator, SlavedIdTracker
+            ] = StreamIdGenerator(db_conn, "push_rules_stream", "stream_id")
         else:
             self._push_rules_stream_id_gen = SlavedIdTracker(
                 db_conn, "push_rules_stream", "stream_id"
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index e31c5864ac..6ad1a0cf7f 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -1744,7 +1744,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
 
             items = keyvalues.items()
             where_clause = " AND ".join(k + " = ?" for k, _ in items)
-            values = [v for _, v in items]  # type: List[Union[str, int]]
+            values: List[Union[str, int]] = [v for _, v in items]
             # Conveniently, refresh_tokens and access_tokens both use the user_id and device_id fields. Only caveat
             # is the `except_token_id` param that is tricky to get right, so for now we're just using the same where
             # clause and values before we handle that. This seems to be only used in the "set password" handler.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 9f0d64a325..6ddafe5434 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -25,6 +25,7 @@ from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import DatabasePool, LoggingTransaction
 from synapse.storage.databases.main.search import SearchStore
+from synapse.storage.types import Cursor
 from synapse.types import JsonDict, ThirdPartyInstanceID
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
@@ -1022,10 +1023,22 @@ class RoomWorkerStore(SQLBaseStore):
         )
 
 
-class RoomBackgroundUpdateStore(SQLBaseStore):
+class _BackgroundUpdates:
     REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
     ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column"
+    POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2"
+    REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth"
+
+
+_REPLACE_ROOM_DEPTH_SQL_COMMANDS = (
+    "DROP TRIGGER populate_min_depth2_trigger ON room_depth",
+    "DROP FUNCTION populate_min_depth2()",
+    "ALTER TABLE room_depth DROP COLUMN min_depth",
+    "ALTER TABLE room_depth RENAME COLUMN min_depth2 TO min_depth",
+)
+
 
+class RoomBackgroundUpdateStore(SQLBaseStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
 
@@ -1037,15 +1050,25 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
         )
 
         self.db_pool.updates.register_background_update_handler(
-            self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE,
+            _BackgroundUpdates.REMOVE_TOMESTONED_ROOMS_BG_UPDATE,
             self._remove_tombstoned_rooms_from_directory,
         )
 
         self.db_pool.updates.register_background_update_handler(
-            self.ADD_ROOMS_ROOM_VERSION_COLUMN,
+            _BackgroundUpdates.ADD_ROOMS_ROOM_VERSION_COLUMN,
             self._background_add_rooms_room_version_column,
         )
 
+        # BG updates to change the type of room_depth.min_depth
+        self.db_pool.updates.register_background_update_handler(
+            _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2,
+            self._background_populate_room_depth_min_depth2,
+        )
+        self.db_pool.updates.register_background_update_handler(
+            _BackgroundUpdates.REPLACE_ROOM_DEPTH_MIN_DEPTH,
+            self._background_replace_room_depth_min_depth,
+        )
+
     async def _background_insert_retention(self, progress, batch_size):
         """Retrieves a list of all rooms within a range and inserts an entry for each of
         them into the room_retention table.
@@ -1164,7 +1187,9 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
                 new_last_room_id = room_id
 
             self.db_pool.updates._background_update_progress_txn(
-                txn, self.ADD_ROOMS_ROOM_VERSION_COLUMN, {"room_id": new_last_room_id}
+                txn,
+                _BackgroundUpdates.ADD_ROOMS_ROOM_VERSION_COLUMN,
+                {"room_id": new_last_room_id},
             )
 
             return False
@@ -1176,7 +1201,7 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
 
         if end:
             await self.db_pool.updates._end_background_update(
-                self.ADD_ROOMS_ROOM_VERSION_COLUMN
+                _BackgroundUpdates.ADD_ROOMS_ROOM_VERSION_COLUMN
             )
 
         return batch_size
@@ -1215,7 +1240,7 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
 
         if not rooms:
             await self.db_pool.updates._end_background_update(
-                self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE
+                _BackgroundUpdates.REMOVE_TOMESTONED_ROOMS_BG_UPDATE
             )
             return 0
 
@@ -1224,7 +1249,7 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
             await self.set_room_is_public(room_id, False)
 
         await self.db_pool.updates._background_update_progress(
-            self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE, {"room_id": rooms[-1]}
+            _BackgroundUpdates.REMOVE_TOMESTONED_ROOMS_BG_UPDATE, {"room_id": rooms[-1]}
         )
 
         return len(rooms)
@@ -1268,6 +1293,71 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
 
         return max_ordering is None
 
+    async def _background_populate_room_depth_min_depth2(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """Populate room_depth.min_depth2
+
+        This is to deal with the fact that min_depth was initially created as a
+        32-bit integer field.
+        """
+
+        def process(txn: Cursor) -> int:
+            last_room = progress.get("last_room", "")
+            txn.execute(
+                """
+                UPDATE room_depth SET min_depth2=min_depth
+                WHERE room_id IN (
+                   SELECT room_id FROM room_depth WHERE room_id > ?
+                   ORDER BY room_id LIMIT ?
+                )
+                RETURNING room_id;
+                """,
+                (last_room, batch_size),
+            )
+            row_count = txn.rowcount
+            if row_count == 0:
+                return 0
+            last_room = max(row[0] for row in txn)
+            logger.info("populated room_depth up to %s", last_room)
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2,
+                {"last_room": last_room},
+            )
+            return row_count
+
+        result = await self.db_pool.runInteraction(
+            "_background_populate_min_depth2", process
+        )
+
+        if result != 0:
+            return result
+
+        await self.db_pool.updates._end_background_update(
+            _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2
+        )
+        return 0
+
+    async def _background_replace_room_depth_min_depth(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """Drop the old 'min_depth' column and rename 'min_depth2' into its place."""
+
+        def process(txn: Cursor) -> None:
+            for sql in _REPLACE_ROOM_DEPTH_SQL_COMMANDS:
+                logger.info("completing room_depth migration: %s", sql)
+                txn.execute(sql)
+
+        await self.db_pool.runInteraction("_background_replace_room_depth", process)
+
+        await self.db_pool.updates._end_background_update(
+            _BackgroundUpdates.REPLACE_ROOM_DEPTH_MIN_DEPTH,
+        )
+
+        return 0
+
 
 class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 4eebe5257a..b6cb6836c2 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -649,7 +649,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             event_to_memberships = await self._get_joined_profiles_from_event_ids(
                 missing_member_event_ids
             )
-            users_in_room.update((row for row in event_to_memberships.values() if row))
+            users_in_room.update(row for row in event_to_memberships.values() if row)
 
         if event is not None and event.type == EventTypes.Member:
             if event.membership == Membership.JOIN:
@@ -703,13 +703,22 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
     @cached(max_entries=10000)
     async def is_host_joined(self, room_id: str, host: str) -> bool:
+        return await self._check_host_room_membership(room_id, host, Membership.JOIN)
+
+    @cached(max_entries=10000)
+    async def is_host_invited(self, room_id: str, host: str) -> bool:
+        return await self._check_host_room_membership(room_id, host, Membership.INVITE)
+
+    async def _check_host_room_membership(
+        self, room_id: str, host: str, membership: str
+    ) -> bool:
         if "%" in host or "_" in host:
             raise Exception("Invalid host name")
 
         sql = """
             SELECT state_key FROM current_state_events AS c
             INNER JOIN room_memberships AS m USING (event_id)
-            WHERE m.membership = 'join'
+            WHERE m.membership = ?
                 AND type = 'm.room.member'
                 AND c.room_id = ?
                 AND state_key LIKE ?
@@ -722,7 +731,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         like_clause = "%:" + host
 
         rows = await self.db_pool.execute(
-            "is_host_joined", None, sql, room_id, like_clause
+            "is_host_joined", None, sql, membership, room_id, like_clause
         )
 
         if not rows:
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 82a1833509..59d67c255b 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -26,7 +26,6 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import StoreError
 from synapse.storage.database import DatabasePool
 from synapse.storage.databases.main.state_deltas import StateDeltasStore
-from synapse.storage.engines import PostgresEngine
 from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
 
@@ -49,14 +48,6 @@ ABSOLUTE_STATS_FIELDS = {
     "user": ("joined_rooms",),
 }
 
-# these fields are per-timeslice and so should be reset to 0 upon a new slice
-# You can draw these stats on a histogram.
-# Example: number of events sent locally during a time slice
-PER_SLICE_FIELDS = {
-    "room": ("total_events", "total_event_bytes"),
-    "user": ("invites_sent", "rooms_created", "total_events", "total_event_bytes"),
-}
-
 TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")}
 
 # these are the tables (& ID columns) which contain our actual subjects
@@ -106,7 +97,6 @@ class StatsStore(StateDeltasStore):
         self.server_name = hs.hostname
         self.clock = self.hs.get_clock()
         self.stats_enabled = hs.config.stats_enabled
-        self.stats_bucket_size = hs.config.stats_bucket_size
 
         self.stats_delta_processing_lock = DeferredLock()
 
@@ -122,22 +112,6 @@ class StatsStore(StateDeltasStore):
         self.db_pool.updates.register_noop_background_update("populate_stats_cleanup")
         self.db_pool.updates.register_noop_background_update("populate_stats_prepare")
 
-    def quantise_stats_time(self, ts):
-        """
-        Quantises a timestamp to be a multiple of the bucket size.
-
-        Args:
-            ts (int): the timestamp to quantise, in milliseconds since the Unix
-                Epoch
-
-        Returns:
-            int: a timestamp which
-              - is divisible by the bucket size;
-              - is no later than `ts`; and
-              - is the largest such timestamp.
-        """
-        return (ts // self.stats_bucket_size) * self.stats_bucket_size
-
     async def _populate_stats_process_users(self, progress, batch_size):
         """
         This is a background update which regenerates statistics for users.
@@ -288,56 +262,6 @@ class StatsStore(StateDeltasStore):
             desc="update_room_state",
         )
 
-    async def get_statistics_for_subject(
-        self, stats_type: str, stats_id: str, start: str, size: int = 100
-    ) -> List[dict]:
-        """
-        Get statistics for a given subject.
-
-        Args:
-            stats_type: The type of subject
-            stats_id: The ID of the subject (e.g. room_id or user_id)
-            start: Pagination start. Number of entries, not timestamp.
-            size: How many entries to return.
-
-        Returns:
-            A list of dicts, where the dict has the keys of
-            ABSOLUTE_STATS_FIELDS[stats_type],  and "bucket_size" and "end_ts".
-        """
-        return await self.db_pool.runInteraction(
-            "get_statistics_for_subject",
-            self._get_statistics_for_subject_txn,
-            stats_type,
-            stats_id,
-            start,
-            size,
-        )
-
-    def _get_statistics_for_subject_txn(
-        self, txn, stats_type, stats_id, start, size=100
-    ):
-        """
-        Transaction-bound version of L{get_statistics_for_subject}.
-        """
-
-        table, id_col = TYPE_TO_TABLE[stats_type]
-        selected_columns = list(
-            ABSOLUTE_STATS_FIELDS[stats_type] + PER_SLICE_FIELDS[stats_type]
-        )
-
-        slice_list = self.db_pool.simple_select_list_paginate_txn(
-            txn,
-            table + "_historical",
-            "end_ts",
-            start,
-            size,
-            retcols=selected_columns + ["bucket_size", "end_ts"],
-            keyvalues={id_col: stats_id},
-            order_direction="DESC",
-        )
-
-        return slice_list
-
     @cached()
     async def get_earliest_token_for_stats(
         self, stats_type: str, id: str
@@ -451,14 +375,10 @@ class StatsStore(StateDeltasStore):
 
         table, id_col = TYPE_TO_TABLE[stats_type]
 
-        quantised_ts = self.quantise_stats_time(int(ts))
-        end_ts = quantised_ts + self.stats_bucket_size
-
         # Lets be paranoid and check that all the given field names are known
         abs_field_names = ABSOLUTE_STATS_FIELDS[stats_type]
-        slice_field_names = PER_SLICE_FIELDS[stats_type]
         for field in chain(fields.keys(), absolute_field_overrides.keys()):
-            if field not in abs_field_names and field not in slice_field_names:
+            if field not in abs_field_names:
                 # guard against potential SQL injection dodginess
                 raise ValueError(
                     "%s is not a recognised field"
@@ -491,20 +411,6 @@ class StatsStore(StateDeltasStore):
             additive_relatives=deltas_of_absolute_fields,
         )
 
-        per_slice_additive_relatives = {
-            key: fields.get(key, 0) for key in slice_field_names
-        }
-        self._upsert_copy_from_table_with_additive_relatives_txn(
-            txn=txn,
-            into_table=table + "_historical",
-            keyvalues={id_col: stats_id},
-            extra_dst_insvalues={"bucket_size": self.stats_bucket_size},
-            extra_dst_keyvalues={"end_ts": end_ts},
-            additive_relatives=per_slice_additive_relatives,
-            src_table=table + "_current",
-            copy_columns=abs_field_names,
-        )
-
     def _upsert_with_additive_relatives_txn(
         self, txn, table, keyvalues, absolutes, additive_relatives
     ):
@@ -528,7 +434,7 @@ class StatsStore(StateDeltasStore):
             ]
 
             relative_updates = [
-                "%(field)s = EXCLUDED.%(field)s + %(table)s.%(field)s"
+                "%(field)s = EXCLUDED.%(field)s + COALESCE(%(table)s.%(field)s, 0)"
                 % {"table": table, "field": field}
                 for field in additive_relatives.keys()
             ]
@@ -568,205 +474,13 @@ class StatsStore(StateDeltasStore):
                 self.db_pool.simple_insert_txn(txn, table, merged_dict)
             else:
                 for (key, val) in additive_relatives.items():
-                    current_row[key] += val
+                    if current_row[key] is None:
+                        current_row[key] = val
+                    else:
+                        current_row[key] += val
                 current_row.update(absolutes)
                 self.db_pool.simple_update_one_txn(txn, table, keyvalues, current_row)
 
-    def _upsert_copy_from_table_with_additive_relatives_txn(
-        self,
-        txn,
-        into_table,
-        keyvalues,
-        extra_dst_keyvalues,
-        extra_dst_insvalues,
-        additive_relatives,
-        src_table,
-        copy_columns,
-    ):
-        """Updates the historic stats table with latest updates.
-
-        This involves copying "absolute" fields from the `_current` table, and
-        adding relative fields to any existing values.
-
-        Args:
-             txn: Transaction
-             into_table (str): The destination table to UPSERT the row into
-             keyvalues (dict[str, any]): Row-identifying key values
-             extra_dst_keyvalues (dict[str, any]): Additional keyvalues
-                for `into_table`.
-             extra_dst_insvalues (dict[str, any]): Additional values to insert
-                on new row creation for `into_table`.
-             additive_relatives (dict[str, any]): Fields that will be added onto
-                if existing row present. (Must be disjoint from copy_columns.)
-             src_table (str): The source table to copy from
-             copy_columns (iterable[str]): The list of columns to copy
-        """
-        if self.database_engine.can_native_upsert:
-            ins_columns = chain(
-                keyvalues,
-                copy_columns,
-                additive_relatives,
-                extra_dst_keyvalues,
-                extra_dst_insvalues,
-            )
-            sel_exprs = chain(
-                keyvalues,
-                copy_columns,
-                (
-                    "?"
-                    for _ in chain(
-                        additive_relatives, extra_dst_keyvalues, extra_dst_insvalues
-                    )
-                ),
-            )
-            keyvalues_where = ("%s = ?" % f for f in keyvalues)
-
-            sets_cc = ("%s = EXCLUDED.%s" % (f, f) for f in copy_columns)
-            sets_ar = (
-                "%s = EXCLUDED.%s + %s.%s" % (f, f, into_table, f)
-                for f in additive_relatives
-            )
-
-            sql = """
-                INSERT INTO %(into_table)s (%(ins_columns)s)
-                SELECT %(sel_exprs)s
-                FROM %(src_table)s
-                WHERE %(keyvalues_where)s
-                ON CONFLICT (%(keyvalues)s)
-                DO UPDATE SET %(sets)s
-            """ % {
-                "into_table": into_table,
-                "ins_columns": ", ".join(ins_columns),
-                "sel_exprs": ", ".join(sel_exprs),
-                "keyvalues_where": " AND ".join(keyvalues_where),
-                "src_table": src_table,
-                "keyvalues": ", ".join(
-                    chain(keyvalues.keys(), extra_dst_keyvalues.keys())
-                ),
-                "sets": ", ".join(chain(sets_cc, sets_ar)),
-            }
-
-            qargs = list(
-                chain(
-                    additive_relatives.values(),
-                    extra_dst_keyvalues.values(),
-                    extra_dst_insvalues.values(),
-                    keyvalues.values(),
-                )
-            )
-            txn.execute(sql, qargs)
-        else:
-            self.database_engine.lock_table(txn, into_table)
-            src_row = self.db_pool.simple_select_one_txn(
-                txn, src_table, keyvalues, copy_columns
-            )
-            all_dest_keyvalues = {**keyvalues, **extra_dst_keyvalues}
-            dest_current_row = self.db_pool.simple_select_one_txn(
-                txn,
-                into_table,
-                keyvalues=all_dest_keyvalues,
-                retcols=list(chain(additive_relatives.keys(), copy_columns)),
-                allow_none=True,
-            )
-
-            if dest_current_row is None:
-                merged_dict = {
-                    **keyvalues,
-                    **extra_dst_keyvalues,
-                    **extra_dst_insvalues,
-                    **src_row,
-                    **additive_relatives,
-                }
-                self.db_pool.simple_insert_txn(txn, into_table, merged_dict)
-            else:
-                for (key, val) in additive_relatives.items():
-                    src_row[key] = dest_current_row[key] + val
-                self.db_pool.simple_update_txn(
-                    txn, into_table, all_dest_keyvalues, src_row
-                )
-
-    async def get_changes_room_total_events_and_bytes(
-        self, min_pos: int, max_pos: int
-    ) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, int]]]:
-        """Fetches the counts of events in the given range of stream IDs.
-
-        Args:
-            min_pos
-            max_pos
-
-        Returns:
-            Mapping of room ID to field changes.
-        """
-
-        return await self.db_pool.runInteraction(
-            "stats_incremental_total_events_and_bytes",
-            self.get_changes_room_total_events_and_bytes_txn,
-            min_pos,
-            max_pos,
-        )
-
-    def get_changes_room_total_events_and_bytes_txn(
-        self, txn, low_pos: int, high_pos: int
-    ) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, int]]]:
-        """Gets the total_events and total_event_bytes counts for rooms and
-        senders, in a range of stream_orderings (including backfilled events).
-
-        Args:
-            txn
-            low_pos: Low stream ordering
-            high_pos: High stream ordering
-
-        Returns:
-            The room and user deltas for total_events/total_event_bytes in the
-            format of `stats_id` -> fields
-        """
-
-        if low_pos >= high_pos:
-            # nothing to do here.
-            return {}, {}
-
-        if isinstance(self.database_engine, PostgresEngine):
-            new_bytes_expression = "OCTET_LENGTH(json)"
-        else:
-            new_bytes_expression = "LENGTH(CAST(json AS BLOB))"
-
-        sql = """
-            SELECT events.room_id, COUNT(*) AS new_events, SUM(%s) AS new_bytes
-            FROM events INNER JOIN event_json USING (event_id)
-            WHERE (? < stream_ordering AND stream_ordering <= ?)
-                OR (? <= stream_ordering AND stream_ordering <= ?)
-            GROUP BY events.room_id
-        """ % (
-            new_bytes_expression,
-        )
-
-        txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
-
-        room_deltas = {
-            room_id: {"total_events": new_events, "total_event_bytes": new_bytes}
-            for room_id, new_events, new_bytes in txn
-        }
-
-        sql = """
-            SELECT events.sender, COUNT(*) AS new_events, SUM(%s) AS new_bytes
-            FROM events INNER JOIN event_json USING (event_id)
-            WHERE (? < stream_ordering AND stream_ordering <= ?)
-                OR (? <= stream_ordering AND stream_ordering <= ?)
-            GROUP BY events.sender
-        """ % (
-            new_bytes_expression,
-        )
-
-        txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
-
-        user_deltas = {
-            user_id: {"total_events": new_events, "total_event_bytes": new_bytes}
-            for user_id, new_events, new_bytes in txn
-            if self.hs.is_mine_id(user_id)
-        }
-
-        return room_deltas, user_deltas
-
     async def _calculate_and_set_initial_state_for_room(
         self, room_id: str
     ) -> Tuple[dict, dict, int]:
@@ -893,6 +607,7 @@ class StatsStore(StateDeltasStore):
                 "invited_members": membership_counts.get(Membership.INVITE, 0),
                 "left_members": membership_counts.get(Membership.LEAVE, 0),
                 "banned_members": membership_counts.get(Membership.BAN, 0),
+                "knocked_members": membership_counts.get(Membership.KNOCK, 0),
                 "local_users_in_room": len(local_users_in_room),
             },
         )
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 7581c7d3ff..959f13de47 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -1085,9 +1085,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta):
         # stream token (as returned by `RoomStreamToken.get_max_stream_pos`) and
         # then filtering the results.
         if from_token.topological is not None:
-            from_bound = (
-                from_token.as_historical_tuple()
-            )  # type: Tuple[Optional[int], int]
+            from_bound: Tuple[Optional[int], int] = from_token.as_historical_tuple()
         elif direction == "b":
             from_bound = (
                 None,
@@ -1099,7 +1097,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta):
                 from_token.stream,
             )
 
-        to_bound = None  # type: Optional[Tuple[Optional[int], int]]
+        to_bound: Optional[Tuple[Optional[int], int]] = None
         if to_token:
             if to_token.topological is not None:
                 to_bound = to_token.as_historical_tuple()
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index 1d62c6140f..f93ff0a545 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -42,7 +42,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
             "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"]
         )
 
-        tags_by_room = {}  # type: Dict[str, Dict[str, JsonDict]]
+        tags_by_room: Dict[str, Dict[str, JsonDict]] = {}
         for row in rows:
             room_tags = tags_by_room.setdefault(row["room_id"], {})
             room_tags[row["tag"]] = db_to_json(row["content"])
diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py
index 22c05cdde7..38bfdf5dad 100644
--- a/synapse/storage/databases/main/ui_auth.py
+++ b/synapse/storage/databases/main/ui_auth.py
@@ -224,12 +224,12 @@ class UIAuthWorkerStore(SQLBaseStore):
         self, txn: LoggingTransaction, session_id: str, key: str, value: Any
     ):
         # Get the current value.
-        result = self.db_pool.simple_select_one_txn(
+        result: Dict[str, Any] = self.db_pool.simple_select_one_txn(  # type: ignore
             txn,
             table="ui_auth_sessions",
             keyvalues={"session_id": session_id},
             retcols=("serverdict",),
-        )  # type: Dict[str, Any]  # type: ignore
+        )
 
         # Update it and add it back to the database.
         serverdict = db_to_json(result["serverdict"])
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 1882bfd9cf..20cd63c330 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -49,6 +49,12 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
         """
         ...
 
+    @property
+    @abc.abstractmethod
+    def supports_returning(self) -> bool:
+        """Do we support the `RETURNING` clause in insert/update/delete?"""
+        ...
+
     @abc.abstractmethod
     def check_database(
         self, db_conn: ConnectionType, allow_outdated_version: bool = False
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 21411c5fea..30f948a0f7 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -133,6 +133,11 @@ class PostgresEngine(BaseDatabaseEngine):
         """Do we support using `a = ANY(?)` and passing a list"""
         return True
 
+    @property
+    def supports_returning(self) -> bool:
+        """Do we support the `RETURNING` clause in insert/update/delete?"""
+        return True
+
     def is_deadlock(self, error):
         if isinstance(error, self.module.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 5fe1b205e1..70d17d4f2c 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -60,6 +60,11 @@ class Sqlite3Engine(BaseDatabaseEngine["sqlite3.Connection"]):
         """Do we support using `a = ANY(?)` and passing a list"""
         return False
 
+    @property
+    def supports_returning(self) -> bool:
+        """Do we support the `RETURNING` clause in insert/update/delete?"""
+        return self.module.sqlite_version_info >= (3, 35, 0)
+
     def check_database(self, db_conn, allow_outdated_version: bool = False):
         if not allow_outdated_version:
             version = self.module.sqlite_version_info
diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py
index 051095fea9..a39877f0d5 100644
--- a/synapse/storage/persist_events.py
+++ b/synapse/storage/persist_events.py
@@ -307,7 +307,7 @@ class EventsPersistenceStorage:
             matched the transcation ID; the existing event is returned in such
             a case.
         """
-        partitioned = {}  # type: Dict[str, List[Tuple[EventBase, EventContext]]]
+        partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
         for event, ctx in events_and_contexts:
             partitioned.setdefault(event.room_id, []).append((event, ctx))
 
@@ -384,7 +384,7 @@ class EventsPersistenceStorage:
             A dictionary of event ID to event ID we didn't persist as we already
             had another event persisted with the same TXN ID.
         """
-        replaced_events = {}  # type: Dict[str, str]
+        replaced_events: Dict[str, str] = {}
         if not events_and_contexts:
             return replaced_events
 
@@ -440,16 +440,14 @@ class EventsPersistenceStorage:
             # Set of remote users which were in rooms the server has left. We
             # should check if we still share any rooms and if not we mark their
             # device lists as stale.
-            potentially_left_users = set()  # type: Set[str]
+            potentially_left_users: Set[str] = set()
 
             if not backfilled:
                 with Measure(self._clock, "_calculate_state_and_extrem"):
                     # Work out the new "current state" for each room.
                     # We do this by working out what the new extremities are and then
                     # calculating the state from that.
-                    events_by_room = (
-                        {}
-                    )  # type: Dict[str, List[Tuple[EventBase, EventContext]]]
+                    events_by_room: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
                     for event, context in chunk:
                         events_by_room.setdefault(event.room_id, []).append(
                             (event, context)
@@ -622,9 +620,9 @@ class EventsPersistenceStorage:
         )
 
         # Remove any events which are prev_events of any existing events.
-        existing_prevs = await self.persist_events_store._get_events_which_are_prevs(
-            result
-        )  # type: Collection[str]
+        existing_prevs: Collection[
+            str
+        ] = await self.persist_events_store._get_events_which_are_prevs(result)
         result.difference_update(existing_prevs)
 
         # Finally handle the case where the new events have soft-failed prev
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 683e5e3b90..61392b9639 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -256,7 +256,7 @@ def _setup_new_database(
         for database in databases
     )
 
-    directory_entries = []  # type: List[_DirectoryListing]
+    directory_entries: List[_DirectoryListing] = []
     for directory in directories:
         directory_entries.extend(
             _DirectoryListing(file_name, os.path.join(directory, file_name))
@@ -424,10 +424,10 @@ def _upgrade_existing_database(
             directories.append(os.path.join(schema_path, database, "delta", str(v)))
 
         # Used to check if we have any duplicate file names
-        file_name_counter = Counter()  # type: CounterType[str]
+        file_name_counter: CounterType[str] = Counter()
 
         # Now find which directories have anything of interest.
-        directory_entries = []  # type: List[_DirectoryListing]
+        directory_entries: List[_DirectoryListing] = []
         for directory in directories:
             logger.debug("Looking for schema deltas in %s", directory)
             try:
@@ -639,7 +639,7 @@ def get_statements(f: Iterable[str]) -> Generator[str, None, None]:
 
 
 def executescript(txn: Cursor, schema_path: str) -> None:
-    with open(schema_path, "r") as f:
+    with open(schema_path) as f:
         execute_statements_from_stream(txn, f)
 
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 0a53b73ccc..36340a652a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 60
+SCHEMA_VERSION = 61
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -21,6 +21,10 @@ older versions of Synapse).
 
 See `README.md <synapse/storage/schema/README.md>`_  for more information on how this
 works.
+
+Changes in SCHEMA_VERSION = 61:
+    - The `user_stats_historical` and `room_stats_historical` tables are not written and
+      are not read (previously, they were written but not read).
 """
 
 
diff --git a/synapse/storage/schema/main/delta/60/01recreate_stream_ordering.sql.postgres b/synapse/storage/schema/main/delta/60/01recreate_stream_ordering.sql.postgres
index 88c9f8bd0d..0edc9fe7a2 100644
--- a/synapse/storage/schema/main/delta/60/01recreate_stream_ordering.sql.postgres
+++ b/synapse/storage/schema/main/delta/60/01recreate_stream_ordering.sql.postgres
@@ -31,10 +31,15 @@ CREATE OR REPLACE RULE "populate_stream_ordering2" AS
 INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
   (6001, 'populate_stream_ordering2', '{}');
 
--- ... and another to build an index on it
+-- ... and some more to build indexes on it. These aren't really interdependent
+-- but the backround_updates manager can only handle a single dependency per update.
 INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
-  (6001, 'index_stream_ordering2', '{}', 'populate_stream_ordering2');
+  (6001, 'index_stream_ordering2', '{}', 'populate_stream_ordering2'),
+  (6001, 'index_stream_ordering2_room_order', '{}', 'index_stream_ordering2'),
+  (6001, 'index_stream_ordering2_contains_url', '{}', 'index_stream_ordering2_room_order'),
+  (6001, 'index_stream_ordering2_room_stream', '{}', 'index_stream_ordering2_contains_url'),
+  (6001, 'index_stream_ordering2_ts', '{}', 'index_stream_ordering2_room_stream');
 
 -- ... and another to do the switcheroo
 INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
-  (6001, 'replace_stream_ordering_column', '{}', 'index_stream_ordering2');
+  (6001, 'replace_stream_ordering_column', '{}', 'index_stream_ordering2_ts');
diff --git a/synapse/storage/schema/main/delta/60/02change_stream_ordering_columns.sql.postgres b/synapse/storage/schema/main/delta/60/02change_stream_ordering_columns.sql.postgres
new file mode 100644
index 0000000000..630c24fd9e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/60/02change_stream_ordering_columns.sql.postgres
@@ -0,0 +1,30 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- This migration is closely related to '01recreate_stream_ordering.sql.postgres'.
+--
+-- It updates the other tables which use an INTEGER to refer to a stream ordering.
+-- These tables are all small enough that a re-create is tractable.
+ALTER TABLE pushers ALTER COLUMN last_stream_ordering SET DATA TYPE BIGINT;
+ALTER TABLE federation_stream_position ALTER COLUMN stream_id SET DATA TYPE BIGINT;
+
+-- these aren't actually event stream orderings, but they are numbers where 2 billion
+-- is a bit limiting, application_services_state is tiny, and I don't want to ever have
+-- to do this again.
+ALTER TABLE application_services_state ALTER COLUMN last_txn SET DATA TYPE BIGINT;
+ALTER TABLE application_services_state ALTER COLUMN read_receipt_stream_id SET DATA TYPE BIGINT;
+ALTER TABLE application_services_state ALTER COLUMN presence_stream_id SET DATA TYPE BIGINT;
+
+
diff --git a/synapse/storage/schema/main/delta/61/01change_appservices_txns.sql.postgres b/synapse/storage/schema/main/delta/61/01change_appservices_txns.sql.postgres
new file mode 100644
index 0000000000..c8aec78e60
--- /dev/null
+++ b/synapse/storage/schema/main/delta/61/01change_appservices_txns.sql.postgres
@@ -0,0 +1,23 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- we use bigint elsewhere in the database for appservice txn ids (notably,
+-- application_services_state.last_txn), and generally we use bigints everywhere else
+-- we have monotonic counters, so let's bring this one in line.
+--
+-- assuming there aren't thousands of rows for decommisioned/non-functional ASes, this
+-- table should be pretty small, so safe to do a synchronous ALTER TABLE.
+
+ALTER TABLE application_services_txns ALTER COLUMN txn_id SET DATA TYPE BIGINT;
diff --git a/synapse/storage/schema/main/delta/61/02drop_redundant_room_depth_index.sql b/synapse/storage/schema/main/delta/61/02drop_redundant_room_depth_index.sql
new file mode 100644
index 0000000000..35ca7a40c0
--- /dev/null
+++ b/synapse/storage/schema/main/delta/61/02drop_redundant_room_depth_index.sql
@@ -0,0 +1,18 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- this index is redundant; there is another UNIQUE index on this table.
+DROP INDEX IF EXISTS room_depth_room;
+
diff --git a/synapse/storage/schema/main/delta/61/03recreate_min_depth.py b/synapse/storage/schema/main/delta/61/03recreate_min_depth.py
new file mode 100644
index 0000000000..f8d7db9f2e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/61/03recreate_min_depth.py
@@ -0,0 +1,70 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This migration handles the process of changing the type of `room_depth.min_depth` to
+a BIGINT.
+"""
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+    if not isinstance(database_engine, PostgresEngine):
+        # this only applies to postgres - sqlite does not distinguish between big and
+        # little ints.
+        return
+
+    # First add a new column to contain the bigger min_depth
+    cur.execute("ALTER TABLE room_depth ADD COLUMN min_depth2 BIGINT")
+
+    # Create a trigger which will keep it populated.
+    cur.execute(
+        """
+        CREATE OR REPLACE FUNCTION populate_min_depth2() RETURNS trigger AS $BODY$
+            BEGIN
+                new.min_depth2 := new.min_depth;
+                RETURN NEW;
+            END;
+        $BODY$ LANGUAGE plpgsql
+        """
+    )
+
+    cur.execute(
+        """
+        CREATE TRIGGER populate_min_depth2_trigger BEFORE INSERT OR UPDATE ON room_depth
+        FOR EACH ROW
+        EXECUTE PROCEDURE populate_min_depth2()
+        """
+    )
+
+    # Start a bg process to populate it for old rooms
+    cur.execute(
+        """
+       INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+            (6103, 'populate_room_depth_min_depth2', '{}')
+       """
+    )
+
+    # and another to switch them over once it completes.
+    cur.execute(
+        """
+        INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+            (6103, 'replace_room_depth_min_depth', '{}', 'populate_room_depth2')
+        """
+    )
+
+
+def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+    pass
diff --git a/synapse/storage/schema/state/delta/61/02state_groups_state_n_distinct.sql.postgres b/synapse/storage/schema/state/delta/61/02state_groups_state_n_distinct.sql.postgres
new file mode 100644
index 0000000000..35a153da7b
--- /dev/null
+++ b/synapse/storage/schema/state/delta/61/02state_groups_state_n_distinct.sql.postgres
@@ -0,0 +1,34 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- By default the postgres statistics collector massively underestimates the
+-- number of distinct state groups are in the `state_groups_state`, which can
+-- cause postgres to use table scans for queries for multiple state groups.
+--
+-- To work around this we can manually tell postgres the number of distinct state
+-- groups there are by setting `n_distinct` (a negative value here is the number
+-- of distinct values divided by the number of rows, so -0.02 means on average
+-- there are 50 rows per distinct value). We don't need a particularly
+-- accurate number here, as a) we just want it to always use index scans and b)
+-- our estimate is going to be better than the one made by the statistics
+-- collector.
+
+ALTER TABLE state_groups_state ALTER COLUMN state_group SET (n_distinct = -0.02);
+
+-- Ideally we'd do an `ANALYZE state_groups_state (state_group)` here so that
+-- the above gets picked up immediately, but that can take a bit of time so we
+-- rely on the autovacuum eventually getting run and doing that in the
+-- background for us.
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index c9dce726cb..f8fbba9d38 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -91,7 +91,7 @@ class StateFilter:
         Returns:
             The new state filter.
         """
-        type_dict = {}  # type: Dict[str, Optional[Set[str]]]
+        type_dict: Dict[str, Optional[Set[str]]] = {}
         for typ, s in types:
             if typ in type_dict:
                 if type_dict[typ] is None:
@@ -194,7 +194,7 @@ class StateFilter:
         """
 
         where_clause = ""
-        where_args = []  # type: List[str]
+        where_args: List[str] = []
 
         if self.is_full():
             return where_clause, where_args
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index f1e62f9e85..c768fdea56 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -112,7 +112,7 @@ class StreamIdGenerator:
         # insertion ordering will ensure its in the correct ordering.
         #
         # The key and values are the same, but we never look at the values.
-        self._unfinished_ids = OrderedDict()  # type: OrderedDict[int, int]
+        self._unfinished_ids: OrderedDict[int, int] = OrderedDict()
 
     def get_next(self):
         """
@@ -236,15 +236,15 @@ class MultiWriterIdGenerator:
         # Note: If we are a negative stream then we still store all the IDs as
         # positive to make life easier for us, and simply negate the IDs when we
         # return them.
-        self._current_positions = {}  # type: Dict[str, int]
+        self._current_positions: Dict[str, int] = {}
 
         # Set of local IDs that we're still processing. The current position
         # should be less than the minimum of this set (if not empty).
-        self._unfinished_ids = set()  # type: Set[int]
+        self._unfinished_ids: Set[int] = set()
 
         # Set of local IDs that we've processed that are larger than the current
         # position, due to there being smaller unpersisted IDs.
-        self._finished_ids = set()  # type: Set[int]
+        self._finished_ids: Set[int] = set()
 
         # We track the max position where we know everything before has been
         # persisted. This is done by a) looking at the min across all instances
@@ -265,7 +265,7 @@ class MultiWriterIdGenerator:
         self._persisted_upto_position = (
             min(self._current_positions.values()) if self._current_positions else 1
         )
-        self._known_persisted_positions = []  # type: List[int]
+        self._known_persisted_positions: List[int] = []
 
         self._sequence_gen = PostgresSequenceGenerator(sequence_name)
 
@@ -465,7 +465,7 @@ class MultiWriterIdGenerator:
             self._unfinished_ids.discard(next_id)
             self._finished_ids.add(next_id)
 
-            new_cur = None  # type: Optional[int]
+            new_cur: Optional[int] = None
 
             if self._unfinished_ids:
                 # If there are unfinished IDs then the new position will be the
diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py
index 30b6b8e0ca..bb33e04fb1 100644
--- a/synapse/storage/util/sequence.py
+++ b/synapse/storage/util/sequence.py
@@ -208,10 +208,10 @@ class LocalSequenceGenerator(SequenceGenerator):
                  get_next_id_txn; should return the curreent maximum id
         """
         # the callback. this is cleared after it is called, so that it can be GCed.
-        self._callback = get_first_callback  # type: Optional[GetFirstCallbackType]
+        self._callback: Optional[GetFirstCallbackType] = get_first_callback
 
         # The current max value, or None if we haven't looked in the DB yet.
-        self._current_max_id = None  # type: Optional[int]
+        self._current_max_id: Optional[int] = None
         self._lock = threading.Lock()
 
     def get_next_id_txn(self, txn: Cursor) -> int:
@@ -274,7 +274,7 @@ def build_sequence_generator(
             `check_consistency` details.
     """
     if isinstance(database_engine, PostgresEngine):
-        seq = PostgresSequenceGenerator(sequence_name)  # type: SequenceGenerator
+        seq: SequenceGenerator = PostgresSequenceGenerator(sequence_name)
     else:
         seq = LocalSequenceGenerator(get_first_callback)
 
diff --git a/synapse/streams/events.py b/synapse/streams/events.py
index 20fceaa935..99b0aac2fb 100644
--- a/synapse/streams/events.py
+++ b/synapse/streams/events.py
@@ -32,9 +32,9 @@ class EventSources:
     }
 
     def __init__(self, hs):
-        self.sources = {
+        self.sources: Dict[str, Any] = {
             name: cls(hs) for name, cls in EventSources.SOURCE_TYPES.items()
-        }  # type: Dict[str, Any]
+        }
         self.store = hs.get_datastore()
 
     def get_current_token(self) -> StreamToken:
diff --git a/synapse/types.py b/synapse/types.py
index 8d2fa00f71..429bb013d2 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -182,14 +182,14 @@ def create_requester(
     )
 
 
-def get_domain_from_id(string):
+def get_domain_from_id(string: str) -> str:
     idx = string.find(":")
     if idx == -1:
         raise SynapseError(400, "Invalid ID: %r" % (string,))
     return string[idx + 1 :]
 
 
-def get_localpart_from_id(string):
+def get_localpart_from_id(string: str) -> str:
     idx = string.find(":")
     if idx == -1:
         raise SynapseError(400, "Invalid ID: %r" % (string,))
@@ -210,7 +210,7 @@ class DomainSpecificString(metaclass=abc.ABCMeta):
         'domain' : The domain part of the name
     """
 
-    SIGIL = abc.abstractproperty()  # type: str  # type: ignore
+    SIGIL: str = abc.abstractproperty()  # type: ignore
 
     localpart = attr.ib(type=str)
     domain = attr.ib(type=str)
@@ -304,7 +304,7 @@ class GroupID(DomainSpecificString):
 
     @classmethod
     def from_string(cls: Type[DS], s: str) -> DS:
-        group_id = super().from_string(s)  # type: DS # type: ignore
+        group_id: DS = super().from_string(s)  # type: ignore
 
         if not group_id.localpart:
             raise SynapseError(400, "Group ID cannot be empty", Codes.INVALID_PARAM)
@@ -577,10 +577,10 @@ class RoomStreamToken:
             entries = []
             for name, pos in self.instance_map.items():
                 instance_id = await store.get_id_for_instance(name)
-                entries.append("{}.{}".format(instance_id, pos))
+                entries.append(f"{instance_id}.{pos}")
 
             encoded_map = "~".join(entries)
-            return "m{}~{}".format(self.stream, encoded_map)
+            return f"m{self.stream}~{encoded_map}"
         else:
             return "s%d" % (self.stream,)
 
@@ -600,7 +600,7 @@ class StreamToken:
     groups_key = attr.ib(type=int)
 
     _SEPARATOR = "_"
-    START = None  # type: StreamToken
+    START: "StreamToken"
 
     @classmethod
     async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 061102c3c8..014db1355b 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -257,7 +257,7 @@ class Linearizer:
             max_count: The maximum number of concurrent accesses
         """
         if name is None:
-            self.name = id(self)  # type: Union[str, int]
+            self.name: Union[str, int] = id(self)
         else:
             self.name = name
 
@@ -269,7 +269,7 @@ class Linearizer:
         self.max_count = max_count
 
         # key_to_defer is a map from the key to a _LinearizerEntry.
-        self.key_to_defer = {}  # type: Dict[Hashable, _LinearizerEntry]
+        self.key_to_defer: Dict[Hashable, _LinearizerEntry] = {}
 
     def is_queued(self, key: Hashable) -> bool:
         """Checks whether there is a process queued up waiting"""
@@ -409,10 +409,10 @@ class ReadWriteLock:
 
     def __init__(self):
         # Latest readers queued
-        self.key_to_current_readers = {}  # type: Dict[str, Set[defer.Deferred]]
+        self.key_to_current_readers: Dict[str, Set[defer.Deferred]] = {}
 
         # Latest writer queued
-        self.key_to_current_writer = {}  # type: Dict[str, defer.Deferred]
+        self.key_to_current_writer: Dict[str, defer.Deferred] = {}
 
     async def read(self, key: str) -> ContextManager:
         new_defer = defer.Deferred()
diff --git a/synapse/util/batching_queue.py b/synapse/util/batching_queue.py
index 8fd5bfb69b..274cea7eb7 100644
--- a/synapse/util/batching_queue.py
+++ b/synapse/util/batching_queue.py
@@ -93,11 +93,11 @@ class BatchingQueue(Generic[V, R]):
         self._clock = clock
 
         # The set of keys currently being processed.
-        self._processing_keys = set()  # type: Set[Hashable]
+        self._processing_keys: Set[Hashable] = set()
 
         # The currently pending batch of values by key, with a Deferred to call
         # with the result of the corresponding `_process_batch_callback` call.
-        self._next_values = {}  # type: Dict[Hashable, List[Tuple[V, defer.Deferred]]]
+        self._next_values: Dict[Hashable, List[Tuple[V, defer.Deferred]]] = {}
 
         # The function to call with batches of values.
         self._process_batch_callback = process_batch_callback
@@ -108,9 +108,7 @@ class BatchingQueue(Generic[V, R]):
 
         number_of_keys.labels(self._name).set_function(lambda: len(self._next_values))
 
-        self._number_in_flight_metric = number_in_flight.labels(
-            self._name
-        )  # type: Gauge
+        self._number_in_flight_metric: Gauge = number_in_flight.labels(self._name)
 
     async def add_to_queue(self, value: V, key: Hashable = ()) -> R:
         """Adds the value to the queue with the given key, returning the result
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index ca36f07c20..9012034b7a 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -29,8 +29,8 @@ logger = logging.getLogger(__name__)
 TRACK_MEMORY_USAGE = False
 
 
-caches_by_name = {}  # type: Dict[str, Sized]
-collectors_by_name = {}  # type: Dict[str, CacheMetric]
+caches_by_name: Dict[str, Sized] = {}
+collectors_by_name: Dict[str, "CacheMetric"] = {}
 
 cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
 cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
diff --git a/synapse/util/caches/cached_call.py b/synapse/util/caches/cached_call.py
index a301c9e89b..891bee0b33 100644
--- a/synapse/util/caches/cached_call.py
+++ b/synapse/util/caches/cached_call.py
@@ -63,9 +63,9 @@ class CachedCall(Generic[TV]):
             f: The underlying function. Only one call to this function will be alive
                 at once (per instance of CachedCall)
         """
-        self._callable = f  # type: Optional[Callable[[], Awaitable[TV]]]
-        self._deferred = None  # type: Optional[Deferred]
-        self._result = None  # type: Union[None, Failure, TV]
+        self._callable: Optional[Callable[[], Awaitable[TV]]] = f
+        self._deferred: Optional[Deferred] = None
+        self._result: Union[None, Failure, TV] = None
 
     async def get(self) -> TV:
         """Kick off the call if necessary, and return the result"""
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index 1044139119..8c6fafc677 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -80,25 +80,25 @@ class DeferredCache(Generic[KT, VT]):
         cache_type = TreeCache if tree else dict
 
         # _pending_deferred_cache maps from the key value to a `CacheEntry` object.
-        self._pending_deferred_cache = (
-            cache_type()
-        )  # type: Union[TreeCache, MutableMapping[KT, CacheEntry]]
+        self._pending_deferred_cache: Union[
+            TreeCache, "MutableMapping[KT, CacheEntry]"
+        ] = cache_type()
 
         def metrics_cb():
             cache_pending_metric.labels(name).set(len(self._pending_deferred_cache))
 
         # cache is used for completed results and maps to the result itself, rather than
         # a Deferred.
-        self.cache = LruCache(
+        self.cache: LruCache[KT, VT] = LruCache(
             max_size=max_entries,
             cache_name=name,
             cache_type=cache_type,
             size_callback=(lambda d: len(d) or 1) if iterable else None,
             metrics_collection_callback=metrics_cb,
             apply_cache_factor_from_config=apply_cache_factor_from_config,
-        )  # type: LruCache[KT, VT]
+        )
 
-        self.thread = None  # type: Optional[threading.Thread]
+        self.thread: Optional[threading.Thread] = None
 
     @property
     def max_entries(self):
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index d77e8edeea..1e8e6b1d01 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -46,17 +46,17 @@ F = TypeVar("F", bound=Callable[..., Any])
 
 
 class _CachedFunction(Generic[F]):
-    invalidate = None  # type: Any
-    invalidate_all = None  # type: Any
-    prefill = None  # type: Any
-    cache = None  # type: Any
-    num_args = None  # type: Any
+    invalidate: Any = None
+    invalidate_all: Any = None
+    prefill: Any = None
+    cache: Any = None
+    num_args: Any = None
 
-    __name__ = None  # type: str
+    __name__: str
 
     # Note: This function signature is actually fiddled with by the synapse mypy
     # plugin to a) make it a bound method, and b) remove any `cache_context` arg.
-    __call__ = None  # type: F
+    __call__: F
 
 
 class _CacheDescriptorBase:
@@ -115,8 +115,8 @@ class _CacheDescriptorBase:
 
 
 class _LruCachedFunction(Generic[F]):
-    cache = None  # type: LruCache[CacheKey, Any]
-    __call__ = None  # type: F
+    cache: LruCache[CacheKey, Any]
+    __call__: F
 
 
 def lru_cache(
@@ -180,10 +180,10 @@ class LruCacheDescriptor(_CacheDescriptorBase):
         self.max_entries = max_entries
 
     def __get__(self, obj, owner):
-        cache = LruCache(
+        cache: LruCache[CacheKey, Any] = LruCache(
             cache_name=self.orig.__name__,
             max_size=self.max_entries,
-        )  # type: LruCache[CacheKey, Any]
+        )
 
         get_cache_key = self.cache_key_builder
         sentinel = LruCacheDescriptor._Sentinel.sentinel
@@ -271,12 +271,12 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
         self.iterable = iterable
 
     def __get__(self, obj, owner):
-        cache = DeferredCache(
+        cache: DeferredCache[CacheKey, Any] = DeferredCache(
             name=self.orig.__name__,
             max_entries=self.max_entries,
             tree=self.tree,
             iterable=self.iterable,
-        )  # type: DeferredCache[CacheKey, Any]
+        )
 
         get_cache_key = self.cache_key_builder
 
@@ -359,7 +359,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
 
     def __get__(self, obj, objtype=None):
         cached_method = getattr(obj, self.cached_method_name)
-        cache = cached_method.cache  # type: DeferredCache[CacheKey, Any]
+        cache: DeferredCache[CacheKey, Any] = cached_method.cache
         num_args = cached_method.num_args
 
         @functools.wraps(self.orig)
@@ -472,15 +472,15 @@ class _CacheContext:
 
     Cache = Union[DeferredCache, LruCache]
 
-    _cache_context_objects = (
-        WeakValueDictionary()
-    )  # type: WeakValueDictionary[Tuple[_CacheContext.Cache, CacheKey], _CacheContext]
+    _cache_context_objects: """WeakValueDictionary[
+        Tuple["_CacheContext.Cache", CacheKey], "_CacheContext"
+    ]""" = WeakValueDictionary()
 
     def __init__(self, cache: "_CacheContext.Cache", cache_key: CacheKey) -> None:
         self._cache = cache
         self._cache_key = cache_key
 
-    def invalidate(self):  # type: () -> None
+    def invalidate(self) -> None:
         """Invalidates the cache entry referred to by the context."""
         self._cache.invalidate(self._cache_key)
 
diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py
index 56d94d96ce..3f852edd7f 100644
--- a/synapse/util/caches/dictionary_cache.py
+++ b/synapse/util/caches/dictionary_cache.py
@@ -62,13 +62,13 @@ class DictionaryCache(Generic[KT, DKT]):
     """
 
     def __init__(self, name: str, max_entries: int = 1000):
-        self.cache = LruCache(
+        self.cache: LruCache[KT, DictionaryEntry] = LruCache(
             max_size=max_entries, cache_name=name, size_callback=len
-        )  # type: LruCache[KT, DictionaryEntry]
+        )
 
         self.name = name
         self.sequence = 0
-        self.thread = None  # type: Optional[threading.Thread]
+        self.thread: Optional[threading.Thread] = None
 
     def check_thread(self) -> None:
         expected_thread = self.thread
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index ac47a31cd7..bde16b8577 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -27,7 +27,7 @@ from synapse.util.caches import register_cache
 logger = logging.getLogger(__name__)
 
 
-SENTINEL = object()  # type: Any
+SENTINEL: Any = object()
 
 
 T = TypeVar("T")
@@ -71,7 +71,7 @@ class ExpiringCache(Generic[KT, VT]):
         self._expiry_ms = expiry_ms
         self._reset_expiry_on_get = reset_expiry_on_get
 
-        self._cache = OrderedDict()  # type: OrderedDict[KT, _CacheEntry]
+        self._cache: OrderedDict[KT, _CacheEntry] = OrderedDict()
 
         self.iterable = iterable
 
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index d89e9d9b1d..5c65d187b6 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import threading
+import weakref
 from functools import wraps
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Collection,
@@ -31,10 +34,19 @@ from typing import (
 
 from typing_extensions import Literal
 
+from twisted.internet import reactor
+
 from synapse.config import cache as cache_config
-from synapse.util import caches
+from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.util import Clock, caches
 from synapse.util.caches import CacheMetric, register_cache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
+from synapse.util.linked_list import ListNode
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
 
 try:
     from pympler.asizeof import Asizer
@@ -78,23 +90,129 @@ def enumerate_leaves(node, depth):
         yield node
     else:
         for n in node.values():
-            for m in enumerate_leaves(n, depth - 1):
-                yield m
+            yield from enumerate_leaves(n, depth - 1)
+
+
+P = TypeVar("P")
+
+
+class _TimedListNode(ListNode[P]):
+    """A `ListNode` that tracks last access time."""
+
+    __slots__ = ["last_access_ts_secs"]
+
+    def update_last_access(self, clock: Clock):
+        self.last_access_ts_secs = int(clock.time())
+
+
+# Whether to insert new cache entries to the global list. We only add to it if
+# time based eviction is enabled.
+USE_GLOBAL_LIST = False
+
+# A linked list of all cache entries, allowing efficient time based eviction.
+GLOBAL_ROOT = ListNode["_Node"].create_root_node()
+
+
+@wrap_as_background_process("LruCache._expire_old_entries")
+async def _expire_old_entries(clock: Clock, expiry_seconds: int):
+    """Walks the global cache list to find cache entries that haven't been
+    accessed in the given number of seconds.
+    """
+
+    now = int(clock.time())
+    node = GLOBAL_ROOT.prev_node
+    assert node is not None
+
+    i = 0
+
+    logger.debug("Searching for stale caches")
+
+    while node is not GLOBAL_ROOT:
+        # Only the root node isn't a `_TimedListNode`.
+        assert isinstance(node, _TimedListNode)
+
+        if node.last_access_ts_secs > now - expiry_seconds:
+            break
+
+        cache_entry = node.get_cache_entry()
+        next_node = node.prev_node
+
+        # The node should always have a reference to a cache entry and a valid
+        # `prev_node`, as we only drop them when we remove the node from the
+        # list.
+        assert next_node is not None
+        assert cache_entry is not None
+        cache_entry.drop_from_cache()
+
+        # If we do lots of work at once we yield to allow other stuff to happen.
+        if (i + 1) % 10000 == 0:
+            logger.debug("Waiting during drop")
+            await clock.sleep(0)
+            logger.debug("Waking during drop")
+
+        node = next_node
+
+        # If we've yielded then our current node may have been evicted, so we
+        # need to check that its still valid.
+        if node.prev_node is None:
+            break
+
+        i += 1
+
+    logger.info("Dropped %d items from caches", i)
+
+
+def setup_expire_lru_cache_entries(hs: "HomeServer"):
+    """Start a background job that expires all cache entries if they have not
+    been accessed for the given number of seconds.
+    """
+    if not hs.config.caches.expiry_time_msec:
+        return
+
+    logger.info(
+        "Expiring LRU caches after %d seconds", hs.config.caches.expiry_time_msec / 1000
+    )
+
+    global USE_GLOBAL_LIST
+    USE_GLOBAL_LIST = True
+
+    clock = hs.get_clock()
+    clock.looping_call(
+        _expire_old_entries, 30 * 1000, clock, hs.config.caches.expiry_time_msec / 1000
+    )
 
 
 class _Node:
-    __slots__ = ["prev_node", "next_node", "key", "value", "callbacks", "memory"]
+    __slots__ = [
+        "_list_node",
+        "_global_list_node",
+        "_cache",
+        "key",
+        "value",
+        "callbacks",
+        "memory",
+    ]
 
     def __init__(
         self,
-        prev_node,
-        next_node,
+        root: "ListNode[_Node]",
         key,
         value,
+        cache: "weakref.ReferenceType[LruCache]",
+        clock: Clock,
         callbacks: Collection[Callable[[], None]] = (),
     ):
-        self.prev_node = prev_node
-        self.next_node = next_node
+        self._list_node = ListNode.insert_after(self, root)
+        self._global_list_node = None
+        if USE_GLOBAL_LIST:
+            self._global_list_node = _TimedListNode.insert_after(self, GLOBAL_ROOT)
+            self._global_list_node.update_last_access(clock)
+
+        # We store a weak reference to the cache object so that this _Node can
+        # remove itself from the cache. If the cache is dropped we ensure we
+        # remove our entries in the lists.
+        self._cache = cache
+
         self.key = key
         self.value = value
 
@@ -107,7 +225,7 @@ class _Node:
         # footprint down. Storing `None` is free as its a singleton, while empty
         # lists are 56 bytes (and empty sets are 216 bytes, if we did the naive
         # thing and used sets).
-        self.callbacks = None  # type: Optional[List[Callable[[], None]]]
+        self.callbacks: Optional[List[Callable[[], None]]] = None
 
         self.add_callbacks(callbacks)
 
@@ -116,11 +234,16 @@ class _Node:
             self.memory = (
                 _get_size_of(key)
                 + _get_size_of(value)
+                + _get_size_of(self._list_node, recurse=False)
                 + _get_size_of(self.callbacks, recurse=False)
                 + _get_size_of(self, recurse=False)
             )
             self.memory += _get_size_of(self.memory, recurse=False)
 
+            if self._global_list_node:
+                self.memory += _get_size_of(self._global_list_node, recurse=False)
+                self.memory += _get_size_of(self._global_list_node.last_access_ts_secs)
+
     def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None:
         """Add to stored list of callbacks, removing duplicates."""
 
@@ -147,6 +270,32 @@ class _Node:
 
         self.callbacks = None
 
+    def drop_from_cache(self) -> None:
+        """Drop this node from the cache.
+
+        Ensures that the entry gets removed from the cache and that we get
+        removed from all lists.
+        """
+        cache = self._cache()
+        if not cache or not cache.pop(self.key, None):
+            # `cache.pop` should call `drop_from_lists()`, unless this Node had
+            # already been removed from the cache.
+            self.drop_from_lists()
+
+    def drop_from_lists(self) -> None:
+        """Remove this node from the cache lists."""
+        self._list_node.remove_from_list()
+
+        if self._global_list_node:
+            self._global_list_node.remove_from_list()
+
+    def move_to_front(self, clock: Clock, cache_list_root: ListNode) -> None:
+        """Moves this node to the front of all the lists its in."""
+        self._list_node.move_after(cache_list_root)
+        if self._global_list_node:
+            self._global_list_node.move_after(GLOBAL_ROOT)
+            self._global_list_node.update_last_access(clock)
+
 
 class LruCache(Generic[KT, VT]):
     """
@@ -163,6 +312,7 @@ class LruCache(Generic[KT, VT]):
         size_callback: Optional[Callable] = None,
         metrics_collection_callback: Optional[Callable[[], None]] = None,
         apply_cache_factor_from_config: bool = True,
+        clock: Optional[Clock] = None,
     ):
         """
         Args:
@@ -188,6 +338,13 @@ class LruCache(Generic[KT, VT]):
             apply_cache_factor_from_config (bool): If true, `max_size` will be
                 multiplied by a cache factor derived from the homeserver config
         """
+        # Default `clock` to something sensible. Note that we rename it to
+        # `real_clock` so that mypy doesn't think its still `Optional`.
+        if clock is None:
+            real_clock = Clock(reactor)
+        else:
+            real_clock = clock
+
         cache = cache_type()
         self.cache = cache  # Used for introspection.
         self.apply_cache_factor_from_config = apply_cache_factor_from_config
@@ -204,32 +361,46 @@ class LruCache(Generic[KT, VT]):
 
         # register_cache might call our "set_cache_factor" callback; there's nothing to
         # do yet when we get resized.
-        self._on_resize = None  # type: Optional[Callable[[],None]]
+        self._on_resize: Optional[Callable[[], None]] = None
 
         if cache_name is not None:
-            metrics = register_cache(
+            metrics: Optional[CacheMetric] = register_cache(
                 "lru_cache",
                 cache_name,
                 self,
                 collect_callback=metrics_collection_callback,
-            )  # type: Optional[CacheMetric]
+            )
         else:
             metrics = None
 
         # this is exposed for access from outside this class
         self.metrics = metrics
 
-        list_root = _Node(None, None, None, None)
-        list_root.next_node = list_root
-        list_root.prev_node = list_root
+        # We create a single weakref to self here so that we don't need to keep
+        # creating more each time we create a `_Node`.
+        weak_ref_to_self = weakref.ref(self)
+
+        list_root = ListNode[_Node].create_root_node()
 
         lock = threading.Lock()
 
         def evict():
             while cache_len() > self.max_size:
+                # Get the last node in the list (i.e. the oldest node).
                 todelete = list_root.prev_node
-                evicted_len = delete_node(todelete)
-                cache.pop(todelete.key, None)
+
+                # The list root should always have a valid `prev_node` if the
+                # cache is not empty.
+                assert todelete is not None
+
+                # The node should always have a reference to a cache entry, as
+                # we only drop the cache entry when we remove the node from the
+                # list.
+                node = todelete.get_cache_entry()
+                assert node is not None
+
+                evicted_len = delete_node(node)
+                cache.pop(node.key, None)
                 if metrics:
                     metrics.inc_evictions(evicted_len)
 
@@ -255,11 +426,7 @@ class LruCache(Generic[KT, VT]):
         self.len = synchronized(cache_len)
 
         def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()):
-            prev_node = list_root
-            next_node = prev_node.next_node
-            node = _Node(prev_node, next_node, key, value, callbacks)
-            prev_node.next_node = node
-            next_node.prev_node = node
+            node = _Node(list_root, key, value, weak_ref_to_self, real_clock, callbacks)
             cache[key] = node
 
             if size_callback:
@@ -268,23 +435,11 @@ class LruCache(Generic[KT, VT]):
             if caches.TRACK_MEMORY_USAGE and metrics:
                 metrics.inc_memory_usage(node.memory)
 
-        def move_node_to_front(node):
-            prev_node = node.prev_node
-            next_node = node.next_node
-            prev_node.next_node = next_node
-            next_node.prev_node = prev_node
-            prev_node = list_root
-            next_node = prev_node.next_node
-            node.prev_node = prev_node
-            node.next_node = next_node
-            prev_node.next_node = node
-            next_node.prev_node = node
-
-        def delete_node(node):
-            prev_node = node.prev_node
-            next_node = node.next_node
-            prev_node.next_node = next_node
-            next_node.prev_node = prev_node
+        def move_node_to_front(node: _Node):
+            node.move_to_front(real_clock, list_root)
+
+        def delete_node(node: _Node) -> int:
+            node.drop_from_lists()
 
             deleted_len = 1
             if size_callback:
@@ -411,10 +566,13 @@ class LruCache(Generic[KT, VT]):
 
         @synchronized
         def cache_clear() -> None:
-            list_root.next_node = list_root
-            list_root.prev_node = list_root
             for node in cache.values():
                 node.run_and_clear_callbacks()
+                node.drop_from_lists()
+
+            assert list_root.next_node == list_root
+            assert list_root.prev_node == list_root
+
             cache.clear()
             if size_callback:
                 cached_cache_len[0] = 0
@@ -484,3 +642,11 @@ class LruCache(Generic[KT, VT]):
                 self._on_resize()
             return True
         return False
+
+    def __del__(self) -> None:
+        # We're about to be deleted, so we make sure to clear up all the nodes
+        # and run callbacks, etc.
+        #
+        # This happens e.g. in the sync code where we have an expiring cache of
+        # lru caches.
+        self.clear()
diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py
index 34c662c4db..ed7204336f 100644
--- a/synapse/util/caches/response_cache.py
+++ b/synapse/util/caches/response_cache.py
@@ -66,7 +66,7 @@ class ResponseCache(Generic[KV]):
         # This is poorly-named: it includes both complete and incomplete results.
         # We keep complete results rather than switching to absolute values because
         # that makes it easier to cache Failure results.
-        self.pending_result_cache = {}  # type: Dict[KV, ObservableDeferred]
+        self.pending_result_cache: Dict[KV, ObservableDeferred] = {}
 
         self.clock = clock
         self.timeout_sec = timeout_ms / 1000.0
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index e81e468899..3a41a8baa6 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -45,10 +45,10 @@ class StreamChangeCache:
     ):
         self._original_max_size = max_size
         self._max_size = math.floor(max_size)
-        self._entity_to_key = {}  # type: Dict[EntityType, int]
+        self._entity_to_key: Dict[EntityType, int] = {}
 
         # map from stream id to the a set of entities which changed at that stream id.
-        self._cache = SortedDict()  # type: SortedDict[int, Set[EntityType]]
+        self._cache: SortedDict[int, Set[EntityType]] = SortedDict()
 
         # the earliest stream_pos for which we can reliably answer
         # get_all_entities_changed. In other words, one less than the earliest
@@ -155,7 +155,7 @@ class StreamChangeCache:
         if stream_pos < self._earliest_known_stream_pos:
             return None
 
-        changed_entities = []  # type: List[EntityType]
+        changed_entities: List[EntityType] = []
 
         for k in self._cache.islice(start=self._cache.bisect_right(stream_pos)):
             changed_entities.extend(self._cache[k])
diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py
index a6df81ebff..4138931e7b 100644
--- a/synapse/util/caches/treecache.py
+++ b/synapse/util/caches/treecache.py
@@ -138,7 +138,6 @@ def iterate_tree_cache_entry(d):
     """
     if isinstance(d, TreeCacheNode):
         for value_d in d.values():
-            for value in iterate_tree_cache_entry(value_d):
-                yield value
+            yield from iterate_tree_cache_entry(value_d)
     else:
         yield d
diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py
index c276107d56..46afe3f934 100644
--- a/synapse/util/caches/ttlcache.py
+++ b/synapse/util/caches/ttlcache.py
@@ -23,7 +23,7 @@ from synapse.util.caches import register_cache
 
 logger = logging.getLogger(__name__)
 
-SENTINEL = object()  # type: Any
+SENTINEL: Any = object()
 
 T = TypeVar("T")
 KT = TypeVar("KT")
@@ -35,10 +35,10 @@ class TTLCache(Generic[KT, VT]):
 
     def __init__(self, cache_name: str, timer: Callable[[], float] = time.time):
         # map from key to _CacheEntry
-        self._data = {}  # type: Dict[KT, _CacheEntry]
+        self._data: Dict[KT, _CacheEntry] = {}
 
         # the _CacheEntries, sorted by expiry time
-        self._expiry_list = SortedList()  # type: SortedList[_CacheEntry]
+        self._expiry_list: SortedList[_CacheEntry] = SortedList()
 
         self._timer = timer
 
diff --git a/synapse/util/daemonize.py b/synapse/util/daemonize.py
index 31b24dd188..d8532411c2 100644
--- a/synapse/util/daemonize.py
+++ b/synapse/util/daemonize.py
@@ -31,13 +31,13 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
     # If pidfile already exists, we should read pid from there; to overwrite it, if
     # locking will fail, because locking attempt somehow purges the file contents.
     if os.path.isfile(pid_file):
-        with open(pid_file, "r") as pid_fh:
+        with open(pid_file) as pid_fh:
             old_pid = pid_fh.read()
 
     # Create a lockfile so that only one instance of this daemon is running at any time.
     try:
         lock_fh = open(pid_file, "w")
-    except IOError:
+    except OSError:
         print("Unable to create the pidfile.")
         sys.exit(1)
 
@@ -45,7 +45,7 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
         # Try to get an exclusive lock on the file. This will fail if another process
         # has the file locked.
         fcntl.flock(lock_fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
-    except IOError:
+    except OSError:
         print("Unable to lock on the pidfile.")
         # We need to overwrite the pidfile if we got here.
         #
@@ -113,7 +113,7 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -
     try:
         lock_fh.write("%s" % (os.getpid()))
         lock_fh.flush()
-    except IOError:
+    except OSError:
         logger.error("Unable to write pid to the pidfile.")
         print("Unable to write pid to the pidfile.")
         sys.exit(1)
diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py
index 886afa9d19..8ac3eab2f5 100644
--- a/synapse/util/iterutils.py
+++ b/synapse/util/iterutils.py
@@ -68,7 +68,7 @@ def sorted_topologically(
     # This is implemented by Kahn's algorithm.
 
     degree_map = {node: 0 for node in nodes}
-    reverse_graph = {}  # type: Dict[T, Set[T]]
+    reverse_graph: Dict[T, Set[T]] = {}
 
     for node, edges in graph.items():
         if node not in degree_map:
diff --git a/synapse/util/linked_list.py b/synapse/util/linked_list.py
new file mode 100644
index 0000000000..a456b136f0
--- /dev/null
+++ b/synapse/util/linked_list.py
@@ -0,0 +1,150 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A circular doubly linked list implementation.
+"""
+
+import threading
+from typing import Generic, Optional, Type, TypeVar
+
+P = TypeVar("P")
+LN = TypeVar("LN", bound="ListNode")
+
+
+class ListNode(Generic[P]):
+    """A node in a circular doubly linked list, with an (optional) reference to
+    a cache entry.
+
+    The reference should only be `None` for the root node or if the node has
+    been removed from the list.
+    """
+
+    # A lock to protect mutating the list prev/next pointers.
+    _LOCK = threading.Lock()
+
+    # We don't use attrs here as in py3.6 you can't have `attr.s(slots=True)`
+    # and inherit from `Generic` for some reason
+    __slots__ = [
+        "cache_entry",
+        "prev_node",
+        "next_node",
+    ]
+
+    def __init__(self, cache_entry: Optional[P] = None) -> None:
+        self.cache_entry = cache_entry
+        self.prev_node: Optional[ListNode[P]] = None
+        self.next_node: Optional[ListNode[P]] = None
+
+    @classmethod
+    def create_root_node(cls: Type["ListNode[P]"]) -> "ListNode[P]":
+        """Create a new linked list by creating a "root" node, which is a node
+        that has prev_node/next_node pointing to itself and no associated cache
+        entry.
+        """
+        root = cls()
+        root.prev_node = root
+        root.next_node = root
+        return root
+
+    @classmethod
+    def insert_after(
+        cls: Type[LN],
+        cache_entry: P,
+        node: "ListNode[P]",
+    ) -> LN:
+        """Create a new list node that is placed after the given node.
+
+        Args:
+            cache_entry: The associated cache entry.
+            node: The existing node in the list to insert the new entry after.
+        """
+        new_node = cls(cache_entry)
+        with cls._LOCK:
+            new_node._refs_insert_after(node)
+        return new_node
+
+    def remove_from_list(self):
+        """Remove this node from the list."""
+        with self._LOCK:
+            self._refs_remove_node_from_list()
+
+        # We drop the reference to the cache entry to break the reference cycle
+        # between the list node and cache entry, allowing the two to be dropped
+        # immediately rather than at the next GC.
+        self.cache_entry = None
+
+    def move_after(self, node: "ListNode"):
+        """Move this node from its current location in the list to after the
+        given node.
+        """
+        with self._LOCK:
+            # We assert that both this node and the target node is still "alive".
+            assert self.prev_node
+            assert self.next_node
+            assert node.prev_node
+            assert node.next_node
+
+            assert self is not node
+
+            # Remove self from the list
+            self._refs_remove_node_from_list()
+
+            # Insert self back into the list, after target node
+            self._refs_insert_after(node)
+
+    def _refs_remove_node_from_list(self):
+        """Internal method to *just* remove the node from the list, without
+        e.g. clearing out the cache entry.
+        """
+        if self.prev_node is None or self.next_node is None:
+            # We've already been removed from the list.
+            return
+
+        prev_node = self.prev_node
+        next_node = self.next_node
+
+        prev_node.next_node = next_node
+        next_node.prev_node = prev_node
+
+        # We set these to None so that we don't get circular references,
+        # allowing us to be dropped without having to go via the GC.
+        self.prev_node = None
+        self.next_node = None
+
+    def _refs_insert_after(self, node: "ListNode"):
+        """Internal method to insert the node after the given node."""
+
+        # This method should only be called when we're not already in the list.
+        assert self.prev_node is None
+        assert self.next_node is None
+
+        # We expect the given node to be in the list and thus have valid
+        # prev/next refs.
+        assert node.next_node
+        assert node.prev_node
+
+        prev_node = node
+        next_node = node.next_node
+
+        self.prev_node = prev_node
+        self.next_node = next_node
+
+        prev_node.next_node = self
+        next_node.prev_node = self
+
+    def get_cache_entry(self) -> Optional[P]:
+        """Get the cache entry, returns None if this is the root node (i.e.
+        cache_entry is None) or if the entry has been dropped.
+        """
+        return self.cache_entry
diff --git a/synapse/util/macaroons.py b/synapse/util/macaroons.py
index f6ebfd7e7d..d1f76e3dc5 100644
--- a/synapse/util/macaroons.py
+++ b/synapse/util/macaroons.py
@@ -39,7 +39,7 @@ def get_value_from_macaroon(macaroon: pymacaroons.Macaroon, key: str) -> str:
              caveat in the macaroon, or if the caveat was not found in the macaroon.
     """
     prefix = key + " = "
-    result = None  # type: Optional[str]
+    result: Optional[str] = None
     for caveat in macaroon.caveats:
         if not caveat.caveat_id.startswith(prefix):
             continue
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 45353d41c5..1b82dca81b 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -124,7 +124,7 @@ class Measure:
             assert isinstance(curr_context, LoggingContext)
             parent_context = curr_context
         self._logging_context = LoggingContext(str(curr_context), parent_context)
-        self.start = None  # type: Optional[int]
+        self.start: Optional[int] = None
 
     def __enter__(self) -> "Measure":
         if self.start is not None:
diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index eed0291cae..99f01e325c 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -41,7 +41,7 @@ def do_patch():
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
             start_context = current_context()
-            changes = []  # type: List[str]
+            changes: List[str] = []
             orig = orig_inline_callbacks(_check_yield_points(f, changes))
 
             try:
@@ -131,7 +131,7 @@ def _check_yield_points(f: Callable, changes: List[str]):
         gen = f(*args, **kwargs)
 
         last_yield_line_no = gen.gi_frame.f_lineno
-        result = None  # type: Any
+        result: Any = None
         while True:
             expected_context = current_context()
 
diff --git a/synapse/visibility.py b/synapse/visibility.py
index 490fb26e81..17532059e9 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -90,13 +90,13 @@ async def filter_events_for_client(
         AccountDataTypes.IGNORED_USER_LIST, user_id
     )
 
-    ignore_list = frozenset()  # type: FrozenSet[str]
+    ignore_list: FrozenSet[str] = frozenset()
     if ignore_dict_content:
         ignored_users_dict = ignore_dict_content.get("ignored_users", {})
         if isinstance(ignored_users_dict, dict):
             ignore_list = frozenset(ignored_users_dict.keys())
 
-    erased_senders = await storage.main.are_users_erased((e.sender for e in events))
+    erased_senders = await storage.main.are_users_erased(e.sender for e in events)
 
     if filter_send_to_client:
         room_ids = {e.room_id for e in events}
@@ -353,7 +353,7 @@ async def filter_events_for_server(
         )
 
     if not check_history_visibility_only:
-        erased_senders = await storage.main.are_users_erased((e.sender for e in events))
+        erased_senders = await storage.main.are_users_erased(e.sender for e in events)
     else:
         # We don't want to check whether users are erased, which is equivalent
         # to no users having been erased.
diff --git a/sytest-blacklist b/sytest-blacklist
index 89c4e828fd..de9986357b 100644
--- a/sytest-blacklist
+++ b/sytest-blacklist
@@ -41,9 +41,3 @@ We can't peek into rooms with invited history_visibility
 We can't peek into rooms with joined history_visibility
 Local users can peek by room alias
 Peeked rooms only turn up in the sync for the device who peeked them
-
-
-# Blacklisted due to changes made in #10272
-Outbound federation will ignore a missing event with bad JSON for room version 6
-Backfilled events whose prev_events are in a different room do not allow cross-room back-pagination
-Federation rejects inbound events where the prev_events cannot be found
diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py
new file mode 100644
index 0000000000..5527e278db
--- /dev/null
+++ b/tests/app/test_phone_stats_home.py
@@ -0,0 +1,395 @@
+import synapse
+from synapse.app.phone_stats_home import start_phone_stats_home
+from synapse.rest.client.v1 import login, room
+
+from tests import unittest
+from tests.unittest import HomeserverTestCase
+
+FIVE_MINUTES_IN_SECONDS = 300
+ONE_DAY_IN_SECONDS = 86400
+
+
+class PhoneHomeTestCase(HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    # Override the retention time for the user_ips table because otherwise it
+    # gets pruned too aggressively for our R30 test.
+    @unittest.override_config({"user_ips_max_age": "365d"})
+    def test_r30_minimum_usage(self):
+        """
+        Tests the minimum amount of interaction necessary for the R30 metric
+        to consider a user 'retained'.
+        """
+
+        # Register a user, log it in, create a room and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
+        self.helper.send(room_id, "message", tok=access_token)
+
+        # Check the R30 results do not count that user.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+        # Advance 30 days (+ 1 second, because strict inequality causes issues if we are
+        # bang on 30 days later).
+        self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)
+
+        # (Make sure the user isn't somehow counted by this point.)
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+        # Send a message (this counts as activity)
+        self.helper.send(room_id, "message2", tok=access_token)
+
+        # We have to wait some time for _update_client_ips_batch to get
+        # called and update the user_ips table.
+        self.reactor.advance(2 * 60 * 60)
+
+        # *Now* the user is counted.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
+
+        # Advance 29 days. The user has now not posted for 29 days.
+        self.reactor.advance(29 * ONE_DAY_IN_SECONDS)
+
+        # The user is still counted.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
+
+        # Advance another day. The user has now not posted for 30 days.
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+
+        # The user is now no longer counted in R30.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+    def test_r30_minimum_usage_using_default_config(self):
+        """
+        Tests the minimum amount of interaction necessary for the R30 metric
+        to consider a user 'retained'.
+
+        N.B. This test does not override the `user_ips_max_age` config setting,
+        which defaults to 28 days.
+        """
+
+        # Register a user, log it in, create a room and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
+        self.helper.send(room_id, "message", tok=access_token)
+
+        # Check the R30 results do not count that user.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+        # Advance 30 days (+ 1 second, because strict inequality causes issues if we are
+        # bang on 30 days later).
+        self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)
+
+        # (Make sure the user isn't somehow counted by this point.)
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+        # Send a message (this counts as activity)
+        self.helper.send(room_id, "message2", tok=access_token)
+
+        # We have to wait some time for _update_client_ips_batch to get
+        # called and update the user_ips table.
+        self.reactor.advance(2 * 60 * 60)
+
+        # *Now* the user is counted.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
+
+        # Advance 27 days. The user has now not posted for 27 days.
+        self.reactor.advance(27 * ONE_DAY_IN_SECONDS)
+
+        # The user is still counted.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
+
+        # Advance another day. The user has now not posted for 28 days.
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+
+        # The user is now no longer counted in R30.
+        # (This is because the user_ips table has been pruned, which by default
+        # only preserves the last 28 days of entries.)
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+    def test_r30_user_must_be_retained_for_at_least_a_month(self):
+        """
+        Tests that a newly-registered user must be retained for a whole month
+        before appearing in the R30 statistic, even if they post every day
+        during that time!
+        """
+        # Register a user and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
+        self.helper.send(room_id, "message", tok=access_token)
+
+        # Check the user does not contribute to R30 yet.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 0})
+
+        for _ in range(30):
+            # This loop posts a message every day for 30 days
+            self.reactor.advance(ONE_DAY_IN_SECONDS)
+            self.helper.send(room_id, "I'm still here", tok=access_token)
+
+            # Notice that the user *still* does not contribute to R30!
+            r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+            self.assertEqual(r30_results, {"all": 0})
+
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+        self.helper.send(room_id, "Still here!", tok=access_token)
+
+        # *Now* the user appears in R30.
+        r30_results = self.get_success(self.hs.get_datastore().count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
+
+
+class PhoneHomeR30V2TestCase(HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def _advance_to(self, desired_time_secs: float):
+        now = self.hs.get_clock().time()
+        assert now < desired_time_secs
+        self.reactor.advance(desired_time_secs - now)
+
+    def make_homeserver(self, reactor, clock):
+        hs = super(PhoneHomeR30V2TestCase, self).make_homeserver(reactor, clock)
+
+        # We don't want our tests to actually report statistics, so check
+        # that it's not enabled
+        assert not hs.config.report_stats
+
+        # This starts the needed data collection that we rely on to calculate
+        # R30v2 metrics.
+        start_phone_stats_home(hs)
+        return hs
+
+    def test_r30v2_minimum_usage(self):
+        """
+        Tests the minimum amount of interaction necessary for the R30v2 metric
+        to consider a user 'retained'.
+        """
+
+        # Register a user, log it in, create a room and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
+        self.helper.send(room_id, "message", tok=access_token)
+        first_post_at = self.hs.get_clock().time()
+
+        # Give time for user_daily_visits table to be updated.
+        # (user_daily_visits is updated every 5 minutes using a looping call.)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        store = self.hs.get_datastore()
+
+        # Check the R30 results do not count that user.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Advance 31 days.
+        # (R30v2 includes users with **more** than 30 days between the two visits,
+        #  and user_daily_visits records the timestamp as the start of the day.)
+        self.reactor.advance(31 * ONE_DAY_IN_SECONDS)
+        # Also advance 5 minutes to let another user_daily_visits update occur
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # (Make sure the user isn't somehow counted by this point.)
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Send a message (this counts as activity)
+        self.helper.send(room_id, "message2", tok=access_token)
+
+        # We have to wait a few minutes for the user_daily_visits table to
+        # be updated by a background process.
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # *Now* the user is counted.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Advance to JUST under 60 days after the user's first post
+        self._advance_to(first_post_at + 60 * ONE_DAY_IN_SECONDS - 5)
+
+        # Check the user is still counted.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Advance into the next day. The user's first activity is now more than 60 days old.
+        self._advance_to(first_post_at + 60 * ONE_DAY_IN_SECONDS + 5)
+
+        # Check the user is now no longer counted in R30.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+    def test_r30v2_user_must_be_retained_for_at_least_a_month(self):
+        """
+        Tests that a newly-registered user must be retained for a whole month
+        before appearing in the R30v2 statistic, even if they post every day
+        during that time!
+        """
+
+        # set a custom user-agent to impersonate Element/Android.
+        headers = (
+            (
+                "User-Agent",
+                "Element/1.1 (Linux; U; Android 9; MatrixAndroidSDK_X 0.0.1)",
+            ),
+        )
+
+        # Register a user and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!", custom_headers=headers)
+        room_id = self.helper.create_room_as(
+            room_creator=user_id, tok=access_token, custom_headers=headers
+        )
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # Give time for user_daily_visits table to be updated.
+        # (user_daily_visits is updated every 5 minutes using a looping call.)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        store = self.hs.get_datastore()
+
+        # Check the user does not contribute to R30 yet.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        for _ in range(30):
+            # This loop posts a message every day for 30 days
+            self.reactor.advance(ONE_DAY_IN_SECONDS - FIVE_MINUTES_IN_SECONDS)
+            self.helper.send(
+                room_id, "I'm still here", tok=access_token, custom_headers=headers
+            )
+
+            # give time for user_daily_visits to update
+            self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+            # Notice that the user *still* does not contribute to R30!
+            r30_results = self.get_success(store.count_r30v2_users())
+            self.assertEqual(
+                r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+            )
+
+        # advance yet another day with more activity
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+        self.helper.send(
+            room_id, "Still here!", tok=access_token, custom_headers=headers
+        )
+
+        # give time for user_daily_visits to update
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # *Now* the user appears in R30.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "android": 1, "electron": 0, "ios": 0, "web": 0}
+        )
+
+    def test_r30v2_returning_dormant_users_not_counted(self):
+        """
+        Tests that dormant users (users inactive for a long time) do not
+        contribute to R30v2 when they return for just a single day.
+        This is a key difference between R30 and R30v2.
+        """
+
+        # set a custom user-agent to impersonate Element/iOS.
+        headers = (
+            (
+                "User-Agent",
+                "Riot/1.4 (iPhone; iOS 13; Scale/4.00)",
+            ),
+        )
+
+        # Register a user and send a message
+        user_id = self.register_user("u1", "secret!")
+        access_token = self.login("u1", "secret!", custom_headers=headers)
+        room_id = self.helper.create_room_as(
+            room_creator=user_id, tok=access_token, custom_headers=headers
+        )
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # the user goes inactive for 2 months
+        self.reactor.advance(60 * ONE_DAY_IN_SECONDS)
+
+        # the user returns for one day, perhaps just to check out a new feature
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # Give time for user_daily_visits table to be updated.
+        # (user_daily_visits is updated every 5 minutes using a looping call.)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        store = self.hs.get_datastore()
+
+        # Check that the user does not contribute to R30v2, even though it's been
+        # more than 30 days since registration.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
+
+        # Check that this is a situation where old R30 differs:
+        # old R30 DOES count this as 'retained'.
+        r30_results = self.get_success(store.count_r30_users())
+        self.assertEqual(r30_results, {"all": 1, "ios": 1})
+
+        # Now we want to check that the user will still be able to appear in
+        # R30v2 as long as the user performs some other activity between
+        # 30 and 60 days later.
+        self.reactor.advance(32 * ONE_DAY_IN_SECONDS)
+        self.helper.send(room_id, "message", tok=access_token, custom_headers=headers)
+
+        # (give time for tables to update)
+        self.reactor.advance(FIVE_MINUTES_IN_SECONDS)
+
+        # Check the user now satisfies the requirements to appear in R30v2.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "ios": 1, "android": 0, "electron": 0, "web": 0}
+        )
+
+        # Advance to 59.5 days after the user's first R30v2-eligible activity.
+        self.reactor.advance(27.5 * ONE_DAY_IN_SECONDS)
+
+        # Check the user still appears in R30v2.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 1, "ios": 1, "android": 0, "electron": 0, "web": 0}
+        )
+
+        # Advance to 60.5 days after the user's first R30v2-eligible activity.
+        self.reactor.advance(ONE_DAY_IN_SECONDS)
+
+        # Check the user no longer appears in R30v2.
+        r30_results = self.get_success(store.count_r30v2_users())
+        self.assertEqual(
+            r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
+        )
diff --git a/tests/config/test_load.py b/tests/config/test_load.py
index ebe2c05165..903c69127d 100644
--- a/tests/config/test_load.py
+++ b/tests/config/test_load.py
@@ -43,7 +43,7 @@ class ConfigLoadingTestCase(unittest.TestCase):
     def test_generates_and_loads_macaroon_secret_key(self):
         self.generate_config()
 
-        with open(self.file, "r") as f:
+        with open(self.file) as f:
             raw = yaml.safe_load(f)
         self.assertIn("macaroon_secret_key", raw)
 
@@ -120,7 +120,7 @@ class ConfigLoadingTestCase(unittest.TestCase):
     def generate_config_and_remove_lines_containing(self, needle):
         self.generate_config()
 
-        with open(self.file, "r") as f:
+        with open(self.file) as f:
             contents = f.readlines()
         contents = [line for line in contents if needle not in line]
         with open(self.file, "w") as f:
diff --git a/tests/events/test_presence_router.py b/tests/events/test_presence_router.py
index 875b0d0a11..3f41e99950 100644
--- a/tests/events/test_presence_router.py
+++ b/tests/events/test_presence_router.py
@@ -152,7 +152,7 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
         )
         self.assertEqual(len(presence_updates), 1)
 
-        presence_update = presence_updates[0]  # type: UserPresenceState
+        presence_update: UserPresenceState = presence_updates[0]
         self.assertEqual(presence_update.user_id, self.other_user_one_id)
         self.assertEqual(presence_update.state, "online")
         self.assertEqual(presence_update.status_msg, "boop")
@@ -274,7 +274,7 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
         presence_updates, _ = sync_presence(self, self.other_user_id)
         self.assertEqual(len(presence_updates), 1)
 
-        presence_update = presence_updates[0]  # type: UserPresenceState
+        presence_update: UserPresenceState = presence_updates[0]
         self.assertEqual(presence_update.user_id, self.other_user_id)
         self.assertEqual(presence_update.state, "online")
         self.assertEqual(presence_update.status_msg, "I'm online!")
@@ -285,6 +285,10 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
         presence_updates, _ = sync_presence(self, self.presence_receiving_user_two_id)
         self.assertEqual(len(presence_updates), 3)
 
+        # We stagger sending of presence, so we need to wait a bit for them to
+        # get sent out.
+        self.reactor.advance(60)
+
         # Test that sending to a remote user works
         remote_user_id = "@far_away_person:island"
 
@@ -301,6 +305,10 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
             self.module_api.send_local_online_presence_to([remote_user_id])
         )
 
+        # We stagger sending of presence, so we need to wait a bit for them to
+        # get sent out.
+        self.reactor.advance(60)
+
         # Check that the expected presence updates were sent
         # We explicitly compare using sets as we expect that calling
         # module_api.send_local_online_presence_to will create a presence
@@ -320,7 +328,7 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
         )
         for call in calls:
             call_args = call[0]
-            federation_transaction = call_args[0]  # type: Transaction
+            federation_transaction: Transaction = call_args[0]
 
             # Get the sent EDUs in this transaction
             edus = federation_transaction.get_dict()["edus"]
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index dfb9b3a0fa..18e92e90d7 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -734,7 +734,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
 
         self.store = hs.get_datastore()
         self.state = hs.get_state_handler()
-        self.auth = hs.get_auth()
+        self._event_auth_handler = hs.get_event_auth_handler()
 
         # We don't actually check signatures in tests, so lets just create a
         # random key to use.
@@ -846,7 +846,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
 
         builder = EventBuilder(
             state=self.state,
-            auth=self.auth,
+            event_auth_handler=self._event_auth_handler,
             store=self.store,
             clock=self.clock,
             hostname=hostname,
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index cdb41101b3..2928c4f48c 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -103,7 +103,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertIsNone(
-            (self.get_success(self.store.get_profile_displayname(self.frank.localpart)))
+            self.get_success(self.store.get_profile_displayname(self.frank.localpart))
         )
 
     def test_set_my_name_if_disabled(self):
diff --git a/tests/handlers/test_space_summary.py b/tests/handlers/test_space_summary.py
index 9771d3fb3b..3f73ad7f94 100644
--- a/tests/handlers/test_space_summary.py
+++ b/tests/handlers/test_space_summary.py
@@ -14,8 +14,18 @@
 from typing import Any, Iterable, Optional, Tuple
 from unittest import mock
 
-from synapse.api.constants import EventContentFields, RoomTypes
+from synapse.api.constants import (
+    EventContentFields,
+    EventTypes,
+    HistoryVisibility,
+    JoinRules,
+    Membership,
+    RestrictedJoinRuleTypes,
+    RoomTypes,
+)
 from synapse.api.errors import AuthError
+from synapse.api.room_versions import RoomVersions
+from synapse.events import make_event_from_dict
 from synapse.handlers.space_summary import _child_events_comparison_key
 from synapse.rest import admin
 from synapse.rest.client.v1 import login, room
@@ -117,7 +127,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         """Add a child room to a space."""
         self.helper.send_state(
             space_id,
-            event_type="m.space.child",
+            event_type=EventTypes.SpaceChild,
             body={"via": [self.hs.hostname]},
             tok=token,
             state_key=room_id,
@@ -155,26 +165,379 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         # The user cannot see the space.
         self.get_failure(self.handler.get_space_summary(user2, self.space), AuthError)
 
-        # Joining the room causes it to be visible.
-        self.helper.join(self.space, user2, tok=token2)
+        # If the space is made world-readable it should return a result.
+        self.helper.send_state(
+            self.space,
+            event_type=EventTypes.RoomHistoryVisibility,
+            body={"history_visibility": HistoryVisibility.WORLD_READABLE},
+            tok=self.token,
+        )
         result = self.get_success(self.handler.get_space_summary(user2, self.space))
-
-        # The result should only have the space, but includes the link to the room.
-        self._assert_rooms(result, [self.space])
+        self._assert_rooms(result, [self.space, self.room])
         self._assert_events(result, [(self.space, self.room)])
 
-    def test_world_readable(self):
-        """A world-readable room is visible to everyone."""
+        # Make it not world-readable again and confirm it results in an error.
         self.helper.send_state(
             self.space,
-            event_type="m.room.history_visibility",
-            body={"history_visibility": "world_readable"},
+            event_type=EventTypes.RoomHistoryVisibility,
+            body={"history_visibility": HistoryVisibility.JOINED},
+            tok=self.token,
+        )
+        self.get_failure(self.handler.get_space_summary(user2, self.space), AuthError)
+
+        # Join the space and results should be returned.
+        self.helper.join(self.space, user2, tok=token2)
+        result = self.get_success(self.handler.get_space_summary(user2, self.space))
+        self._assert_rooms(result, [self.space, self.room])
+        self._assert_events(result, [(self.space, self.room)])
+
+    def _create_room_with_join_rule(
+        self, join_rule: str, room_version: Optional[str] = None, **extra_content
+    ) -> str:
+        """Create a room with the given join rule and add it to the space."""
+        room_id = self.helper.create_room_as(
+            self.user,
+            room_version=room_version,
             tok=self.token,
+            extra_content={
+                "initial_state": [
+                    {
+                        "type": EventTypes.JoinRules,
+                        "state_key": "",
+                        "content": {
+                            "join_rule": join_rule,
+                            **extra_content,
+                        },
+                    }
+                ]
+            },
         )
+        self._add_child(self.space, room_id, self.token)
+        return room_id
 
+    def test_filtering(self):
+        """
+        Rooms should be properly filtered to only include rooms the user has access to.
+        """
         user2 = self.register_user("user2", "pass")
+        token2 = self.login("user2", "pass")
 
-        # The space should be visible, as well as the link to the room.
+        # Create a few rooms which will have different properties.
+        public_room = self._create_room_with_join_rule(JoinRules.PUBLIC)
+        knock_room = self._create_room_with_join_rule(
+            JoinRules.KNOCK, room_version=RoomVersions.V7.identifier
+        )
+        not_invited_room = self._create_room_with_join_rule(JoinRules.INVITE)
+        invited_room = self._create_room_with_join_rule(JoinRules.INVITE)
+        self.helper.invite(invited_room, targ=user2, tok=self.token)
+        restricted_room = self._create_room_with_join_rule(
+            JoinRules.MSC3083_RESTRICTED,
+            room_version=RoomVersions.MSC3083.identifier,
+            allow=[],
+        )
+        restricted_accessible_room = self._create_room_with_join_rule(
+            JoinRules.MSC3083_RESTRICTED,
+            room_version=RoomVersions.MSC3083.identifier,
+            allow=[
+                {
+                    "type": RestrictedJoinRuleTypes.ROOM_MEMBERSHIP,
+                    "room_id": self.space,
+                    "via": [self.hs.hostname],
+                }
+            ],
+        )
+        world_readable_room = self._create_room_with_join_rule(JoinRules.INVITE)
+        self.helper.send_state(
+            world_readable_room,
+            event_type=EventTypes.RoomHistoryVisibility,
+            body={"history_visibility": HistoryVisibility.WORLD_READABLE},
+            tok=self.token,
+        )
+        joined_room = self._create_room_with_join_rule(JoinRules.INVITE)
+        self.helper.invite(joined_room, targ=user2, tok=self.token)
+        self.helper.join(joined_room, user2, tok=token2)
+
+        # Join the space.
+        self.helper.join(self.space, user2, tok=token2)
         result = self.get_success(self.handler.get_space_summary(user2, self.space))
-        self._assert_rooms(result, [self.space])
-        self._assert_events(result, [(self.space, self.room)])
+
+        self._assert_rooms(
+            result,
+            [
+                self.space,
+                self.room,
+                public_room,
+                knock_room,
+                invited_room,
+                restricted_accessible_room,
+                world_readable_room,
+                joined_room,
+            ],
+        )
+        self._assert_events(
+            result,
+            [
+                (self.space, self.room),
+                (self.space, public_room),
+                (self.space, knock_room),
+                (self.space, not_invited_room),
+                (self.space, invited_room),
+                (self.space, restricted_room),
+                (self.space, restricted_accessible_room),
+                (self.space, world_readable_room),
+                (self.space, joined_room),
+            ],
+        )
+
+    def test_complex_space(self):
+        """
+        Create a "complex" space to see how it handles things like loops and subspaces.
+        """
+        # Create an inaccessible room.
+        user2 = self.register_user("user2", "pass")
+        token2 = self.login("user2", "pass")
+        room2 = self.helper.create_room_as(user2, is_public=False, tok=token2)
+        # This is a bit odd as "user" is adding a room they don't know about, but
+        # it works for the tests.
+        self._add_child(self.space, room2, self.token)
+
+        # Create a subspace under the space with an additional room in it.
+        subspace = self.helper.create_room_as(
+            self.user,
+            tok=self.token,
+            extra_content={
+                "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE}
+            },
+        )
+        subroom = self.helper.create_room_as(self.user, tok=self.token)
+        self._add_child(self.space, subspace, token=self.token)
+        self._add_child(subspace, subroom, token=self.token)
+        # Also add the two rooms from the space into this subspace (causing loops).
+        self._add_child(subspace, self.room, token=self.token)
+        self._add_child(subspace, room2, self.token)
+
+        result = self.get_success(self.handler.get_space_summary(self.user, self.space))
+
+        # The result should include each room a single time and each link.
+        self._assert_rooms(result, [self.space, self.room, subspace, subroom])
+        self._assert_events(
+            result,
+            [
+                (self.space, self.room),
+                (self.space, room2),
+                (self.space, subspace),
+                (subspace, subroom),
+                (subspace, self.room),
+                (subspace, room2),
+            ],
+        )
+
+    def test_fed_complex(self):
+        """
+        Return data over federation and ensure that it is handled properly.
+        """
+        fed_hostname = self.hs.hostname + "2"
+        subspace = "#subspace:" + fed_hostname
+        subroom = "#subroom:" + fed_hostname
+
+        async def summarize_remote_room(
+            _self, room, suggested_only, max_children, exclude_rooms
+        ):
+            # Return some good data, and some bad data:
+            #
+            # * Event *back* to the root room.
+            # * Unrelated events / rooms
+            # * Multiple levels of events (in a not-useful order, e.g. grandchild
+            #   events before child events).
+
+            # Note that these entries are brief, but should contain enough info.
+            rooms = [
+                {
+                    "room_id": subspace,
+                    "world_readable": True,
+                    "room_type": RoomTypes.SPACE,
+                },
+                {
+                    "room_id": subroom,
+                    "world_readable": True,
+                },
+            ]
+            event_content = {"via": [fed_hostname]}
+            events = [
+                {
+                    "room_id": subspace,
+                    "state_key": subroom,
+                    "content": event_content,
+                },
+            ]
+            return rooms, events
+
+        # Add a room to the space which is on another server.
+        self._add_child(self.space, subspace, self.token)
+
+        with mock.patch(
+            "synapse.handlers.space_summary.SpaceSummaryHandler._summarize_remote_room",
+            new=summarize_remote_room,
+        ):
+            result = self.get_success(
+                self.handler.get_space_summary(self.user, self.space)
+            )
+
+        self._assert_rooms(result, [self.space, self.room, subspace, subroom])
+        self._assert_events(
+            result,
+            [
+                (self.space, self.room),
+                (self.space, subspace),
+                (subspace, subroom),
+            ],
+        )
+
+    def test_fed_filtering(self):
+        """
+        Rooms returned over federation should be properly filtered to only include
+        rooms the user has access to.
+        """
+        fed_hostname = self.hs.hostname + "2"
+        subspace = "#subspace:" + fed_hostname
+
+        # Create a few rooms which will have different properties.
+        public_room = "#public:" + fed_hostname
+        knock_room = "#knock:" + fed_hostname
+        not_invited_room = "#not_invited:" + fed_hostname
+        invited_room = "#invited:" + fed_hostname
+        restricted_room = "#restricted:" + fed_hostname
+        restricted_accessible_room = "#restricted_accessible:" + fed_hostname
+        world_readable_room = "#world_readable:" + fed_hostname
+        joined_room = self.helper.create_room_as(self.user, tok=self.token)
+
+        # Poke an invite over federation into the database.
+        fed_handler = self.hs.get_federation_handler()
+        event = make_event_from_dict(
+            {
+                "room_id": invited_room,
+                "event_id": "!abcd:" + fed_hostname,
+                "type": EventTypes.Member,
+                "sender": "@remote:" + fed_hostname,
+                "state_key": self.user,
+                "content": {"membership": Membership.INVITE},
+                "prev_events": [],
+                "auth_events": [],
+                "depth": 1,
+                "origin_server_ts": 1234,
+            }
+        )
+        self.get_success(
+            fed_handler.on_invite_request(fed_hostname, event, RoomVersions.V6)
+        )
+
+        async def summarize_remote_room(
+            _self, room, suggested_only, max_children, exclude_rooms
+        ):
+            # Note that these entries are brief, but should contain enough info.
+            rooms = [
+                {
+                    "room_id": public_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.PUBLIC,
+                },
+                {
+                    "room_id": knock_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.KNOCK,
+                },
+                {
+                    "room_id": not_invited_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.INVITE,
+                },
+                {
+                    "room_id": invited_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.INVITE,
+                },
+                {
+                    "room_id": restricted_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.MSC3083_RESTRICTED,
+                    "allowed_spaces": [],
+                },
+                {
+                    "room_id": restricted_accessible_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.MSC3083_RESTRICTED,
+                    "allowed_spaces": [self.room],
+                },
+                {
+                    "room_id": world_readable_room,
+                    "world_readable": True,
+                    "join_rules": JoinRules.INVITE,
+                },
+                {
+                    "room_id": joined_room,
+                    "world_readable": False,
+                    "join_rules": JoinRules.INVITE,
+                },
+            ]
+
+            # Place each room in the sub-space.
+            event_content = {"via": [fed_hostname]}
+            events = [
+                {
+                    "room_id": subspace,
+                    "state_key": room["room_id"],
+                    "content": event_content,
+                }
+                for room in rooms
+            ]
+
+            # Also include the subspace.
+            rooms.insert(
+                0,
+                {
+                    "room_id": subspace,
+                    "world_readable": True,
+                },
+            )
+            return rooms, events
+
+        # Add a room to the space which is on another server.
+        self._add_child(self.space, subspace, self.token)
+
+        with mock.patch(
+            "synapse.handlers.space_summary.SpaceSummaryHandler._summarize_remote_room",
+            new=summarize_remote_room,
+        ):
+            result = self.get_success(
+                self.handler.get_space_summary(self.user, self.space)
+            )
+
+        self._assert_rooms(
+            result,
+            [
+                self.space,
+                self.room,
+                subspace,
+                public_room,
+                knock_room,
+                invited_room,
+                restricted_accessible_room,
+                world_readable_room,
+                joined_room,
+            ],
+        )
+        self._assert_events(
+            result,
+            [
+                (self.space, self.room),
+                (self.space, subspace),
+                (subspace, public_room),
+                (subspace, knock_room),
+                (subspace, not_invited_room),
+                (subspace, invited_room),
+                (subspace, restricted_room),
+                (subspace, restricted_accessible_room),
+                (subspace, world_readable_room),
+                (subspace, joined_room),
+            ],
+        )
diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py
index c9d4fd9336..e4059acda3 100644
--- a/tests/handlers/test_stats.py
+++ b/tests/handlers/test_stats.py
@@ -88,16 +88,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
     def _get_current_stats(self, stats_type, stat_id):
         table, id_col = stats.TYPE_TO_TABLE[stats_type]
 
-        cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list(
-            stats.PER_SLICE_FIELDS[stats_type]
-        )
-
-        end_ts = self.store.quantise_stats_time(self.reactor.seconds() * 1000)
+        cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type])
 
         return self.get_success(
             self.store.db_pool.simple_select_one(
-                table + "_historical",
-                {id_col: stat_id, end_ts: end_ts},
+                table + "_current",
+                {id_col: stat_id},
                 cols,
                 allow_none=True,
             )
@@ -156,115 +152,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertEqual(len(r), 1)
         self.assertEqual(r[0]["topic"], "foo")
 
-    def test_initial_earliest_token(self):
-        """
-        Ingestion via notify_new_event will ignore tokens that the background
-        update have already processed.
-        """
-
-        self.reactor.advance(86401)
-
-        self.hs.config.stats_enabled = False
-        self.handler.stats_enabled = False
-
-        u1 = self.register_user("u1", "pass")
-        u1_token = self.login("u1", "pass")
-
-        u2 = self.register_user("u2", "pass")
-        u2_token = self.login("u2", "pass")
-
-        u3 = self.register_user("u3", "pass")
-        u3_token = self.login("u3", "pass")
-
-        room_1 = self.helper.create_room_as(u1, tok=u1_token)
-        self.helper.send_state(
-            room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token
-        )
-
-        # Begin the ingestion by creating the temp tables. This will also store
-        # the position that the deltas should begin at, once they take over.
-        self.hs.config.stats_enabled = True
-        self.handler.stats_enabled = True
-        self.store.db_pool.updates._all_done = False
-        self.get_success(
-            self.store.db_pool.simple_update_one(
-                table="stats_incremental_position",
-                keyvalues={},
-                updatevalues={"stream_id": 0},
-            )
-        )
-
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {"update_name": "populate_stats_prepare", "progress_json": "{}"},
-            )
-        )
-
-        while not self.get_success(
-            self.store.db_pool.updates.has_completed_background_updates()
-        ):
-            self.get_success(
-                self.store.db_pool.updates.do_next_background_update(100), by=0.1
-            )
-
-        # Now, before the table is actually ingested, add some more events.
-        self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token)
-        self.helper.join(room=room_1, user=u2, tok=u2_token)
-
-        # orig_delta_processor = self.store.
-
-        # Now do the initial ingestion.
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {"update_name": "populate_stats_process_rooms", "progress_json": "{}"},
-            )
-        )
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {
-                    "update_name": "populate_stats_cleanup",
-                    "progress_json": "{}",
-                    "depends_on": "populate_stats_process_rooms",
-                },
-            )
-        )
-
-        self.store.db_pool.updates._all_done = False
-        while not self.get_success(
-            self.store.db_pool.updates.has_completed_background_updates()
-        ):
-            self.get_success(
-                self.store.db_pool.updates.do_next_background_update(100), by=0.1
-            )
-
-        self.reactor.advance(86401)
-
-        # Now add some more events, triggering ingestion. Because of the stream
-        # position being set to before the events sent in the middle, a simpler
-        # implementation would reprocess those events, and say there were four
-        # users, not three.
-        self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token)
-        self.helper.join(room=room_1, user=u3, tok=u3_token)
-
-        # self.handler.notify_new_event()
-
-        # We need to let the delta processor advance…
-        self.reactor.advance(10 * 60)
-
-        # Get the slices! There should be two -- day 1, and day 2.
-        r = self.get_success(self.store.get_statistics_for_subject("room", room_1, 0))
-
-        self.assertEqual(len(r), 2)
-
-        # The oldest has 2 joined members
-        self.assertEqual(r[-1]["joined_members"], 2)
-
-        # The newest has 3
-        self.assertEqual(r[0]["joined_members"], 3)
-
     def test_create_user(self):
         """
         When we create a user, it should have statistics already ready.
@@ -296,22 +183,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertIsNotNone(r1stats)
         self.assertIsNotNone(r2stats)
 
-        # contains the default things you'd expect in a fresh room
-        self.assertEqual(
-            r1stats["total_events"],
-            EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM,
-            "Wrong number of total_events in new room's stats!"
-            " You may need to update this if more state events are added to"
-            " the room creation process.",
-        )
-        self.assertEqual(
-            r2stats["total_events"],
-            EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM,
-            "Wrong number of total_events in new room's stats!"
-            " You may need to update this if more state events are added to"
-            " the room creation process.",
-        )
-
         self.assertEqual(
             r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
         )
@@ -327,24 +198,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertEqual(r2stats["invited_members"], 0)
         self.assertEqual(r2stats["banned_members"], 0)
 
-    def test_send_message_increments_total_events(self):
-        """
-        When we send a message, it increments total_events.
-        """
-
-        self._perform_background_initial_update()
-
-        u1 = self.register_user("u1", "pass")
-        u1token = self.login("u1", "pass")
-        r1 = self.helper.create_room_as(u1, tok=u1token)
-        r1stats_ante = self._get_current_stats("room", r1)
-
-        self.helper.send(r1, "hiss", tok=u1token)
-
-        r1stats_post = self._get_current_stats("room", r1)
-
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
-
     def test_updating_profile_information_does_not_increase_joined_members_count(self):
         """
         Check that the joined_members count does not increase when a user changes their
@@ -378,7 +231,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_send_state_event_nonoverwriting(self):
         """
-        When we send a non-overwriting state event, it increments total_events AND current_state_events
+        When we send a non-overwriting state event, it increments current_state_events
         """
 
         self._perform_background_initial_update()
@@ -399,44 +252,14 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
         )
 
-    def test_send_state_event_overwriting(self):
-        """
-        When we send an overwriting state event, it increments total_events ONLY
-        """
-
-        self._perform_background_initial_update()
-
-        u1 = self.register_user("u1", "pass")
-        u1token = self.login("u1", "pass")
-        r1 = self.helper.create_room_as(u1, tok=u1token)
-
-        self.helper.send_state(
-            r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby"
-        )
-
-        r1stats_ante = self._get_current_stats("room", r1)
-
-        self.helper.send_state(
-            r1, "cat.hissing", {"value": False}, tok=u1token, state_key="tabby"
-        )
-
-        r1stats_post = self._get_current_stats("room", r1)
-
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
-        self.assertEqual(
-            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
-            0,
-        )
-
     def test_join_first_time(self):
         """
-        When a user joins a room for the first time, total_events, current_state_events and
+        When a user joins a room for the first time, current_state_events and
         joined_members should increase by exactly 1.
         """
 
@@ -455,7 +278,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
@@ -466,7 +288,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_join_after_leave(self):
         """
-        When a user joins a room after being previously left, total_events and
+        When a user joins a room after being previously left,
         joined_members should increase by exactly 1.
         current_state_events should not increase.
         left_members should decrease by exactly 1.
@@ -490,7 +312,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
@@ -504,7 +325,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_invited(self):
         """
-        When a user invites another user, current_state_events, total_events and
+        When a user invites another user, current_state_events and
         invited_members should increase by exactly 1.
         """
 
@@ -522,7 +343,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
@@ -533,7 +353,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_join_after_invite(self):
         """
-        When a user joins a room after being invited, total_events and
+        When a user joins a room after being invited and
         joined_members should increase by exactly 1.
         current_state_events should not increase.
         invited_members should decrease by exactly 1.
@@ -556,7 +376,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
@@ -570,7 +389,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_left(self):
         """
-        When a user leaves a room after joining, total_events and
+        When a user leaves a room after joining and
         left_members should increase by exactly 1.
         current_state_events should not increase.
         joined_members should decrease by exactly 1.
@@ -593,7 +412,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
@@ -607,7 +425,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
     def test_banned(self):
         """
-        When a user is banned from a room after joining, total_events and
+        When a user is banned from a room after joining and
         left_members should increase by exactly 1.
         current_state_events should not increase.
         banned_members should decrease by exactly 1.
@@ -630,7 +448,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         r1stats_post = self._get_current_stats("room", r1)
 
-        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             0,
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index f58afbc244..fa3cff598e 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -38,6 +38,9 @@ U_ONION = UserID.from_string("@onion:farm")
 # Test room id
 ROOM_ID = "a-room"
 
+# Room we're not in
+OTHER_ROOM_ID = "another-room"
+
 
 def _expect_edu_transaction(edu_type, content, origin="test"):
     return {
@@ -115,6 +118,11 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         hs.get_auth().check_user_in_room = check_user_in_room
 
+        async def check_host_in_room(room_id, server_name):
+            return room_id == ROOM_ID
+
+        hs.get_event_auth_handler().check_host_in_room = check_host_in_room
+
         def get_joined_hosts_for_room(room_id):
             return {member.domain for member in self.room_members}
 
@@ -244,6 +252,35 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
             ],
         )
 
+    def test_started_typing_remote_recv_not_in_room(self):
+        self.room_members = [U_APPLE, U_ONION]
+
+        self.assertEquals(self.event_source.get_current_key(), 0)
+
+        channel = self.make_request(
+            "PUT",
+            "/_matrix/federation/v1/send/1000000",
+            _make_edu_transaction_json(
+                "m.typing",
+                content={
+                    "room_id": OTHER_ROOM_ID,
+                    "user_id": U_ONION.to_string(),
+                    "typing": True,
+                },
+            ),
+            federation_auth_origin=b"farm",
+        )
+        self.assertEqual(channel.code, 200)
+
+        self.on_new_event.assert_not_called()
+
+        self.assertEquals(self.event_source.get_current_key(), 0)
+        events = self.get_success(
+            self.event_source.get_new_events(room_ids=[OTHER_ROOM_ID], from_key=0)
+        )
+        self.assertEquals(events[0], [])
+        self.assertEquals(events[1], 0)
+
     @override_config({"send_federation": True})
     def test_stopped_typing(self):
         self.room_members = [U_APPLE, U_BANANA, U_ONION]
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index e45980316b..a37bce08c3 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -273,7 +273,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.assertEqual(response.code, 200)
 
         # Send the body
-        request.write('{ "a": 1 }'.encode("ascii"))
+        request.write(b'{ "a": 1 }')
         request.finish()
 
         self.reactor.pump((0.1,))
diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py
index ed9a884d76..d9a8b077d3 100644
--- a/tests/http/test_fedclient.py
+++ b/tests/http/test_fedclient.py
@@ -102,7 +102,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertNoResult(test_d)
 
         # Send it the HTTP response
-        res_json = '{ "a": 1 }'.encode("ascii")
+        res_json = b'{ "a": 1 }'
         protocol.dataReceived(
             b"HTTP/1.1 200 OK\r\n"
             b"Server: Fake\r\n"
@@ -339,10 +339,8 @@ class FederationClientTests(HomeserverTestCase):
 
         # Send it the HTTP response
         client.dataReceived(
-            (
-                b"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n"
-                b"Server: Fake\r\n\r\n"
-            )
+            b"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n"
+            b"Server: Fake\r\n\r\n"
         )
 
         # Push by enough to time it out
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
index fefc8099c9..437113929a 100644
--- a/tests/http/test_proxyagent.py
+++ b/tests/http/test_proxyagent.py
@@ -205,6 +205,41 @@ class MatrixFederationAgentTests(TestCase):
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
     def test_http_request_via_proxy(self):
+        """
+        Tests that requests can be made through a proxy.
+        """
+        self._do_http_request_via_proxy(auth_credentials=None)
+
+    @patch.dict(
+        os.environ,
+        {"http_proxy": "bob:pinkponies@proxy.com:8888", "no_proxy": "unused.com"},
+    )
+    def test_http_request_via_proxy_with_auth(self):
+        """
+        Tests that authenticated requests can be made through a proxy.
+        """
+        self._do_http_request_via_proxy(auth_credentials="bob:pinkponies")
+
+    @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
+    def test_https_request_via_proxy(self):
+        """Tests that TLS-encrypted requests can be made through a proxy"""
+        self._do_https_request_via_proxy(auth_credentials=None)
+
+    @patch.dict(
+        os.environ,
+        {"https_proxy": "bob:pinkponies@proxy.com", "no_proxy": "unused.com"},
+    )
+    def test_https_request_via_proxy_with_auth(self):
+        """Tests that authenticated, TLS-encrypted requests can be made through a proxy"""
+        self._do_https_request_via_proxy(auth_credentials="bob:pinkponies")
+
+    def _do_http_request_via_proxy(
+        self,
+        auth_credentials: Optional[str] = None,
+    ):
+        """
+        Tests that requests can be made through a proxy.
+        """
         agent = ProxyAgent(self.reactor, use_proxy=True)
 
         self.reactor.lookups["proxy.com"] = "1.2.3.5"
@@ -229,6 +264,23 @@ class MatrixFederationAgentTests(TestCase):
         self.assertEqual(len(http_server.requests), 1)
 
         request = http_server.requests[0]
+
+        # Check whether auth credentials have been supplied to the proxy
+        proxy_auth_header_values = request.requestHeaders.getRawHeaders(
+            b"Proxy-Authorization"
+        )
+
+        if auth_credentials is not None:
+            # Compute the correct header value for Proxy-Authorization
+            encoded_credentials = base64.b64encode(b"bob:pinkponies")
+            expected_header_value = b"Basic " + encoded_credentials
+
+            # Validate the header's value
+            self.assertIn(expected_header_value, proxy_auth_header_values)
+        else:
+            # Check that the Proxy-Authorization header has not been supplied to the proxy
+            self.assertIsNone(proxy_auth_header_values)
+
         self.assertEqual(request.method, b"GET")
         self.assertEqual(request.path, b"http://test.com")
         self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
@@ -241,19 +293,6 @@ class MatrixFederationAgentTests(TestCase):
         body = self.successResultOf(treq.content(resp))
         self.assertEqual(body, b"result")
 
-    @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
-    def test_https_request_via_proxy(self):
-        """Tests that TLS-encrypted requests can be made through a proxy"""
-        self._do_https_request_via_proxy(auth_credentials=None)
-
-    @patch.dict(
-        os.environ,
-        {"https_proxy": "bob:pinkponies@proxy.com", "no_proxy": "unused.com"},
-    )
-    def test_https_request_via_proxy_with_auth(self):
-        """Tests that authenticated, TLS-encrypted requests can be made through a proxy"""
-        self._do_https_request_via_proxy(auth_credentials="bob:pinkponies")
-
     def _do_https_request_via_proxy(
         self,
         auth_credentials: Optional[str] = None,
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index 2c68b9a13c..81d9e2f484 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -100,9 +100,9 @@ class ModuleApiTestCase(HomeserverTestCase):
             "content": content,
             "sender": user_id,
         }
-        event = self.get_success(
+        event: EventBase = self.get_success(
             self.module_api.create_and_send_event_into_room(event_dict)
-        )  # type: EventBase
+        )
         self.assertEqual(event.sender, user_id)
         self.assertEqual(event.type, "m.room.message")
         self.assertEqual(event.room_id, room_id)
@@ -136,9 +136,9 @@ class ModuleApiTestCase(HomeserverTestCase):
             "sender": user_id,
             "state_key": "",
         }
-        event = self.get_success(
+        event: EventBase = self.get_success(
             self.module_api.create_and_send_event_into_room(event_dict)
-        )  # type: EventBase
+        )
         self.assertEqual(event.sender, user_id)
         self.assertEqual(event.type, "m.room.power_levels")
         self.assertEqual(event.room_id, room_id)
@@ -281,7 +281,7 @@ class ModuleApiTestCase(HomeserverTestCase):
         )
         for call in calls:
             call_args = call[0]
-            federation_transaction = call_args[0]  # type: Transaction
+            federation_transaction: Transaction = call_args[0]
 
             # Get the sent EDUs in this transaction
             edus = federation_transaction.get_dict()["edus"]
@@ -390,7 +390,7 @@ def _test_sending_local_online_presence_to_local_user(
     )
     test_case.assertEqual(len(presence_updates), 1)
 
-    presence_update = presence_updates[0]  # type: UserPresenceState
+    presence_update: UserPresenceState = presence_updates[0]
     test_case.assertEqual(presence_update.user_id, test_case.presence_sender_id)
     test_case.assertEqual(presence_update.state, "online")
 
@@ -443,7 +443,7 @@ def _test_sending_local_online_presence_to_local_user(
     )
     test_case.assertEqual(len(presence_updates), 1)
 
-    presence_update = presence_updates[0]  # type: UserPresenceState
+    presence_update: UserPresenceState = presence_updates[0]
     test_case.assertEqual(presence_update.user_id, test_case.presence_sender_id)
     test_case.assertEqual(presence_update.state, "online")
 
@@ -454,7 +454,7 @@ def _test_sending_local_online_presence_to_local_user(
     )
     test_case.assertEqual(len(presence_updates), 1)
 
-    presence_update = presence_updates[0]  # type: UserPresenceState
+    presence_update: UserPresenceState = presence_updates[0]
     test_case.assertEqual(presence_update.user_id, test_case.presence_sender_id)
     test_case.assertEqual(presence_update.state, "online")
 
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index 802c3ec724..2f3a870dc5 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -53,9 +53,9 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         # build a replication server
         server_factory = ReplicationStreamProtocolFactory(hs)
         self.streamer = hs.get_replication_streamer()
-        self.server = server_factory.buildProtocol(
+        self.server: ServerReplicationStreamProtocol = server_factory.buildProtocol(
             None
-        )  # type: ServerReplicationStreamProtocol
+        )
 
         # Make a new HomeServer object for the worker
         self.reactor.lookups["testserv"] = "1.2.3.4"
@@ -195,7 +195,7 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         fetching updates for given stream.
         """
 
-        path = request.path  # type: bytes  # type: ignore
+        path: bytes = request.path  # type: ignore
         self.assertRegex(
             path,
             br"^/_synapse/replication/get_repl_stream_updates/%s/[^/]+$"
@@ -212,7 +212,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
     unlike `BaseStreamTestCase`.
     """
 
-    servlets = []  # type: List[Callable[[HomeServer, JsonResource], None]]
+    servlets: List[Callable[[HomeServer, JsonResource], None]] = []
 
     def setUp(self):
         super().setUp()
@@ -448,7 +448,7 @@ class TestReplicationDataHandler(ReplicationDataHandler):
         super().__init__(hs)
 
         # list of received (stream_name, token, row) tuples
-        self.received_rdata_rows = []  # type: List[Tuple[str, int, Any]]
+        self.received_rdata_rows: List[Tuple[str, int, Any]] = []
 
     async def on_rdata(self, stream_name, instance_name, token, rows):
         await super().on_rdata(stream_name, instance_name, token, rows)
@@ -484,7 +484,7 @@ class FakeRedisPubSubServer:
 class FakeRedisPubSubProtocol(Protocol):
     """A connection from a client talking to the fake Redis server."""
 
-    transport = None  # type: Optional[FakeTransport]
+    transport: Optional[FakeTransport] = None
 
     def __init__(self, server: FakeRedisPubSubServer):
         self._server = server
@@ -554,12 +554,12 @@ class FakeRedisPubSubProtocol(Protocol):
         if obj is None:
             return "$-1\r\n"
         if isinstance(obj, str):
-            return "${len}\r\n{str}\r\n".format(len=len(obj), str=obj)
+            return f"${len(obj)}\r\n{obj}\r\n"
         if isinstance(obj, int):
-            return ":{val}\r\n".format(val=obj)
+            return f":{obj}\r\n"
         if isinstance(obj, (list, tuple)):
             items = "".join(self.encode(a) for a in obj)
-            return "*{len}\r\n{items}".format(len=len(obj), items=items)
+            return f"*{len(obj)}\r\n{items}"
 
         raise Exception("Unrecognized type for encoding redis: %r: %r", type(obj), obj)
 
diff --git a/tests/replication/tcp/streams/test_events.py b/tests/replication/tcp/streams/test_events.py
index f51fa0a79e..666008425a 100644
--- a/tests/replication/tcp/streams/test_events.py
+++ b/tests/replication/tcp/streams/test_events.py
@@ -135,9 +135,9 @@ class EventsStreamTestCase(BaseStreamTestCase):
         )
 
         # this is the point in the DAG where we make a fork
-        fork_point = self.get_success(
+        fork_point: List[str] = self.get_success(
             self.hs.get_datastore().get_latest_event_ids_in_room(self.room_id)
-        )  # type: List[str]
+        )
 
         events = [
             self._inject_state_event(sender=OTHER_USER)
@@ -238,7 +238,7 @@ class EventsStreamTestCase(BaseStreamTestCase):
         self.assertEqual(row.data.event_id, pl_event.event_id)
 
         # the state rows are unsorted
-        state_rows = []  # type: List[EventsStreamCurrentStateRow]
+        state_rows: List[EventsStreamCurrentStateRow] = []
         for stream_name, _, row in received_rows:
             self.assertEqual("events", stream_name)
             self.assertIsInstance(row, EventsStreamRow)
@@ -290,11 +290,11 @@ class EventsStreamTestCase(BaseStreamTestCase):
         )
 
         # this is the point in the DAG where we make a fork
-        fork_point = self.get_success(
+        fork_point: List[str] = self.get_success(
             self.hs.get_datastore().get_latest_event_ids_in_room(self.room_id)
-        )  # type: List[str]
+        )
 
-        events = []  # type: List[EventBase]
+        events: List[EventBase] = []
         for user in user_ids:
             events.extend(
                 self._inject_state_event(sender=user) for _ in range(STATES_PER_USER)
@@ -355,7 +355,7 @@ class EventsStreamTestCase(BaseStreamTestCase):
             self.assertEqual(row.data.event_id, pl_events[i].event_id)
 
             # the state rows are unsorted
-            state_rows = []  # type: List[EventsStreamCurrentStateRow]
+            state_rows: List[EventsStreamCurrentStateRow] = []
             for _ in range(STATES_PER_USER + 1):
                 stream_name, token, row = received_rows.pop(0)
                 self.assertEqual("events", stream_name)
diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py
index 7f5d932f0b..38e292c1ab 100644
--- a/tests/replication/tcp/streams/test_receipts.py
+++ b/tests/replication/tcp/streams/test_receipts.py
@@ -43,7 +43,7 @@ class ReceiptsStreamTestCase(BaseStreamTestCase):
         stream_name, _, token, rdata_rows = self.test_handler.on_rdata.call_args[0]
         self.assertEqual(stream_name, "receipts")
         self.assertEqual(1, len(rdata_rows))
-        row = rdata_rows[0]  # type: ReceiptsStream.ReceiptsStreamRow
+        row: ReceiptsStream.ReceiptsStreamRow = rdata_rows[0]
         self.assertEqual("!room:blue", row.room_id)
         self.assertEqual("m.read", row.receipt_type)
         self.assertEqual(USER_ID, row.user_id)
@@ -75,7 +75,7 @@ class ReceiptsStreamTestCase(BaseStreamTestCase):
         self.assertEqual(token, 3)
         self.assertEqual(1, len(rdata_rows))
 
-        row = rdata_rows[0]  # type: ReceiptsStream.ReceiptsStreamRow
+        row: ReceiptsStream.ReceiptsStreamRow = rdata_rows[0]
         self.assertEqual("!room2:blue", row.room_id)
         self.assertEqual("m.read", row.receipt_type)
         self.assertEqual(USER_ID, row.user_id)
diff --git a/tests/replication/tcp/streams/test_typing.py b/tests/replication/tcp/streams/test_typing.py
index ecd360c2d0..3ff5afc6e5 100644
--- a/tests/replication/tcp/streams/test_typing.py
+++ b/tests/replication/tcp/streams/test_typing.py
@@ -47,7 +47,7 @@ class TypingStreamTestCase(BaseStreamTestCase):
         stream_name, _, token, rdata_rows = self.test_handler.on_rdata.call_args[0]
         self.assertEqual(stream_name, "typing")
         self.assertEqual(1, len(rdata_rows))
-        row = rdata_rows[0]  # type: TypingStream.TypingStreamRow
+        row: TypingStream.TypingStreamRow = rdata_rows[0]
         self.assertEqual(ROOM_ID, row.room_id)
         self.assertEqual([USER_ID], row.user_ids)
 
@@ -102,7 +102,7 @@ class TypingStreamTestCase(BaseStreamTestCase):
         stream_name, _, token, rdata_rows = self.test_handler.on_rdata.call_args[0]
         self.assertEqual(stream_name, "typing")
         self.assertEqual(1, len(rdata_rows))
-        row = rdata_rows[0]  # type: TypingStream.TypingStreamRow
+        row: TypingStream.TypingStreamRow = rdata_rows[0]
         self.assertEqual(ROOM_ID, row.room_id)
         self.assertEqual([USER_ID], row.user_ids)
 
diff --git a/tests/replication/test_multi_media_repo.py b/tests/replication/test_multi_media_repo.py
index 76e6644353..ffa425328f 100644
--- a/tests/replication/test_multi_media_repo.py
+++ b/tests/replication/test_multi_media_repo.py
@@ -31,7 +31,7 @@ from tests.server import FakeChannel, FakeSite, FakeTransport, make_request
 
 logger = logging.getLogger(__name__)
 
-test_server_connection_factory = None  # type: Optional[TestServerTLSConnectionFactory]
+test_server_connection_factory: Optional[TestServerTLSConnectionFactory] = None
 
 
 class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase):
@@ -70,7 +70,7 @@ class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase):
             self.reactor,
             FakeSite(resource),
             "GET",
-            "/{}/{}".format(target, media_id),
+            f"/{target}/{media_id}",
             shorthand=False,
             access_token=self.access_token,
             await_result=False,
@@ -113,7 +113,7 @@ class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase):
         self.assertEqual(request.method, b"GET")
         self.assertEqual(
             request.path,
-            "/_matrix/media/r0/download/{}/{}".format(target, media_id).encode("utf-8"),
+            f"/_matrix/media/r0/download/{target}/{media_id}".encode("utf-8"),
         )
         self.assertEqual(
             request.requestHeaders.getRawHeaders(b"host"), [target.encode("utf-8")]
diff --git a/tests/replication/test_sharded_event_persister.py b/tests/replication/test_sharded_event_persister.py
index 5eca5c165d..f3615af97e 100644
--- a/tests/replication/test_sharded_event_persister.py
+++ b/tests/replication/test_sharded_event_persister.py
@@ -211,7 +211,7 @@ class EventPersisterShardTestCase(BaseMultiWorkerStreamTestCase):
             self.reactor,
             sync_hs_site,
             "GET",
-            "/sync?since={}".format(next_batch),
+            f"/sync?since={next_batch}",
             access_token=access_token,
         )
 
@@ -241,7 +241,7 @@ class EventPersisterShardTestCase(BaseMultiWorkerStreamTestCase):
             self.reactor,
             sync_hs_site,
             "GET",
-            "/sync?since={}".format(vector_clock_token),
+            f"/sync?since={vector_clock_token}",
             access_token=access_token,
         )
 
@@ -266,7 +266,7 @@ class EventPersisterShardTestCase(BaseMultiWorkerStreamTestCase):
             self.reactor,
             sync_hs_site,
             "GET",
-            "/sync?since={}".format(next_batch),
+            f"/sync?since={next_batch}",
             access_token=access_token,
         )
 
diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py
index 2f7090e554..a7c6e595b9 100644
--- a/tests/rest/admin/test_admin.py
+++ b/tests/rest/admin/test_admin.py
@@ -66,7 +66,7 @@ class DeleteGroupTestCase(unittest.HomeserverTestCase):
         # Create a new group
         channel = self.make_request(
             "POST",
-            "/create_group".encode("ascii"),
+            b"/create_group",
             access_token=self.admin_user_tok,
             content={"localpart": "test"},
         )
@@ -129,9 +129,7 @@ class DeleteGroupTestCase(unittest.HomeserverTestCase):
 
     def _get_groups_user_is_in(self, access_token):
         """Returns the list of groups the user is in (given their access token)"""
-        channel = self.make_request(
-            "GET", "/joined_groups".encode("ascii"), access_token=access_token
-        )
+        channel = self.make_request("GET", b"/joined_groups", access_token=access_token)
 
         self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
 
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index ee071c2477..17ec8bfd3b 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -535,7 +535,7 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase):
                 )
             )
 
-            self.assertEqual(count, 0, msg="Rows not purged in {}".format(table))
+            self.assertEqual(count, 0, msg=f"Rows not purged in {table}")
 
     def _assert_peek(self, room_id, expect_code):
         """Assert that the admin user can (or cannot) peek into the room."""
@@ -599,7 +599,7 @@ class PurgeRoomTestCase(unittest.HomeserverTestCase):
                 )
             )
 
-            self.assertEqual(count, 0, msg="Rows not purged in {}".format(table))
+            self.assertEqual(count, 0, msg=f"Rows not purged in {table}")
 
 
 class RoomTestCase(unittest.HomeserverTestCase):
@@ -1280,7 +1280,7 @@ class JoinAliasRoomTestCase(unittest.HomeserverTestCase):
         self.public_room_id = self.helper.create_room_as(
             self.creator, tok=self.creator_tok, is_public=True
         )
-        self.url = "/_synapse/admin/v1/join/{}".format(self.public_room_id)
+        self.url = f"/_synapse/admin/v1/join/{self.public_room_id}"
 
     def test_requester_is_no_admin(self):
         """
@@ -1420,7 +1420,7 @@ class JoinAliasRoomTestCase(unittest.HomeserverTestCase):
         private_room_id = self.helper.create_room_as(
             self.creator, tok=self.creator_tok, is_public=False
         )
-        url = "/_synapse/admin/v1/join/{}".format(private_room_id)
+        url = f"/_synapse/admin/v1/join/{private_room_id}"
         body = json.dumps({"user_id": self.second_user_id})
 
         channel = self.make_request(
@@ -1463,7 +1463,7 @@ class JoinAliasRoomTestCase(unittest.HomeserverTestCase):
 
         # Join user to room.
 
-        url = "/_synapse/admin/v1/join/{}".format(private_room_id)
+        url = f"/_synapse/admin/v1/join/{private_room_id}"
         body = json.dumps({"user_id": self.second_user_id})
 
         channel = self.make_request(
@@ -1493,7 +1493,7 @@ class JoinAliasRoomTestCase(unittest.HomeserverTestCase):
         private_room_id = self.helper.create_room_as(
             self.admin_user, tok=self.admin_user_tok, is_public=False
         )
-        url = "/_synapse/admin/v1/join/{}".format(private_room_id)
+        url = f"/_synapse/admin/v1/join/{private_room_id}"
         body = json.dumps({"user_id": self.second_user_id})
 
         channel = self.make_request(
@@ -1633,7 +1633,7 @@ class MakeRoomAdminTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request(
             "POST",
-            "/_synapse/admin/v1/rooms/{}/make_room_admin".format(room_id),
+            f"/_synapse/admin/v1/rooms/{room_id}/make_room_admin",
             content={},
             access_token=self.admin_user_tok,
         )
@@ -1660,7 +1660,7 @@ class MakeRoomAdminTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request(
             "POST",
-            "/_synapse/admin/v1/rooms/{}/make_room_admin".format(room_id),
+            f"/_synapse/admin/v1/rooms/{room_id}/make_room_admin",
             content={},
             access_token=self.admin_user_tok,
         )
@@ -1686,7 +1686,7 @@ class MakeRoomAdminTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request(
             "POST",
-            "/_synapse/admin/v1/rooms/{}/make_room_admin".format(room_id),
+            f"/_synapse/admin/v1/rooms/{room_id}/make_room_admin",
             content={"user_id": self.second_user_id},
             access_token=self.admin_user_tok,
         )
@@ -1720,7 +1720,7 @@ class MakeRoomAdminTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request(
             "POST",
-            "/_synapse/admin/v1/rooms/{}/make_room_admin".format(room_id),
+            f"/_synapse/admin/v1/rooms/{room_id}/make_room_admin",
             content={},
             access_token=self.admin_user_tok,
         )
@@ -1753,7 +1753,6 @@ PURGE_TABLES = [
     "room_memberships",
     "room_stats_state",
     "room_stats_current",
-    "room_stats_historical",
     "room_stats_earliest_token",
     "rooms",
     "stream_ordering_to_exterm",
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index d599a4c984..4fccce34fd 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -939,7 +939,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         """
         channel = self.make_request("POST", self.url, b"{}")
 
-        self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(401, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
 
     def test_requester_is_not_admin(self):
@@ -950,7 +950,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request("POST", url, access_token=self.other_user_token)
 
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual("You are not a server admin", channel.json_body["error"])
 
         channel = self.make_request(
@@ -960,7 +960,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             content=b"{}",
         )
 
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual("You are not a server admin", channel.json_body["error"])
 
     def test_user_does_not_exist(self):
@@ -990,7 +990,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(400, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.BAD_JSON, channel.json_body["errcode"])
 
     def test_user_is_not_local(self):
@@ -1006,7 +1006,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
     def test_deactivate_user_erase_true(self):
         """
-        Test deactivating an user and set `erase` to `true`
+        Test deactivating a user and set `erase` to `true`
         """
 
         # Get user
@@ -1016,24 +1016,22 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(False, channel.json_body["deactivated"])
         self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
         self.assertEqual("mxc://servername/mediaid", channel.json_body["avatar_url"])
         self.assertEqual("User1", channel.json_body["displayname"])
 
-        # Deactivate user
-        body = json.dumps({"erase": True})
-
+        # Deactivate and erase user
         channel = self.make_request(
             "POST",
             self.url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"erase": True},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
 
         # Get user
         channel = self.make_request(
@@ -1042,7 +1040,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(True, channel.json_body["deactivated"])
         self.assertEqual(0, len(channel.json_body["threepids"]))
@@ -1053,7 +1051,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
     def test_deactivate_user_erase_false(self):
         """
-        Test deactivating an user and set `erase` to `false`
+        Test deactivating a user and set `erase` to `false`
         """
 
         # Get user
@@ -1063,7 +1061,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(False, channel.json_body["deactivated"])
         self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
@@ -1071,13 +1069,11 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         self.assertEqual("User1", channel.json_body["displayname"])
 
         # Deactivate user
-        body = json.dumps({"erase": False})
-
         channel = self.make_request(
             "POST",
             self.url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"erase": False},
         )
 
         self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
@@ -1089,7 +1085,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual(True, channel.json_body["deactivated"])
         self.assertEqual(0, len(channel.json_body["threepids"]))
@@ -1098,6 +1094,60 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
         self._is_erased("@user:test", False)
 
+    def test_deactivate_user_erase_true_no_profile(self):
+        """
+        Test deactivating a user and set `erase` to `true`
+        if user has no profile information (stored in the database table `profiles`).
+        """
+
+        # Users normally have an entry in `profiles`, but occasionally they are created without one.
+        # To test deactivation for users without a profile, we delete the profile information for our user.
+        self.get_success(
+            self.store.db_pool.simple_delete_one(
+                table="profiles", keyvalues={"user_id": "user"}
+            )
+        )
+
+        # Get user
+        channel = self.make_request(
+            "GET",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@user:test", channel.json_body["name"])
+        self.assertEqual(False, channel.json_body["deactivated"])
+        self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
+        self.assertIsNone(channel.json_body["avatar_url"])
+        self.assertIsNone(channel.json_body["displayname"])
+
+        # Deactivate and erase user
+        channel = self.make_request(
+            "POST",
+            self.url,
+            access_token=self.admin_user_tok,
+            content={"erase": True},
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        # Get user
+        channel = self.make_request(
+            "GET",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@user:test", channel.json_body["name"])
+        self.assertEqual(True, channel.json_body["deactivated"])
+        self.assertEqual(0, len(channel.json_body["threepids"]))
+        self.assertIsNone(channel.json_body["avatar_url"])
+        self.assertIsNone(channel.json_body["displayname"])
+
+        self._is_erased("@user:test", True)
+
     def _is_erased(self, user_id: str, expect: bool) -> None:
         """Assert that the user is erased or not"""
         d = self.store.is_user_erased(user_id)
@@ -1150,7 +1200,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.other_user_token,
         )
 
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual("You are not a server admin", channel.json_body["error"])
 
         channel = self.make_request(
@@ -1160,7 +1210,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content=b"{}",
         )
 
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual("You are not a server admin", channel.json_body["error"])
 
     def test_user_does_not_exist(self):
@@ -1177,6 +1227,58 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertEqual(404, channel.code, msg=channel.json_body)
         self.assertEqual("M_NOT_FOUND", channel.json_body["errcode"])
 
+    def test_get_user(self):
+        """
+        Test a simple get of a user.
+        """
+        channel = self.make_request(
+            "GET",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@user:test", channel.json_body["name"])
+        self.assertEqual("User", channel.json_body["displayname"])
+        self._check_fields(channel.json_body)
+
+    def test_get_user_with_sso(self):
+        """
+        Test get a user with SSO details.
+        """
+        self.get_success(
+            self.store.record_user_external_id(
+                "auth_provider1", "external_id1", self.other_user
+            )
+        )
+        self.get_success(
+            self.store.record_user_external_id(
+                "auth_provider2", "external_id2", self.other_user
+            )
+        )
+
+        channel = self.make_request(
+            "GET",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@user:test", channel.json_body["name"])
+        self.assertEqual(
+            "external_id1", channel.json_body["external_ids"][0]["external_id"]
+        )
+        self.assertEqual(
+            "auth_provider1", channel.json_body["external_ids"][0]["auth_provider"]
+        )
+        self.assertEqual(
+            "external_id2", channel.json_body["external_ids"][1]["external_id"]
+        )
+        self.assertEqual(
+            "auth_provider2", channel.json_body["external_ids"][1]["auth_provider"]
+        )
+        self._check_fields(channel.json_body)
+
     def test_create_server_admin(self):
         """
         Check that a new admin user is created successfully.
@@ -1184,30 +1286,29 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         url = "/_synapse/admin/v2/users/@bob:test"
 
         # Create user (server admin)
-        body = json.dumps(
-            {
-                "password": "abc123",
-                "admin": True,
-                "displayname": "Bob's name",
-                "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
-                "avatar_url": "mxc://fibble/wibble",
-            }
-        )
+        body = {
+            "password": "abc123",
+            "admin": True,
+            "displayname": "Bob's name",
+            "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
+            "avatar_url": "mxc://fibble/wibble",
+        }
 
         channel = self.make_request(
             "PUT",
             url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content=body,
         )
 
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("Bob's name", channel.json_body["displayname"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"])
         self.assertTrue(channel.json_body["admin"])
         self.assertEqual("mxc://fibble/wibble", channel.json_body["avatar_url"])
+        self._check_fields(channel.json_body)
 
         # Get user
         channel = self.make_request(
@@ -1216,7 +1317,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("Bob's name", channel.json_body["displayname"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
@@ -1225,6 +1326,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertFalse(channel.json_body["is_guest"])
         self.assertFalse(channel.json_body["deactivated"])
         self.assertEqual("mxc://fibble/wibble", channel.json_body["avatar_url"])
+        self._check_fields(channel.json_body)
 
     def test_create_user(self):
         """
@@ -1233,30 +1335,29 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         url = "/_synapse/admin/v2/users/@bob:test"
 
         # Create user
-        body = json.dumps(
-            {
-                "password": "abc123",
-                "admin": False,
-                "displayname": "Bob's name",
-                "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
-                "avatar_url": "mxc://fibble/wibble",
-            }
-        )
+        body = {
+            "password": "abc123",
+            "admin": False,
+            "displayname": "Bob's name",
+            "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
+            "avatar_url": "mxc://fibble/wibble",
+        }
 
         channel = self.make_request(
             "PUT",
             url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content=body,
         )
 
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("Bob's name", channel.json_body["displayname"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"])
         self.assertFalse(channel.json_body["admin"])
         self.assertEqual("mxc://fibble/wibble", channel.json_body["avatar_url"])
+        self._check_fields(channel.json_body)
 
         # Get user
         channel = self.make_request(
@@ -1265,7 +1366,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("Bob's name", channel.json_body["displayname"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
@@ -1275,6 +1376,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertFalse(channel.json_body["deactivated"])
         self.assertFalse(channel.json_body["shadow_banned"])
         self.assertEqual("mxc://fibble/wibble", channel.json_body["avatar_url"])
+        self._check_fields(channel.json_body)
 
     @override_config(
         {"limit_usage_by_mau": True, "max_mau_value": 2, "mau_trial_days": 0}
@@ -1311,16 +1413,14 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         url = "/_synapse/admin/v2/users/@bob:test"
 
         # Create user
-        body = json.dumps({"password": "abc123", "admin": False})
-
         channel = self.make_request(
             "PUT",
             url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"password": "abc123", "admin": False},
         )
 
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["admin"])
 
@@ -1350,17 +1450,15 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         url = "/_synapse/admin/v2/users/@bob:test"
 
         # Create user
-        body = json.dumps({"password": "abc123", "admin": False})
-
         channel = self.make_request(
             "PUT",
             url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"password": "abc123", "admin": False},
         )
 
         # Admin user is not blocked by mau anymore
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["admin"])
 
@@ -1382,21 +1480,19 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         url = "/_synapse/admin/v2/users/@bob:test"
 
         # Create user
-        body = json.dumps(
-            {
-                "password": "abc123",
-                "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
-            }
-        )
+        body = {
+            "password": "abc123",
+            "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
+        }
 
         channel = self.make_request(
             "PUT",
             url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content=body,
         )
 
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"])
@@ -1426,21 +1522,19 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         url = "/_synapse/admin/v2/users/@bob:test"
 
         # Create user
-        body = json.dumps(
-            {
-                "password": "abc123",
-                "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
-            }
-        )
+        body = {
+            "password": "abc123",
+            "threepids": [{"medium": "email", "address": "bob@bob.bob"}],
+        }
 
         channel = self.make_request(
             "PUT",
             url,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content=body,
         )
 
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob@bob.bob", channel.json_body["threepids"][0]["address"])
@@ -1457,16 +1551,15 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         """
 
         # Change password
-        body = json.dumps({"password": "hahaha"})
-
         channel = self.make_request(
             "PUT",
             self.url_other_user,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"password": "hahaha"},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self._check_fields(channel.json_body)
 
     def test_set_displayname(self):
         """
@@ -1474,16 +1567,14 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         """
 
         # Modify user
-        body = json.dumps({"displayname": "foobar"})
-
         channel = self.make_request(
             "PUT",
             self.url_other_user,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"displayname": "foobar"},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual("foobar", channel.json_body["displayname"])
 
@@ -1494,7 +1585,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual("foobar", channel.json_body["displayname"])
 
@@ -1504,18 +1595,14 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         """
 
         # Delete old and add new threepid to user
-        body = json.dumps(
-            {"threepids": [{"medium": "email", "address": "bob3@bob.bob"}]}
-        )
-
         channel = self.make_request(
             "PUT",
             self.url_other_user,
             access_token=self.admin_user_tok,
-            content=body.encode(encoding="utf_8"),
+            content={"threepids": [{"medium": "email", "address": "bob3@bob.bob"}]},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob3@bob.bob", channel.json_body["threepids"][0]["address"])
@@ -1527,7 +1614,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
         self.assertEqual("bob3@bob.bob", channel.json_body["threepids"][0]["address"])
@@ -1552,7 +1639,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["deactivated"])
         self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
@@ -1567,7 +1654,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"deactivated": True},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertTrue(channel.json_body["deactivated"])
         self.assertIsNone(channel.json_body["password_hash"])
@@ -1583,7 +1670,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertTrue(channel.json_body["deactivated"])
         self.assertIsNone(channel.json_body["password_hash"])
@@ -1610,7 +1697,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"deactivated": True},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertTrue(channel.json_body["deactivated"])
 
@@ -1626,7 +1713,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"displayname": "Foobar"},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertTrue(channel.json_body["deactivated"])
         self.assertEqual("Foobar", channel.json_body["displayname"])
@@ -1650,7 +1737,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": False},
         )
-        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(400, channel.code, msg=channel.json_body)
 
         # Reactivate the user.
         channel = self.make_request(
@@ -1659,7 +1746,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": False, "password": "foo"},
         )
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["deactivated"])
         self.assertIsNotNone(channel.json_body["password_hash"])
@@ -1681,7 +1768,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": False, "password": "foo"},
         )
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
         # Reactivate the user without a password.
@@ -1691,7 +1778,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": False},
         )
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["deactivated"])
         self.assertIsNone(channel.json_body["password_hash"])
@@ -1713,7 +1800,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": False, "password": "foo"},
         )
-        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(403, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
         # Reactivate the user without a password.
@@ -1723,7 +1810,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": False},
         )
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertFalse(channel.json_body["deactivated"])
         self.assertIsNone(channel.json_body["password_hash"])
@@ -1742,7 +1829,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"admin": True},
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertTrue(channel.json_body["admin"])
 
@@ -1753,7 +1840,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@user:test", channel.json_body["name"])
         self.assertTrue(channel.json_body["admin"])
 
@@ -1772,7 +1859,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"password": "abc123"},
         )
 
-        self.assertEqual(201, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(201, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("bob", channel.json_body["displayname"])
 
@@ -1783,7 +1870,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("bob", channel.json_body["displayname"])
         self.assertEqual(0, channel.json_body["deactivated"])
@@ -1796,7 +1883,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             content={"password": "abc123", "deactivated": "false"},
         )
 
-        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(400, channel.code, msg=channel.json_body)
 
         # Check user is not deactivated
         channel = self.make_request(
@@ -1805,7 +1892,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
         )
 
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("@bob:test", channel.json_body["name"])
         self.assertEqual("bob", channel.json_body["displayname"])
 
@@ -1830,7 +1917,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
             access_token=self.admin_user_tok,
             content={"deactivated": True},
         )
-        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertTrue(channel.json_body["deactivated"])
         self.assertIsNone(channel.json_body["password_hash"])
         self._is_erased(user_id, False)
@@ -1838,6 +1925,25 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertIsNone(self.get_success(d))
         self._is_erased(user_id, True)
 
+    def _check_fields(self, content: JsonDict):
+        """Checks that the expected user attributes are present in content
+
+        Args:
+            content: Content dictionary to check
+        """
+        self.assertIn("displayname", content)
+        self.assertIn("threepids", content)
+        self.assertIn("avatar_url", content)
+        self.assertIn("admin", content)
+        self.assertIn("deactivated", content)
+        self.assertIn("shadow_banned", content)
+        self.assertIn("password_hash", content)
+        self.assertIn("creation_ts", content)
+        self.assertIn("appservice_id", content)
+        self.assertIn("consent_server_notice_sent", content)
+        self.assertIn("consent_version", content)
+        self.assertIn("external_ids", content)
+
 
 class UserMembershipRestTestCase(unittest.HomeserverTestCase):
 
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index e1fe72fc5d..c5e1c5458b 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -233,11 +233,11 @@ class ThirdPartyRulesTestCase(unittest.HomeserverTestCase):
             "content": content,
             "sender": self.user_id,
         }
-        event = self.get_success(
+        event: EventBase = self.get_success(
             current_rules_module().module_api.create_and_send_event_into_room(
                 event_dict
             )
-        )  # type: EventBase
+        )
 
         self.assertEquals(event.sender, self.user_id)
         self.assertEquals(event.room_id, self.room_id)
diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py
index 605b952316..7eba69642a 100644
--- a/tests/rest/client/v1/test_login.py
+++ b/tests/rest/client/v1/test_login.py
@@ -453,7 +453,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 200, channel.result)
 
         # stick the flows results in a dict by type
-        flow_results = {}  # type: Dict[str, Any]
+        flow_results: Dict[str, Any] = {}
         for f in channel.json_body["flows"]:
             flow_type = f["type"]
             self.assertNotIn(
@@ -501,7 +501,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         p.close()
 
         # there should be a link for each href
-        returned_idps = []  # type: List[str]
+        returned_idps: List[str] = []
         for link in p.links:
             path, query = link.split("?", 1)
             self.assertEqual(path, "pick_idp")
@@ -582,7 +582,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         # ... and should have set a cookie including the redirect url
         cookie_headers = channel.headers.getRawHeaders("Set-Cookie")
         assert cookie_headers
-        cookies = {}  # type: Dict[str, str]
+        cookies: Dict[str, str] = {}
         for h in cookie_headers:
             key, value = h.split(";")[0].split("=", maxsplit=1)
             cookies[key] = value
@@ -874,9 +874,7 @@ class JWTTestCase(unittest.HomeserverTestCase):
 
     def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_secret) -> str:
         # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str.
-        result = jwt.encode(
-            payload, secret, self.jwt_algorithm
-        )  # type: Union[str, bytes]
+        result: Union[str, bytes] = jwt.encode(payload, secret, self.jwt_algorithm)
         if isinstance(result, bytes):
             return result.decode("ascii")
         return result
@@ -1084,7 +1082,7 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase):
 
     def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str:
         # PyJWT 2.0.0 changed the return type of jwt.encode from bytes to str.
-        result = jwt.encode(payload, secret, "RS256")  # type: Union[bytes,str]
+        result: Union[bytes, str] = jwt.encode(payload, secret, "RS256")
         if isinstance(result, bytes):
             return result.decode("ascii")
         return result
@@ -1272,7 +1270,7 @@ class UsernamePickerTestCase(HomeserverTestCase):
         self.assertEqual(picker_url, "/_synapse/client/pick_username/account_details")
 
         # ... with a username_mapping_session cookie
-        cookies = {}  # type: Dict[str,str]
+        cookies: Dict[str, str] = {}
         channel.extract_cookies(cookies)
         self.assertIn("username_mapping_session", cookies)
         session_id = cookies["username_mapping_session"]
diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py
index e94566ffd7..3df070c936 100644
--- a/tests/rest/client/v1/test_rooms.py
+++ b/tests/rest/client/v1/test_rooms.py
@@ -1206,7 +1206,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/join".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/join",
             content={"reason": reason},
             access_token=self.second_tok,
         )
@@ -1220,7 +1220,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/leave".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/leave",
             content={"reason": reason},
             access_token=self.second_tok,
         )
@@ -1234,7 +1234,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/kick".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/kick",
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.second_tok,
         )
@@ -1248,7 +1248,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/ban".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/ban",
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.creator_tok,
         )
@@ -1260,7 +1260,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/unban".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/unban",
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.creator_tok,
         )
@@ -1272,7 +1272,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/invite".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/invite",
             content={"reason": reason, "user_id": self.second_user_id},
             access_token=self.creator_tok,
         )
@@ -1291,7 +1291,7 @@ class RoomMembershipReasonTestCase(unittest.HomeserverTestCase):
         reason = "hello"
         channel = self.make_request(
             "POST",
-            "/_matrix/client/r0/rooms/{}/leave".format(self.room_id),
+            f"/_matrix/client/r0/rooms/{self.room_id}/leave",
             content={"reason": reason},
             access_token=self.second_tok,
         )
diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py
index 69798e95c3..fc2d35596e 100644
--- a/tests/rest/client/v1/utils.py
+++ b/tests/rest/client/v1/utils.py
@@ -19,7 +19,7 @@ import json
 import re
 import time
 import urllib.parse
-from typing import Any, Dict, Mapping, MutableMapping, Optional
+from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional, Tuple, Union
 from unittest.mock import patch
 
 import attr
@@ -53,6 +53,9 @@ class RestHelper:
         tok: str = None,
         expect_code: int = 200,
         extra_content: Optional[Dict] = None,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
     ) -> str:
         """
         Create a room.
@@ -87,6 +90,7 @@ class RestHelper:
             "POST",
             path,
             json.dumps(content).encode("utf8"),
+            custom_headers=custom_headers,
         )
 
         assert channel.result["code"] == b"%d" % expect_code, channel.result
@@ -175,14 +179,30 @@ class RestHelper:
 
         self.auth_user_id = temp_id
 
-    def send(self, room_id, body=None, txn_id=None, tok=None, expect_code=200):
+    def send(
+        self,
+        room_id,
+        body=None,
+        txn_id=None,
+        tok=None,
+        expect_code=200,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
+    ):
         if body is None:
             body = "body_text_here"
 
         content = {"msgtype": "m.text", "body": body}
 
         return self.send_event(
-            room_id, "m.room.message", content, txn_id, tok, expect_code
+            room_id,
+            "m.room.message",
+            content,
+            txn_id,
+            tok,
+            expect_code,
+            custom_headers=custom_headers,
         )
 
     def send_event(
@@ -193,6 +213,9 @@ class RestHelper:
         txn_id=None,
         tok=None,
         expect_code=200,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
     ):
         if txn_id is None:
             txn_id = "m%s" % (str(time.time()))
@@ -207,6 +230,7 @@ class RestHelper:
             "PUT",
             path,
             json.dumps(content or {}).encode("utf8"),
+            custom_headers=custom_headers,
         )
 
         assert (
diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py
index 856aa8682f..2e2f94742e 100644
--- a/tests/rest/client/v2_alpha/test_relations.py
+++ b/tests/rest/client/v2_alpha/test_relations.py
@@ -273,7 +273,7 @@ class RelationsTestCase(unittest.HomeserverTestCase):
 
         prev_token = None
         found_event_ids = []
-        encoded_key = urllib.parse.quote_plus("👍".encode("utf-8"))
+        encoded_key = urllib.parse.quote_plus("👍".encode())
         for _ in range(20):
             from_token = ""
             if prev_token:
diff --git a/tests/rest/client/v2_alpha/test_report_event.py b/tests/rest/client/v2_alpha/test_report_event.py
index 1ec6b05e5b..a76a6fef1e 100644
--- a/tests/rest/client/v2_alpha/test_report_event.py
+++ b/tests/rest/client/v2_alpha/test_report_event.py
@@ -41,7 +41,7 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
         self.helper.join(self.room_id, user=self.admin_user, tok=self.admin_user_tok)
         resp = self.helper.send(self.room_id, tok=self.admin_user_tok)
         self.event_id = resp["event_id"]
-        self.report_path = "rooms/{}/report/{}".format(self.room_id, self.event_id)
+        self.report_path = f"rooms/{self.room_id}/report/{self.event_id}"
 
     def test_reason_str_and_score_int(self):
         data = {"reason": "this makes me sad", "score": -100}
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 95e7075841..2d6b49692e 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -310,7 +310,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         correctly decode it as the UTF-8 string, and use filename* in the
         response.
         """
-        filename = parse.quote("\u2603".encode("utf8")).encode("ascii")
+        filename = parse.quote("\u2603".encode()).encode("ascii")
         channel = self._req(
             b"inline; filename*=utf-8''" + filename + self.test_image.extension
         )
diff --git a/tests/server.py b/tests/server.py
index f32d8dc375..6fddd3b305 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -52,7 +52,7 @@ class FakeChannel:
     _reactor = attr.ib()
     result = attr.ib(type=dict, default=attr.Factory(dict))
     _ip = attr.ib(type=str, default="127.0.0.1")
-    _producer = None  # type: Optional[Union[IPullProducer, IPushProducer]]
+    _producer: Optional[Union[IPullProducer, IPushProducer]] = None
 
     @property
     def json_body(self):
@@ -316,8 +316,10 @@ class ThreadedMemoryReactorClock(MemoryReactorClock):
 
         self._tcp_callbacks = {}
         self._udp = []
-        lookups = self.lookups = {}  # type: Dict[str, str]
-        self._thread_callbacks = deque()  # type: Deque[Callable[[], None]]
+        self.lookups: Dict[str, str] = {}
+        self._thread_callbacks: Deque[Callable[[], None]] = deque()
+
+        lookups = self.lookups
 
         @implementer(IResolverSimple)
         class FakeResolver:
diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py
index 069db0edc4..0da42b5ac5 100644
--- a/tests/storage/test_background_update.py
+++ b/tests/storage/test_background_update.py
@@ -7,9 +7,7 @@ from tests import unittest
 
 class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
     def prepare(self, reactor, clock, homeserver):
-        self.updates = (
-            self.hs.get_datastore().db_pool.updates
-        )  # type: BackgroundUpdater
+        self.updates: BackgroundUpdater = self.hs.get_datastore().db_pool.updates
         # the base test class should have run the real bg updates for us
         self.assertTrue(
             self.get_success(self.updates.has_completed_background_updates())
diff --git a/tests/storage/test_directory.py b/tests/storage/test_directory.py
index 41bef62ca8..43628ce44f 100644
--- a/tests/storage/test_directory.py
+++ b/tests/storage/test_directory.py
@@ -59,5 +59,5 @@ class DirectoryStoreTestCase(HomeserverTestCase):
         self.assertEqual(self.room.to_string(), room_id)
 
         self.assertIsNone(
-            (self.get_success(self.store.get_association_from_room_alias(self.alias)))
+            self.get_success(self.store.get_association_from_room_alias(self.alias))
         )
diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py
index 792b1c44c1..7486078284 100644
--- a/tests/storage/test_id_generators.py
+++ b/tests/storage/test_id_generators.py
@@ -27,7 +27,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
 
     def prepare(self, reactor, clock, hs):
         self.store = hs.get_datastore()
-        self.db_pool = self.store.db_pool  # type: DatabasePool
+        self.db_pool: DatabasePool = self.store.db_pool
 
         self.get_success(self.db_pool.runInteraction("_setup_db", self._setup_db))
 
@@ -460,7 +460,7 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
 
     def prepare(self, reactor, clock, hs):
         self.store = hs.get_datastore()
-        self.db_pool = self.store.db_pool  # type: DatabasePool
+        self.db_pool: DatabasePool = self.store.db_pool
 
         self.get_success(self.db_pool.runInteraction("_setup_db", self._setup_db))
 
@@ -586,7 +586,7 @@ class MultiTableMultiWriterIdGeneratorTestCase(HomeserverTestCase):
 
     def prepare(self, reactor, clock, hs):
         self.store = hs.get_datastore()
-        self.db_pool = self.store.db_pool  # type: DatabasePool
+        self.db_pool: DatabasePool = self.store.db_pool
 
         self.get_success(self.db_pool.runInteraction("_setup_db", self._setup_db))
 
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index 8a446da848..a1ba99ff14 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -45,11 +45,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertIsNone(
-            (
-                self.get_success(
-                    self.store.get_profile_displayname(self.u_frank.localpart)
-                )
-            )
+            self.get_success(self.store.get_profile_displayname(self.u_frank.localpart))
         )
 
     def test_avatar_url(self):
@@ -76,9 +72,5 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertIsNone(
-            (
-                self.get_success(
-                    self.store.get_profile_avatar_url(self.u_frank.localpart)
-                )
-            )
+            self.get_success(self.store.get_profile_avatar_url(self.u_frank.localpart))
         )
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index 54c5b470c7..e5574063f1 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -75,7 +75,7 @@ class PurgeTests(HomeserverTestCase):
         token = self.get_success(
             self.store.get_topological_token_for_event(last["event_id"])
         )
-        event = "t{}-{}".format(token.topological + 1, token.stream + 1)
+        event = f"t{token.topological + 1}-{token.stream + 1}"
 
         # Purge everything before this topological token
         f = self.get_failure(
diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py
index 70257bf210..31ce7f6252 100644
--- a/tests/storage/test_room.py
+++ b/tests/storage/test_room.py
@@ -49,7 +49,7 @@ class RoomStoreTestCase(HomeserverTestCase):
         )
 
     def test_get_room_unknown_room(self):
-        self.assertIsNone((self.get_success(self.store.get_room("!uknown:test"))))
+        self.assertIsNone(self.get_success(self.store.get_room("!uknown:test")))
 
     def test_get_room_with_stats(self):
         self.assertDictContainsSubset(
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index 88888319cc..f73306ecc4 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+from typing import Optional
 
 from synapse import event_auth
 from synapse.api.errors import AuthError
 from synapse.api.room_versions import RoomVersions
-from synapse.events import make_event_from_dict
-from synapse.types import get_domain_from_id
+from synapse.events import EventBase, make_event_from_dict
+from synapse.types import JsonDict, get_domain_from_id
 
 
 class EventAuthTestCase(unittest.TestCase):
@@ -432,7 +433,7 @@ class EventAuthTestCase(unittest.TestCase):
 TEST_ROOM_ID = "!test:room"
 
 
-def _create_event(user_id):
+def _create_event(user_id: str) -> EventBase:
     return make_event_from_dict(
         {
             "room_id": TEST_ROOM_ID,
@@ -444,7 +445,9 @@ def _create_event(user_id):
     )
 
 
-def _member_event(user_id, membership, sender=None):
+def _member_event(
+    user_id: str, membership: str, sender: Optional[str] = None
+) -> EventBase:
     return make_event_from_dict(
         {
             "room_id": TEST_ROOM_ID,
@@ -458,11 +461,11 @@ def _member_event(user_id, membership, sender=None):
     )
 
 
-def _join_event(user_id):
+def _join_event(user_id: str) -> EventBase:
     return _member_event(user_id, "join")
 
 
-def _power_levels_event(sender, content):
+def _power_levels_event(sender: str, content: JsonDict) -> EventBase:
     return make_event_from_dict(
         {
             "room_id": TEST_ROOM_ID,
@@ -475,7 +478,7 @@ def _power_levels_event(sender, content):
     )
 
 
-def _alias_event(sender, **kwargs):
+def _alias_event(sender: str, **kwargs) -> EventBase:
     data = {
         "room_id": TEST_ROOM_ID,
         "event_id": _get_event_id(),
@@ -488,7 +491,7 @@ def _alias_event(sender, **kwargs):
     return make_event_from_dict(data)
 
 
-def _random_state_event(sender):
+def _random_state_event(sender: str) -> EventBase:
     return make_event_from_dict(
         {
             "room_id": TEST_ROOM_ID,
@@ -501,7 +504,7 @@ def _random_state_event(sender):
     )
 
 
-def _join_rules_event(sender, join_rule):
+def _join_rules_event(sender: str, join_rule: str) -> EventBase:
     return make_event_from_dict(
         {
             "room_id": TEST_ROOM_ID,
@@ -519,7 +522,7 @@ def _join_rules_event(sender, join_rule):
 event_count = 0
 
 
-def _get_event_id():
+def _get_event_id() -> str:
     global event_count
     c = event_count
     event_count += 1
diff --git a/tests/test_state.py b/tests/test_state.py
index 62f7095873..e5488df1ac 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -168,6 +168,7 @@ class StateTestCase(unittest.TestCase):
                 "get_state_handler",
                 "get_clock",
                 "get_state_resolution_handler",
+                "get_account_validity_handler",
                 "hostname",
             ]
         )
@@ -199,7 +200,7 @@ class StateTestCase(unittest.TestCase):
 
         self.store.register_events(graph.walk())
 
-        context_store = {}  # type: dict[str, EventContext]
+        context_store: dict[str, EventContext] = {}
 
         for event in graph.walk():
             context = yield defer.ensureDeferred(
diff --git a/tests/test_types.py b/tests/test_types.py
index d7881021d3..0d0c00d97a 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -103,6 +103,4 @@ class MapUsernameTestCase(unittest.TestCase):
     def testNonAscii(self):
         # this should work with either a unicode or a bytes
         self.assertEqual(map_username_to_mxid_localpart("têst"), "t=c3=aast")
-        self.assertEqual(
-            map_username_to_mxid_localpart("têst".encode("utf-8")), "t=c3=aast"
-        )
+        self.assertEqual(map_username_to_mxid_localpart("têst".encode()), "t=c3=aast")
diff --git a/tests/test_utils/html_parsers.py b/tests/test_utils/html_parsers.py
index 1fbb38f4be..e878af5f12 100644
--- a/tests/test_utils/html_parsers.py
+++ b/tests/test_utils/html_parsers.py
@@ -23,13 +23,13 @@ class TestHtmlParser(HTMLParser):
         super().__init__()
 
         # a list of links found in the doc
-        self.links = []  # type: List[str]
+        self.links: List[str] = []
 
         # the values of any hidden <input>s: map from name to value
-        self.hiddens = {}  # type: Dict[str, Optional[str]]
+        self.hiddens: Dict[str, Optional[str]] = {}
 
         # the values of any radio buttons: map from name to list of values
-        self.radios = {}  # type: Dict[str, List[Optional[str]]]
+        self.radios: Dict[str, List[Optional[str]]] = {}
 
     def handle_starttag(
         self, tag: str, attrs: Iterable[Tuple[str, Optional[str]]]
diff --git a/tests/unittest.py b/tests/unittest.py
index 74db7c08f1..3eec9c4d5b 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -140,7 +140,7 @@ class TestCase(unittest.TestCase):
             try:
                 self.assertEquals(attrs[key], getattr(obj, key))
             except AssertionError as e:
-                raise (type(e))("Assert error for '.{}':".format(key)) from e
+                raise (type(e))(f"Assert error for '.{key}':") from e
 
     def assert_dict(self, required, actual):
         """Does a partial assert of a dict.
@@ -520,7 +520,7 @@ class HomeserverTestCase(TestCase):
         if not isinstance(deferred, Deferred):
             return d
 
-        results = []  # type: list
+        results: list = []
         deferred.addBoth(results.append)
 
         self.pump(by=by)
@@ -594,7 +594,15 @@ class HomeserverTestCase(TestCase):
         user_id = channel.json_body["user_id"]
         return user_id
 
-    def login(self, username, password, device_id=None):
+    def login(
+        self,
+        username,
+        password,
+        device_id=None,
+        custom_headers: Optional[
+            Iterable[Tuple[Union[bytes, str], Union[bytes, str]]]
+        ] = None,
+    ):
         """
         Log in a user, and get an access token. Requires the Login API be
         registered.
@@ -605,7 +613,10 @@ class HomeserverTestCase(TestCase):
             body["device_id"] = device_id
 
         channel = self.make_request(
-            "POST", "/_matrix/client/r0/login", json.dumps(body).encode("utf8")
+            "POST",
+            "/_matrix/client/r0/login",
+            json.dumps(body).encode("utf8"),
+            custom_headers=custom_headers,
         )
         self.assertEqual(channel.code, 200, channel.result)
 
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 0277998cbe..39947a166b 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -174,7 +174,7 @@ class DescriptorTestCase(unittest.TestCase):
                 return self.result
 
         obj = Cls()
-        callbacks = set()  # type: Set[str]
+        callbacks: Set[str] = set()
 
         # set off an asynchronous request
         obj.result = origin_d = defer.Deferred()
diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py
index e712eb42ea..3c0ddd4f18 100644
--- a/tests/util/test_itertools.py
+++ b/tests/util/test_itertools.py
@@ -44,7 +44,7 @@ class ChunkSeqTests(TestCase):
         )
 
     def test_empty_input(self):
-        parts = chunk_seq([], 5)  # type: Iterable[Sequence]
+        parts: Iterable[Sequence] = chunk_seq([], 5)
 
         self.assertEqual(
             list(parts),
@@ -56,13 +56,13 @@ class SortTopologically(TestCase):
     def test_empty(self):
         "Test that an empty graph works correctly"
 
-        graph = {}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {}
         self.assertEqual(list(sorted_topologically([], graph)), [])
 
     def test_handle_empty_graph(self):
         "Test that a graph where a node doesn't have an entry is treated as empty"
 
-        graph = {}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {}
 
         # For disconnected nodes the output is simply sorted.
         self.assertEqual(list(sorted_topologically([1, 2], graph)), [1, 2])
@@ -70,7 +70,7 @@ class SortTopologically(TestCase):
     def test_disconnected(self):
         "Test that a graph with no edges work"
 
-        graph = {1: [], 2: []}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {1: [], 2: []}
 
         # For disconnected nodes the output is simply sorted.
         self.assertEqual(list(sorted_topologically([1, 2], graph)), [1, 2])
@@ -78,19 +78,19 @@ class SortTopologically(TestCase):
     def test_linear(self):
         "Test that a simple `4 -> 3 -> 2 -> 1` graph works"
 
-        graph = {1: [], 2: [1], 3: [2], 4: [3]}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3]}
 
         self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4])
 
     def test_subset(self):
         "Test that only sorting a subset of the graph works"
-        graph = {1: [], 2: [1], 3: [2], 4: [3]}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3]}
 
         self.assertEqual(list(sorted_topologically([4, 3], graph)), [3, 4])
 
     def test_fork(self):
         "Test that a forked graph works"
-        graph = {1: [], 2: [1], 3: [1], 4: [2, 3]}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [1], 4: [2, 3]}
 
         # Valid orderings are `[1, 3, 2, 4]` or `[1, 2, 3, 4]`, but we should
         # always get the same one.
@@ -98,12 +98,12 @@ class SortTopologically(TestCase):
 
     def test_duplicates(self):
         "Test that a graph with duplicate edges work"
-        graph = {1: [], 2: [1, 1], 3: [2, 2], 4: [3]}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {1: [], 2: [1, 1], 3: [2, 2], 4: [3]}
 
         self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4])
 
     def test_multiple_paths(self):
         "Test that a graph with multiple paths between two nodes work"
-        graph = {1: [], 2: [1], 3: [2], 4: [3, 2, 1]}  # type: Dict[int, List[int]]
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3, 2, 1]}
 
         self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4])
diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py
index 377904e72e..6578f3411e 100644
--- a/tests/util/test_lrucache.py
+++ b/tests/util/test_lrucache.py
@@ -15,7 +15,7 @@
 
 from unittest.mock import Mock
 
-from synapse.util.caches.lrucache import LruCache
+from synapse.util.caches.lrucache import LruCache, setup_expire_lru_cache_entries
 from synapse.util.caches.treecache import TreeCache
 
 from tests import unittest
@@ -260,3 +260,47 @@ class LruCacheSizedTestCase(unittest.HomeserverTestCase):
         self.assertEquals(cache["key3"], [3])
         self.assertEquals(cache["key4"], [4])
         self.assertEquals(cache["key5"], [5, 6])
+
+
+class TimeEvictionTestCase(unittest.HomeserverTestCase):
+    """Test that time based eviction works correctly."""
+
+    def default_config(self):
+        config = super().default_config()
+
+        config.setdefault("caches", {})["expiry_time"] = "30m"
+
+        return config
+
+    def test_evict(self):
+        setup_expire_lru_cache_entries(self.hs)
+
+        cache = LruCache(5, clock=self.hs.get_clock())
+
+        # Check that we evict entries we haven't accessed for 30 minutes.
+        cache["key1"] = 1
+        cache["key2"] = 2
+
+        self.reactor.advance(20 * 60)
+
+        self.assertEqual(cache.get("key1"), 1)
+
+        self.reactor.advance(20 * 60)
+
+        # We have only touched `key1` in the last 30m, so we expect that to
+        # still be in the cache while `key2` should have been evicted.
+        self.assertEqual(cache.get("key1"), 1)
+        self.assertEqual(cache.get("key2"), None)
+
+        # Check that re-adding an expired key works correctly.
+        cache["key2"] = 3
+        self.assertEqual(cache.get("key2"), 3)
+
+        self.reactor.advance(20 * 60)
+
+        self.assertEqual(cache.get("key2"), 3)
+
+        self.reactor.advance(20 * 60)
+
+        self.assertEqual(cache.get("key1"), None)
+        self.assertEqual(cache.get("key2"), 3)
author	Will Hunt <willh@matrix.org>	2021-07-20 09:35:03 +0100
committer	Will Hunt <willh@matrix.org>	2021-07-20 09:35:03 +0100
commit	f19795355bf31af3842e607b3f1cc34e5d7727dc (patch)
tree	85834d47d9f4610fcf70f176114aa17a7b61b8e4
parent	Merge remote-tracking branch 'origin/develop' into hs/hacked-together-event-c... (diff)
parent	Factorise `get_datastore` calls in phone_stats_home. (#10427) (diff)
download	synapse-f19795355bf31af3842e607b3f1cc34e5d7727dc.tar.xz