summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.md59
-rw-r--r--INSTALL.md4
-rw-r--r--changelog.d/3484.misc1
-rw-r--r--changelog.d/4338.feature1
-rw-r--r--changelog.d/5043.feature1
-rw-r--r--changelog.d/5171.misc1
-rw-r--r--changelog.d/5181.feature1
-rw-r--r--changelog.d/5183.misc1
-rw-r--r--changelog.d/5184.misc1
-rw-r--r--changelog.d/5185.misc1
-rw-r--r--changelog.d/5187.bugfix1
-rw-r--r--changelog.d/5190.feature1
-rw-r--r--changelog.d/5196.feature1
-rw-r--r--changelog.d/5200.bugfix1
-rw-r--r--changelog.d/5204.feature1
-rw-r--r--changelog.d/5216.misc1
-rw-r--r--changelog.d/5220.feature1
-rw-r--r--changelog.d/5223.feature1
-rw-r--r--changelog.d/5227.misc1
-rw-r--r--changelog.d/5230.misc1
-rw-r--r--changelog.d/5232.misc1
-rw-r--r--changelog.d/5233.bugfix1
-rw-r--r--changelog.d/5234.misc1
-rw-r--r--changelog.d/5235.misc1
-rw-r--r--changelog.d/5236.misc1
-rw-r--r--changelog.d/5237.misc1
-rw-r--r--changelog.d/5244.misc1
-rw-r--r--changelog.d/5249.feature1
-rw-r--r--changelog.d/5250.misc1
-rw-r--r--changelog.d/5251.bugfix1
-rw-r--r--changelog.d/5256.bugfix1
-rw-r--r--changelog.d/5257.bugfix1
-rw-r--r--changelog.d/5258.bugfix1
-rw-r--r--changelog.d/5260.feature1
-rw-r--r--changelog.d/5274.bugfix1
-rw-r--r--changelog.d/5275.bugfix1
-rw-r--r--changelog.d/5277.bugfix1
-rw-r--r--changelog.d/5278.bugfix1
-rw-r--r--changelog.d/5282.doc1
-rw-r--r--changelog.d/5283.misc1
-rw-r--r--changelog.d/5286.feature1
-rw-r--r--changelog.d/5287.misc1
-rw-r--r--changelog.d/5288.misc1
-rw-r--r--changelog.d/5291.bugfix1
-rw-r--r--changelog.d/5293.bugfix1
-rw-r--r--changelog.d/5294.bugfix1
-rw-r--r--changelog.d/5296.misc1
-rw-r--r--changelog.d/5300.bugfix1
-rw-r--r--changelog.d/5303.misc1
-rw-r--r--debian/changelog12
-rw-r--r--debian/test/.gitignore2
-rw-r--r--debian/test/provision.sh23
-rw-r--r--debian/test/stretch/Vagrantfile13
-rw-r--r--debian/test/xenial/Vagrantfile10
-rw-r--r--docker/README.md2
-rw-r--r--docs/CAPTCHA_SETUP.rst1
-rw-r--r--docs/admin_api/user_admin_api.rst2
-rw-r--r--docs/postgres.rst45
-rw-r--r--docs/sample_config.yaml32
-rw-r--r--docs/user_directory.md10
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/api/constants.py12
-rw-r--r--synapse/api/errors.py16
-rw-r--r--synapse/api/room_versions.py17
-rw-r--r--synapse/api/urls.py1
-rw-r--r--synapse/app/_base.py20
-rw-r--r--synapse/app/client_reader.py2
-rw-r--r--synapse/config/homeserver.py42
-rw-r--r--synapse/config/server.py38
-rw-r--r--synapse/config/stats.py60
-rw-r--r--synapse/config/user_directory.py6
-rw-r--r--synapse/crypto/keyring.py709
-rw-r--r--synapse/events/__init__.py31
-rw-r--r--synapse/events/builder.py13
-rw-r--r--synapse/events/utils.py59
-rw-r--r--synapse/federation/federation_server.py14
-rw-r--r--synapse/federation/transport/server.py31
-rw-r--r--synapse/handlers/events.py3
-rw-r--r--synapse/handlers/federation.py308
-rw-r--r--synapse/handlers/message.py19
-rw-r--r--synapse/handlers/presence.py99
-rw-r--r--synapse/handlers/register.py11
-rw-r--r--synapse/handlers/room.py9
-rw-r--r--synapse/handlers/room_member.py2
-rw-r--r--synapse/handlers/stats.py325
-rw-r--r--synapse/http/matrixfederationclient.py4
-rw-r--r--synapse/http/servlet.py2
-rw-r--r--synapse/python_dependencies.py50
-rw-r--r--synapse/replication/slave/storage/events.py14
-rw-r--r--synapse/replication/tcp/streams/_base.py1
-rw-r--r--synapse/replication/tcp/streams/events.py11
-rw-r--r--synapse/rest/__init__.py2
-rw-r--r--synapse/rest/admin/__init__.py12
-rw-r--r--synapse/rest/client/v1/login.py4
-rw-r--r--synapse/rest/client/v1/logout.py27
-rw-r--r--synapse/rest/client/v1/room.py2
-rw-r--r--synapse/rest/client/v2_alpha/capabilities.py5
-rw-r--r--synapse/rest/client/v2_alpha/register.py22
-rw-r--r--synapse/rest/client/v2_alpha/relations.py338
-rw-r--r--synapse/rest/client/v2_alpha/sync.py3
-rw-r--r--synapse/rest/key/v2/remote_key_resource.py6
-rw-r--r--synapse/rest/media/v1/media_repository.py9
-rw-r--r--synapse/rest/media/v1/thumbnail_resource.py4
-rw-r--r--synapse/rest/media/v1/thumbnailer.py35
-rw-r--r--synapse/server.py6
-rw-r--r--synapse/storage/__init__.py6
-rw-r--r--synapse/storage/_base.py12
-rw-r--r--synapse/storage/events.py288
-rw-r--r--synapse/storage/events_bg_updates.py401
-rw-r--r--synapse/storage/events_worker.py79
-rw-r--r--synapse/storage/keys.py90
-rw-r--r--synapse/storage/relations.py476
-rw-r--r--synapse/storage/roommember.py21
-rw-r--r--synapse/storage/schema/delta/54/add_validity_to_server_keys.sql23
-rw-r--r--synapse/storage/schema/delta/54/delete_forward_extremities.sql23
-rw-r--r--synapse/storage/schema/delta/54/relations.sql27
-rw-r--r--synapse/storage/schema/delta/54/stats.sql80
-rw-r--r--synapse/storage/schema/delta/54/stats2.sql28
-rw-r--r--synapse/storage/state_deltas.py12
-rw-r--r--synapse/storage/stats.py452
-rw-r--r--synapse/storage/stream.py194
-rw-r--r--synapse/util/logcontext.py22
-rw-r--r--tests/crypto/test_keyring.py156
-rw-r--r--tests/federation/test_complexity.py90
-rw-r--r--tests/handlers/test_register.py7
-rw-r--r--tests/handlers/test_stats.py251
-rw-r--r--tests/rest/client/v1/utils.py17
-rw-r--r--tests/rest/client/v2_alpha/test_auth.py9
-rw-r--r--tests/rest/client/v2_alpha/test_capabilities.py7
-rw-r--r--tests/rest/client/v2_alpha/test_relations.py564
-rw-r--r--tests/storage/test_cleanup_extrems.py248
-rw-r--r--tests/storage/test_keys.py70
-rw-r--r--tests/test_terms_auth.py2
-rw-r--r--tox.ini2
134 files changed, 5318 insertions, 1007 deletions
diff --git a/CHANGES.md b/CHANGES.md
index 1e9c3cf953..0ffdf1aaef 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,62 @@
+Synapse 0.99.5.2 (2019-05-30)
+=============================
+
+Bugfixes
+--------
+
+- Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. ([\#5274](https://github.com/matrix-org/synapse/issues/5274), [\#5278](https://github.com/matrix-org/synapse/issues/5278), [\#5291](https://github.com/matrix-org/synapse/issues/5291))
+
+
+Synapse 0.99.5.1 (2019-05-22)
+=============================
+
+0.99.5.1 supersedes 0.99.5 due to malformed debian changelog - no functional changes.
+
+Synapse 0.99.5 (2019-05-22)
+===========================
+
+No significant changes.
+
+
+Synapse 0.99.5rc1 (2019-05-21)
+==============================
+
+Features
+--------
+
+- Add ability to blacklist IP ranges for the federation client. ([\#5043](https://github.com/matrix-org/synapse/issues/5043))
+- Ratelimiting configuration for clients sending messages and the federation server has been altered to match login ratelimiting. The old configuration names will continue working. Check the sample config for details of the new names. ([\#5181](https://github.com/matrix-org/synapse/issues/5181))
+- Drop support for the undocumented /_matrix/client/v2_alpha API prefix. ([\#5190](https://github.com/matrix-org/synapse/issues/5190))
+- Add an option to disable per-room profiles. ([\#5196](https://github.com/matrix-org/synapse/issues/5196))
+- Stick an expiration date to any registered user missing one at startup if account validity is enabled. ([\#5204](https://github.com/matrix-org/synapse/issues/5204))
+- Add experimental support for relations (aka reactions and edits). ([\#5209](https://github.com/matrix-org/synapse/issues/5209), [\#5211](https://github.com/matrix-org/synapse/issues/5211), [\#5203](https://github.com/matrix-org/synapse/issues/5203), [\#5212](https://github.com/matrix-org/synapse/issues/5212))
+- Add a room version 4 which uses a new event ID format, as per [MSC2002](https://github.com/matrix-org/matrix-doc/pull/2002). ([\#5210](https://github.com/matrix-org/synapse/issues/5210), [\#5217](https://github.com/matrix-org/synapse/issues/5217))
+
+
+Bugfixes
+--------
+
+- Fix image orientation when generating thumbnails (needs pillow>=4.3.0). Contributed by Pau Rodriguez-Estivill. ([\#5039](https://github.com/matrix-org/synapse/issues/5039))
+- Exclude soft-failed events from forward-extremity candidates: fixes "No forward extremities left!" error. ([\#5146](https://github.com/matrix-org/synapse/issues/5146))
+- Re-order stages in registration flows such that msisdn and email verification are done last. ([\#5174](https://github.com/matrix-org/synapse/issues/5174))
+- Fix 3pid guest invites. ([\#5177](https://github.com/matrix-org/synapse/issues/5177))
+- Fix a bug where the register endpoint would fail with M_THREEPID_IN_USE instead of returning an account previously registered in the same session. ([\#5187](https://github.com/matrix-org/synapse/issues/5187))
+- Prevent registration for user ids that are too long to fit into a state key. Contributed by Reid Anderson. ([\#5198](https://github.com/matrix-org/synapse/issues/5198))
+- Fix incompatibility between ACME support and Python 3.5.2. ([\#5218](https://github.com/matrix-org/synapse/issues/5218))
+- Fix error handling for rooms whose versions are unknown. ([\#5219](https://github.com/matrix-org/synapse/issues/5219))
+
+
+Internal Changes
+----------------
+
+- Make /sync attempt to return device updates for both joined and invited users. Note that this doesn't currently work correctly due to other bugs. ([\#3484](https://github.com/matrix-org/synapse/issues/3484))
+- Update tests to consistently be configured via the same code that is used when loading from configuration files. ([\#5171](https://github.com/matrix-org/synapse/issues/5171), [\#5185](https://github.com/matrix-org/synapse/issues/5185))
+- Allow client event serialization to be async. ([\#5183](https://github.com/matrix-org/synapse/issues/5183))
+- Expose DataStore._get_events as get_events_as_list. ([\#5184](https://github.com/matrix-org/synapse/issues/5184))
+- Make generating SQL bounds for pagination generic. ([\#5191](https://github.com/matrix-org/synapse/issues/5191))
+- Stop telling people to install the optional dependencies by default. ([\#5197](https://github.com/matrix-org/synapse/issues/5197))
+
+
 Synapse 0.99.4 (2019-05-15)
 ===========================
 
diff --git a/INSTALL.md b/INSTALL.md
index b88d826f6c..1934593148 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -35,7 +35,7 @@ virtualenv -p python3 ~/synapse/env
 source ~/synapse/env/bin/activate
 pip install --upgrade pip
 pip install --upgrade setuptools
-pip install matrix-synapse[all]
+pip install matrix-synapse
 ```
 
 This will download Synapse from [PyPI](https://pypi.org/project/matrix-synapse)
@@ -48,7 +48,7 @@ update flag:
 
 ```
 source ~/synapse/env/bin/activate
-pip install -U matrix-synapse[all]
+pip install -U matrix-synapse
 ```
 
 Before you can start Synapse, you will need to generate a configuration
diff --git a/changelog.d/3484.misc b/changelog.d/3484.misc
deleted file mode 100644
index 3645849844..0000000000
--- a/changelog.d/3484.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make /sync attempt to return device updates for both joined and invited users. Note that this doesn't currently work correctly due to other bugs.
diff --git a/changelog.d/4338.feature b/changelog.d/4338.feature
new file mode 100644
index 0000000000..01285e965c
--- /dev/null
+++ b/changelog.d/4338.feature
@@ -0,0 +1 @@
+Synapse now more efficiently collates room statistics.
diff --git a/changelog.d/5043.feature b/changelog.d/5043.feature
deleted file mode 100644
index 0f1e0ee30e..0000000000
--- a/changelog.d/5043.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add ability to blacklist IP ranges for the federation client.
diff --git a/changelog.d/5171.misc b/changelog.d/5171.misc
deleted file mode 100644
index d148b03b51..0000000000
--- a/changelog.d/5171.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update tests to consistently be configured via the same code that is used when loading from configuration files.
diff --git a/changelog.d/5181.feature b/changelog.d/5181.feature
deleted file mode 100644
index 5ce13aa2ea..0000000000
--- a/changelog.d/5181.feature
+++ /dev/null
@@ -1 +0,0 @@
-Ratelimiting configuration for clients sending messages and the federation server has been altered to match login ratelimiting. The old configuration names will continue working. Check the sample config for details of the new names.
diff --git a/changelog.d/5183.misc b/changelog.d/5183.misc
deleted file mode 100644
index a8970f29eb..0000000000
--- a/changelog.d/5183.misc
+++ /dev/null
@@ -1 +0,0 @@
-Allow client event serialization to be async.
diff --git a/changelog.d/5184.misc b/changelog.d/5184.misc
deleted file mode 100644
index 1588bdef6c..0000000000
--- a/changelog.d/5184.misc
+++ /dev/null
@@ -1 +0,0 @@
-Expose DataStore._get_events as get_events_as_list.
diff --git a/changelog.d/5185.misc b/changelog.d/5185.misc
deleted file mode 100644
index d148b03b51..0000000000
--- a/changelog.d/5185.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update tests to consistently be configured via the same code that is used when loading from configuration files.
diff --git a/changelog.d/5187.bugfix b/changelog.d/5187.bugfix
deleted file mode 100644
index df176cf5b2..0000000000
--- a/changelog.d/5187.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug where the register endpoint would fail with M_THREEPID_IN_USE instead of returning an account previously registered in the same session.
diff --git a/changelog.d/5190.feature b/changelog.d/5190.feature
deleted file mode 100644
index 34904aa7a8..0000000000
--- a/changelog.d/5190.feature
+++ /dev/null
@@ -1 +0,0 @@
-Drop support for the undocumented /_matrix/client/v2_alpha API prefix.
diff --git a/changelog.d/5196.feature b/changelog.d/5196.feature
deleted file mode 100644
index 1ffb928f62..0000000000
--- a/changelog.d/5196.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add an option to disable per-room profiles.
diff --git a/changelog.d/5200.bugfix b/changelog.d/5200.bugfix
new file mode 100644
index 0000000000..f346c7b0cc
--- /dev/null
+++ b/changelog.d/5200.bugfix
@@ -0,0 +1 @@
+Fix worker registration bug caused by ClientReaderSlavedStore being unable to see get_profileinfo.
diff --git a/changelog.d/5204.feature b/changelog.d/5204.feature
deleted file mode 100644
index 2a7212ca18..0000000000
--- a/changelog.d/5204.feature
+++ /dev/null
@@ -1 +0,0 @@
-Stick an expiration date to any registered user missing one at startup if account validity is enabled.
diff --git a/changelog.d/5216.misc b/changelog.d/5216.misc
new file mode 100644
index 0000000000..dbfa29475f
--- /dev/null
+++ b/changelog.d/5216.misc
@@ -0,0 +1 @@
+Synapse will now serve the experimental "room complexity" API endpoint.
diff --git a/changelog.d/5220.feature b/changelog.d/5220.feature
new file mode 100644
index 0000000000..747098c166
--- /dev/null
+++ b/changelog.d/5220.feature
@@ -0,0 +1 @@
+Add experimental support for relations (aka reactions and edits).
diff --git a/changelog.d/5223.feature b/changelog.d/5223.feature
new file mode 100644
index 0000000000..cfdf1ad41b
--- /dev/null
+++ b/changelog.d/5223.feature
@@ -0,0 +1 @@
+Ability to configure default room version.
diff --git a/changelog.d/5227.misc b/changelog.d/5227.misc
new file mode 100644
index 0000000000..32bd7b6009
--- /dev/null
+++ b/changelog.d/5227.misc
@@ -0,0 +1 @@
+Simplifications and comments in do_auth.
diff --git a/changelog.d/5230.misc b/changelog.d/5230.misc
new file mode 100644
index 0000000000..c681bc9748
--- /dev/null
+++ b/changelog.d/5230.misc
@@ -0,0 +1 @@
+Remove urllib3 pin as requests 2.22.0 has been released supporting urllib3 1.25.2.
diff --git a/changelog.d/5232.misc b/changelog.d/5232.misc
new file mode 100644
index 0000000000..1cdc71f095
--- /dev/null
+++ b/changelog.d/5232.misc
@@ -0,0 +1 @@
+Run black on synapse.crypto.keyring.
diff --git a/changelog.d/5233.bugfix b/changelog.d/5233.bugfix
new file mode 100644
index 0000000000..d71b962160
--- /dev/null
+++ b/changelog.d/5233.bugfix
@@ -0,0 +1 @@
+Fix appservice timestamp massaging.
diff --git a/changelog.d/5234.misc b/changelog.d/5234.misc
new file mode 100644
index 0000000000..43fbd6f67c
--- /dev/null
+++ b/changelog.d/5234.misc
@@ -0,0 +1 @@
+Rewrite store_server_verify_key to store several keys at once.
diff --git a/changelog.d/5235.misc b/changelog.d/5235.misc
new file mode 100644
index 0000000000..2296ad2a4f
--- /dev/null
+++ b/changelog.d/5235.misc
@@ -0,0 +1 @@
+Remove unused VerifyKey.expired and .time_added fields.
diff --git a/changelog.d/5236.misc b/changelog.d/5236.misc
new file mode 100644
index 0000000000..cb4417a9f4
--- /dev/null
+++ b/changelog.d/5236.misc
@@ -0,0 +1 @@
+Simplify Keyring.process_v2_response.
\ No newline at end of file
diff --git a/changelog.d/5237.misc b/changelog.d/5237.misc
new file mode 100644
index 0000000000..f4fe3b821b
--- /dev/null
+++ b/changelog.d/5237.misc
@@ -0,0 +1 @@
+Store key validity time in the storage layer.
diff --git a/changelog.d/5244.misc b/changelog.d/5244.misc
new file mode 100644
index 0000000000..9cc1fb869d
--- /dev/null
+++ b/changelog.d/5244.misc
@@ -0,0 +1 @@
+Refactor synapse.crypto.keyring to use a KeyFetcher interface.
diff --git a/changelog.d/5249.feature b/changelog.d/5249.feature
new file mode 100644
index 0000000000..cfdf1ad41b
--- /dev/null
+++ b/changelog.d/5249.feature
@@ -0,0 +1 @@
+Ability to configure default room version.
diff --git a/changelog.d/5250.misc b/changelog.d/5250.misc
new file mode 100644
index 0000000000..575a299a82
--- /dev/null
+++ b/changelog.d/5250.misc
@@ -0,0 +1 @@
+Simplification to Keyring.wait_for_previous_lookups.
diff --git a/changelog.d/5251.bugfix b/changelog.d/5251.bugfix
new file mode 100644
index 0000000000..9a053204b6
--- /dev/null
+++ b/changelog.d/5251.bugfix
@@ -0,0 +1 @@
+Ensure that server_keys fetched via a notary server are correctly signed.
\ No newline at end of file
diff --git a/changelog.d/5256.bugfix b/changelog.d/5256.bugfix
new file mode 100644
index 0000000000..86316ab5dd
--- /dev/null
+++ b/changelog.d/5256.bugfix
@@ -0,0 +1 @@
+Show the correct error when logging out and access token is missing.
diff --git a/changelog.d/5257.bugfix b/changelog.d/5257.bugfix
new file mode 100644
index 0000000000..8334af9b99
--- /dev/null
+++ b/changelog.d/5257.bugfix
@@ -0,0 +1 @@
+Fix error code when there is an invalid parameter on /_matrix/client/r0/publicRooms
diff --git a/changelog.d/5258.bugfix b/changelog.d/5258.bugfix
new file mode 100644
index 0000000000..fb5d44aedb
--- /dev/null
+++ b/changelog.d/5258.bugfix
@@ -0,0 +1 @@
+Fix error when downloading thumbnail with missing width/height parameter.
diff --git a/changelog.d/5260.feature b/changelog.d/5260.feature
new file mode 100644
index 0000000000..01285e965c
--- /dev/null
+++ b/changelog.d/5260.feature
@@ -0,0 +1 @@
+Synapse now more efficiently collates room statistics.
diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix
new file mode 100644
index 0000000000..9e14d20289
--- /dev/null
+++ b/changelog.d/5274.bugfix
@@ -0,0 +1 @@
+Fix bug where we leaked extremities when we soft failed events, leading to performance degradation.
diff --git a/changelog.d/5275.bugfix b/changelog.d/5275.bugfix
new file mode 100644
index 0000000000..45a554642a
--- /dev/null
+++ b/changelog.d/5275.bugfix
@@ -0,0 +1 @@
+Fix "db txn 'update_presence' from sentinel context" log messages.
diff --git a/changelog.d/5277.bugfix b/changelog.d/5277.bugfix
new file mode 100644
index 0000000000..371aa2e7fb
--- /dev/null
+++ b/changelog.d/5277.bugfix
@@ -0,0 +1 @@
+Fix dropped logcontexts during high outbound traffic.
diff --git a/changelog.d/5278.bugfix b/changelog.d/5278.bugfix
new file mode 100644
index 0000000000..9e14d20289
--- /dev/null
+++ b/changelog.d/5278.bugfix
@@ -0,0 +1 @@
+Fix bug where we leaked extremities when we soft failed events, leading to performance degradation.
diff --git a/changelog.d/5282.doc b/changelog.d/5282.doc
new file mode 100644
index 0000000000..350e15bc03
--- /dev/null
+++ b/changelog.d/5282.doc
@@ -0,0 +1 @@
+Fix docs on resetting the user directory.
diff --git a/changelog.d/5283.misc b/changelog.d/5283.misc
new file mode 100644
index 0000000000..002721e566
--- /dev/null
+++ b/changelog.d/5283.misc
@@ -0,0 +1 @@
+Specify the type of reCAPTCHA key to use.
diff --git a/changelog.d/5286.feature b/changelog.d/5286.feature
new file mode 100644
index 0000000000..81860279a3
--- /dev/null
+++ b/changelog.d/5286.feature
@@ -0,0 +1 @@
+CAS login will now hit the r0 API, not the deprecated v1 one.
diff --git a/changelog.d/5287.misc b/changelog.d/5287.misc
new file mode 100644
index 0000000000..1286f1dd08
--- /dev/null
+++ b/changelog.d/5287.misc
@@ -0,0 +1 @@
+Remove spurious debug from MatrixFederationHttpClient.get_json.
diff --git a/changelog.d/5288.misc b/changelog.d/5288.misc
new file mode 100644
index 0000000000..fbf049ba6a
--- /dev/null
+++ b/changelog.d/5288.misc
@@ -0,0 +1 @@
+Improve logging for logcontext leaks.
diff --git a/changelog.d/5291.bugfix b/changelog.d/5291.bugfix
new file mode 100644
index 0000000000..9e14d20289
--- /dev/null
+++ b/changelog.d/5291.bugfix
@@ -0,0 +1 @@
+Fix bug where we leaked extremities when we soft failed events, leading to performance degradation.
diff --git a/changelog.d/5293.bugfix b/changelog.d/5293.bugfix
new file mode 100644
index 0000000000..aa519a8433
--- /dev/null
+++ b/changelog.d/5293.bugfix
@@ -0,0 +1 @@
+Fix a bug where it is not possible to get events in the federation format with the request `GET /_matrix/client/r0/rooms/{roomId}/messages`.
diff --git a/changelog.d/5294.bugfix b/changelog.d/5294.bugfix
new file mode 100644
index 0000000000..5924bda319
--- /dev/null
+++ b/changelog.d/5294.bugfix
@@ -0,0 +1 @@
+Fix performance problems with the rooms stats background update.
diff --git a/changelog.d/5296.misc b/changelog.d/5296.misc
new file mode 100644
index 0000000000..a038a6f7f6
--- /dev/null
+++ b/changelog.d/5296.misc
@@ -0,0 +1 @@
+Refactor keyring.VerifyKeyRequest to use attr.s.
diff --git a/changelog.d/5300.bugfix b/changelog.d/5300.bugfix
new file mode 100644
index 0000000000..049e93cd5a
--- /dev/null
+++ b/changelog.d/5300.bugfix
@@ -0,0 +1 @@
+Fix noisy 'no key for server' logs.
diff --git a/changelog.d/5303.misc b/changelog.d/5303.misc
new file mode 100644
index 0000000000..f6a7f1f8e3
--- /dev/null
+++ b/changelog.d/5303.misc
@@ -0,0 +1 @@
+Clarify that the admin change password API logs the user out.
diff --git a/debian/changelog b/debian/changelog
index 35cf8ffb20..6a1a72c0e3 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,15 @@
+matrix-synapse-py3 (0.99.5.2) stable; urgency=medium
+
+  * New synapse release 0.99.5.2.
+
+ -- Synapse Packaging team <packages@matrix.org>  Thu, 30 May 2019 16:28:07 +0100
+
+matrix-synapse-py3 (0.99.5.1) stable; urgency=medium
+
+  * New synapse release 0.99.5.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Wed, 22 May 2019 16:22:24 +0000
+
 matrix-synapse-py3 (0.99.4) stable; urgency=medium
 
   [ Christoph Müller ]
diff --git a/debian/test/.gitignore b/debian/test/.gitignore
new file mode 100644
index 0000000000..95eda73fcc
--- /dev/null
+++ b/debian/test/.gitignore
@@ -0,0 +1,2 @@
+.vagrant
+*.log
diff --git a/debian/test/provision.sh b/debian/test/provision.sh
new file mode 100644
index 0000000000..a5c7f59712
--- /dev/null
+++ b/debian/test/provision.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# provisioning script for vagrant boxes for testing the matrix-synapse debs.
+#
+# Will install the most recent matrix-synapse-py3 deb for this platform from
+# the /debs directory.
+
+set -e
+
+apt-get update
+apt-get install -y lsb-release
+
+deb=`ls /debs/matrix-synapse-py3_*+$(lsb_release -cs)*.deb | sort | tail -n1`
+
+debconf-set-selections <<EOF
+matrix-synapse matrix-synapse/report-stats boolean false
+matrix-synapse matrix-synapse/server-name string localhost:18448
+EOF
+
+dpkg -i "$deb"
+
+sed -i -e '/port: 8...$/{s/8448/18448/; s/8008/18008/}' -e '$aregistration_shared_secret: secret' /etc/matrix-synapse/homeserver.yaml
+systemctl restart matrix-synapse
diff --git a/debian/test/stretch/Vagrantfile b/debian/test/stretch/Vagrantfile
new file mode 100644
index 0000000000..d8eff6fe11
--- /dev/null
+++ b/debian/test/stretch/Vagrantfile
@@ -0,0 +1,13 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+ver = `cd ../../..; dpkg-parsechangelog -S Version`.strip()
+
+Vagrant.configure("2") do |config|
+  config.vm.box = "debian/stretch64"
+
+  config.vm.synced_folder ".", "/vagrant", disabled: true
+  config.vm.synced_folder "../../../../debs", "/debs", type: "nfs"
+
+  config.vm.provision "shell", path: "../provision.sh"
+end
diff --git a/debian/test/xenial/Vagrantfile b/debian/test/xenial/Vagrantfile
new file mode 100644
index 0000000000..189236da17
--- /dev/null
+++ b/debian/test/xenial/Vagrantfile
@@ -0,0 +1,10 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+Vagrant.configure("2") do |config|
+  config.vm.box = "ubuntu/xenial64"
+
+  config.vm.synced_folder ".", "/vagrant", disabled: true
+  config.vm.synced_folder "../../../../debs", "/debs"
+  config.vm.provision "shell", path: "../provision.sh"
+end
diff --git a/docker/README.md b/docker/README.md
index b27a692d5b..df5d0151e2 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -161,7 +161,7 @@ specify values for `SYNAPSE_CONFIG_PATH`, `SYNAPSE_SERVER_NAME` and
 example:
 
 ```
-docker run -it --rm
+docker run -it --rm \
     --mount type=volume,src=synapse-data,dst=/data \
     -e SYNAPSE_CONFIG_PATH=/data/homeserver.yaml \
     -e SYNAPSE_SERVER_NAME=my.matrix.host \
diff --git a/docs/CAPTCHA_SETUP.rst b/docs/CAPTCHA_SETUP.rst
index 19a204d9ce..0c22ee4ff6 100644
--- a/docs/CAPTCHA_SETUP.rst
+++ b/docs/CAPTCHA_SETUP.rst
@@ -7,6 +7,7 @@ Requires a public/private key pair from:
 
 https://developers.google.com/recaptcha/
 
+Must be a reCAPTCHA v2 key using the "I'm not a robot" Checkbox option
 
 Setting ReCaptcha Keys
 ----------------------
diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst
index 8aca4f158d..213359d0c0 100644
--- a/docs/admin_api/user_admin_api.rst
+++ b/docs/admin_api/user_admin_api.rst
@@ -69,7 +69,7 @@ An empty body may be passed for backwards compatibility.
 Reset password
 ==============
 
-Changes the password of another user.
+Changes the password of another user. This will automatically log the user out of all their devices.
 
 The api is::
 
diff --git a/docs/postgres.rst b/docs/postgres.rst
index f7ebbed0c3..e81e10403f 100644
--- a/docs/postgres.rst
+++ b/docs/postgres.rst
@@ -3,6 +3,28 @@ Using Postgres
 
 Postgres version 9.4 or later is known to work.
 
+Install postgres client libraries
+=================================
+
+Synapse will require the python postgres client library in order to connect to
+a postgres database.
+
+* If you are using the `matrix.org debian/ubuntu
+  packages <../INSTALL.md#matrixorg-packages>`_,
+  the necessary libraries will already be installed.
+
+* For other pre-built packages, please consult the documentation from the
+  relevant package.
+
+* If you installed synapse `in a virtualenv 
+  <../INSTALL.md#installing-from-source>`_, you can install the library with::
+
+      ~/synapse/env/bin/pip install matrix-synapse[postgres]
+
+  (substituting the path to your virtualenv for ``~/synapse/env``, if you used a
+  different path). You will require the postgres development files. These are in
+  the ``libpq-dev`` package on Debian-derived distributions.
+
 Set up database
 ===============
 
@@ -26,29 +48,6 @@ encoding use, e.g.::
 This would create an appropriate database named ``synapse`` owned by the
 ``synapse_user`` user (which must already exist).
 
-Set up client in Debian/Ubuntu
-===========================
-
-Postgres support depends on the postgres python connector ``psycopg2``. In the
-virtual env::
-
-    sudo apt-get install libpq-dev
-    pip install psycopg2
-
-Set up client in RHEL/CentOs 7
-==============================
-
-Make sure you have the appropriate version of postgres-devel installed. For a
-postgres 9.4, use the postgres 9.4 packages from
-[here](https://wiki.postgresql.org/wiki/YUM_Installation).
-
-As with Debian/Ubuntu, postgres support depends on the postgres python connector
-``psycopg2``. In the virtual env::
-
-    sudo yum install postgresql-devel libpqxx-devel.x86_64
-    export PATH=/usr/pgsql-9.4/bin/:$PATH
-    pip install psycopg2
-
 Tuning Postgres
 ===============
 
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 9536681068..493ea9ee9e 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -83,6 +83,16 @@ pid_file: DATADIR/homeserver.pid
 #
 #restrict_public_rooms_to_local_users: true
 
+# The default room version for newly created rooms.
+#
+# Known room versions are listed here:
+# https://matrix.org/docs/spec/#complete-list-of-room-versions
+#
+# For example, for room version 1, default_room_version should be set
+# to "1".
+#
+#default_room_version: "1"
+
 # The GC threshold parameters to pass to `gc.set_threshold`, if defined
 #
 #gc_thresholds: [700, 10, 10]
@@ -1095,9 +1105,9 @@ password_config:
 #
 # 'search_all_users' defines whether to search all users visible to your HS
 # when searching the user directory, rather than limiting to users visible
-# in public rooms.  Defaults to false.  If you set it True, you'll have to run
-# UPDATE user_directory_stream_pos SET stream_id = NULL;
-# on your database to tell it to rebuild the user_directory search indexes.
+# in public rooms.  Defaults to false.  If you set it True, you'll have to
+# rebuild the user_directory search indexes, see
+# https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
 #
 #user_directory:
 #  enabled: true
@@ -1155,6 +1165,22 @@ password_config:
 #
 
 
+
+# Local statistics collection. Used in populating the room directory.
+#
+# 'bucket_size' controls how large each statistics timeslice is. It can
+# be defined in a human readable short form -- e.g. "1d", "1y".
+#
+# 'retention' controls how long historical statistics will be kept for.
+# It can be defined in a human readable short form -- e.g. "1d", "1y".
+#
+#
+#stats:
+#   enabled: true
+#   bucket_size: 1d
+#   retention: 1y
+
+
 # Server Notices room configuration
 #
 # Uncomment this section to enable a room which can be used to send notices
diff --git a/docs/user_directory.md b/docs/user_directory.md
index 4c8ee44f37..e64aa453cc 100644
--- a/docs/user_directory.md
+++ b/docs/user_directory.md
@@ -7,11 +7,7 @@ who are present in a publicly viewable room present on the server.
 
 The directory info is stored in various tables, which can (typically after
 DB corruption) get stale or out of sync.  If this happens, for now the
-quickest solution to fix it is:
-
-```
-UPDATE user_directory_stream_pos SET stream_id = NULL;
-```
-
-and restart the synapse, which should then start a background task to
+solution to fix it is to execute the SQL here
+https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/delta/53/user_dir_populate.sql
+and then restart synapse. This should then start a background task to
 flush the current tables and regenerate the directory.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index bf9e810da6..d0e8d7c21b 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -27,4 +27,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "0.99.4"
+__version__ = "0.99.5.2"
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 8547a63535..ee129c8689 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -23,6 +23,9 @@ MAX_DEPTH = 2**63 - 1
 # the maximum length for a room alias is 255 characters
 MAX_ALIAS_LENGTH = 255
 
+# the maximum length for a user id is 255 characters
+MAX_USERID_LENGTH = 255
+
 
 class Membership(object):
 
@@ -76,6 +79,7 @@ class EventTypes(object):
 
     RoomHistoryVisibility = "m.room.history_visibility"
     CanonicalAlias = "m.room.canonical_alias"
+    Encryption = "m.room.encryption"
     RoomAvatar = "m.room.avatar"
     RoomEncryption = "m.room.encryption"
     GuestAccess = "m.room.guest_access"
@@ -116,3 +120,11 @@ class UserTypes(object):
     """
     SUPPORT = "support"
     ALL_USER_TYPES = (SUPPORT,)
+
+
+class RelationTypes(object):
+    """The types of relations known to this server.
+    """
+    ANNOTATION = "m.annotation"
+    REPLACE = "m.replace"
+    REFERENCE = "m.reference"
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index ff89259dec..e91697049c 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -328,9 +328,23 @@ class RoomKeysVersionError(SynapseError):
         self.current_version = current_version
 
 
+class UnsupportedRoomVersionError(SynapseError):
+    """The client's request to create a room used a room version that the server does
+    not support."""
+    def __init__(self):
+        super(UnsupportedRoomVersionError, self).__init__(
+            code=400,
+            msg="Homeserver does not support this room version",
+            errcode=Codes.UNSUPPORTED_ROOM_VERSION,
+        )
+
+
 class IncompatibleRoomVersionError(SynapseError):
-    """A server is trying to join a room whose version it does not support."""
+    """A server is trying to join a room whose version it does not support.
 
+    Unlike UnsupportedRoomVersionError, it is specific to the case of the make_join
+    failing.
+    """
     def __init__(self, room_version):
         super(IncompatibleRoomVersionError, self).__init__(
             code=400,
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index e77abe1040..4085bd10b9 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -19,13 +19,15 @@ class EventFormatVersions(object):
     """This is an internal enum for tracking the version of the event format,
     independently from the room version.
     """
-    V1 = 1   # $id:server format
-    V2 = 2   # MSC1659-style $hash format: introduced for room v3
+    V1 = 1   # $id:server event id format
+    V2 = 2   # MSC1659-style $hash event id format: introduced for room v3
+    V3 = 3   # MSC1884-style $hash format: introduced for room v4
 
 
 KNOWN_EVENT_FORMAT_VERSIONS = {
     EventFormatVersions.V1,
     EventFormatVersions.V2,
+    EventFormatVersions.V3,
 }
 
 
@@ -75,10 +77,12 @@ class RoomVersions(object):
         EventFormatVersions.V2,
         StateResolutionVersions.V2,
     )
-
-
-# the version we will give rooms which are created on this server
-DEFAULT_ROOM_VERSION = RoomVersions.V1
+    V4 = RoomVersion(
+        "4",
+        RoomDisposition.STABLE,
+        EventFormatVersions.V3,
+        StateResolutionVersions.V2,
+    )
 
 
 KNOWN_ROOM_VERSIONS = {
@@ -87,5 +91,6 @@ KNOWN_ROOM_VERSIONS = {
         RoomVersions.V2,
         RoomVersions.V3,
         RoomVersions.STATE_V2_TEST,
+        RoomVersions.V4,
     )
 }   # type: dict[str, RoomVersion]
diff --git a/synapse/api/urls.py b/synapse/api/urls.py
index 3c6bddff7a..e16c386a14 100644
--- a/synapse/api/urls.py
+++ b/synapse/api/urls.py
@@ -26,6 +26,7 @@ CLIENT_API_PREFIX = "/_matrix/client"
 FEDERATION_PREFIX = "/_matrix/federation"
 FEDERATION_V1_PREFIX = FEDERATION_PREFIX + "/v1"
 FEDERATION_V2_PREFIX = FEDERATION_PREFIX + "/v2"
+FEDERATION_UNSTABLE_PREFIX = FEDERATION_PREFIX + "/unstable"
 STATIC_PREFIX = "/_matrix/static"
 WEB_CLIENT_PREFIX = "/_matrix/client"
 CONTENT_REPO_PREFIX = "/_matrix/content"
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 08199a5e8d..8cc990399f 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -344,15 +344,21 @@ class _LimitedHostnameResolver(object):
 
     def resolveHostName(self, resolutionReceiver, hostName, portNumber=0,
                         addressTypes=None, transportSemantics='TCP'):
-        # Note this is happening deep within the reactor, so we don't need to
-        # worry about log contexts.
-
         # We need this function to return `resolutionReceiver` so we do all the
         # actual logic involving deferreds in a separate function.
-        self._resolve(
-            resolutionReceiver, hostName, portNumber,
-            addressTypes, transportSemantics,
-        )
+
+        # even though this is happening within the depths of twisted, we need to drop
+        # our logcontext before starting _resolve, otherwise: (a) _resolve will drop
+        # the logcontext if it returns an incomplete deferred; (b) _resolve will
+        # call the resolutionReceiver *with* a logcontext, which it won't be expecting.
+        with PreserveLoggingContext():
+            self._resolve(
+                resolutionReceiver,
+                hostName,
+                portNumber,
+                addressTypes,
+                transportSemantics,
+            )
 
         return resolutionReceiver
 
diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py
index 864f1eac48..a16e037f32 100644
--- a/synapse/app/client_reader.py
+++ b/synapse/app/client_reader.py
@@ -38,6 +38,7 @@ from synapse.replication.slave.storage.devices import SlavedDeviceStore
 from synapse.replication.slave.storage.directory import DirectoryStore
 from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.replication.slave.storage.keys import SlavedKeyStore
+from synapse.replication.slave.storage.profile import SlavedProfileStore
 from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
 from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
@@ -81,6 +82,7 @@ class ClientReaderSlavedStore(
     SlavedApplicationServiceStore,
     SlavedRegistrationStore,
     SlavedTransactionStore,
+    SlavedProfileStore,
     SlavedClientIpStore,
     BaseSlavedStore,
 ):
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 727fdc54d8..5c4fc8ff21 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from .api import ApiConfig
 from .appservice import AppServiceConfig
 from .captcha import CaptchaConfig
@@ -36,20 +37,41 @@ from .saml2_config import SAML2Config
 from .server import ServerConfig
 from .server_notices_config import ServerNoticesConfig
 from .spam_checker import SpamCheckerConfig
+from .stats import StatsConfig
 from .tls import TlsConfig
 from .user_directory import UserDirectoryConfig
 from .voip import VoipConfig
 from .workers import WorkerConfig
 
 
-class HomeServerConfig(ServerConfig, TlsConfig, DatabaseConfig, LoggingConfig,
-                       RatelimitConfig, ContentRepositoryConfig, CaptchaConfig,
-                       VoipConfig, RegistrationConfig, MetricsConfig, ApiConfig,
-                       AppServiceConfig, KeyConfig, SAML2Config, CasConfig,
-                       JWTConfig, PasswordConfig, EmailConfig,
-                       WorkerConfig, PasswordAuthProviderConfig, PushConfig,
-                       SpamCheckerConfig, GroupsConfig, UserDirectoryConfig,
-                       ConsentConfig,
-                       ServerNoticesConfig, RoomDirectoryConfig,
-                       ):
+class HomeServerConfig(
+    ServerConfig,
+    TlsConfig,
+    DatabaseConfig,
+    LoggingConfig,
+    RatelimitConfig,
+    ContentRepositoryConfig,
+    CaptchaConfig,
+    VoipConfig,
+    RegistrationConfig,
+    MetricsConfig,
+    ApiConfig,
+    AppServiceConfig,
+    KeyConfig,
+    SAML2Config,
+    CasConfig,
+    JWTConfig,
+    PasswordConfig,
+    EmailConfig,
+    WorkerConfig,
+    PasswordAuthProviderConfig,
+    PushConfig,
+    SpamCheckerConfig,
+    GroupsConfig,
+    UserDirectoryConfig,
+    ConsentConfig,
+    StatsConfig,
+    ServerNoticesConfig,
+    RoomDirectoryConfig,
+):
     pass
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 1b8968608e..e763e19e15 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -20,6 +20,7 @@ import os.path
 
 from netaddr import IPSet
 
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.http.endpoint import parse_and_validate_server_name
 from synapse.python_dependencies import DependencyException, check_requirements
 
@@ -35,6 +36,8 @@ logger = logging.Logger(__name__)
 # in the list.
 DEFAULT_BIND_ADDRESSES = ['::', '0.0.0.0']
 
+DEFAULT_ROOM_VERSION = "1"
+
 
 class ServerConfig(Config):
 
@@ -88,6 +91,22 @@ class ServerConfig(Config):
             "restrict_public_rooms_to_local_users", False,
         )
 
+        default_room_version = config.get(
+            "default_room_version", DEFAULT_ROOM_VERSION,
+        )
+
+        # Ensure room version is a str
+        default_room_version = str(default_room_version)
+
+        if default_room_version not in KNOWN_ROOM_VERSIONS:
+            raise ConfigError(
+                "Unknown default_room_version: %s, known room versions: %s" %
+                (default_room_version, list(KNOWN_ROOM_VERSIONS.keys()))
+            )
+
+        # Get the actual room version object rather than just the identifier
+        self.default_room_version = KNOWN_ROOM_VERSIONS[default_room_version]
+
         # whether to enable search. If disabled, new entries will not be inserted
         # into the search tables and they will not be indexed. Users will receive
         # errors when attempting to search for messages.
@@ -101,6 +120,11 @@ class ServerConfig(Config):
             "block_non_admin_invites", False,
         )
 
+        # Whether to enable experimental MSC1849 (aka relations) support
+        self.experimental_msc1849_support_enabled = config.get(
+            "experimental_msc1849_support_enabled", False,
+        )
+
         # Options to control access by tracking MAU
         self.limit_usage_by_mau = config.get("limit_usage_by_mau", False)
         self.max_mau_value = 0
@@ -305,6 +329,10 @@ class ServerConfig(Config):
             unsecure_port = 8008
 
         pid_file = os.path.join(data_dir_path, "homeserver.pid")
+
+        # Bring DEFAULT_ROOM_VERSION into the local-scope for use in the
+        # default config string
+        default_room_version = DEFAULT_ROOM_VERSION
         return """\
         ## Server ##
 
@@ -379,6 +407,16 @@ class ServerConfig(Config):
         #
         #restrict_public_rooms_to_local_users: true
 
+        # The default room version for newly created rooms.
+        #
+        # Known room versions are listed here:
+        # https://matrix.org/docs/spec/#complete-list-of-room-versions
+        #
+        # For example, for room version 1, default_room_version should be set
+        # to "1".
+        #
+        #default_room_version: "%(default_room_version)s"
+
         # The GC threshold parameters to pass to `gc.set_threshold`, if defined
         #
         #gc_thresholds: [700, 10, 10]
diff --git a/synapse/config/stats.py b/synapse/config/stats.py
new file mode 100644
index 0000000000..80fc1b9dd0
--- /dev/null
+++ b/synapse/config/stats.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import sys
+
+from ._base import Config
+
+
+class StatsConfig(Config):
+    """Stats Configuration
+    Configuration for the behaviour of synapse's stats engine
+    """
+
+    def read_config(self, config):
+        self.stats_enabled = True
+        self.stats_bucket_size = 86400
+        self.stats_retention = sys.maxsize
+        stats_config = config.get("stats", None)
+        if stats_config:
+            self.stats_enabled = stats_config.get("enabled", self.stats_enabled)
+            self.stats_bucket_size = (
+                self.parse_duration(stats_config.get("bucket_size", "1d")) / 1000
+            )
+            self.stats_retention = (
+                self.parse_duration(
+                    stats_config.get("retention", "%ds" % (sys.maxsize,))
+                )
+                / 1000
+            )
+
+    def default_config(self, config_dir_path, server_name, **kwargs):
+        return """
+        # Local statistics collection. Used in populating the room directory.
+        #
+        # 'bucket_size' controls how large each statistics timeslice is. It can
+        # be defined in a human readable short form -- e.g. "1d", "1y".
+        #
+        # 'retention' controls how long historical statistics will be kept for.
+        # It can be defined in a human readable short form -- e.g. "1d", "1y".
+        #
+        #
+        #stats:
+        #   enabled: true
+        #   bucket_size: 1d
+        #   retention: 1y
+        """
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index 142754a7dc..023997ccde 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -43,9 +43,9 @@ class UserDirectoryConfig(Config):
         #
         # 'search_all_users' defines whether to search all users visible to your HS
         # when searching the user directory, rather than limiting to users visible
-        # in public rooms.  Defaults to false.  If you set it True, you'll have to run
-        # UPDATE user_directory_stream_pos SET stream_id = NULL;
-        # on your database to tell it to rebuild the user_directory search indexes.
+        # in public rooms.  Defaults to false.  If you set it True, you'll have to
+        # rebuild the user_directory search indexes, see
+        # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
         #
         #user_directory:
         #  enabled: true
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index d8ba870cca..5756478ad7 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -15,12 +15,12 @@
 # limitations under the License.
 
 import logging
-from collections import namedtuple
 
+import six
 from six import raise_from
 from six.moves import urllib
 
-import nacl.signing
+import attr
 from signedjson.key import (
     decode_verify_key_bytes,
     encode_verify_key_base64,
@@ -43,6 +43,7 @@ from synapse.api.errors import (
     RequestSendFailed,
     SynapseError,
 )
+from synapse.storage.keys import FetchKeyResult
 from synapse.util import logcontext, unwrapFirstError
 from synapse.util.logcontext import (
     LoggingContext,
@@ -56,22 +57,32 @@ from synapse.util.retryutils import NotRetryingDestination
 logger = logging.getLogger(__name__)
 
 
-VerifyKeyRequest = namedtuple("VerifyRequest", (
-    "server_name", "key_ids", "json_object", "deferred"
-))
-"""
-A request for a verify key to verify a JSON object.
+@attr.s(slots=True, cmp=False)
+class VerifyKeyRequest(object):
+    """
+    A request for a verify key to verify a JSON object.
+
+    Attributes:
+        server_name(str): The name of the server to verify against.
+
+        key_ids(set[str]): The set of key_ids to that could be used to verify the
+            JSON object
+
+        json_object(dict): The JSON object to verify.
 
-Attributes:
-    server_name(str): The name of the server to verify against.
-    key_ids(set(str)): The set of key_ids to that could be used to verify the
-        JSON object
-    json_object(dict): The JSON object to verify.
-    deferred(Deferred[str, str, nacl.signing.VerifyKey]):
-        A deferred (server_name, key_id, verify_key) tuple that resolves when
-        a verify key has been fetched. The deferreds' callbacks are run with no
-        logcontext.
-"""
+        deferred(Deferred[str, str, nacl.signing.VerifyKey]):
+            A deferred (server_name, key_id, verify_key) tuple that resolves when
+            a verify key has been fetched. The deferreds' callbacks are run with no
+            logcontext.
+
+            If we are unable to find a key which satisfies the request, the deferred
+            errbacks with an M_UNAUTHORIZED SynapseError.
+    """
+
+    server_name = attr.ib()
+    key_ids = attr.ib()
+    json_object = attr.ib()
+    deferred = attr.ib()
 
 
 class KeyLookupError(ValueError):
@@ -80,12 +91,13 @@ class KeyLookupError(ValueError):
 
 class Keyring(object):
     def __init__(self, hs):
-        self.store = hs.get_datastore()
         self.clock = hs.get_clock()
-        self.client = hs.get_http_client()
-        self.config = hs.get_config()
-        self.perspective_servers = self.config.perspectives
-        self.hs = hs
+
+        self._key_fetchers = (
+            StoreKeyFetcher(hs),
+            PerspectivesKeyFetcher(hs),
+            ServerKeyFetcher(hs),
+        )
 
         # map from server name to Deferred. Has an entry for each server with
         # an ongoing key download; the Deferred completes once the download
@@ -96,9 +108,7 @@ class Keyring(object):
 
     def verify_json_for_server(self, server_name, json_object):
         return logcontext.make_deferred_yieldable(
-            self.verify_json_objects_for_server(
-                [(server_name, json_object)]
-            )[0]
+            self.verify_json_objects_for_server([(server_name, json_object)])[0]
         )
 
     def verify_json_objects_for_server(self, server_and_json):
@@ -130,18 +140,15 @@ class Keyring(object):
             if not key_ids:
                 return defer.fail(
                     SynapseError(
-                        400,
-                        "Not signed by %s" % (server_name,),
-                        Codes.UNAUTHORIZED,
+                        400, "Not signed by %s" % (server_name,), Codes.UNAUTHORIZED
                     )
                 )
 
-            logger.debug("Verifying for %s with key_ids %s",
-                         server_name, key_ids)
+            logger.debug("Verifying for %s with key_ids %s", server_name, key_ids)
 
             # add the key request to the queue, but don't start it off yet.
             verify_request = VerifyKeyRequest(
-                server_name, key_ids, json_object, defer.Deferred(),
+                server_name, key_ids, json_object, defer.Deferred()
             )
             verify_requests.append(verify_request)
 
@@ -179,16 +186,12 @@ class Keyring(object):
             # any other lookups until we have finished.
             # The deferreds are called with no logcontext.
             server_to_deferred = {
-                rq.server_name: defer.Deferred()
-                for rq in verify_requests
+                rq.server_name: defer.Deferred() for rq in verify_requests
             }
 
             # We want to wait for any previous lookups to complete before
             # proceeding.
-            yield self.wait_for_previous_lookups(
-                [rq.server_name for rq in verify_requests],
-                server_to_deferred,
-            )
+            yield self.wait_for_previous_lookups(server_to_deferred)
 
             # Actually start fetching keys.
             self._get_server_verify_keys(verify_requests)
@@ -216,19 +219,16 @@ class Keyring(object):
                 return res
 
             for verify_request in verify_requests:
-                verify_request.deferred.addBoth(
-                    remove_deferreds, verify_request,
-                )
+                verify_request.deferred.addBoth(remove_deferreds, verify_request)
         except Exception:
             logger.exception("Error starting key lookups")
 
     @defer.inlineCallbacks
-    def wait_for_previous_lookups(self, server_names, server_to_deferred):
+    def wait_for_previous_lookups(self, server_to_deferred):
         """Waits for any previous key lookups for the given servers to finish.
 
         Args:
-            server_names (list): list of server_names we want to lookup
-            server_to_deferred (dict): server_name to deferred which gets
+            server_to_deferred (dict[str, Deferred]): server_name to deferred which gets
                 resolved once we've finished looking up keys for that server.
                 The Deferreds should be regular twisted ones which call their
                 callbacks with no logcontext.
@@ -241,14 +241,15 @@ class Keyring(object):
         while True:
             wait_on = [
                 (server_name, self.key_downloads[server_name])
-                for server_name in server_names
+                for server_name in server_to_deferred.keys()
                 if server_name in self.key_downloads
             ]
             if not wait_on:
                 break
             logger.info(
                 "Waiting for existing lookups for %s to complete [loop %i]",
-                [w[0] for w in wait_on], loop_count,
+                [w[0] for w in wait_on],
+                loop_count,
             )
             with PreserveLoggingContext():
                 yield defer.DeferredList((w[1] for w in wait_on))
@@ -279,13 +280,6 @@ class Keyring(object):
             verify_requests (list[VerifyKeyRequest]): list of verify requests
         """
 
-        # These are functions that produce keys given a list of key ids
-        key_fetch_fns = (
-            self.get_keys_from_store,  # First try the local store
-            self.get_keys_from_perspectives,  # Then try via perspectives
-            self.get_keys_from_server,  # Then try directly
-        )
-
         @defer.inlineCallbacks
         def do_iterations():
             with Measure(self.clock, "get_server_verify_keys"):
@@ -296,8 +290,8 @@ class Keyring(object):
                         verify_request.key_ids
                     )
 
-                for fn in key_fetch_fns:
-                    results = yield fn(missing_keys.items())
+                for f in self._key_fetchers:
+                    results = yield f.get_keys(missing_keys.items())
 
                     # We now need to figure out which verify requests we have keys
                     # for and which we don't
@@ -315,11 +309,15 @@ class Keyring(object):
                         # complete this VerifyKeyRequest.
                         result_keys = results.get(server_name, {})
                         for key_id in verify_request.key_ids:
-                            key = result_keys.get(key_id)
-                            if key:
+                            fetch_key_result = result_keys.get(key_id)
+                            if fetch_key_result:
                                 with PreserveLoggingContext():
                                     verify_request.deferred.callback(
-                                        (server_name, key_id, key)
+                                        (
+                                            server_name,
+                                            key_id,
+                                            fetch_key_result.verify_key,
+                                        )
                                     )
                                 break
                         else:
@@ -335,13 +333,14 @@ class Keyring(object):
 
                 with PreserveLoggingContext():
                     for verify_request in requests_missing_keys:
-                        verify_request.deferred.errback(SynapseError(
-                            401,
-                            "No key for %s with id %s" % (
-                                verify_request.server_name, verify_request.key_ids,
-                            ),
-                            Codes.UNAUTHORIZED,
-                        ))
+                        verify_request.deferred.errback(
+                            SynapseError(
+                                401,
+                                "No key for %s with id %s"
+                                % (verify_request.server_name, verify_request.key_ids),
+                                Codes.UNAUTHORIZED,
+                            )
+                        )
 
         def on_err(err):
             with PreserveLoggingContext():
@@ -351,17 +350,31 @@ class Keyring(object):
 
         run_in_background(do_iterations).addErrback(on_err)
 
-    @defer.inlineCallbacks
-    def get_keys_from_store(self, server_name_and_key_ids):
+
+class KeyFetcher(object):
+    def get_keys(self, server_name_and_key_ids):
         """
         Args:
-            server_name_and_key_ids (iterable(Tuple[str, iterable[str]]):
+            server_name_and_key_ids (iterable[Tuple[str, iterable[str]]]):
                 list of (server_name, iterable[key_id]) tuples to fetch keys for
+                Note that the iterables may be iterated more than once.
 
         Returns:
-            Deferred: resolves to dict[str, dict[str, VerifyKey|None]]: map from
-                server_name -> key_id -> VerifyKey
+            Deferred[dict[str, dict[str, synapse.storage.keys.FetchKeyResult|None]]]:
+                map from server_name -> key_id -> FetchKeyResult
         """
+        raise NotImplementedError
+
+
+class StoreKeyFetcher(KeyFetcher):
+    """KeyFetcher impl which fetches keys from our data store"""
+
+    def __init__(self, hs):
+        self.store = hs.get_datastore()
+
+    @defer.inlineCallbacks
+    def get_keys(self, server_name_and_key_ids):
+        """see KeyFetcher.get_keys"""
         keys_to_fetch = (
             (server_name, key_id)
             for server_name, key_ids in server_name_and_key_ids
@@ -373,8 +386,135 @@ class Keyring(object):
             keys.setdefault(server_name, {})[key_id] = key
         defer.returnValue(keys)
 
+
+class BaseV2KeyFetcher(object):
+    def __init__(self, hs):
+        self.store = hs.get_datastore()
+        self.config = hs.get_config()
+
+    @defer.inlineCallbacks
+    def process_v2_response(
+        self, from_server, response_json, time_added_ms, requested_ids=[]
+    ):
+        """Parse a 'Server Keys' structure from the result of a /key request
+
+        This is used to parse either the entirety of the response from
+        GET /_matrix/key/v2/server, or a single entry from the list returned by
+        POST /_matrix/key/v2/query.
+
+        Checks that each signature in the response that claims to come from the origin
+        server is valid, and that there is at least one such signature.
+
+        Stores the json in server_keys_json so that it can be used for future responses
+        to /_matrix/key/v2/query.
+
+        Args:
+            from_server (str): the name of the server producing this result: either
+                the origin server for a /_matrix/key/v2/server request, or the notary
+                for a /_matrix/key/v2/query.
+
+            response_json (dict): the json-decoded Server Keys response object
+
+            time_added_ms (int): the timestamp to record in server_keys_json
+
+            requested_ids (iterable[str]): a list of the key IDs that were requested.
+                We will store the json for these key ids as well as any that are
+                actually in the response
+
+        Returns:
+            Deferred[dict[str, FetchKeyResult]]: map from key_id to result object
+        """
+        ts_valid_until_ms = response_json[u"valid_until_ts"]
+
+        # start by extracting the keys from the response, since they may be required
+        # to validate the signature on the response.
+        verify_keys = {}
+        for key_id, key_data in response_json["verify_keys"].items():
+            if is_signing_algorithm_supported(key_id):
+                key_base64 = key_data["key"]
+                key_bytes = decode_base64(key_base64)
+                verify_key = decode_verify_key_bytes(key_id, key_bytes)
+                verify_keys[key_id] = FetchKeyResult(
+                    verify_key=verify_key, valid_until_ts=ts_valid_until_ms
+                )
+
+        server_name = response_json["server_name"]
+        verified = False
+        for key_id in response_json["signatures"].get(server_name, {}):
+            # each of the keys used for the signature must be present in the response
+            # json.
+            key = verify_keys.get(key_id)
+            if not key:
+                raise KeyLookupError(
+                    "Key response is signed by key id %s:%s but that key is not "
+                    "present in the response" % (server_name, key_id)
+                )
+
+            verify_signed_json(response_json, server_name, key.verify_key)
+            verified = True
+
+        if not verified:
+            raise KeyLookupError(
+                "Key response for %s is not signed by the origin server"
+                % (server_name,)
+            )
+
+        for key_id, key_data in response_json["old_verify_keys"].items():
+            if is_signing_algorithm_supported(key_id):
+                key_base64 = key_data["key"]
+                key_bytes = decode_base64(key_base64)
+                verify_key = decode_verify_key_bytes(key_id, key_bytes)
+                verify_keys[key_id] = FetchKeyResult(
+                    verify_key=verify_key, valid_until_ts=key_data["expired_ts"]
+                )
+
+        # re-sign the json with our own key, so that it is ready if we are asked to
+        # give it out as a notary server
+        signed_key_json = sign_json(
+            response_json, self.config.server_name, self.config.signing_key[0]
+        )
+
+        signed_key_json_bytes = encode_canonical_json(signed_key_json)
+
+        # for reasons I don't quite understand, we store this json for the key ids we
+        # requested, as well as those we got.
+        updated_key_ids = set(requested_ids)
+        updated_key_ids.update(verify_keys)
+
+        yield logcontext.make_deferred_yieldable(
+            defer.gatherResults(
+                [
+                    run_in_background(
+                        self.store.store_server_keys_json,
+                        server_name=server_name,
+                        key_id=key_id,
+                        from_server=from_server,
+                        ts_now_ms=time_added_ms,
+                        ts_expires_ms=ts_valid_until_ms,
+                        key_json_bytes=signed_key_json_bytes,
+                    )
+                    for key_id in updated_key_ids
+                ],
+                consumeErrors=True,
+            ).addErrback(unwrapFirstError)
+        )
+
+        defer.returnValue(verify_keys)
+
+
+class PerspectivesKeyFetcher(BaseV2KeyFetcher):
+    """KeyFetcher impl which fetches keys from the "perspectives" servers"""
+
+    def __init__(self, hs):
+        super(PerspectivesKeyFetcher, self).__init__(hs)
+        self.clock = hs.get_clock()
+        self.client = hs.get_http_client()
+        self.perspective_servers = self.config.perspectives
+
     @defer.inlineCallbacks
-    def get_keys_from_perspectives(self, server_name_and_key_ids):
+    def get_keys(self, server_name_and_key_ids):
+        """see KeyFetcher.get_keys"""
+
         @defer.inlineCallbacks
         def get_key(perspective_name, perspective_keys):
             try:
@@ -383,25 +523,26 @@ class Keyring(object):
                 )
                 defer.returnValue(result)
             except KeyLookupError as e:
-                logger.warning(
-                    "Key lookup failed from %r: %s", perspective_name, e,
-                )
+                logger.warning("Key lookup failed from %r: %s", perspective_name, e)
             except Exception as e:
                 logger.exception(
                     "Unable to get key from %r: %s %s",
                     perspective_name,
-                    type(e).__name__, str(e),
+                    type(e).__name__,
+                    str(e),
                 )
 
             defer.returnValue({})
 
-        results = yield logcontext.make_deferred_yieldable(defer.gatherResults(
-            [
-                run_in_background(get_key, p_name, p_keys)
-                for p_name, p_keys in self.perspective_servers.items()
-            ],
-            consumeErrors=True,
-        ).addErrback(unwrapFirstError))
+        results = yield logcontext.make_deferred_yieldable(
+            defer.gatherResults(
+                [
+                    run_in_background(get_key, p_name, p_keys)
+                    for p_name, p_keys in self.perspective_servers.items()
+                ],
+                consumeErrors=True,
+            ).addErrback(unwrapFirstError)
+        )
 
         union_of_keys = {}
         for result in results:
@@ -411,33 +552,30 @@ class Keyring(object):
         defer.returnValue(union_of_keys)
 
     @defer.inlineCallbacks
-    def get_keys_from_server(self, server_name_and_key_ids):
-        results = yield logcontext.make_deferred_yieldable(defer.gatherResults(
-            [
-                run_in_background(
-                    self.get_server_verify_key_v2_direct,
-                    server_name,
-                    key_ids,
-                )
-                for server_name, key_ids in server_name_and_key_ids
-            ],
-            consumeErrors=True,
-        ).addErrback(unwrapFirstError))
-
-        merged = {}
-        for result in results:
-            merged.update(result)
+    def get_server_verify_key_v2_indirect(
+        self, server_names_and_key_ids, perspective_name, perspective_keys
+    ):
+        """
+        Args:
+            server_names_and_key_ids (iterable[Tuple[str, iterable[str]]]):
+                list of (server_name, iterable[key_id]) tuples to fetch keys for
+            perspective_name (str): name of the notary server to query for the keys
+            perspective_keys (dict[str, VerifyKey]): map of key_id->key for the
+                notary server
 
-        defer.returnValue({
-            server_name: keys
-            for server_name, keys in merged.items()
-            if keys
-        })
+        Returns:
+            Deferred[dict[str, dict[str, synapse.storage.keys.FetchKeyResult]]]: map
+                from server_name -> key_id -> FetchKeyResult
 
-    @defer.inlineCallbacks
-    def get_server_verify_key_v2_indirect(self, server_names_and_key_ids,
-                                          perspective_name,
-                                          perspective_keys):
+        Raises:
+            KeyLookupError if there was an error processing the entire response from
+                the server
+        """
+        logger.info(
+            "Requesting keys %s from notary server %s",
+            server_names_and_key_ids,
+            perspective_name,
+        )
         # TODO(mark): Set the minimum_valid_until_ts to that needed by
         # the events being validated or the current time if validating
         # an incoming request.
@@ -448,9 +586,7 @@ class Keyring(object):
                 data={
                     u"server_keys": {
                         server_name: {
-                            key_id: {
-                                u"minimum_valid_until_ts": 0
-                            } for key_id in key_ids
+                            key_id: {u"minimum_valid_until_ts": 0} for key_id in key_ids
                         }
                         for server_name, key_ids in server_names_and_key_ids
                     }
@@ -458,240 +594,174 @@ class Keyring(object):
                 long_retries=True,
             )
         except (NotRetryingDestination, RequestSendFailed) as e:
-            raise_from(
-                KeyLookupError("Failed to connect to remote server"), e,
-            )
+            raise_from(KeyLookupError("Failed to connect to remote server"), e)
         except HttpResponseException as e:
-            raise_from(
-                KeyLookupError("Remote server returned an error"), e,
-            )
+            raise_from(KeyLookupError("Remote server returned an error"), e)
 
         keys = {}
+        added_keys = []
 
-        responses = query_response["server_keys"]
+        time_now_ms = self.clock.time_msec()
 
-        for response in responses:
-            if (u"signatures" not in response
-                    or perspective_name not in response[u"signatures"]):
+        for response in query_response["server_keys"]:
+            # do this first, so that we can give useful errors thereafter
+            server_name = response.get("server_name")
+            if not isinstance(server_name, six.string_types):
                 raise KeyLookupError(
-                    "Key response not signed by perspective server"
-                    " %r" % (perspective_name,)
+                    "Malformed response from key notary server %s: invalid server_name"
+                    % (perspective_name,)
                 )
 
-            verified = False
-            for key_id in response[u"signatures"][perspective_name]:
-                if key_id in perspective_keys:
-                    verify_signed_json(
-                        response,
-                        perspective_name,
-                        perspective_keys[key_id]
-                    )
-                    verified = True
-
-            if not verified:
-                logging.info(
-                    "Response from perspective server %r not signed with a"
-                    " known key, signed with: %r, known keys: %r",
+            try:
+                processed_response = yield self._process_perspectives_response(
                     perspective_name,
-                    list(response[u"signatures"][perspective_name]),
-                    list(perspective_keys)
+                    perspective_keys,
+                    response,
+                    time_added_ms=time_now_ms,
                 )
-                raise KeyLookupError(
-                    "Response not signed with a known key for perspective"
-                    " server %r" % (perspective_name,)
+            except KeyLookupError as e:
+                logger.warning(
+                    "Error processing response from key notary server %s for origin "
+                    "server %s: %s",
+                    perspective_name,
+                    server_name,
+                    e,
                 )
+                # we continue to process the rest of the response
+                continue
 
-            processed_response = yield self.process_v2_response(
-                perspective_name, response
+            added_keys.extend(
+                (server_name, key_id, key) for key_id, key in processed_response.items()
             )
-            server_name = response["server_name"]
-
             keys.setdefault(server_name, {}).update(processed_response)
 
-        yield logcontext.make_deferred_yieldable(defer.gatherResults(
-            [
-                run_in_background(
-                    self.store_keys,
-                    server_name=server_name,
-                    from_server=perspective_name,
-                    verify_keys=response_keys,
-                )
-                for server_name, response_keys in keys.items()
-            ],
-            consumeErrors=True
-        ).addErrback(unwrapFirstError))
+        yield self.store.store_server_verify_keys(
+            perspective_name, time_now_ms, added_keys
+        )
 
         defer.returnValue(keys)
 
+    def _process_perspectives_response(
+        self, perspective_name, perspective_keys, response, time_added_ms
+    ):
+        """Parse a 'Server Keys' structure from the result of a /key/query request
+
+        Checks that the entry is correctly signed by the perspectives server, and then
+        passes over to process_v2_response
+
+        Args:
+            perspective_name (str): the name of the notary server that produced this
+                result
+
+            perspective_keys (dict[str, VerifyKey]): map of key_id->key for the
+                notary server
+
+            response (dict): the json-decoded Server Keys response object
+
+            time_added_ms (int): the timestamp to record in server_keys_json
+
+        Returns:
+            Deferred[dict[str, FetchKeyResult]]: map from key_id to result object
+        """
+        if (
+            u"signatures" not in response
+            or perspective_name not in response[u"signatures"]
+        ):
+            raise KeyLookupError("Response not signed by the notary server")
+
+        verified = False
+        for key_id in response[u"signatures"][perspective_name]:
+            if key_id in perspective_keys:
+                verify_signed_json(response, perspective_name, perspective_keys[key_id])
+                verified = True
+
+        if not verified:
+            raise KeyLookupError(
+                "Response not signed with a known key: signed with: %r, known keys: %r"
+                % (
+                    list(response[u"signatures"][perspective_name].keys()),
+                    list(perspective_keys.keys()),
+                )
+            )
+
+        return self.process_v2_response(
+            perspective_name, response, time_added_ms=time_added_ms
+        )
+
+
+class ServerKeyFetcher(BaseV2KeyFetcher):
+    """KeyFetcher impl which fetches keys from the origin servers"""
+
+    def __init__(self, hs):
+        super(ServerKeyFetcher, self).__init__(hs)
+        self.clock = hs.get_clock()
+        self.client = hs.get_http_client()
+
+    @defer.inlineCallbacks
+    def get_keys(self, server_name_and_key_ids):
+        """see KeyFetcher.get_keys"""
+        results = yield logcontext.make_deferred_yieldable(
+            defer.gatherResults(
+                [
+                    run_in_background(
+                        self.get_server_verify_key_v2_direct, server_name, key_ids
+                    )
+                    for server_name, key_ids in server_name_and_key_ids
+                ],
+                consumeErrors=True,
+            ).addErrback(unwrapFirstError)
+        )
+
+        merged = {}
+        for result in results:
+            merged.update(result)
+
+        defer.returnValue(
+            {server_name: keys for server_name, keys in merged.items() if keys}
+        )
+
     @defer.inlineCallbacks
     def get_server_verify_key_v2_direct(self, server_name, key_ids):
-        keys = {}  # type: dict[str, nacl.signing.VerifyKey]
+        keys = {}  # type: dict[str, FetchKeyResult]
 
         for requested_key_id in key_ids:
             if requested_key_id in keys:
                 continue
 
+            time_now_ms = self.clock.time_msec()
             try:
                 response = yield self.client.get_json(
                     destination=server_name,
-                    path="/_matrix/key/v2/server/" + urllib.parse.quote(requested_key_id),
+                    path="/_matrix/key/v2/server/"
+                    + urllib.parse.quote(requested_key_id),
                     ignore_backoff=True,
                 )
             except (NotRetryingDestination, RequestSendFailed) as e:
-                raise_from(
-                    KeyLookupError("Failed to connect to remote server"), e,
-                )
+                raise_from(KeyLookupError("Failed to connect to remote server"), e)
             except HttpResponseException as e:
-                raise_from(
-                    KeyLookupError("Remote server returned an error"), e,
-                )
-
-            if (u"signatures" not in response
-                    or server_name not in response[u"signatures"]):
-                raise KeyLookupError("Key response not signed by remote server")
+                raise_from(KeyLookupError("Remote server returned an error"), e)
 
             if response["server_name"] != server_name:
-                raise KeyLookupError("Expected a response for server %r not %r" % (
-                    server_name, response["server_name"]
-                ))
+                raise KeyLookupError(
+                    "Expected a response for server %r not %r"
+                    % (server_name, response["server_name"])
+                )
 
             response_keys = yield self.process_v2_response(
                 from_server=server_name,
                 requested_ids=[requested_key_id],
                 response_json=response,
+                time_added_ms=time_now_ms,
+            )
+            yield self.store.store_server_verify_keys(
+                server_name,
+                time_now_ms,
+                ((server_name, key_id, key) for key_id, key in response_keys.items()),
             )
-
             keys.update(response_keys)
 
-        yield self.store_keys(
-            server_name=server_name,
-            from_server=server_name,
-            verify_keys=keys,
-        )
         defer.returnValue({server_name: keys})
 
-    @defer.inlineCallbacks
-    def process_v2_response(
-        self, from_server, response_json, requested_ids=[],
-    ):
-        """Parse a 'Server Keys' structure from the result of a /key request
-
-        This is used to parse either the entirety of the response from
-        GET /_matrix/key/v2/server, or a single entry from the list returned by
-        POST /_matrix/key/v2/query.
-
-        Checks that each signature in the response that claims to come from the origin
-        server is valid. (Does not check that there actually is such a signature, for
-        some reason.)
-
-        Stores the json in server_keys_json so that it can be used for future responses
-        to /_matrix/key/v2/query.
-
-        Args:
-            from_server (str): the name of the server producing this result: either
-                the origin server for a /_matrix/key/v2/server request, or the notary
-                for a /_matrix/key/v2/query.
-
-            response_json (dict): the json-decoded Server Keys response object
-
-            requested_ids (iterable[str]): a list of the key IDs that were requested.
-                We will store the json for these key ids as well as any that are
-                actually in the response
-
-        Returns:
-            Deferred[dict[str, nacl.signing.VerifyKey]]:
-                map from key_id to key object
-        """
-        time_now_ms = self.clock.time_msec()
-        response_keys = {}
-        verify_keys = {}
-        for key_id, key_data in response_json["verify_keys"].items():
-            if is_signing_algorithm_supported(key_id):
-                key_base64 = key_data["key"]
-                key_bytes = decode_base64(key_base64)
-                verify_key = decode_verify_key_bytes(key_id, key_bytes)
-                verify_key.time_added = time_now_ms
-                verify_keys[key_id] = verify_key
-
-        old_verify_keys = {}
-        for key_id, key_data in response_json["old_verify_keys"].items():
-            if is_signing_algorithm_supported(key_id):
-                key_base64 = key_data["key"]
-                key_bytes = decode_base64(key_base64)
-                verify_key = decode_verify_key_bytes(key_id, key_bytes)
-                verify_key.expired = key_data["expired_ts"]
-                verify_key.time_added = time_now_ms
-                old_verify_keys[key_id] = verify_key
-
-        server_name = response_json["server_name"]
-        for key_id in response_json["signatures"].get(server_name, {}):
-            if key_id not in response_json["verify_keys"]:
-                raise KeyLookupError(
-                    "Key response must include verification keys for all"
-                    " signatures"
-                )
-            if key_id in verify_keys:
-                verify_signed_json(
-                    response_json,
-                    server_name,
-                    verify_keys[key_id]
-                )
-
-        signed_key_json = sign_json(
-            response_json,
-            self.config.server_name,
-            self.config.signing_key[0],
-        )
-
-        signed_key_json_bytes = encode_canonical_json(signed_key_json)
-        ts_valid_until_ms = signed_key_json[u"valid_until_ts"]
-
-        updated_key_ids = set(requested_ids)
-        updated_key_ids.update(verify_keys)
-        updated_key_ids.update(old_verify_keys)
-
-        response_keys.update(verify_keys)
-        response_keys.update(old_verify_keys)
-
-        yield logcontext.make_deferred_yieldable(defer.gatherResults(
-            [
-                run_in_background(
-                    self.store.store_server_keys_json,
-                    server_name=server_name,
-                    key_id=key_id,
-                    from_server=from_server,
-                    ts_now_ms=time_now_ms,
-                    ts_expires_ms=ts_valid_until_ms,
-                    key_json_bytes=signed_key_json_bytes,
-                )
-                for key_id in updated_key_ids
-            ],
-            consumeErrors=True,
-        ).addErrback(unwrapFirstError))
-
-        defer.returnValue(response_keys)
-
-    def store_keys(self, server_name, from_server, verify_keys):
-        """Store a collection of verify keys for a given server
-        Args:
-            server_name(str): The name of the server the keys are for.
-            from_server(str): The server the keys were downloaded from.
-            verify_keys(dict): A mapping of key_id to VerifyKey.
-        Returns:
-            A deferred that completes when the keys are stored.
-        """
-        # TODO(markjh): Store whether the keys have expired.
-        return logcontext.make_deferred_yieldable(defer.gatherResults(
-            [
-                run_in_background(
-                    self.store.store_server_verify_key,
-                    server_name, server_name, key.time_added, key
-                )
-                for key_id, key in verify_keys.items()
-            ],
-            consumeErrors=True,
-        ).addErrback(unwrapFirstError))
-
 
 @defer.inlineCallbacks
 def _handle_key_deferred(verify_request):
@@ -707,48 +777,29 @@ def _handle_key_deferred(verify_request):
         SynapseError if there was a problem performing the verification
     """
     server_name = verify_request.server_name
-    try:
-        with PreserveLoggingContext():
-            _, key_id, verify_key = yield verify_request.deferred
-    except KeyLookupError as e:
-        logger.warn(
-            "Failed to download keys for %s: %s %s",
-            server_name, type(e).__name__, str(e),
-        )
-        raise SynapseError(
-            502,
-            "Error downloading keys for %s" % (server_name,),
-            Codes.UNAUTHORIZED,
-        )
-    except Exception as e:
-        logger.exception(
-            "Got Exception when downloading keys for %s: %s %s",
-            server_name, type(e).__name__, str(e),
-        )
-        raise SynapseError(
-            401,
-            "No key for %s with id %s" % (server_name, verify_request.key_ids),
-            Codes.UNAUTHORIZED,
-        )
+    with PreserveLoggingContext():
+        _, key_id, verify_key = yield verify_request.deferred
 
     json_object = verify_request.json_object
 
-    logger.debug("Got key %s %s:%s for server %s, verifying" % (
-        key_id, verify_key.alg, verify_key.version, server_name,
-    ))
+    logger.debug(
+        "Got key %s %s:%s for server %s, verifying"
+        % (key_id, verify_key.alg, verify_key.version, server_name)
+    )
     try:
         verify_signed_json(json_object, server_name, verify_key)
     except SignatureVerifyException as e:
         logger.debug(
             "Error verifying signature for %s:%s:%s with key %s: %s",
-            server_name, verify_key.alg, verify_key.version,
+            server_name,
+            verify_key.alg,
+            verify_key.version,
             encode_verify_key_base64(verify_key),
             str(e),
         )
         raise SynapseError(
             401,
-            "Invalid signature for server %s with key %s:%s: %s" % (
-                server_name, verify_key.alg, verify_key.version, str(e),
-            ),
+            "Invalid signature for server %s with key %s:%s: %s"
+            % (server_name, verify_key.alg, verify_key.version, str(e)),
             Codes.UNAUTHORIZED,
         )
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 12056d5be2..1edd19cc13 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -21,6 +21,7 @@ import six
 
 from unpaddedbase64 import encode_base64
 
+from synapse.api.errors import UnsupportedRoomVersionError
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, EventFormatVersions
 from synapse.util.caches import intern_dict
 from synapse.util.frozenutils import freeze
@@ -335,13 +336,32 @@ class FrozenEventV2(EventBase):
         return self.__repr__()
 
     def __repr__(self):
-        return "<FrozenEventV2 event_id='%s', type='%s', state_key='%s'>" % (
+        return "<%s event_id='%s', type='%s', state_key='%s'>" % (
+            self.__class__.__name__,
             self.event_id,
             self.get("type", None),
             self.get("state_key", None),
         )
 
 
+class FrozenEventV3(FrozenEventV2):
+    """FrozenEventV3, which differs from FrozenEventV2 only in the event_id format"""
+    format_version = EventFormatVersions.V3  # All events of this type are V3
+
+    @property
+    def event_id(self):
+        # We have to import this here as otherwise we get an import loop which
+        # is hard to break.
+        from synapse.crypto.event_signing import compute_event_reference_hash
+
+        if self._event_id:
+            return self._event_id
+        self._event_id = "$" + encode_base64(
+            compute_event_reference_hash(self)[1], urlsafe=True
+        )
+        return self._event_id
+
+
 def room_version_to_event_format(room_version):
     """Converts a room version string to the event format
 
@@ -350,12 +370,15 @@ def room_version_to_event_format(room_version):
 
     Returns:
         int
+
+    Raises:
+        UnsupportedRoomVersionError if the room version is unknown
     """
     v = KNOWN_ROOM_VERSIONS.get(room_version)
 
     if not v:
-        # We should have already checked version, so this should not happen
-        raise RuntimeError("Unrecognized room version %s" % (room_version,))
+        # this can happen if support is withdrawn for a room version
+        raise UnsupportedRoomVersionError()
 
     return v.event_format
 
@@ -376,6 +399,8 @@ def event_type_from_format_version(format_version):
         return FrozenEvent
     elif format_version == EventFormatVersions.V2:
         return FrozenEventV2
+    elif format_version == EventFormatVersions.V3:
+        return FrozenEventV3
     else:
         raise Exception(
             "No event format %r" % (format_version,)
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index fba27177c7..546b6f4982 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -18,6 +18,7 @@ import attr
 from twisted.internet import defer
 
 from synapse.api.constants import MAX_DEPTH
+from synapse.api.errors import UnsupportedRoomVersionError
 from synapse.api.room_versions import (
     KNOWN_EVENT_FORMAT_VERSIONS,
     KNOWN_ROOM_VERSIONS,
@@ -75,6 +76,7 @@ class EventBuilder(object):
     # someone tries to get them when they don't exist.
     _state_key = attr.ib(default=None)
     _redacts = attr.ib(default=None)
+    _origin_server_ts = attr.ib(default=None)
 
     internal_metadata = attr.ib(default=attr.Factory(lambda: _EventInternalMetadata({})))
 
@@ -141,6 +143,9 @@ class EventBuilder(object):
         if self._redacts is not None:
             event_dict["redacts"] = self._redacts
 
+        if self._origin_server_ts is not None:
+            event_dict["origin_server_ts"] = self._origin_server_ts
+
         defer.returnValue(
             create_local_event_from_event_dict(
                 clock=self._clock,
@@ -178,9 +183,8 @@ class EventBuilderFactory(object):
         """
         v = KNOWN_ROOM_VERSIONS.get(room_version)
         if not v:
-            raise Exception(
-                "No event format defined for version %r" % (room_version,)
-            )
+            # this can happen if support is withdrawn for a room version
+            raise UnsupportedRoomVersionError()
         return self.for_room_version(v, key_values)
 
     def for_room_version(self, room_version, key_values):
@@ -209,6 +213,7 @@ class EventBuilderFactory(object):
             content=key_values.get("content", {}),
             unsigned=key_values.get("unsigned", {}),
             redacts=key_values.get("redacts", None),
+            origin_server_ts=key_values.get("origin_server_ts", None),
         )
 
 
@@ -245,7 +250,7 @@ def create_local_event_from_event_dict(clock, hostname, signing_key,
         event_dict["event_id"] = _create_event_id(clock, hostname)
 
     event_dict["origin"] = hostname
-    event_dict["origin_server_ts"] = time_now
+    event_dict.setdefault("origin_server_ts", time_now)
 
     event_dict.setdefault("unsigned", {})
     age = event_dict["unsigned"].pop("age", 0)
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index a5454556cc..e2d4384de1 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -21,7 +21,7 @@ from frozendict import frozendict
 
 from twisted.internet import defer
 
-from synapse.api.constants import EventTypes
+from synapse.api.constants import EventTypes, RelationTypes
 from synapse.util.async_helpers import yieldable_gather_results
 
 from . import EventBase
@@ -324,21 +324,70 @@ class EventClientSerializer(object):
     """
 
     def __init__(self, hs):
-        pass
+        self.store = hs.get_datastore()
+        self.experimental_msc1849_support_enabled = (
+            hs.config.experimental_msc1849_support_enabled
+        )
 
-    def serialize_event(self, event, time_now, **kwargs):
+    @defer.inlineCallbacks
+    def serialize_event(self, event, time_now, bundle_aggregations=True, **kwargs):
         """Serializes a single event.
 
         Args:
             event (EventBase)
             time_now (int): The current time in milliseconds
+            bundle_aggregations (bool): Whether to bundle in related events
             **kwargs: Arguments to pass to `serialize_event`
 
         Returns:
             Deferred[dict]: The serialized event
         """
-        event = serialize_event(event, time_now, **kwargs)
-        return defer.succeed(event)
+        # To handle the case of presence events and the like
+        if not isinstance(event, EventBase):
+            defer.returnValue(event)
+
+        event_id = event.event_id
+        serialized_event = serialize_event(event, time_now, **kwargs)
+
+        # If MSC1849 is enabled then we need to look if thre are any relations
+        # we need to bundle in with the event
+        if self.experimental_msc1849_support_enabled and bundle_aggregations:
+            annotations = yield self.store.get_aggregation_groups_for_event(
+                event_id,
+            )
+            references = yield self.store.get_relations_for_event(
+                event_id, RelationTypes.REFERENCE, direction="f",
+            )
+
+            if annotations.chunk:
+                r = serialized_event["unsigned"].setdefault("m.relations", {})
+                r[RelationTypes.ANNOTATION] = annotations.to_dict()
+
+            if references.chunk:
+                r = serialized_event["unsigned"].setdefault("m.relations", {})
+                r[RelationTypes.REFERENCE] = references.to_dict()
+
+            edit = None
+            if event.type == EventTypes.Message:
+                edit = yield self.store.get_applicable_edit(event_id)
+
+            if edit:
+                # If there is an edit replace the content, preserving existing
+                # relations.
+
+                relations = event.content.get("m.relates_to")
+                serialized_event["content"] = edit.content.get("m.new_content", {})
+                if relations:
+                    serialized_event["content"]["m.relates_to"] = relations
+                else:
+                    serialized_event["content"].pop("m.relates_to", None)
+
+                r = serialized_event["unsigned"].setdefault("m.relations", {})
+                r[RelationTypes.REPLACE] = {
+                    "event_id": edit.event_id,
+                }
+
+        defer.returnValue(serialized_event)
 
     def serialize_events(self, events, time_now, **kwargs):
         """Serializes multiple events.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index df60828dba..4c28c1dc3c 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -33,6 +33,7 @@ from synapse.api.errors import (
     IncompatibleRoomVersionError,
     NotFoundError,
     SynapseError,
+    UnsupportedRoomVersionError,
 )
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.crypto.event_signing import compute_event_signature
@@ -198,11 +199,22 @@ class FederationServer(FederationBase):
 
             try:
                 room_version = yield self.store.get_room_version(room_id)
-                format_ver = room_version_to_event_format(room_version)
             except NotFoundError:
                 logger.info("Ignoring PDU for unknown room_id: %s", room_id)
                 continue
 
+            try:
+                format_ver = room_version_to_event_format(room_version)
+            except UnsupportedRoomVersionError:
+                # this can happen if support for a given room version is withdrawn,
+                # so that we still get events for said room.
+                logger.info(
+                    "Ignoring PDU for room %s with unknown version %s",
+                    room_id,
+                    room_version,
+                )
+                continue
+
             event = event_from_pdu_json(p, format_ver)
             pdus_by_room.setdefault(room_id, []).append(event)
 
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 385eda2dca..d0efc4e0d3 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -23,7 +23,11 @@ from twisted.internet import defer
 import synapse
 from synapse.api.errors import Codes, FederationDeniedError, SynapseError
 from synapse.api.room_versions import RoomVersions
-from synapse.api.urls import FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX
+from synapse.api.urls import (
+    FEDERATION_UNSTABLE_PREFIX,
+    FEDERATION_V1_PREFIX,
+    FEDERATION_V2_PREFIX,
+)
 from synapse.http.endpoint import parse_and_validate_server_name
 from synapse.http.server import JsonResource
 from synapse.http.servlet import (
@@ -1304,6 +1308,30 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet):
         defer.returnValue((200, new_content))
 
 
+class RoomComplexityServlet(BaseFederationServlet):
+    """
+    Indicates to other servers how complex (and therefore likely
+    resource-intensive) a public room this server knows about is.
+    """
+    PATH = "/rooms/(?P<room_id>[^/]*)/complexity"
+    PREFIX = FEDERATION_UNSTABLE_PREFIX
+
+    @defer.inlineCallbacks
+    def on_GET(self, origin, content, query, room_id):
+
+        store = self.handler.hs.get_datastore()
+
+        is_public = yield store.is_room_world_readable_or_publicly_joinable(
+            room_id
+        )
+
+        if not is_public:
+            raise SynapseError(404, "Room not found", errcode=Codes.INVALID_PARAM)
+
+        complexity = yield store.get_room_complexity(room_id)
+        defer.returnValue((200, complexity))
+
+
 FEDERATION_SERVLET_CLASSES = (
     FederationSendServlet,
     FederationEventServlet,
@@ -1327,6 +1355,7 @@ FEDERATION_SERVLET_CLASSES = (
     FederationThirdPartyInviteExchangeServlet,
     On3pidBindServlet,
     FederationVersionServlet,
+    RoomComplexityServlet,
 )
 
 OPENID_SERVLET_CLASSES = (
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index 6003ad9cca..eb525070cf 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -122,6 +122,9 @@ class EventStreamHandler(BaseHandler):
 
             chunks = yield self._event_serializer.serialize_events(
                 events, time_now, as_client_event=as_client_event,
+                # We don't bundle "live" events, as otherwise clients
+                # will end up double counting annotations.
+                bundle_aggregations=False,
             )
 
             chunk = {
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 0684778882..cf4fad7de0 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1916,6 +1916,11 @@ class FederationHandler(BaseHandler):
                     event.room_id, latest_event_ids=extrem_ids,
                 )
 
+            logger.debug(
+                "Doing soft-fail check for %s: state %s",
+                event.event_id, current_state_ids,
+            )
+
             # Now check if event pass auth against said current state
             auth_types = auth_types_for_event(event)
             current_state_ids = [
@@ -1932,7 +1937,7 @@ class FederationHandler(BaseHandler):
                 self.auth.check(room_version, event, auth_events=current_auth_events)
             except AuthError as e:
                 logger.warn(
-                    "Failed current state auth resolution for %r because %s",
+                    "Soft-failing %r because %s",
                     event, e,
                 )
                 event.internal_metadata.soft_failed = True
@@ -2008,15 +2013,44 @@ class FederationHandler(BaseHandler):
 
         Args:
             origin (str):
-            event (synapse.events.FrozenEvent):
+            event (synapse.events.EventBase):
             context (synapse.events.snapshot.EventContext):
-            auth_events (dict[(str, str)->str]):
+            auth_events (dict[(str, str)->synapse.events.EventBase]):
+                Map from (event_type, state_key) to event
+
+                What we expect the event's auth_events to be, based on the event's
+                position in the dag. I think? maybe??
+
+                Also NB that this function adds entries to it.
+        Returns:
+            defer.Deferred[None]
+        """
+        room_version = yield self.store.get_room_version(event.room_id)
+
+        yield self._update_auth_events_and_context_for_auth(
+            origin, event, context, auth_events
+        )
+        try:
+            self.auth.check(room_version, event, auth_events=auth_events)
+        except AuthError as e:
+            logger.warn("Failed auth resolution for %r because %s", event, e)
+            raise e
+
+    @defer.inlineCallbacks
+    def _update_auth_events_and_context_for_auth(
+        self, origin, event, context, auth_events
+    ):
+        """Helper for do_auth. See there for docs.
+
+        Args:
+            origin (str):
+            event (synapse.events.EventBase):
+            context (synapse.events.snapshot.EventContext):
+            auth_events (dict[(str, str)->synapse.events.EventBase]):
 
         Returns:
             defer.Deferred[None]
         """
-        # Check if we have all the auth events.
-        current_state = set(e.event_id for e in auth_events.values())
         event_auth_events = set(event.auth_event_ids())
 
         if event.is_state():
@@ -2024,11 +2058,21 @@ class FederationHandler(BaseHandler):
         else:
             event_key = None
 
-        if event_auth_events - current_state:
+        # if the event's auth_events refers to events which are not in our
+        # calculated auth_events, we need to fetch those events from somewhere.
+        #
+        # we start by fetching them from the store, and then try calling /event_auth/.
+        missing_auth = event_auth_events.difference(
+            e.event_id for e in auth_events.values()
+        )
+
+        if missing_auth:
             # TODO: can we use store.have_seen_events here instead?
             have_events = yield self.store.get_seen_events_with_rejections(
-                event_auth_events - current_state
+                missing_auth
             )
+            logger.debug("Got events %s from store", have_events)
+            missing_auth.difference_update(have_events.keys())
         else:
             have_events = {}
 
@@ -2037,13 +2081,12 @@ class FederationHandler(BaseHandler):
             for e in auth_events.values()
         })
 
-        seen_events = set(have_events.keys())
-
-        missing_auth = event_auth_events - seen_events - current_state
-
         if missing_auth:
-            logger.info("Missing auth: %s", missing_auth)
             # If we don't have all the auth events, we need to get them.
+            logger.info(
+                "auth_events contains unknown events: %s",
+                missing_auth,
+            )
             try:
                 remote_auth_chain = yield self.federation_client.get_event_auth(
                     origin, event.room_id, event.event_id
@@ -2084,145 +2127,168 @@ class FederationHandler(BaseHandler):
                 have_events = yield self.store.get_seen_events_with_rejections(
                     event.auth_event_ids()
                 )
-                seen_events = set(have_events.keys())
             except Exception:
                 # FIXME:
                 logger.exception("Failed to get auth chain")
 
+        if event.internal_metadata.is_outlier():
+            logger.info("Skipping auth_event fetch for outlier")
+            return
+
         # FIXME: Assumes we have and stored all the state for all the
         # prev_events
-        current_state = set(e.event_id for e in auth_events.values())
-        different_auth = event_auth_events - current_state
+        different_auth = event_auth_events.difference(
+            e.event_id for e in auth_events.values()
+        )
 
-        room_version = yield self.store.get_room_version(event.room_id)
+        if not different_auth:
+            return
 
-        if different_auth and not event.internal_metadata.is_outlier():
-            # Do auth conflict res.
-            logger.info("Different auth: %s", different_auth)
-
-            different_events = yield logcontext.make_deferred_yieldable(
-                defer.gatherResults([
-                    logcontext.run_in_background(
-                        self.store.get_event,
-                        d,
-                        allow_none=True,
-                        allow_rejected=False,
-                    )
-                    for d in different_auth
-                    if d in have_events and not have_events[d]
-                ], consumeErrors=True)
-            ).addErrback(unwrapFirstError)
-
-            if different_events:
-                local_view = dict(auth_events)
-                remote_view = dict(auth_events)
-                remote_view.update({
-                    (d.type, d.state_key): d for d in different_events if d
-                })
+        logger.info(
+            "auth_events refers to events which are not in our calculated auth "
+            "chain: %s",
+            different_auth,
+        )
 
-                new_state = yield self.state_handler.resolve_events(
-                    room_version,
-                    [list(local_view.values()), list(remote_view.values())],
-                    event
+        room_version = yield self.store.get_room_version(event.room_id)
+
+        different_events = yield logcontext.make_deferred_yieldable(
+            defer.gatherResults([
+                logcontext.run_in_background(
+                    self.store.get_event,
+                    d,
+                    allow_none=True,
+                    allow_rejected=False,
                 )
+                for d in different_auth
+                if d in have_events and not have_events[d]
+            ], consumeErrors=True)
+        ).addErrback(unwrapFirstError)
+
+        if different_events:
+            local_view = dict(auth_events)
+            remote_view = dict(auth_events)
+            remote_view.update({
+                (d.type, d.state_key): d for d in different_events if d
+            })
 
-                auth_events.update(new_state)
+            new_state = yield self.state_handler.resolve_events(
+                room_version,
+                [list(local_view.values()), list(remote_view.values())],
+                event
+            )
 
-                current_state = set(e.event_id for e in auth_events.values())
-                different_auth = event_auth_events - current_state
+            logger.info(
+                "After state res: updating auth_events with new state %s",
+                {
+                    (d.type, d.state_key): d.event_id for d in new_state.values()
+                    if auth_events.get((d.type, d.state_key)) != d
+                },
+            )
 
-                yield self._update_context_for_auth_events(
-                    event, context, auth_events, event_key,
-                )
+            auth_events.update(new_state)
 
-        if different_auth and not event.internal_metadata.is_outlier():
-            logger.info("Different auth after resolution: %s", different_auth)
+            different_auth = event_auth_events.difference(
+                e.event_id for e in auth_events.values()
+            )
 
-            # Only do auth resolution if we have something new to say.
-            # We can't rove an auth failure.
-            do_resolution = False
+            yield self._update_context_for_auth_events(
+                event, context, auth_events, event_key,
+            )
 
-            provable = [
-                RejectedReason.NOT_ANCESTOR, RejectedReason.NOT_ANCESTOR,
-            ]
+        if not different_auth:
+            # we're done
+            return
 
-            for e_id in different_auth:
-                if e_id in have_events:
-                    if have_events[e_id] in provable:
-                        do_resolution = True
-                        break
+        logger.info(
+            "auth_events still refers to events which are not in the calculated auth "
+            "chain after state resolution: %s",
+            different_auth,
+        )
 
-            if do_resolution:
-                prev_state_ids = yield context.get_prev_state_ids(self.store)
-                # 1. Get what we think is the auth chain.
-                auth_ids = yield self.auth.compute_auth_events(
-                    event, prev_state_ids
-                )
-                local_auth_chain = yield self.store.get_auth_chain(
-                    auth_ids, include_given=True
-                )
+        # Only do auth resolution if we have something new to say.
+        # We can't prove an auth failure.
+        do_resolution = False
 
-                try:
-                    # 2. Get remote difference.
-                    result = yield self.federation_client.query_auth(
-                        origin,
-                        event.room_id,
-                        event.event_id,
-                        local_auth_chain,
-                    )
+        for e_id in different_auth:
+            if e_id in have_events:
+                if have_events[e_id] == RejectedReason.NOT_ANCESTOR:
+                    do_resolution = True
+                    break
 
-                    seen_remotes = yield self.store.have_seen_events(
-                        [e.event_id for e in result["auth_chain"]]
-                    )
+        if not do_resolution:
+            logger.info(
+                "Skipping auth resolution due to lack of provable rejection reasons"
+            )
+            return
 
-                    # 3. Process any remote auth chain events we haven't seen.
-                    for ev in result["auth_chain"]:
-                        if ev.event_id in seen_remotes:
-                            continue
+        logger.info("Doing auth resolution")
 
-                        if ev.event_id == event.event_id:
-                            continue
+        prev_state_ids = yield context.get_prev_state_ids(self.store)
 
-                        try:
-                            auth_ids = ev.auth_event_ids()
-                            auth = {
-                                (e.type, e.state_key): e
-                                for e in result["auth_chain"]
-                                if e.event_id in auth_ids
-                                or event.type == EventTypes.Create
-                            }
-                            ev.internal_metadata.outlier = True
+        # 1. Get what we think is the auth chain.
+        auth_ids = yield self.auth.compute_auth_events(
+            event, prev_state_ids
+        )
+        local_auth_chain = yield self.store.get_auth_chain(
+            auth_ids, include_given=True
+        )
 
-                            logger.debug(
-                                "do_auth %s different_auth: %s",
-                                event.event_id, e.event_id
-                            )
+        try:
+            # 2. Get remote difference.
+            result = yield self.federation_client.query_auth(
+                origin,
+                event.room_id,
+                event.event_id,
+                local_auth_chain,
+            )
 
-                            yield self._handle_new_event(
-                                origin, ev, auth_events=auth
-                            )
+            seen_remotes = yield self.store.have_seen_events(
+                [e.event_id for e in result["auth_chain"]]
+            )
 
-                            if ev.event_id in event_auth_events:
-                                auth_events[(ev.type, ev.state_key)] = ev
-                        except AuthError:
-                            pass
+            # 3. Process any remote auth chain events we haven't seen.
+            for ev in result["auth_chain"]:
+                if ev.event_id in seen_remotes:
+                    continue
 
-                except Exception:
-                    # FIXME:
-                    logger.exception("Failed to query auth chain")
+                if ev.event_id == event.event_id:
+                    continue
 
-                # 4. Look at rejects and their proofs.
-                # TODO.
+                try:
+                    auth_ids = ev.auth_event_ids()
+                    auth = {
+                        (e.type, e.state_key): e
+                        for e in result["auth_chain"]
+                        if e.event_id in auth_ids
+                        or event.type == EventTypes.Create
+                    }
+                    ev.internal_metadata.outlier = True
+
+                    logger.debug(
+                        "do_auth %s different_auth: %s",
+                        event.event_id, e.event_id
+                    )
 
-                yield self._update_context_for_auth_events(
-                    event, context, auth_events, event_key,
-                )
+                    yield self._handle_new_event(
+                        origin, ev, auth_events=auth
+                    )
 
-        try:
-            self.auth.check(room_version, event, auth_events=auth_events)
-        except AuthError as e:
-            logger.warn("Failed auth resolution for %r because %s", event, e)
-            raise e
+                    if ev.event_id in event_auth_events:
+                        auth_events[(ev.type, ev.state_key)] = ev
+                except AuthError:
+                    pass
+
+        except Exception:
+            # FIXME:
+            logger.exception("Failed to query auth chain")
+
+        # 4. Look at rejects and their proofs.
+        # TODO.
+
+        yield self._update_context_for_auth_events(
+            event, context, auth_events, event_key,
+        )
 
     @defer.inlineCallbacks
     def _update_context_for_auth_events(self, event, context, auth_events,
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 7b2c33a922..0b02469ceb 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -22,7 +22,7 @@ from canonicaljson import encode_canonical_json, json
 from twisted.internet import defer
 from twisted.internet.defer import succeed
 
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EventTypes, Membership, RelationTypes
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -166,6 +166,9 @@ class MessageHandler(object):
         now = self.clock.time_msec()
         events = yield self._event_serializer.serialize_events(
             room_state.values(), now,
+            # We don't bother bundling aggregations in when asked for state
+            # events, as clients won't use them.
+            bundle_aggregations=False,
         )
         defer.returnValue(events)
 
@@ -601,6 +604,20 @@ class EventCreationHandler(object):
 
         self.validator.validate_new(event)
 
+        # If this event is an annotation then we check that that the sender
+        # can't annotate the same way twice (e.g. stops users from liking an
+        # event multiple times).
+        relation = event.content.get("m.relates_to", {})
+        if relation.get("rel_type") == RelationTypes.ANNOTATION:
+            relates_to = relation["event_id"]
+            aggregation_key = relation["key"]
+
+            already_exists = yield self.store.has_user_annotated_event(
+                relates_to, event.type, aggregation_key, event.sender,
+            )
+            if already_exists:
+                raise SynapseError(400, "Can't send same reaction twice")
+
         logger.debug(
             "Created event %s",
             event.event_id,
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 59d53f1050..6209858bbb 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -182,17 +182,27 @@ class PresenceHandler(object):
         # Start a LoopingCall in 30s that fires every 5s.
         # The initial delay is to allow disconnected clients a chance to
         # reconnect before we treat them as offline.
+        def run_timeout_handler():
+            return run_as_background_process(
+                "handle_presence_timeouts", self._handle_timeouts
+            )
+
         self.clock.call_later(
             30,
             self.clock.looping_call,
-            self._handle_timeouts,
+            run_timeout_handler,
             5000,
         )
 
+        def run_persister():
+            return run_as_background_process(
+                "persist_presence_changes", self._persist_unpersisted_changes
+            )
+
         self.clock.call_later(
             60,
             self.clock.looping_call,
-            self._persist_unpersisted_changes,
+            run_persister,
             60 * 1000,
         )
 
@@ -229,6 +239,7 @@ class PresenceHandler(object):
         )
 
         if self.unpersisted_users_changes:
+
             yield self.store.update_presence([
                 self.user_to_current_state[user_id]
                 for user_id in self.unpersisted_users_changes
@@ -240,30 +251,18 @@ class PresenceHandler(object):
         """We periodically persist the unpersisted changes, as otherwise they
         may stack up and slow down shutdown times.
         """
-        logger.info(
-            "Performing _persist_unpersisted_changes. Persisting %d unpersisted changes",
-            len(self.unpersisted_users_changes)
-        )
-
         unpersisted = self.unpersisted_users_changes
         self.unpersisted_users_changes = set()
 
         if unpersisted:
+            logger.info(
+                "Persisting %d upersisted presence updates", len(unpersisted)
+            )
             yield self.store.update_presence([
                 self.user_to_current_state[user_id]
                 for user_id in unpersisted
             ])
 
-        logger.info("Finished _persist_unpersisted_changes")
-
-    @defer.inlineCallbacks
-    def _update_states_and_catch_exception(self, new_states):
-        try:
-            res = yield self._update_states(new_states)
-            defer.returnValue(res)
-        except Exception:
-            logger.exception("Error updating presence")
-
     @defer.inlineCallbacks
     def _update_states(self, new_states):
         """Updates presence of users. Sets the appropriate timeouts. Pokes
@@ -338,45 +337,41 @@ class PresenceHandler(object):
         logger.info("Handling presence timeouts")
         now = self.clock.time_msec()
 
-        try:
-            with Measure(self.clock, "presence_handle_timeouts"):
-                # Fetch the list of users that *may* have timed out. Things may have
-                # changed since the timeout was set, so we won't necessarily have to
-                # take any action.
-                users_to_check = set(self.wheel_timer.fetch(now))
-
-                # Check whether the lists of syncing processes from an external
-                # process have expired.
-                expired_process_ids = [
-                    process_id for process_id, last_update
-                    in self.external_process_last_updated_ms.items()
-                    if now - last_update > EXTERNAL_PROCESS_EXPIRY
-                ]
-                for process_id in expired_process_ids:
-                    users_to_check.update(
-                        self.external_process_last_updated_ms.pop(process_id, ())
-                    )
-                    self.external_process_last_update.pop(process_id)
+        # Fetch the list of users that *may* have timed out. Things may have
+        # changed since the timeout was set, so we won't necessarily have to
+        # take any action.
+        users_to_check = set(self.wheel_timer.fetch(now))
+
+        # Check whether the lists of syncing processes from an external
+        # process have expired.
+        expired_process_ids = [
+            process_id for process_id, last_update
+            in self.external_process_last_updated_ms.items()
+            if now - last_update > EXTERNAL_PROCESS_EXPIRY
+        ]
+        for process_id in expired_process_ids:
+            users_to_check.update(
+                self.external_process_last_updated_ms.pop(process_id, ())
+            )
+            self.external_process_last_update.pop(process_id)
 
-                states = [
-                    self.user_to_current_state.get(
-                        user_id, UserPresenceState.default(user_id)
-                    )
-                    for user_id in users_to_check
-                ]
+        states = [
+            self.user_to_current_state.get(
+                user_id, UserPresenceState.default(user_id)
+            )
+            for user_id in users_to_check
+        ]
 
-                timers_fired_counter.inc(len(states))
+        timers_fired_counter.inc(len(states))
 
-                changes = handle_timeouts(
-                    states,
-                    is_mine_fn=self.is_mine_id,
-                    syncing_user_ids=self.get_currently_syncing_users(),
-                    now=now,
-                )
+        changes = handle_timeouts(
+            states,
+            is_mine_fn=self.is_mine_id,
+            syncing_user_ids=self.get_currently_syncing_users(),
+            now=now,
+        )
 
-            run_in_background(self._update_states_and_catch_exception, changes)
-        except Exception:
-            logger.exception("Exception in _handle_timeouts loop")
+        return self._update_states(changes)
 
     @defer.inlineCallbacks
     def bump_presence_active_time(self, user):
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index a51d11a257..e83ee24f10 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -19,7 +19,7 @@ import logging
 from twisted.internet import defer
 
 from synapse import types
-from synapse.api.constants import LoginType
+from synapse.api.constants import MAX_USERID_LENGTH, LoginType
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -123,6 +123,15 @@ class RegistrationHandler(BaseHandler):
 
         self.check_user_id_not_appservice_exclusive(user_id)
 
+        if len(user_id) > MAX_USERID_LENGTH:
+            raise SynapseError(
+                400,
+                "User ID may not be longer than %s characters" % (
+                    MAX_USERID_LENGTH,
+                ),
+                Codes.INVALID_USERNAME
+            )
+
         users = yield self.store.get_users_by_id_case_insensitive(user_id)
         if users:
             if not guest_access_token:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index e37ae96899..4a17911a87 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -27,7 +27,7 @@ from twisted.internet import defer
 
 from synapse.api.constants import EventTypes, JoinRules, RoomCreationPreset
 from synapse.api.errors import AuthError, Codes, NotFoundError, StoreError, SynapseError
-from synapse.api.room_versions import DEFAULT_ROOM_VERSION, KNOWN_ROOM_VERSIONS
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.storage.state import StateFilter
 from synapse.types import RoomAlias, RoomID, RoomStreamToken, StreamToken, UserID
 from synapse.util import stringutils
@@ -70,6 +70,7 @@ class RoomCreationHandler(BaseHandler):
         self.spam_checker = hs.get_spam_checker()
         self.event_creation_handler = hs.get_event_creation_handler()
         self.room_member_handler = hs.get_room_member_handler()
+        self.config = hs.config
 
         # linearizer to stop two upgrades happening at once
         self._upgrade_linearizer = Linearizer("room_upgrade_linearizer")
@@ -475,7 +476,11 @@ class RoomCreationHandler(BaseHandler):
         if ratelimit:
             yield self.ratelimit(requester)
 
-        room_version = config.get("room_version", DEFAULT_ROOM_VERSION.identifier)
+        room_version = config.get(
+            "room_version",
+            self.config.default_room_version.identifier,
+        )
+
         if not isinstance(room_version, string_types):
             raise SynapseError(
                 400,
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index ffc588d454..93ac986c86 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -944,7 +944,7 @@ class RoomMemberHandler(object):
         }
 
         if self.config.invite_3pid_guest:
-            guest_access_token, guest_user_id = yield self.get_or_register_3pid_guest(
+            guest_user_id, guest_access_token = yield self.get_or_register_3pid_guest(
                 requester=requester,
                 medium=medium,
                 address=address,
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
new file mode 100644
index 0000000000..0e92b405ba
--- /dev/null
+++ b/synapse/handlers/stats.py
@@ -0,0 +1,325 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, JoinRules, Membership
+from synapse.handlers.state_deltas import StateDeltasHandler
+from synapse.metrics import event_processing_positions
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.types import UserID
+from synapse.util.metrics import Measure
+
+logger = logging.getLogger(__name__)
+
+
+class StatsHandler(StateDeltasHandler):
+    """Handles keeping the *_stats tables updated with a simple time-series of
+    information about the users, rooms and media on the server, such that admins
+    have some idea of who is consuming their resources.
+
+    Heavily derived from UserDirectoryHandler
+    """
+
+    def __init__(self, hs):
+        super(StatsHandler, self).__init__(hs)
+        self.hs = hs
+        self.store = hs.get_datastore()
+        self.state = hs.get_state_handler()
+        self.server_name = hs.hostname
+        self.clock = hs.get_clock()
+        self.notifier = hs.get_notifier()
+        self.is_mine_id = hs.is_mine_id
+        self.stats_bucket_size = hs.config.stats_bucket_size
+
+        # The current position in the current_state_delta stream
+        self.pos = None
+
+        # Guard to ensure we only process deltas one at a time
+        self._is_processing = False
+
+        if hs.config.stats_enabled:
+            self.notifier.add_replication_callback(self.notify_new_event)
+
+            # We kick this off so that we don't have to wait for a change before
+            # we start populating stats
+            self.clock.call_later(0, self.notify_new_event)
+
+    def notify_new_event(self):
+        """Called when there may be more deltas to process
+        """
+        if not self.hs.config.stats_enabled:
+            return
+
+        if self._is_processing:
+            return
+
+        @defer.inlineCallbacks
+        def process():
+            try:
+                yield self._unsafe_process()
+            finally:
+                self._is_processing = False
+
+        self._is_processing = True
+        run_as_background_process("stats.notify_new_event", process)
+
+    @defer.inlineCallbacks
+    def _unsafe_process(self):
+        # If self.pos is None then means we haven't fetched it from DB
+        if self.pos is None:
+            self.pos = yield self.store.get_stats_stream_pos()
+
+        # If still None then the initial background update hasn't happened yet
+        if self.pos is None:
+            defer.returnValue(None)
+
+        # Loop round handling deltas until we're up to date
+        while True:
+            with Measure(self.clock, "stats_delta"):
+                deltas = yield self.store.get_current_state_deltas(self.pos)
+                if not deltas:
+                    return
+
+                logger.info("Handling %d state deltas", len(deltas))
+                yield self._handle_deltas(deltas)
+
+                self.pos = deltas[-1]["stream_id"]
+                yield self.store.update_stats_stream_pos(self.pos)
+
+                event_processing_positions.labels("stats").set(self.pos)
+
+    @defer.inlineCallbacks
+    def _handle_deltas(self, deltas):
+        """
+        Called with the state deltas to process
+        """
+        for delta in deltas:
+            typ = delta["type"]
+            state_key = delta["state_key"]
+            room_id = delta["room_id"]
+            event_id = delta["event_id"]
+            stream_id = delta["stream_id"]
+            prev_event_id = delta["prev_event_id"]
+
+            logger.debug("Handling: %r %r, %s", typ, state_key, event_id)
+
+            token = yield self.store.get_earliest_token_for_room_stats(room_id)
+
+            # If the earliest token to begin from is larger than our current
+            # stream ID, skip processing this delta.
+            if token is not None and token >= stream_id:
+                logger.debug(
+                    "Ignoring: %s as earlier than this room's initial ingestion event",
+                    event_id,
+                )
+                continue
+
+            if event_id is None and prev_event_id is None:
+                # Errr...
+                continue
+
+            event_content = {}
+
+            if event_id is not None:
+                event_content = (yield self.store.get_event(event_id)).content or {}
+
+            # quantise time to the nearest bucket
+            now = yield self.store.get_received_ts(event_id)
+            now = (now // 1000 // self.stats_bucket_size) * self.stats_bucket_size
+
+            if typ == EventTypes.Member:
+                # we could use _get_key_change here but it's a bit inefficient
+                # given we're not testing for a specific result; might as well
+                # just grab the prev_membership and membership strings and
+                # compare them.
+                prev_event_content = {}
+                if prev_event_id is not None:
+                    prev_event_content = (
+                        yield self.store.get_event(prev_event_id)
+                    ).content
+
+                membership = event_content.get("membership", Membership.LEAVE)
+                prev_membership = prev_event_content.get("membership", Membership.LEAVE)
+
+                if prev_membership == membership:
+                    continue
+
+                if prev_membership == Membership.JOIN:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "joined_members", -1
+                    )
+                elif prev_membership == Membership.INVITE:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "invited_members", -1
+                    )
+                elif prev_membership == Membership.LEAVE:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "left_members", -1
+                    )
+                elif prev_membership == Membership.BAN:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "banned_members", -1
+                    )
+                else:
+                    err = "%s is not a valid prev_membership" % (repr(prev_membership),)
+                    logger.error(err)
+                    raise ValueError(err)
+
+                if membership == Membership.JOIN:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "joined_members", +1
+                    )
+                elif membership == Membership.INVITE:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "invited_members", +1
+                    )
+                elif membership == Membership.LEAVE:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "left_members", +1
+                    )
+                elif membership == Membership.BAN:
+                    yield self.store.update_stats_delta(
+                        now, "room", room_id, "banned_members", +1
+                    )
+                else:
+                    err = "%s is not a valid membership" % (repr(membership),)
+                    logger.error(err)
+                    raise ValueError(err)
+
+                user_id = state_key
+                if self.is_mine_id(user_id):
+                    # update user_stats as it's one of our users
+                    public = yield self._is_public_room(room_id)
+
+                    if membership == Membership.LEAVE:
+                        yield self.store.update_stats_delta(
+                            now,
+                            "user",
+                            user_id,
+                            "public_rooms" if public else "private_rooms",
+                            -1,
+                        )
+                    elif membership == Membership.JOIN:
+                        yield self.store.update_stats_delta(
+                            now,
+                            "user",
+                            user_id,
+                            "public_rooms" if public else "private_rooms",
+                            +1,
+                        )
+
+            elif typ == EventTypes.Create:
+                # Newly created room. Add it with all blank portions.
+                yield self.store.update_room_state(
+                    room_id,
+                    {
+                        "join_rules": None,
+                        "history_visibility": None,
+                        "encryption": None,
+                        "name": None,
+                        "topic": None,
+                        "avatar": None,
+                        "canonical_alias": None,
+                    },
+                )
+
+            elif typ == EventTypes.JoinRules:
+                yield self.store.update_room_state(
+                    room_id, {"join_rules": event_content.get("join_rule")}
+                )
+
+                is_public = yield self._get_key_change(
+                    prev_event_id, event_id, "join_rule", JoinRules.PUBLIC
+                )
+                if is_public is not None:
+                    yield self.update_public_room_stats(now, room_id, is_public)
+
+            elif typ == EventTypes.RoomHistoryVisibility:
+                yield self.store.update_room_state(
+                    room_id,
+                    {"history_visibility": event_content.get("history_visibility")},
+                )
+
+                is_public = yield self._get_key_change(
+                    prev_event_id, event_id, "history_visibility", "world_readable"
+                )
+                if is_public is not None:
+                    yield self.update_public_room_stats(now, room_id, is_public)
+
+            elif typ == EventTypes.Encryption:
+                yield self.store.update_room_state(
+                    room_id, {"encryption": event_content.get("algorithm")}
+                )
+            elif typ == EventTypes.Name:
+                yield self.store.update_room_state(
+                    room_id, {"name": event_content.get("name")}
+                )
+            elif typ == EventTypes.Topic:
+                yield self.store.update_room_state(
+                    room_id, {"topic": event_content.get("topic")}
+                )
+            elif typ == EventTypes.RoomAvatar:
+                yield self.store.update_room_state(
+                    room_id, {"avatar": event_content.get("url")}
+                )
+            elif typ == EventTypes.CanonicalAlias:
+                yield self.store.update_room_state(
+                    room_id, {"canonical_alias": event_content.get("alias")}
+                )
+
+    @defer.inlineCallbacks
+    def update_public_room_stats(self, ts, room_id, is_public):
+        """
+        Increment/decrement a user's number of public rooms when a room they are
+        in changes to/from public visibility.
+
+        Args:
+            ts (int): Timestamp in seconds
+            room_id (str)
+            is_public (bool)
+        """
+        # For now, blindly iterate over all local users in the room so that
+        # we can handle the whole problem of copying buckets over as needed
+        user_ids = yield self.store.get_users_in_room(room_id)
+
+        for user_id in user_ids:
+            if self.hs.is_mine(UserID.from_string(user_id)):
+                yield self.store.update_stats_delta(
+                    ts, "user", user_id, "public_rooms", +1 if is_public else -1
+                )
+                yield self.store.update_stats_delta(
+                    ts, "user", user_id, "private_rooms", -1 if is_public else +1
+                )
+
+    @defer.inlineCallbacks
+    def _is_public_room(self, room_id):
+        join_rules = yield self.state.get_current_state(room_id, EventTypes.JoinRules)
+        history_visibility = yield self.state.get_current_state(
+            room_id, EventTypes.RoomHistoryVisibility
+        )
+
+        if (join_rules and join_rules.content.get("join_rule") == JoinRules.PUBLIC) or (
+            (
+                history_visibility
+                and history_visibility.content.get("history_visibility")
+                == "world_readable"
+            )
+        ):
+            defer.returnValue(True)
+        else:
+            defer.returnValue(False)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 7eefc7b1fc..8197619a78 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -711,10 +711,6 @@ class MatrixFederationHttpClient(object):
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
         """
-        logger.debug("get_json args: %s", args)
-
-        logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail)
-
         request = MatrixFederationRequest(
             method="GET",
             destination=destination,
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 528125e737..197c652850 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -55,7 +55,7 @@ def parse_integer_from_args(args, name, default=None, required=False):
             return int(args[name][0])
         except Exception:
             message = "Query parameter %r must be an integer" % (name,)
-            raise SynapseError(400, message)
+            raise SynapseError(400, message, errcode=Codes.INVALID_PARAM)
     else:
         if required:
             message = "Missing integer query parameter %r" % (name,)
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 2708f5e820..f64baa4d58 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -16,7 +16,12 @@
 
 import logging
 
-from pkg_resources import DistributionNotFound, VersionConflict, get_distribution
+from pkg_resources import (
+    DistributionNotFound,
+    Requirement,
+    VersionConflict,
+    get_provider,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -53,7 +58,7 @@ REQUIREMENTS = [
     "pyasn1-modules>=0.0.7",
     "daemonize>=2.3.1",
     "bcrypt>=3.1.0",
-    "pillow>=3.1.2",
+    "pillow>=4.3.0",
     "sortedcontainers>=1.4.4",
     "psutil>=2.0.0",
     "pymacaroons>=0.13.0",
@@ -69,14 +74,6 @@ REQUIREMENTS = [
     "attrs>=17.4.0",
 
     "netaddr>=0.7.18",
-
-    # requests is a transitive dep of treq, and urlib3 is a transitive dep
-    # of requests, as well as of sentry-sdk.
-    #
-    # As of requests 2.21, requests does not yet support urllib3 1.25.
-    # (If we do not pin it here, pip will give us the latest urllib3
-    # due to the dep via sentry-sdk.)
-    "urllib3<1.25",
 ]
 
 CONDITIONAL_REQUIREMENTS = {
@@ -91,7 +88,13 @@ CONDITIONAL_REQUIREMENTS = {
 
     # ACME support is required to provision TLS certificates from authorities
     # that use the protocol, such as Let's Encrypt.
-    "acme": ["txacme>=0.9.2"],
+    "acme": [
+        "txacme>=0.9.2",
+
+        # txacme depends on eliot. Eliot 1.8.0 is incompatible with
+        # python 3.5.2, as per https://github.com/itamarst/eliot/issues/418
+        'eliot<1.8.0;python_version<"3.5.3"',
+    ],
 
     "saml2": ["pysaml2>=4.5.0"],
     "systemd": ["systemd-python>=231"],
@@ -125,10 +128,10 @@ class DependencyException(Exception):
     @property
     def dependencies(self):
         for i in self.args[0]:
-            yield '"' + i + '"'
+            yield "'" + i + "'"
 
 
-def check_requirements(for_feature=None, _get_distribution=get_distribution):
+def check_requirements(for_feature=None):
     deps_needed = []
     errors = []
 
@@ -139,7 +142,7 @@ def check_requirements(for_feature=None, _get_distribution=get_distribution):
 
     for dependency in reqs:
         try:
-            _get_distribution(dependency)
+            _check_requirement(dependency)
         except VersionConflict as e:
             deps_needed.append(dependency)
             errors.append(
@@ -157,7 +160,7 @@ def check_requirements(for_feature=None, _get_distribution=get_distribution):
 
         for dependency in OPTS:
             try:
-                _get_distribution(dependency)
+                _check_requirement(dependency)
             except VersionConflict as e:
                 deps_needed.append(dependency)
                 errors.append(
@@ -175,6 +178,23 @@ def check_requirements(for_feature=None, _get_distribution=get_distribution):
         raise DependencyException(deps_needed)
 
 
+def _check_requirement(dependency_string):
+    """Parses a dependency string, and checks if the specified requirement is installed
+
+    Raises:
+        VersionConflict if the requirement is installed, but with the the wrong version
+        DistributionNotFound if nothing is found to provide the requirement
+    """
+    req = Requirement.parse(dependency_string)
+
+    # first check if the markers specify that this requirement needs installing
+    if req.marker is not None and not req.marker.evaluate():
+        # not required for this environment
+        return
+
+    get_provider(req)
+
+
 if __name__ == "__main__":
     import sys
 
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
index b457c5563f..a3952506c1 100644
--- a/synapse/replication/slave/storage/events.py
+++ b/synapse/replication/slave/storage/events.py
@@ -23,6 +23,7 @@ from synapse.replication.tcp.streams.events import (
 from synapse.storage.event_federation import EventFederationWorkerStore
 from synapse.storage.event_push_actions import EventPushActionsWorkerStore
 from synapse.storage.events_worker import EventsWorkerStore
+from synapse.storage.relations import RelationsWorkerStore
 from synapse.storage.roommember import RoomMemberWorkerStore
 from synapse.storage.signatures import SignatureWorkerStore
 from synapse.storage.state import StateGroupWorkerStore
@@ -52,6 +53,7 @@ class SlavedEventStore(EventFederationWorkerStore,
                        EventsWorkerStore,
                        SignatureWorkerStore,
                        UserErasureWorkerStore,
+                       RelationsWorkerStore,
                        BaseSlavedStore):
 
     def __init__(self, db_conn, hs):
@@ -89,7 +91,7 @@ class SlavedEventStore(EventFederationWorkerStore,
             for row in rows:
                 self.invalidate_caches_for_event(
                     -token, row.event_id, row.room_id, row.type, row.state_key,
-                    row.redacts,
+                    row.redacts, row.relates_to,
                     backfilled=True,
                 )
         return super(SlavedEventStore, self).process_replication_rows(
@@ -102,7 +104,7 @@ class SlavedEventStore(EventFederationWorkerStore,
         if row.type == EventsStreamEventRow.TypeId:
             self.invalidate_caches_for_event(
                 token, data.event_id, data.room_id, data.type, data.state_key,
-                data.redacts,
+                data.redacts, data.relates_to,
                 backfilled=False,
             )
         elif row.type == EventsStreamCurrentStateRow.TypeId:
@@ -114,7 +116,8 @@ class SlavedEventStore(EventFederationWorkerStore,
             raise Exception("Unknown events stream row type %s" % (row.type, ))
 
     def invalidate_caches_for_event(self, stream_ordering, event_id, room_id,
-                                    etype, state_key, redacts, backfilled):
+                                    etype, state_key, redacts, relates_to,
+                                    backfilled):
         self._invalidate_get_event_cache(event_id)
 
         self.get_latest_event_ids_in_room.invalidate((room_id,))
@@ -136,3 +139,8 @@ class SlavedEventStore(EventFederationWorkerStore,
                 state_key, stream_ordering
             )
             self.get_invited_rooms_for_user.invalidate((state_key,))
+
+        if relates_to:
+            self.get_relations_for_event.invalidate_many((relates_to,))
+            self.get_aggregation_groups_for_event.invalidate_many((relates_to,))
+            self.get_applicable_edit.invalidate((relates_to,))
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index 8971a6a22e..b6ce7a7bee 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -32,6 +32,7 @@ BackfillStreamRow = namedtuple("BackfillStreamRow", (
     "type",  # str
     "state_key",  # str, optional
     "redacts",  # str, optional
+    "relates_to",  # str, optional
 ))
 PresenceStreamRow = namedtuple("PresenceStreamRow", (
     "user_id",  # str
diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py
index e0f6e29248..f1290d022a 100644
--- a/synapse/replication/tcp/streams/events.py
+++ b/synapse/replication/tcp/streams/events.py
@@ -80,11 +80,12 @@ class BaseEventsStreamRow(object):
 class EventsStreamEventRow(BaseEventsStreamRow):
     TypeId = "ev"
 
-    event_id = attr.ib()   # str
-    room_id = attr.ib()    # str
-    type = attr.ib()       # str
-    state_key = attr.ib()  # str, optional
-    redacts = attr.ib()    # str, optional
+    event_id = attr.ib()    # str
+    room_id = attr.ib()     # str
+    type = attr.ib()        # str
+    state_key = attr.ib()   # str, optional
+    redacts = attr.ib()     # str, optional
+    relates_to = attr.ib()  # str, optional
 
 
 @attr.s(slots=True, frozen=True)
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 3a24d31d1b..e6110ad9b1 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -44,6 +44,7 @@ from synapse.rest.client.v2_alpha import (
     read_marker,
     receipts,
     register,
+    relations,
     report_event,
     room_keys,
     room_upgrade_rest_servlet,
@@ -115,6 +116,7 @@ class ClientRestResource(JsonResource):
         room_upgrade_rest_servlet.register_servlets(hs, client_resource)
         capabilities.register_servlets(hs, client_resource)
         account_validity.register_servlets(hs, client_resource)
+        relations.register_servlets(hs, client_resource)
 
         # moving to /_synapse/admin
         synapse.rest.admin.register_servlets_for_client_rest_resource(
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 744d85594f..d6c4dcdb18 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -822,10 +822,16 @@ class AdminRestResource(JsonResource):
 
     def __init__(self, hs):
         JsonResource.__init__(self, hs, canonical_json=False)
+        register_servlets(hs, self)
 
-        register_servlets_for_client_rest_resource(hs, self)
-        SendServerNoticeServlet(hs).register(self)
-        VersionServlet(hs).register(self)
+
+def register_servlets(hs, http_server):
+    """
+    Register all the admin servlets.
+    """
+    register_servlets_for_client_rest_resource(hs, http_server)
+    SendServerNoticeServlet(hs).register(http_server)
+    VersionServlet(hs).register(http_server)
 
 
 def register_servlets_for_client_rest_resource(hs, http_server):
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 5180e9eaf1..029039c162 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -386,7 +386,7 @@ class CasRedirectServlet(RestServlet):
             b"redirectUrl": args[b"redirectUrl"][0]
         }).encode('ascii')
         hs_redirect_url = (self.cas_service_url +
-                           b"/_matrix/client/api/v1/login/cas/ticket")
+                           b"/_matrix/client/r0/login/cas/ticket")
         service_param = urllib.parse.urlencode({
             b"service": b"%s?%s" % (hs_redirect_url, client_redirect_url_param)
         }).encode('ascii')
@@ -395,7 +395,7 @@ class CasRedirectServlet(RestServlet):
 
 
 class CasTicketServlet(ClientV1RestServlet):
-    PATTERNS = client_path_patterns("/login/cas/ticket", releases=())
+    PATTERNS = client_path_patterns("/login/cas/ticket")
 
     def __init__(self, hs):
         super(CasTicketServlet, self).__init__(hs)
diff --git a/synapse/rest/client/v1/logout.py b/synapse/rest/client/v1/logout.py
index 430c692336..ba20e75033 100644
--- a/synapse/rest/client/v1/logout.py
+++ b/synapse/rest/client/v1/logout.py
@@ -17,8 +17,6 @@ import logging
 
 from twisted.internet import defer
 
-from synapse.api.errors import AuthError
-
 from .base import ClientV1RestServlet, client_path_patterns
 
 logger = logging.getLogger(__name__)
@@ -38,23 +36,16 @@ class LogoutRestServlet(ClientV1RestServlet):
 
     @defer.inlineCallbacks
     def on_POST(self, request):
-        try:
-            requester = yield self.auth.get_user_by_req(request)
-        except AuthError:
-            # this implies the access token has already been deleted.
-            defer.returnValue((401, {
-                "errcode": "M_UNKNOWN_TOKEN",
-                "error": "Access Token unknown or expired"
-            }))
+        requester = yield self.auth.get_user_by_req(request)
+
+        if requester.device_id is None:
+            # the acccess token wasn't associated with a device.
+            # Just delete the access token
+            access_token = self._auth.get_access_token_from_request(request)
+            yield self._auth_handler.delete_access_token(access_token)
         else:
-            if requester.device_id is None:
-                # the acccess token wasn't associated with a device.
-                # Just delete the access token
-                access_token = self._auth.get_access_token_from_request(request)
-                yield self._auth_handler.delete_access_token(access_token)
-            else:
-                yield self._device_handler.delete_device(
-                    requester.user.to_string(), requester.device_id)
+            yield self._device_handler.delete_device(
+                requester.user.to_string(), requester.device_id)
 
         defer.returnValue((200, {}))
 
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 255a85c588..b92c6a9a9c 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -475,6 +475,8 @@ class RoomMessageListRestServlet(ClientV1RestServlet):
         if filter_bytes:
             filter_json = urlparse.unquote(filter_bytes.decode("UTF-8"))
             event_filter = Filter(json.loads(filter_json))
+            if event_filter.filter_json.get("event_format", "client") == "federation":
+                as_client_event = False
         else:
             event_filter = None
         msgs = yield self.pagination_handler.get_messages(
diff --git a/synapse/rest/client/v2_alpha/capabilities.py b/synapse/rest/client/v2_alpha/capabilities.py
index a868d06098..2b4892330c 100644
--- a/synapse/rest/client/v2_alpha/capabilities.py
+++ b/synapse/rest/client/v2_alpha/capabilities.py
@@ -16,7 +16,7 @@ import logging
 
 from twisted.internet import defer
 
-from synapse.api.room_versions import DEFAULT_ROOM_VERSION, KNOWN_ROOM_VERSIONS
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.http.servlet import RestServlet
 
 from ._base import client_v2_patterns
@@ -36,6 +36,7 @@ class CapabilitiesRestServlet(RestServlet):
         """
         super(CapabilitiesRestServlet, self).__init__()
         self.hs = hs
+        self.config = hs.config
         self.auth = hs.get_auth()
         self.store = hs.get_datastore()
 
@@ -48,7 +49,7 @@ class CapabilitiesRestServlet(RestServlet):
         response = {
             "capabilities": {
                 "m.room_versions": {
-                    "default": DEFAULT_ROOM_VERSION.identifier,
+                    "default": self.config.default_room_version.identifier,
                     "available": {
                         v.identifier: v.disposition
                         for v in KNOWN_ROOM_VERSIONS.values()
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index fa0cedb8d4..042f636135 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -348,18 +348,22 @@ class RegisterRestServlet(RestServlet):
         if self.hs.config.enable_registration_captcha:
             # only support 3PIDless registration if no 3PIDs are required
             if not require_email and not require_msisdn:
-                flows.extend([[LoginType.RECAPTCHA]])
+                # Also add a dummy flow here, otherwise if a client completes
+                # recaptcha first we'll assume they were going for this flow
+                # and complete the request, when they could have been trying to
+                # complete one of the flows with email/msisdn auth.
+                flows.extend([[LoginType.RECAPTCHA, LoginType.DUMMY]])
             # only support the email-only flow if we don't require MSISDN 3PIDs
             if not require_msisdn:
-                flows.extend([[LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA]])
+                flows.extend([[LoginType.RECAPTCHA, LoginType.EMAIL_IDENTITY]])
 
             if show_msisdn:
                 # only support the MSISDN-only flow if we don't require email 3PIDs
                 if not require_email:
-                    flows.extend([[LoginType.MSISDN, LoginType.RECAPTCHA]])
+                    flows.extend([[LoginType.RECAPTCHA, LoginType.MSISDN]])
                 # always let users provide both MSISDN & email
                 flows.extend([
-                    [LoginType.MSISDN, LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA],
+                    [LoginType.RECAPTCHA, LoginType.MSISDN, LoginType.EMAIL_IDENTITY],
                 ])
         else:
             # only support 3PIDless registration if no 3PIDs are required
@@ -382,7 +386,15 @@ class RegisterRestServlet(RestServlet):
         if self.hs.config.user_consent_at_registration:
             new_flows = []
             for flow in flows:
-                flow.append(LoginType.TERMS)
+                inserted = False
+                # m.login.terms should go near the end but before msisdn or email auth
+                for i, stage in enumerate(flow):
+                    if stage == LoginType.EMAIL_IDENTITY or stage == LoginType.MSISDN:
+                        flow.insert(i, LoginType.TERMS)
+                        inserted = True
+                        break
+                if not inserted:
+                    flow.append(LoginType.TERMS)
             flows.extend(new_flows)
 
         auth_result, params, session_id = yield self.auth_handler.check_auth(
diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py
new file mode 100644
index 0000000000..41e0a44936
--- /dev/null
+++ b/synapse/rest/client/v2_alpha/relations.py
@@ -0,0 +1,338 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This class implements the proposed relation APIs from MSC 1849.
+
+Since the MSC has not been approved all APIs here are unstable and may change at
+any time to reflect changes in the MSC.
+"""
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, RelationTypes
+from synapse.api.errors import SynapseError
+from synapse.http.servlet import (
+    RestServlet,
+    parse_integer,
+    parse_json_object_from_request,
+    parse_string,
+)
+from synapse.rest.client.transactions import HttpTransactionCache
+from synapse.storage.relations import AggregationPaginationToken, RelationPaginationToken
+
+from ._base import client_v2_patterns
+
+logger = logging.getLogger(__name__)
+
+
+class RelationSendServlet(RestServlet):
+    """Helper API for sending events that have relation data.
+
+    Example API shape to send a 👍 reaction to a room:
+
+        POST /rooms/!foo/send_relation/$bar/m.annotation/m.reaction?key=%F0%9F%91%8D
+        {}
+
+        {
+            "event_id": "$foobar"
+        }
+    """
+
+    PATTERN = (
+        "/rooms/(?P<room_id>[^/]*)/send_relation"
+        "/(?P<parent_id>[^/]*)/(?P<relation_type>[^/]*)/(?P<event_type>[^/]*)"
+    )
+
+    def __init__(self, hs):
+        super(RelationSendServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.event_creation_handler = hs.get_event_creation_handler()
+        self.txns = HttpTransactionCache(hs)
+
+    def register(self, http_server):
+        http_server.register_paths(
+            "POST",
+            client_v2_patterns(self.PATTERN + "$", releases=()),
+            self.on_PUT_or_POST,
+        )
+        http_server.register_paths(
+            "PUT",
+            client_v2_patterns(self.PATTERN + "/(?P<txn_id>[^/]*)$", releases=()),
+            self.on_PUT,
+        )
+
+    def on_PUT(self, request, *args, **kwargs):
+        return self.txns.fetch_or_execute_request(
+            request, self.on_PUT_or_POST, request, *args, **kwargs
+        )
+
+    @defer.inlineCallbacks
+    def on_PUT_or_POST(
+        self, request, room_id, parent_id, relation_type, event_type, txn_id=None
+    ):
+        requester = yield self.auth.get_user_by_req(request, allow_guest=True)
+
+        if event_type == EventTypes.Member:
+            # Add relations to a membership is meaningless, so we just deny it
+            # at the CS API rather than trying to handle it correctly.
+            raise SynapseError(400, "Cannot send member events with relations")
+
+        content = parse_json_object_from_request(request)
+
+        aggregation_key = parse_string(request, "key", encoding="utf-8")
+
+        content["m.relates_to"] = {
+            "event_id": parent_id,
+            "key": aggregation_key,
+            "rel_type": relation_type,
+        }
+
+        event_dict = {
+            "type": event_type,
+            "content": content,
+            "room_id": room_id,
+            "sender": requester.user.to_string(),
+        }
+
+        event = yield self.event_creation_handler.create_and_send_nonmember_event(
+            requester, event_dict=event_dict, txn_id=txn_id
+        )
+
+        defer.returnValue((200, {"event_id": event.event_id}))
+
+
+class RelationPaginationServlet(RestServlet):
+    """API to paginate relations on an event by topological ordering, optionally
+    filtered by relation type and event type.
+    """
+
+    PATTERNS = client_v2_patterns(
+        "/rooms/(?P<room_id>[^/]*)/relations/(?P<parent_id>[^/]*)"
+        "(/(?P<relation_type>[^/]*)(/(?P<event_type>[^/]*))?)?$",
+        releases=(),
+    )
+
+    def __init__(self, hs):
+        super(RelationPaginationServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastore()
+        self.clock = hs.get_clock()
+        self._event_serializer = hs.get_event_client_serializer()
+        self.event_handler = hs.get_event_handler()
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, room_id, parent_id, relation_type=None, event_type=None):
+        requester = yield self.auth.get_user_by_req(request, allow_guest=True)
+
+        yield self.auth.check_in_room_or_world_readable(
+            room_id, requester.user.to_string()
+        )
+
+        # This checks that a) the event exists and b) the user is allowed to
+        # view it.
+        yield self.event_handler.get_event(requester.user, room_id, parent_id)
+
+        limit = parse_integer(request, "limit", default=5)
+        from_token = parse_string(request, "from")
+        to_token = parse_string(request, "to")
+
+        if from_token:
+            from_token = RelationPaginationToken.from_string(from_token)
+
+        if to_token:
+            to_token = RelationPaginationToken.from_string(to_token)
+
+        result = yield self.store.get_relations_for_event(
+            event_id=parent_id,
+            relation_type=relation_type,
+            event_type=event_type,
+            limit=limit,
+            from_token=from_token,
+            to_token=to_token,
+        )
+
+        events = yield self.store.get_events_as_list(
+            [c["event_id"] for c in result.chunk]
+        )
+
+        now = self.clock.time_msec()
+        events = yield self._event_serializer.serialize_events(events, now)
+
+        return_value = result.to_dict()
+        return_value["chunk"] = events
+
+        defer.returnValue((200, return_value))
+
+
+class RelationAggregationPaginationServlet(RestServlet):
+    """API to paginate aggregation groups of relations, e.g. paginate the
+    types and counts of the reactions on the events.
+
+    Example request and response:
+
+        GET /rooms/{room_id}/aggregations/{parent_id}
+
+        {
+            chunk: [
+                {
+                    "type": "m.reaction",
+                    "key": "👍",
+                    "count": 3
+                }
+            ]
+        }
+    """
+
+    PATTERNS = client_v2_patterns(
+        "/rooms/(?P<room_id>[^/]*)/aggregations/(?P<parent_id>[^/]*)"
+        "(/(?P<relation_type>[^/]*)(/(?P<event_type>[^/]*))?)?$",
+        releases=(),
+    )
+
+    def __init__(self, hs):
+        super(RelationAggregationPaginationServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastore()
+        self.event_handler = hs.get_event_handler()
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, room_id, parent_id, relation_type=None, event_type=None):
+        requester = yield self.auth.get_user_by_req(request, allow_guest=True)
+
+        yield self.auth.check_in_room_or_world_readable(
+            room_id, requester.user.to_string()
+        )
+
+        # This checks that a) the event exists and b) the user is allowed to
+        # view it.
+        yield self.event_handler.get_event(requester.user, room_id, parent_id)
+
+        if relation_type not in (RelationTypes.ANNOTATION, None):
+            raise SynapseError(400, "Relation type must be 'annotation'")
+
+        limit = parse_integer(request, "limit", default=5)
+        from_token = parse_string(request, "from")
+        to_token = parse_string(request, "to")
+
+        if from_token:
+            from_token = AggregationPaginationToken.from_string(from_token)
+
+        if to_token:
+            to_token = AggregationPaginationToken.from_string(to_token)
+
+        res = yield self.store.get_aggregation_groups_for_event(
+            event_id=parent_id,
+            event_type=event_type,
+            limit=limit,
+            from_token=from_token,
+            to_token=to_token,
+        )
+
+        defer.returnValue((200, res.to_dict()))
+
+
+class RelationAggregationGroupPaginationServlet(RestServlet):
+    """API to paginate within an aggregation group of relations, e.g. paginate
+    all the 👍 reactions on an event.
+
+    Example request and response:
+
+        GET /rooms/{room_id}/aggregations/{parent_id}/m.annotation/m.reaction/👍
+
+        {
+            chunk: [
+                {
+                    "type": "m.reaction",
+                    "content": {
+                        "m.relates_to": {
+                            "rel_type": "m.annotation",
+                            "key": "👍"
+                        }
+                    }
+                },
+                ...
+            ]
+        }
+    """
+
+    PATTERNS = client_v2_patterns(
+        "/rooms/(?P<room_id>[^/]*)/aggregations/(?P<parent_id>[^/]*)"
+        "/(?P<relation_type>[^/]*)/(?P<event_type>[^/]*)/(?P<key>[^/]*)$",
+        releases=(),
+    )
+
+    def __init__(self, hs):
+        super(RelationAggregationGroupPaginationServlet, self).__init__()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastore()
+        self.clock = hs.get_clock()
+        self._event_serializer = hs.get_event_client_serializer()
+        self.event_handler = hs.get_event_handler()
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, room_id, parent_id, relation_type, event_type, key):
+        requester = yield self.auth.get_user_by_req(request, allow_guest=True)
+
+        yield self.auth.check_in_room_or_world_readable(
+            room_id, requester.user.to_string()
+        )
+
+        # This checks that a) the event exists and b) the user is allowed to
+        # view it.
+        yield self.event_handler.get_event(requester.user, room_id, parent_id)
+
+        if relation_type != RelationTypes.ANNOTATION:
+            raise SynapseError(400, "Relation type must be 'annotation'")
+
+        limit = parse_integer(request, "limit", default=5)
+        from_token = parse_string(request, "from")
+        to_token = parse_string(request, "to")
+
+        if from_token:
+            from_token = RelationPaginationToken.from_string(from_token)
+
+        if to_token:
+            to_token = RelationPaginationToken.from_string(to_token)
+
+        result = yield self.store.get_relations_for_event(
+            event_id=parent_id,
+            relation_type=relation_type,
+            event_type=event_type,
+            aggregation_key=key,
+            limit=limit,
+            from_token=from_token,
+            to_token=to_token,
+        )
+
+        events = yield self.store.get_events_as_list(
+            [c["event_id"] for c in result.chunk]
+        )
+
+        now = self.clock.time_msec()
+        events = yield self._event_serializer.serialize_events(events, now)
+
+        return_value = result.to_dict()
+        return_value["chunk"] = events
+
+        defer.returnValue((200, return_value))
+
+
+def register_servlets(hs, http_server):
+    RelationSendServlet(hs).register(http_server)
+    RelationPaginationServlet(hs).register(http_server)
+    RelationAggregationPaginationServlet(hs).register(http_server)
+    RelationAggregationGroupPaginationServlet(hs).register(http_server)
diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py
index c701e534e7..d3025025e3 100644
--- a/synapse/rest/client/v2_alpha/sync.py
+++ b/synapse/rest/client/v2_alpha/sync.py
@@ -358,6 +358,9 @@ class SyncRestServlet(RestServlet):
         def serialize(events):
             return self._event_serializer.serialize_events(
                 events, time_now=time_now,
+                # We don't bundle "live" events, as otherwise clients
+                # will end up double counting annotations.
+                bundle_aggregations=False,
                 token_id=token_id,
                 event_format=event_formatter,
                 only_event_fields=only_fields,
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index eb8782aa6e..21c3c807b9 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -20,7 +20,7 @@ from twisted.web.resource import Resource
 from twisted.web.server import NOT_DONE_YET
 
 from synapse.api.errors import Codes, SynapseError
-from synapse.crypto.keyring import KeyLookupError
+from synapse.crypto.keyring import KeyLookupError, ServerKeyFetcher
 from synapse.http.server import respond_with_json_bytes, wrap_json_request_handler
 from synapse.http.servlet import parse_integer, parse_json_object_from_request
 
@@ -89,7 +89,7 @@ class RemoteKey(Resource):
     isLeaf = True
 
     def __init__(self, hs):
-        self.keyring = hs.get_keyring()
+        self.fetcher = ServerKeyFetcher(hs)
         self.store = hs.get_datastore()
         self.clock = hs.get_clock()
         self.federation_domain_whitelist = hs.config.federation_domain_whitelist
@@ -217,7 +217,7 @@ class RemoteKey(Resource):
         if cache_misses and query_remote_on_cache_miss:
             for server_name, key_ids in cache_misses.items():
                 try:
-                    yield self.keyring.get_server_verify_key_v2_direct(
+                    yield self.fetcher.get_server_verify_key_v2_direct(
                         server_name, key_ids
                     )
                 except KeyLookupError as e:
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index bdffa97805..8569677355 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -444,6 +444,9 @@ class MediaRepository(object):
             )
             return
 
+        if thumbnailer.transpose_method is not None:
+            m_width, m_height = thumbnailer.transpose()
+
         if t_method == "crop":
             t_byte_source = thumbnailer.crop(t_width, t_height, t_type)
         elif t_method == "scale":
@@ -578,6 +581,12 @@ class MediaRepository(object):
             )
             return
 
+        if thumbnailer.transpose_method is not None:
+            m_width, m_height = yield logcontext.defer_to_thread(
+                self.hs.get_reactor(),
+                thumbnailer.transpose
+            )
+
         # We deduplicate the thumbnail sizes by ignoring the cropped versions if
         # they have the same dimensions of a scaled one.
         thumbnails = {}
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 5305e9175f..35a750923b 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -56,8 +56,8 @@ class ThumbnailResource(Resource):
     def _async_render_GET(self, request):
         set_cors_headers(request)
         server_name, media_id, _ = parse_media_id(request)
-        width = parse_integer(request, "width")
-        height = parse_integer(request, "height")
+        width = parse_integer(request, "width", required=True)
+        height = parse_integer(request, "height", required=True)
         method = parse_string(request, "method", "scale")
         m_type = parse_string(request, "type", "image/png")
 
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index a4b26c2587..3efd0d80fc 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -20,6 +20,17 @@ import PIL.Image as Image
 
 logger = logging.getLogger(__name__)
 
+EXIF_ORIENTATION_TAG = 0x0112
+EXIF_TRANSPOSE_MAPPINGS = {
+    2: Image.FLIP_LEFT_RIGHT,
+    3: Image.ROTATE_180,
+    4: Image.FLIP_TOP_BOTTOM,
+    5: Image.TRANSPOSE,
+    6: Image.ROTATE_270,
+    7: Image.TRANSVERSE,
+    8: Image.ROTATE_90
+}
+
 
 class Thumbnailer(object):
 
@@ -31,6 +42,30 @@ class Thumbnailer(object):
     def __init__(self, input_path):
         self.image = Image.open(input_path)
         self.width, self.height = self.image.size
+        self.transpose_method = None
+        try:
+            # We don't use ImageOps.exif_transpose since it crashes with big EXIF
+            image_exif = self.image._getexif()
+            if image_exif is not None:
+                image_orientation = image_exif.get(EXIF_ORIENTATION_TAG)
+                self.transpose_method = EXIF_TRANSPOSE_MAPPINGS.get(image_orientation)
+        except Exception as e:
+            # A lot of parsing errors can happen when parsing EXIF
+            logger.info("Error parsing image EXIF information: %s", e)
+
+    def transpose(self):
+        """Transpose the image using its EXIF Orientation tag
+
+        Returns:
+            Tuple[int, int]: (width, height) containing the new image size in pixels.
+        """
+        if self.transpose_method is not None:
+            self.image = self.image.transpose(self.transpose_method)
+            self.width, self.height = self.image.size
+            self.transpose_method = None
+            # We don't need EXIF any more
+            self.image.info["exif"] = None
+        return self.image.size
 
     def aspect(self, max_width, max_height):
         """Calculate the largest size that preserves aspect ratio which
diff --git a/synapse/server.py b/synapse/server.py
index 80d40b9272..9229a68a8d 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -72,6 +72,7 @@ from synapse.handlers.room_list import RoomListHandler
 from synapse.handlers.room_member import RoomMemberMasterHandler
 from synapse.handlers.room_member_worker import RoomMemberWorkerHandler
 from synapse.handlers.set_password import SetPasswordHandler
+from synapse.handlers.stats import StatsHandler
 from synapse.handlers.sync import SyncHandler
 from synapse.handlers.typing import TypingHandler
 from synapse.handlers.user_directory import UserDirectoryHandler
@@ -139,6 +140,7 @@ class HomeServer(object):
         'acme_handler',
         'auth_handler',
         'device_handler',
+        'stats_handler',
         'e2e_keys_handler',
         'e2e_room_keys_handler',
         'event_handler',
@@ -191,6 +193,7 @@ class HomeServer(object):
 
     REQUIRED_ON_MASTER_STARTUP = [
         "user_directory_handler",
+        "stats_handler"
     ]
 
     # This is overridden in derived application classes
@@ -474,6 +477,9 @@ class HomeServer(object):
     def build_secrets(self):
         return Secrets()
 
+    def build_stats_handler(self):
+        return StatsHandler(self)
+
     def build_spam_checker(self):
         return SpamChecker(self)
 
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index c432041b4e..71316f7d09 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -36,6 +36,7 @@ from .engines import PostgresEngine
 from .event_federation import EventFederationStore
 from .event_push_actions import EventPushActionsStore
 from .events import EventsStore
+from .events_bg_updates import EventsBackgroundUpdatesStore
 from .filtering import FilteringStore
 from .group_server import GroupServerStore
 from .keys import KeyStore
@@ -49,11 +50,13 @@ from .pusher import PusherStore
 from .receipts import ReceiptsStore
 from .registration import RegistrationStore
 from .rejections import RejectionsStore
+from .relations import RelationsStore
 from .room import RoomStore
 from .roommember import RoomMemberStore
 from .search import SearchStore
 from .signatures import SignatureStore
 from .state import StateStore
+from .stats import StatsStore
 from .stream import StreamStore
 from .tags import TagsStore
 from .transactions import TransactionStore
@@ -64,6 +67,7 @@ logger = logging.getLogger(__name__)
 
 
 class DataStore(
+    EventsBackgroundUpdatesStore,
     RoomMemberStore,
     RoomStore,
     RegistrationStore,
@@ -99,6 +103,8 @@ class DataStore(
     GroupServerStore,
     UserErasureStore,
     MonthlyActiveUsersStore,
+    StatsStore,
+    RelationsStore,
 ):
     def __init__(self, db_conn, hs):
         self.hs = hs
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 086318a530..52891bb9eb 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -1279,7 +1279,8 @@ class SQLBaseStore(object):
             " AND ".join("%s = ?" % (k,) for k in keyvalues),
         )
 
-        return txn.execute(sql, list(keyvalues.values()))
+        txn.execute(sql, list(keyvalues.values()))
+        return txn.rowcount
 
     def _simple_delete_many(self, table, column, iterable, keyvalues, desc):
         return self.runInteraction(
@@ -1298,9 +1299,12 @@ class SQLBaseStore(object):
             column : column name to test for inclusion against `iterable`
             iterable : list
             keyvalues : dict of column names and values to select the rows with
+
+        Returns:
+            int: Number rows deleted
         """
         if not iterable:
-            return
+            return 0
 
         sql = "DELETE FROM %s" % table
 
@@ -1315,7 +1319,9 @@ class SQLBaseStore(object):
 
         if clauses:
             sql = "%s WHERE %s" % (sql, " AND ".join(clauses))
-        return txn.execute(sql, values)
+        txn.execute(sql, values)
+
+        return txn.rowcount
 
     def _get_cache_dict(
         self, db_conn, table, entity_column, stream_column, max_value, limit=100000
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 7a7f841c6c..f9162be9b9 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
+# Copyright 2018-2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -219,41 +220,11 @@ class EventsStore(
     EventsWorkerStore,
     BackgroundUpdateStore,
 ):
-    EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
-    EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url"
 
     def __init__(self, db_conn, hs):
         super(EventsStore, self).__init__(db_conn, hs)
-        self.register_background_update_handler(
-            self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts
-        )
-        self.register_background_update_handler(
-            self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME,
-            self._background_reindex_fields_sender,
-        )
-
-        self.register_background_index_update(
-            "event_contains_url_index",
-            index_name="event_contains_url_index",
-            table="events",
-            columns=["room_id", "topological_ordering", "stream_ordering"],
-            where_clause="contains_url = true AND outlier = false",
-        )
-
-        # an event_id index on event_search is useful for the purge_history
-        # api. Plus it means we get to enforce some integrity with a UNIQUE
-        # clause
-        self.register_background_index_update(
-            "event_search_event_id_idx",
-            index_name="event_search_event_id_idx",
-            table="event_search",
-            columns=["event_id"],
-            unique=True,
-            psql_only=True,
-        )
 
         self._event_persist_queue = _EventPeristenceQueue()
-
         self._state_resolution_handler = hs.get_state_resolution_handler()
 
     @defer.inlineCallbacks
@@ -554,10 +525,18 @@ class EventsStore(
             e_id for event in new_events for e_id in event.prev_event_ids()
         )
 
-        # Finally, remove any events which are prev_events of any existing events.
+        # Remove any events which are prev_events of any existing events.
         existing_prevs = yield self._get_events_which_are_prevs(result)
         result.difference_update(existing_prevs)
 
+        # Finally handle the case where the new events have soft-failed prev
+        # events. If they do we need to remove them and their prev events,
+        # otherwise we end up with dangling extremities.
+        existing_prevs = yield self._get_prevs_before_rejected(
+            e_id for event in new_events for e_id in event.prev_event_ids()
+        )
+        result.difference_update(existing_prevs)
+
         defer.returnValue(result)
 
     @defer.inlineCallbacks
@@ -573,12 +552,13 @@ class EventsStore(
         """
         results = []
 
-        def _get_events(txn, batch):
+        def _get_events_which_are_prevs_txn(txn, batch):
             sql = """
-            SELECT prev_event_id
+            SELECT prev_event_id, internal_metadata
             FROM event_edges
                 INNER JOIN events USING (event_id)
                 LEFT JOIN rejections USING (event_id)
+                LEFT JOIN event_json USING (event_id)
             WHERE
                 prev_event_id IN (%s)
                 AND NOT events.outlier
@@ -588,14 +568,86 @@ class EventsStore(
             )
 
             txn.execute(sql, batch)
-            results.extend(r[0] for r in txn)
+            results.extend(
+                r[0]
+                for r in txn
+                if not json.loads(r[1]).get("soft_failed")
+            )
 
         for chunk in batch_iter(event_ids, 100):
-            yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk)
+            yield self.runInteraction(
+                "_get_events_which_are_prevs",
+                _get_events_which_are_prevs_txn,
+                chunk,
+            )
 
         defer.returnValue(results)
 
     @defer.inlineCallbacks
+    def _get_prevs_before_rejected(self, event_ids):
+        """Get soft-failed ancestors to remove from the extremities.
+
+        Given a set of events, find all those that have been soft-failed or
+        rejected. Returns those soft failed/rejected events and their prev
+        events (whether soft-failed/rejected or not), and recurses up the
+        prev-event graph until it finds no more soft-failed/rejected events.
+
+        This is used to find extremities that are ancestors of new events, but
+        are separated by soft failed events.
+
+        Args:
+            event_ids (Iterable[str]): Events to find prev events for. Note
+                that these must have already been persisted.
+
+        Returns:
+            Deferred[set[str]]
+        """
+
+        # The set of event_ids to return. This includes all soft-failed events
+        # and their prev events.
+        existing_prevs = set()
+
+        def _get_prevs_before_rejected_txn(txn, batch):
+            to_recursively_check = batch
+
+            while to_recursively_check:
+                sql = """
+                SELECT
+                    event_id, prev_event_id, internal_metadata,
+                    rejections.event_id IS NOT NULL
+                FROM event_edges
+                    INNER JOIN events USING (event_id)
+                    LEFT JOIN rejections USING (event_id)
+                    LEFT JOIN event_json USING (event_id)
+                WHERE
+                    event_id IN (%s)
+                    AND NOT events.outlier
+                """ % (
+                    ",".join("?" for _ in to_recursively_check),
+                )
+
+                txn.execute(sql, to_recursively_check)
+                to_recursively_check = []
+
+                for event_id, prev_event_id, metadata, rejected in txn:
+                    if prev_event_id in existing_prevs:
+                        continue
+
+                    soft_failed = json.loads(metadata).get("soft_failed")
+                    if soft_failed or rejected:
+                        to_recursively_check.append(prev_event_id)
+                        existing_prevs.add(prev_event_id)
+
+        for chunk in batch_iter(event_ids, 100):
+            yield self.runInteraction(
+                "_get_prevs_before_rejected",
+                _get_prevs_before_rejected_txn,
+                chunk,
+            )
+
+        defer.returnValue(existing_prevs)
+
+    @defer.inlineCallbacks
     def _get_new_state_after_events(
         self, room_id, events_context, old_latest_event_ids, new_latest_event_ids
     ):
@@ -1325,6 +1377,9 @@ class EventsStore(
                     txn, event.room_id, event.redacts
                 )
 
+                # Remove from relations table.
+                self._handle_redaction(txn, event.redacts)
+
         # Update the event_forward_extremities, event_backward_extremities and
         # event_edges tables.
         self._handle_mult_prev_events(
@@ -1351,6 +1406,8 @@ class EventsStore(
                 # Insert into the event_search table.
                 self._store_guest_access_txn(txn, event)
 
+            self._handle_event_relations(txn, event)
+
         # Insert into the room_memberships table.
         self._store_room_members_txn(
             txn,
@@ -1493,153 +1550,6 @@ class EventsStore(
         ret = yield self.runInteraction("count_daily_active_rooms", _count)
         defer.returnValue(ret)
 
-    @defer.inlineCallbacks
-    def _background_reindex_fields_sender(self, progress, batch_size):
-        target_min_stream_id = progress["target_min_stream_id_inclusive"]
-        max_stream_id = progress["max_stream_id_exclusive"]
-        rows_inserted = progress.get("rows_inserted", 0)
-
-        INSERT_CLUMP_SIZE = 1000
-
-        def reindex_txn(txn):
-            sql = (
-                "SELECT stream_ordering, event_id, json FROM events"
-                " INNER JOIN event_json USING (event_id)"
-                " WHERE ? <= stream_ordering AND stream_ordering < ?"
-                " ORDER BY stream_ordering DESC"
-                " LIMIT ?"
-            )
-
-            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
-
-            rows = txn.fetchall()
-            if not rows:
-                return 0
-
-            min_stream_id = rows[-1][0]
-
-            update_rows = []
-            for row in rows:
-                try:
-                    event_id = row[1]
-                    event_json = json.loads(row[2])
-                    sender = event_json["sender"]
-                    content = event_json["content"]
-
-                    contains_url = "url" in content
-                    if contains_url:
-                        contains_url &= isinstance(content["url"], text_type)
-                except (KeyError, AttributeError):
-                    # If the event is missing a necessary field then
-                    # skip over it.
-                    continue
-
-                update_rows.append((sender, contains_url, event_id))
-
-            sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?"
-
-            for index in range(0, len(update_rows), INSERT_CLUMP_SIZE):
-                clump = update_rows[index : index + INSERT_CLUMP_SIZE]
-                txn.executemany(sql, clump)
-
-            progress = {
-                "target_min_stream_id_inclusive": target_min_stream_id,
-                "max_stream_id_exclusive": min_stream_id,
-                "rows_inserted": rows_inserted + len(rows),
-            }
-
-            self._background_update_progress_txn(
-                txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress
-            )
-
-            return len(rows)
-
-        result = yield self.runInteraction(
-            self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn
-        )
-
-        if not result:
-            yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME)
-
-        defer.returnValue(result)
-
-    @defer.inlineCallbacks
-    def _background_reindex_origin_server_ts(self, progress, batch_size):
-        target_min_stream_id = progress["target_min_stream_id_inclusive"]
-        max_stream_id = progress["max_stream_id_exclusive"]
-        rows_inserted = progress.get("rows_inserted", 0)
-
-        INSERT_CLUMP_SIZE = 1000
-
-        def reindex_search_txn(txn):
-            sql = (
-                "SELECT stream_ordering, event_id FROM events"
-                " WHERE ? <= stream_ordering AND stream_ordering < ?"
-                " ORDER BY stream_ordering DESC"
-                " LIMIT ?"
-            )
-
-            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
-
-            rows = txn.fetchall()
-            if not rows:
-                return 0
-
-            min_stream_id = rows[-1][0]
-            event_ids = [row[1] for row in rows]
-
-            rows_to_update = []
-
-            chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)]
-            for chunk in chunks:
-                ev_rows = self._simple_select_many_txn(
-                    txn,
-                    table="event_json",
-                    column="event_id",
-                    iterable=chunk,
-                    retcols=["event_id", "json"],
-                    keyvalues={},
-                )
-
-                for row in ev_rows:
-                    event_id = row["event_id"]
-                    event_json = json.loads(row["json"])
-                    try:
-                        origin_server_ts = event_json["origin_server_ts"]
-                    except (KeyError, AttributeError):
-                        # If the event is missing a necessary field then
-                        # skip over it.
-                        continue
-
-                    rows_to_update.append((origin_server_ts, event_id))
-
-            sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?"
-
-            for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE):
-                clump = rows_to_update[index : index + INSERT_CLUMP_SIZE]
-                txn.executemany(sql, clump)
-
-            progress = {
-                "target_min_stream_id_inclusive": target_min_stream_id,
-                "max_stream_id_exclusive": min_stream_id,
-                "rows_inserted": rows_inserted + len(rows_to_update),
-            }
-
-            self._background_update_progress_txn(
-                txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress
-            )
-
-            return len(rows_to_update)
-
-        result = yield self.runInteraction(
-            self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn
-        )
-
-        if not result:
-            yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME)
-
-        defer.returnValue(result)
-
     def get_current_backfill_token(self):
         """The current minimum token that backfilled events have reached"""
         return -self._backfill_id_gen.get_current_token()
@@ -1655,10 +1565,11 @@ class EventsStore(
         def get_all_new_forward_event_rows(txn):
             sql = (
                 "SELECT e.stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts"
+                " state_key, redacts, relates_to_id"
                 " FROM events AS e"
                 " LEFT JOIN redactions USING (event_id)"
                 " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
                 " WHERE ? < stream_ordering AND stream_ordering <= ?"
                 " ORDER BY stream_ordering ASC"
                 " LIMIT ?"
@@ -1673,11 +1584,12 @@ class EventsStore(
 
             sql = (
                 "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts"
+                " state_key, redacts, relates_to_id"
                 " FROM events AS e"
                 " INNER JOIN ex_outlier_stream USING (event_id)"
                 " LEFT JOIN redactions USING (event_id)"
                 " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
                 " WHERE ? < event_stream_ordering"
                 " AND event_stream_ordering <= ?"
                 " ORDER BY event_stream_ordering DESC"
@@ -1698,10 +1610,11 @@ class EventsStore(
         def get_all_new_backfill_event_rows(txn):
             sql = (
                 "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts"
+                " state_key, redacts, relates_to_id"
                 " FROM events AS e"
                 " LEFT JOIN redactions USING (event_id)"
                 " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
                 " WHERE ? > stream_ordering AND stream_ordering >= ?"
                 " ORDER BY stream_ordering ASC"
                 " LIMIT ?"
@@ -1716,11 +1629,12 @@ class EventsStore(
 
             sql = (
                 "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts"
+                " state_key, redacts, relates_to_id"
                 " FROM events AS e"
                 " INNER JOIN ex_outlier_stream USING (event_id)"
                 " LEFT JOIN redactions USING (event_id)"
                 " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
                 " WHERE ? > event_stream_ordering"
                 " AND event_stream_ordering >= ?"
                 " ORDER BY event_stream_ordering DESC"
diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py
new file mode 100644
index 0000000000..75c1935bf3
--- /dev/null
+++ b/synapse/storage/events_bg_updates.py
@@ -0,0 +1,401 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from six import text_type
+
+from canonicaljson import json
+
+from twisted.internet import defer
+
+from synapse.storage.background_updates import BackgroundUpdateStore
+
+logger = logging.getLogger(__name__)
+
+
+class EventsBackgroundUpdatesStore(BackgroundUpdateStore):
+
+    EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
+    EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url"
+    DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities"
+
+    def __init__(self, db_conn, hs):
+        super(EventsBackgroundUpdatesStore, self).__init__(db_conn, hs)
+
+        self.register_background_update_handler(
+            self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts
+        )
+        self.register_background_update_handler(
+            self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME,
+            self._background_reindex_fields_sender,
+        )
+
+        self.register_background_index_update(
+            "event_contains_url_index",
+            index_name="event_contains_url_index",
+            table="events",
+            columns=["room_id", "topological_ordering", "stream_ordering"],
+            where_clause="contains_url = true AND outlier = false",
+        )
+
+        # an event_id index on event_search is useful for the purge_history
+        # api. Plus it means we get to enforce some integrity with a UNIQUE
+        # clause
+        self.register_background_index_update(
+            "event_search_event_id_idx",
+            index_name="event_search_event_id_idx",
+            table="event_search",
+            columns=["event_id"],
+            unique=True,
+            psql_only=True,
+        )
+
+        self.register_background_update_handler(
+            self.DELETE_SOFT_FAILED_EXTREMITIES,
+            self._cleanup_extremities_bg_update,
+        )
+
+    @defer.inlineCallbacks
+    def _background_reindex_fields_sender(self, progress, batch_size):
+        target_min_stream_id = progress["target_min_stream_id_inclusive"]
+        max_stream_id = progress["max_stream_id_exclusive"]
+        rows_inserted = progress.get("rows_inserted", 0)
+
+        INSERT_CLUMP_SIZE = 1000
+
+        def reindex_txn(txn):
+            sql = (
+                "SELECT stream_ordering, event_id, json FROM events"
+                " INNER JOIN event_json USING (event_id)"
+                " WHERE ? <= stream_ordering AND stream_ordering < ?"
+                " ORDER BY stream_ordering DESC"
+                " LIMIT ?"
+            )
+
+            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            min_stream_id = rows[-1][0]
+
+            update_rows = []
+            for row in rows:
+                try:
+                    event_id = row[1]
+                    event_json = json.loads(row[2])
+                    sender = event_json["sender"]
+                    content = event_json["content"]
+
+                    contains_url = "url" in content
+                    if contains_url:
+                        contains_url &= isinstance(content["url"], text_type)
+                except (KeyError, AttributeError):
+                    # If the event is missing a necessary field then
+                    # skip over it.
+                    continue
+
+                update_rows.append((sender, contains_url, event_id))
+
+            sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?"
+
+            for index in range(0, len(update_rows), INSERT_CLUMP_SIZE):
+                clump = update_rows[index : index + INSERT_CLUMP_SIZE]
+                txn.executemany(sql, clump)
+
+            progress = {
+                "target_min_stream_id_inclusive": target_min_stream_id,
+                "max_stream_id_exclusive": min_stream_id,
+                "rows_inserted": rows_inserted + len(rows),
+            }
+
+            self._background_update_progress_txn(
+                txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress
+            )
+
+            return len(rows)
+
+        result = yield self.runInteraction(
+            self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn
+        )
+
+        if not result:
+            yield self._end_background_update(self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME)
+
+        defer.returnValue(result)
+
+    @defer.inlineCallbacks
+    def _background_reindex_origin_server_ts(self, progress, batch_size):
+        target_min_stream_id = progress["target_min_stream_id_inclusive"]
+        max_stream_id = progress["max_stream_id_exclusive"]
+        rows_inserted = progress.get("rows_inserted", 0)
+
+        INSERT_CLUMP_SIZE = 1000
+
+        def reindex_search_txn(txn):
+            sql = (
+                "SELECT stream_ordering, event_id FROM events"
+                " WHERE ? <= stream_ordering AND stream_ordering < ?"
+                " ORDER BY stream_ordering DESC"
+                " LIMIT ?"
+            )
+
+            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            min_stream_id = rows[-1][0]
+            event_ids = [row[1] for row in rows]
+
+            rows_to_update = []
+
+            chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)]
+            for chunk in chunks:
+                ev_rows = self._simple_select_many_txn(
+                    txn,
+                    table="event_json",
+                    column="event_id",
+                    iterable=chunk,
+                    retcols=["event_id", "json"],
+                    keyvalues={},
+                )
+
+                for row in ev_rows:
+                    event_id = row["event_id"]
+                    event_json = json.loads(row["json"])
+                    try:
+                        origin_server_ts = event_json["origin_server_ts"]
+                    except (KeyError, AttributeError):
+                        # If the event is missing a necessary field then
+                        # skip over it.
+                        continue
+
+                    rows_to_update.append((origin_server_ts, event_id))
+
+            sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?"
+
+            for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE):
+                clump = rows_to_update[index : index + INSERT_CLUMP_SIZE]
+                txn.executemany(sql, clump)
+
+            progress = {
+                "target_min_stream_id_inclusive": target_min_stream_id,
+                "max_stream_id_exclusive": min_stream_id,
+                "rows_inserted": rows_inserted + len(rows_to_update),
+            }
+
+            self._background_update_progress_txn(
+                txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress
+            )
+
+            return len(rows_to_update)
+
+        result = yield self.runInteraction(
+            self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn
+        )
+
+        if not result:
+            yield self._end_background_update(self.EVENT_ORIGIN_SERVER_TS_NAME)
+
+        defer.returnValue(result)
+
+    @defer.inlineCallbacks
+    def _cleanup_extremities_bg_update(self, progress, batch_size):
+        """Background update to clean out extremities that should have been
+        deleted previously.
+
+        Mainly used to deal with the aftermath of #5269.
+        """
+
+        # This works by first copying all existing forward extremities into the
+        # `_extremities_to_check` table at start up, and then checking each
+        # event in that table whether we have any descendants that are not
+        # soft-failed/rejected. If that is the case then we delete that event
+        # from the forward extremities table.
+        #
+        # For efficiency, we do this in batches by recursively pulling out all
+        # descendants of a batch until we find the non soft-failed/rejected
+        # events, i.e. the set of descendants whose chain of prev events back
+        # to the batch of extremities are all soft-failed or rejected.
+        # Typically, we won't find any such events as extremities will rarely
+        # have any descendants, but if they do then we should delete those
+        # extremities.
+
+        def _cleanup_extremities_bg_update_txn(txn):
+            # The set of extremity event IDs that we're checking this round
+            original_set = set()
+
+            # A dict[str, set[str]] of event ID to their prev events.
+            graph = {}
+
+            # The set of descendants of the original set that are not rejected
+            # nor soft-failed. Ancestors of these events should be removed
+            # from the forward extremities table.
+            non_rejected_leaves = set()
+
+            # Set of event IDs that have been soft failed, and for which we
+            # should check if they have descendants which haven't been soft
+            # failed.
+            soft_failed_events_to_lookup = set()
+
+            # First, we get `batch_size` events from the table, pulling out
+            # their successor events, if any, and the successor events'
+            # rejection status.
+            txn.execute(
+                """SELECT prev_event_id, event_id, internal_metadata,
+                    rejections.event_id IS NOT NULL, events.outlier
+                FROM (
+                    SELECT event_id AS prev_event_id
+                    FROM _extremities_to_check
+                    LIMIT ?
+                ) AS f
+                LEFT JOIN event_edges USING (prev_event_id)
+                LEFT JOIN events USING (event_id)
+                LEFT JOIN event_json USING (event_id)
+                LEFT JOIN rejections USING (event_id)
+                """, (batch_size,)
+            )
+
+            for prev_event_id, event_id, metadata, rejected, outlier in txn:
+                original_set.add(prev_event_id)
+
+                if not event_id or outlier:
+                    # Common case where the forward extremity doesn't have any
+                    # descendants.
+                    continue
+
+                graph.setdefault(event_id, set()).add(prev_event_id)
+
+                soft_failed = False
+                if metadata:
+                    soft_failed = json.loads(metadata).get("soft_failed")
+
+                if soft_failed or rejected:
+                    soft_failed_events_to_lookup.add(event_id)
+                else:
+                    non_rejected_leaves.add(event_id)
+
+            # Now we recursively check all the soft-failed descendants we
+            # found above in the same way, until we have nothing left to
+            # check.
+            while soft_failed_events_to_lookup:
+                # We only want to do 100 at a time, so we split given list
+                # into two.
+                batch = list(soft_failed_events_to_lookup)
+                to_check, to_defer = batch[:100], batch[100:]
+                soft_failed_events_to_lookup = set(to_defer)
+
+                sql = """SELECT prev_event_id, event_id, internal_metadata,
+                    rejections.event_id IS NOT NULL
+                    FROM event_edges
+                    INNER JOIN events USING (event_id)
+                    INNER JOIN event_json USING (event_id)
+                    LEFT JOIN rejections USING (event_id)
+                    WHERE
+                        prev_event_id IN (%s)
+                        AND NOT events.outlier
+                """ % (
+                    ",".join("?" for _ in to_check),
+                )
+                txn.execute(sql, to_check)
+
+                for prev_event_id, event_id, metadata, rejected in txn:
+                    if event_id in graph:
+                        # Already handled this event previously, but we still
+                        # want to record the edge.
+                        graph[event_id].add(prev_event_id)
+                        continue
+
+                    graph[event_id] = {prev_event_id}
+
+                    soft_failed = json.loads(metadata).get("soft_failed")
+                    if soft_failed or rejected:
+                        soft_failed_events_to_lookup.add(event_id)
+                    else:
+                        non_rejected_leaves.add(event_id)
+
+            # We have a set of non-soft-failed descendants, so we recurse up
+            # the graph to find all ancestors and add them to the set of event
+            # IDs that we can delete from forward extremities table.
+            to_delete = set()
+            while non_rejected_leaves:
+                event_id = non_rejected_leaves.pop()
+                prev_event_ids = graph.get(event_id, set())
+                non_rejected_leaves.update(prev_event_ids)
+                to_delete.update(prev_event_ids)
+
+            to_delete.intersection_update(original_set)
+
+            deleted = self._simple_delete_many_txn(
+                txn=txn,
+                table="event_forward_extremities",
+                column="event_id",
+                iterable=to_delete,
+                keyvalues={},
+            )
+
+            logger.info(
+                "Deleted %d forward extremities of %d checked, to clean up #5269",
+                deleted,
+                len(original_set),
+            )
+
+            if deleted:
+                # We now need to invalidate the caches of these rooms
+                rows = self._simple_select_many_txn(
+                    txn,
+                    table="events",
+                    column="event_id",
+                    iterable=to_delete,
+                    keyvalues={},
+                    retcols=("room_id",)
+                )
+                room_ids = set(row["room_id"] for row in rows)
+                for room_id in room_ids:
+                    txn.call_after(
+                        self.get_latest_event_ids_in_room.invalidate,
+                        (room_id,)
+                    )
+
+            self._simple_delete_many_txn(
+                txn=txn,
+                table="_extremities_to_check",
+                column="event_id",
+                iterable=original_set,
+                keyvalues={},
+            )
+
+            return len(original_set)
+
+        num_handled = yield self.runInteraction(
+            "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn,
+        )
+
+        if not num_handled:
+            yield self._end_background_update(self.DELETE_SOFT_FAILED_EXTREMITIES)
+
+            def _drop_table_txn(txn):
+                txn.execute("DROP TABLE _extremities_to_check")
+
+            yield self.runInteraction(
+                "_cleanup_extremities_bg_update_drop_table",
+                _drop_table_txn,
+            )
+
+        defer.returnValue(num_handled)
diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index adc6cf26b5..1782428048 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import division
+
 import itertools
 import logging
 from collections import namedtuple
@@ -610,4 +612,79 @@ class EventsWorkerStore(SQLBaseStore):
 
             return res
 
-        return self.runInteraction("get_rejection_reasons", f)
+        return self.runInteraction("get_seen_events_with_rejections", f)
+
+    def _get_total_state_event_counts_txn(self, txn, room_id):
+        """
+        See get_total_state_event_counts.
+        """
+        # We join against the events table as that has an index on room_id
+        sql = """
+            SELECT COUNT(*) FROM state_events
+            INNER JOIN events USING (room_id, event_id)
+            WHERE room_id=?
+        """
+        txn.execute(sql, (room_id,))
+        row = txn.fetchone()
+        return row[0] if row else 0
+
+    def get_total_state_event_counts(self, room_id):
+        """
+        Gets the total number of state events in a room.
+
+        Args:
+            room_id (str)
+
+        Returns:
+            Deferred[int]
+        """
+        return self.runInteraction(
+            "get_total_state_event_counts",
+            self._get_total_state_event_counts_txn, room_id
+        )
+
+    def _get_current_state_event_counts_txn(self, txn, room_id):
+        """
+        See get_current_state_event_counts.
+        """
+        sql = "SELECT COUNT(*) FROM current_state_events WHERE room_id=?"
+        txn.execute(sql, (room_id,))
+        row = txn.fetchone()
+        return row[0] if row else 0
+
+    def get_current_state_event_counts(self, room_id):
+        """
+        Gets the current number of state events in a room.
+
+        Args:
+            room_id (str)
+
+        Returns:
+            Deferred[int]
+        """
+        return self.runInteraction(
+            "get_current_state_event_counts",
+            self._get_current_state_event_counts_txn, room_id
+        )
+
+    @defer.inlineCallbacks
+    def get_room_complexity(self, room_id):
+        """
+        Get a rough approximation of the complexity of the room. This is used by
+        remote servers to decide whether they wish to join the room or not.
+        Higher complexity value indicates that being in the room will consume
+        more resources.
+
+        Args:
+            room_id (str)
+
+        Returns:
+            Deferred[dict[str:int]] of complexity version to complexity.
+        """
+        state_events = yield self.get_current_state_event_counts(room_id)
+
+        # Call this one "v1", so we can introduce new ones as we want to develop
+        # it.
+        complexity_v1 = round(state_events / 500, 2)
+
+        defer.returnValue({"v1": complexity_v1})
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index 7036541792..5300720dbb 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -19,6 +19,7 @@ import logging
 
 import six
 
+import attr
 from signedjson.key import decode_verify_key_bytes
 
 from synapse.util import batch_iter
@@ -36,6 +37,12 @@ else:
     db_binary_type = memoryview
 
 
+@attr.s(slots=True, frozen=True)
+class FetchKeyResult(object):
+    verify_key = attr.ib()  # VerifyKey: the key itself
+    valid_until_ts = attr.ib()  # int: how long we can use this key for
+
+
 class KeyStore(SQLBaseStore):
     """Persistence for signature verification keys
     """
@@ -54,8 +61,8 @@ class KeyStore(SQLBaseStore):
                 iterable of (server_name, key-id) tuples to fetch keys for
 
         Returns:
-            Deferred: resolves to dict[Tuple[str, str], VerifyKey|None]:
-                map from (server_name, key_id) -> VerifyKey, or None if the key is
+            Deferred: resolves to dict[Tuple[str, str], FetchKeyResult|None]:
+                map from (server_name, key_id) -> FetchKeyResult, or None if the key is
                 unknown
         """
         keys = {}
@@ -65,17 +72,19 @@ class KeyStore(SQLBaseStore):
 
             # batch_iter always returns tuples so it's safe to do len(batch)
             sql = (
-                "SELECT server_name, key_id, verify_key FROM server_signature_keys "
-                "WHERE 1=0"
+                "SELECT server_name, key_id, verify_key, ts_valid_until_ms "
+                "FROM server_signature_keys WHERE 1=0"
             ) + " OR (server_name=? AND key_id=?)" * len(batch)
 
             txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
 
             for row in txn:
-                server_name, key_id, key_bytes = row
-                keys[(server_name, key_id)] = decode_verify_key_bytes(
-                    key_id, bytes(key_bytes)
+                server_name, key_id, key_bytes, ts_valid_until_ms = row
+                res = FetchKeyResult(
+                    verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)),
+                    valid_until_ts=ts_valid_until_ms,
                 )
+                keys[(server_name, key_id)] = res
 
         def _txn(txn):
             for batch in batch_iter(server_name_and_key_ids, 50):
@@ -84,38 +93,53 @@ class KeyStore(SQLBaseStore):
 
         return self.runInteraction("get_server_verify_keys", _txn)
 
-    def store_server_verify_key(
-        self, server_name, from_server, time_now_ms, verify_key
-    ):
-        """Stores a NACL verification key for the given server.
+    def store_server_verify_keys(self, from_server, ts_added_ms, verify_keys):
+        """Stores NACL verification keys for remote servers.
         Args:
-            server_name (str): The name of the server.
-            from_server (str): Where the verification key was looked up
-            time_now_ms (int): The time now in milliseconds
-            verify_key (nacl.signing.VerifyKey): The NACL verify key.
+            from_server (str): Where the verification keys were looked up
+            ts_added_ms (int): The time to record that the key was added
+            verify_keys (iterable[tuple[str, str, FetchKeyResult]]):
+                keys to be stored. Each entry is a triplet of
+                (server_name, key_id, key).
         """
-        key_id = "%s:%s" % (verify_key.alg, verify_key.version)
-
-        # XXX fix this to not need a lock (#3819)
-        def _txn(txn):
-            self._simple_upsert_txn(
-                txn,
-                table="server_signature_keys",
-                keyvalues={"server_name": server_name, "key_id": key_id},
-                values={
-                    "from_server": from_server,
-                    "ts_added_ms": time_now_ms,
-                    "verify_key": db_binary_type(verify_key.encode()),
-                },
+        key_values = []
+        value_values = []
+        invalidations = []
+        for server_name, key_id, fetch_result in verify_keys:
+            key_values.append((server_name, key_id))
+            value_values.append(
+                (
+                    from_server,
+                    ts_added_ms,
+                    fetch_result.valid_until_ts,
+                    db_binary_type(fetch_result.verify_key.encode()),
+                )
             )
             # invalidate takes a tuple corresponding to the params of
             # _get_server_verify_key. _get_server_verify_key only takes one
             # param, which is itself the 2-tuple (server_name, key_id).
-            txn.call_after(
-                self._get_server_verify_key.invalidate, ((server_name, key_id),)
-            )
-
-        return self.runInteraction("store_server_verify_key", _txn)
+            invalidations.append((server_name, key_id))
+
+        def _invalidate(res):
+            f = self._get_server_verify_key.invalidate
+            for i in invalidations:
+                f((i, ))
+            return res
+
+        return self.runInteraction(
+            "store_server_verify_keys",
+            self._simple_upsert_many_txn,
+            table="server_signature_keys",
+            key_names=("server_name", "key_id"),
+            key_values=key_values,
+            value_names=(
+                "from_server",
+                "ts_added_ms",
+                "ts_valid_until_ms",
+                "verify_key",
+            ),
+            value_values=value_values,
+        ).addCallback(_invalidate)
 
     def store_server_keys_json(
         self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes
diff --git a/synapse/storage/relations.py b/synapse/storage/relations.py
new file mode 100644
index 0000000000..4c83800cca
--- /dev/null
+++ b/synapse/storage/relations.py
@@ -0,0 +1,476 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import attr
+
+from twisted.internet import defer
+
+from synapse.api.constants import RelationTypes
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.stream import generate_pagination_where_clause
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s
+class PaginationChunk(object):
+    """Returned by relation pagination APIs.
+
+    Attributes:
+        chunk (list): The rows returned by pagination
+        next_batch (Any|None): Token to fetch next set of results with, if
+            None then there are no more results.
+        prev_batch (Any|None): Token to fetch previous set of results with, if
+            None then there are no previous results.
+    """
+
+    chunk = attr.ib()
+    next_batch = attr.ib(default=None)
+    prev_batch = attr.ib(default=None)
+
+    def to_dict(self):
+        d = {"chunk": self.chunk}
+
+        if self.next_batch:
+            d["next_batch"] = self.next_batch.to_string()
+
+        if self.prev_batch:
+            d["prev_batch"] = self.prev_batch.to_string()
+
+        return d
+
+
+@attr.s(frozen=True, slots=True)
+class RelationPaginationToken(object):
+    """Pagination token for relation pagination API.
+
+    As the results are order by topological ordering, we can use the
+    `topological_ordering` and `stream_ordering` fields of the events at the
+    boundaries of the chunk as pagination tokens.
+
+    Attributes:
+        topological (int): The topological ordering of the boundary event
+        stream (int): The stream ordering of the boundary event.
+    """
+
+    topological = attr.ib()
+    stream = attr.ib()
+
+    @staticmethod
+    def from_string(string):
+        try:
+            t, s = string.split("-")
+            return RelationPaginationToken(int(t), int(s))
+        except ValueError:
+            raise SynapseError(400, "Invalid token")
+
+    def to_string(self):
+        return "%d-%d" % (self.topological, self.stream)
+
+    def as_tuple(self):
+        return attr.astuple(self)
+
+
+@attr.s(frozen=True, slots=True)
+class AggregationPaginationToken(object):
+    """Pagination token for relation aggregation pagination API.
+
+    As the results are order by count and then MAX(stream_ordering) of the
+    aggregation groups, we can just use them as our pagination token.
+
+    Attributes:
+        count (int): The count of relations in the boundar group.
+        stream (int): The MAX stream ordering in the boundary group.
+    """
+
+    count = attr.ib()
+    stream = attr.ib()
+
+    @staticmethod
+    def from_string(string):
+        try:
+            c, s = string.split("-")
+            return AggregationPaginationToken(int(c), int(s))
+        except ValueError:
+            raise SynapseError(400, "Invalid token")
+
+    def to_string(self):
+        return "%d-%d" % (self.count, self.stream)
+
+    def as_tuple(self):
+        return attr.astuple(self)
+
+
+class RelationsWorkerStore(SQLBaseStore):
+    @cached(tree=True)
+    def get_relations_for_event(
+        self,
+        event_id,
+        relation_type=None,
+        event_type=None,
+        aggregation_key=None,
+        limit=5,
+        direction="b",
+        from_token=None,
+        to_token=None,
+    ):
+        """Get a list of relations for an event, ordered by topological ordering.
+
+        Args:
+            event_id (str): Fetch events that relate to this event ID.
+            relation_type (str|None): Only fetch events with this relation
+                type, if given.
+            event_type (str|None): Only fetch events with this event type, if
+                given.
+            aggregation_key (str|None): Only fetch events with this aggregation
+                key, if given.
+            limit (int): Only fetch the most recent `limit` events.
+            direction (str): Whether to fetch the most recent first (`"b"`) or
+                the oldest first (`"f"`).
+            from_token (RelationPaginationToken|None): Fetch rows from the given
+                token, or from the start if None.
+            to_token (RelationPaginationToken|None): Fetch rows up to the given
+                token, or up to the end if None.
+
+        Returns:
+            Deferred[PaginationChunk]: List of event IDs that match relations
+            requested. The rows are of the form `{"event_id": "..."}`.
+        """
+
+        where_clause = ["relates_to_id = ?"]
+        where_args = [event_id]
+
+        if relation_type is not None:
+            where_clause.append("relation_type = ?")
+            where_args.append(relation_type)
+
+        if event_type is not None:
+            where_clause.append("type = ?")
+            where_args.append(event_type)
+
+        if aggregation_key:
+            where_clause.append("aggregation_key = ?")
+            where_args.append(aggregation_key)
+
+        pagination_clause = generate_pagination_where_clause(
+            direction=direction,
+            column_names=("topological_ordering", "stream_ordering"),
+            from_token=attr.astuple(from_token) if from_token else None,
+            to_token=attr.astuple(to_token) if to_token else None,
+            engine=self.database_engine,
+        )
+
+        if pagination_clause:
+            where_clause.append(pagination_clause)
+
+        if direction == "b":
+            order = "DESC"
+        else:
+            order = "ASC"
+
+        sql = """
+            SELECT event_id, topological_ordering, stream_ordering
+            FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE %s
+            ORDER BY topological_ordering %s, stream_ordering %s
+            LIMIT ?
+        """ % (
+            " AND ".join(where_clause),
+            order,
+            order,
+        )
+
+        def _get_recent_references_for_event_txn(txn):
+            txn.execute(sql, where_args + [limit + 1])
+
+            last_topo_id = None
+            last_stream_id = None
+            events = []
+            for row in txn:
+                events.append({"event_id": row[0]})
+                last_topo_id = row[1]
+                last_stream_id = row[2]
+
+            next_batch = None
+            if len(events) > limit and last_topo_id and last_stream_id:
+                next_batch = RelationPaginationToken(last_topo_id, last_stream_id)
+
+            return PaginationChunk(
+                chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+            )
+
+        return self.runInteraction(
+            "get_recent_references_for_event", _get_recent_references_for_event_txn
+        )
+
+    @cached(tree=True)
+    def get_aggregation_groups_for_event(
+        self,
+        event_id,
+        event_type=None,
+        limit=5,
+        direction="b",
+        from_token=None,
+        to_token=None,
+    ):
+        """Get a list of annotations on the event, grouped by event type and
+        aggregation key, sorted by count.
+
+        This is used e.g. to get the what and how many reactions have happend
+        on an event.
+
+        Args:
+            event_id (str): Fetch events that relate to this event ID.
+            event_type (str|None): Only fetch events with this event type, if
+                given.
+            limit (int): Only fetch the `limit` groups.
+            direction (str): Whether to fetch the highest count first (`"b"`) or
+                the lowest count first (`"f"`).
+            from_token (AggregationPaginationToken|None): Fetch rows from the
+                given token, or from the start if None.
+            to_token (AggregationPaginationToken|None): Fetch rows up to the
+                given token, or up to the end if None.
+
+
+        Returns:
+            Deferred[PaginationChunk]: List of groups of annotations that
+            match. Each row is a dict with `type`, `key` and `count` fields.
+        """
+
+        where_clause = ["relates_to_id = ?", "relation_type = ?"]
+        where_args = [event_id, RelationTypes.ANNOTATION]
+
+        if event_type:
+            where_clause.append("type = ?")
+            where_args.append(event_type)
+
+        having_clause = generate_pagination_where_clause(
+            direction=direction,
+            column_names=("COUNT(*)", "MAX(stream_ordering)"),
+            from_token=attr.astuple(from_token) if from_token else None,
+            to_token=attr.astuple(to_token) if to_token else None,
+            engine=self.database_engine,
+        )
+
+        if direction == "b":
+            order = "DESC"
+        else:
+            order = "ASC"
+
+        if having_clause:
+            having_clause = "HAVING " + having_clause
+        else:
+            having_clause = ""
+
+        sql = """
+            SELECT type, aggregation_key, COUNT(DISTINCT sender), MAX(stream_ordering)
+            FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE {where_clause}
+            GROUP BY relation_type, type, aggregation_key
+            {having_clause}
+            ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order}
+            LIMIT ?
+        """.format(
+            where_clause=" AND ".join(where_clause),
+            order=order,
+            having_clause=having_clause,
+        )
+
+        def _get_aggregation_groups_for_event_txn(txn):
+            txn.execute(sql, where_args + [limit + 1])
+
+            next_batch = None
+            events = []
+            for row in txn:
+                events.append({"type": row[0], "key": row[1], "count": row[2]})
+                next_batch = AggregationPaginationToken(row[2], row[3])
+
+            if len(events) <= limit:
+                next_batch = None
+
+            return PaginationChunk(
+                chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+            )
+
+        return self.runInteraction(
+            "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn
+        )
+
+    @cachedInlineCallbacks()
+    def get_applicable_edit(self, event_id):
+        """Get the most recent edit (if any) that has happened for the given
+        event.
+
+        Correctly handles checking whether edits were allowed to happen.
+
+        Args:
+            event_id (str): The original event ID
+
+        Returns:
+            Deferred[EventBase|None]: Returns the most recent edit, if any.
+        """
+
+        # We only allow edits for `m.room.message` events that have the same sender
+        # and event type. We can't assert these things during regular event auth so
+        # we have to do the checks post hoc.
+
+        # Fetches latest edit that has the same type and sender as the
+        # original, and is an `m.room.message`.
+        sql = """
+            SELECT edit.event_id FROM events AS edit
+            INNER JOIN event_relations USING (event_id)
+            INNER JOIN events AS original ON
+                original.event_id = relates_to_id
+                AND edit.type = original.type
+                AND edit.sender = original.sender
+            WHERE
+                relates_to_id = ?
+                AND relation_type = ?
+                AND edit.type = 'm.room.message'
+            ORDER by edit.origin_server_ts DESC, edit.event_id DESC
+            LIMIT 1
+        """
+
+        def _get_applicable_edit_txn(txn):
+            txn.execute(sql, (event_id, RelationTypes.REPLACE))
+            row = txn.fetchone()
+            if row:
+                return row[0]
+
+        edit_id = yield self.runInteraction(
+            "get_applicable_edit", _get_applicable_edit_txn
+        )
+
+        if not edit_id:
+            return
+
+        edit_event = yield self.get_event(edit_id, allow_none=True)
+        defer.returnValue(edit_event)
+
+    def has_user_annotated_event(self, parent_id, event_type, aggregation_key, sender):
+        """Check if a user has already annotated an event with the same key
+        (e.g. already liked an event).
+
+        Args:
+            parent_id (str): The event being annotated
+            event_type (str): The event type of the annotation
+            aggregation_key (str): The aggregation key of the annotation
+            sender (str): The sender of the annotation
+
+        Returns:
+            Deferred[bool]
+        """
+
+        sql = """
+            SELECT 1 FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE
+                relates_to_id = ?
+                AND relation_type = ?
+                AND type = ?
+                AND sender = ?
+                AND aggregation_key = ?
+            LIMIT 1;
+        """
+
+        def _get_if_user_has_annotated_event(txn):
+            txn.execute(
+                sql,
+                (
+                    parent_id,
+                    RelationTypes.ANNOTATION,
+                    event_type,
+                    sender,
+                    aggregation_key,
+                ),
+            )
+
+            return bool(txn.fetchone())
+
+        return self.runInteraction(
+            "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
+        )
+
+
+class RelationsStore(RelationsWorkerStore):
+    def _handle_event_relations(self, txn, event):
+        """Handles inserting relation data during peristence of events
+
+        Args:
+            txn
+            event (EventBase)
+        """
+        relation = event.content.get("m.relates_to")
+        if not relation:
+            # No relations
+            return
+
+        rel_type = relation.get("rel_type")
+        if rel_type not in (
+            RelationTypes.ANNOTATION,
+            RelationTypes.REFERENCE,
+            RelationTypes.REPLACE,
+        ):
+            # Unknown relation type
+            return
+
+        parent_id = relation.get("event_id")
+        if not parent_id:
+            # Invalid relation
+            return
+
+        aggregation_key = relation.get("key")
+
+        self._simple_insert_txn(
+            txn,
+            table="event_relations",
+            values={
+                "event_id": event.event_id,
+                "relates_to_id": parent_id,
+                "relation_type": rel_type,
+                "aggregation_key": aggregation_key,
+            },
+        )
+
+        txn.call_after(self.get_relations_for_event.invalidate_many, (parent_id,))
+        txn.call_after(
+            self.get_aggregation_groups_for_event.invalidate_many, (parent_id,)
+        )
+
+        if rel_type == RelationTypes.REPLACE:
+            txn.call_after(self.get_applicable_edit.invalidate, (parent_id,))
+
+    def _handle_redaction(self, txn, redacted_event_id):
+        """Handles receiving a redaction and checking whether we need to remove
+        any redacted relations from the database.
+
+        Args:
+            txn
+            redacted_event_id (str): The event that was redacted.
+        """
+
+        self._simple_delete_txn(
+            txn,
+            table="event_relations",
+            keyvalues={
+                "event_id": redacted_event_id,
+            }
+        )
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 57df17bcc2..7617913326 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -142,6 +142,27 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return self.runInteraction("get_room_summary", _get_room_summary_txn)
 
+    def _get_user_counts_in_room_txn(self, txn, room_id):
+        """
+        Get the user count in a room by membership.
+
+        Args:
+            room_id (str)
+            membership (Membership)
+
+        Returns:
+            Deferred[int]
+        """
+        sql = """
+        SELECT m.membership, count(*) FROM room_memberships as m
+            INNER JOIN current_state_events as c USING(event_id)
+            WHERE c.type = 'm.room.member' AND c.room_id = ?
+            GROUP BY m.membership
+        """
+
+        txn.execute(sql, (room_id,))
+        return {row[0]: row[1] for row in txn}
+
     @cached()
     def get_invited_rooms_for_user(self, user_id):
         """ Get all the rooms the user is invited to
diff --git a/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql b/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql
new file mode 100644
index 0000000000..c01aa9d2d9
--- /dev/null
+++ b/synapse/storage/schema/delta/54/add_validity_to_server_keys.sql
@@ -0,0 +1,23 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* When we can use this key until, before we have to refresh it. */
+ALTER TABLE server_signature_keys ADD COLUMN ts_valid_until_ms BIGINT;
+
+UPDATE server_signature_keys SET ts_valid_until_ms = (
+    SELECT MAX(ts_valid_until_ms) FROM server_keys_json skj WHERE
+        skj.server_name = server_signature_keys.server_name AND
+        skj.key_id = server_signature_keys.key_id
+);
diff --git a/synapse/storage/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/schema/delta/54/delete_forward_extremities.sql
new file mode 100644
index 0000000000..b062ec840c
--- /dev/null
+++ b/synapse/storage/schema/delta/54/delete_forward_extremities.sql
@@ -0,0 +1,23 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Start a background job to cleanup extremities that were incorrectly added
+-- by bug #5269.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('delete_soft_failed_extremities', '{}');
+
+DROP TABLE IF EXISTS _extremities_to_check;  -- To make this delta schema file idempotent.
+CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities;
+CREATE INDEX _extremities_to_check_id ON _extremities_to_check(event_id);
diff --git a/synapse/storage/schema/delta/54/relations.sql b/synapse/storage/schema/delta/54/relations.sql
new file mode 100644
index 0000000000..134862b870
--- /dev/null
+++ b/synapse/storage/schema/delta/54/relations.sql
@@ -0,0 +1,27 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Tracks related events, like reactions, replies, edits, etc. Note that things
+-- in this table are not necessarily "valid", e.g. it may contain edits from
+-- people who don't have power to edit other peoples events.
+CREATE TABLE IF NOT EXISTS event_relations (
+    event_id TEXT NOT NULL,
+    relates_to_id TEXT NOT NULL,
+    relation_type TEXT NOT NULL,
+    aggregation_key TEXT
+);
+
+CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id);
+CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key);
diff --git a/synapse/storage/schema/delta/54/stats.sql b/synapse/storage/schema/delta/54/stats.sql
new file mode 100644
index 0000000000..652e58308e
--- /dev/null
+++ b/synapse/storage/schema/delta/54/stats.sql
@@ -0,0 +1,80 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE stats_stream_pos (
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id BIGINT,
+    CHECK (Lock='X')
+);
+
+INSERT INTO stats_stream_pos (stream_id) VALUES (null);
+
+CREATE TABLE user_stats (
+    user_id TEXT NOT NULL,
+    ts BIGINT NOT NULL,
+    bucket_size INT NOT NULL,
+    public_rooms INT NOT NULL,
+    private_rooms INT NOT NULL
+);
+
+CREATE UNIQUE INDEX user_stats_user_ts ON user_stats(user_id, ts);
+
+CREATE TABLE room_stats (
+    room_id TEXT NOT NULL,
+    ts BIGINT NOT NULL,
+    bucket_size INT NOT NULL,
+    current_state_events INT NOT NULL,
+    joined_members INT NOT NULL,
+    invited_members INT NOT NULL,
+    left_members INT NOT NULL,
+    banned_members INT NOT NULL,
+    state_events INT NOT NULL
+);
+
+CREATE UNIQUE INDEX room_stats_room_ts ON room_stats(room_id, ts);
+
+-- cache of current room state; useful for the publicRooms list
+CREATE TABLE room_state (
+    room_id TEXT NOT NULL,
+    join_rules TEXT,
+    history_visibility TEXT,
+    encryption TEXT,
+    name TEXT,
+    topic TEXT,
+    avatar TEXT,
+    canonical_alias TEXT
+    -- get aliases straight from the right table
+);
+
+CREATE UNIQUE INDEX room_state_room ON room_state(room_id);
+
+CREATE TABLE room_stats_earliest_token (
+    room_id TEXT NOT NULL,
+    token BIGINT NOT NULL
+);
+
+CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id);
+
+-- Set up staging tables
+INSERT INTO background_updates (update_name, progress_json) VALUES
+    ('populate_stats_createtables', '{}');
+
+-- Run through each room and update stats
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_stats_process_rooms', '{}', 'populate_stats_createtables');
+
+-- Clean up staging tables
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+    ('populate_stats_cleanup', '{}', 'populate_stats_process_rooms');
diff --git a/synapse/storage/schema/delta/54/stats2.sql b/synapse/storage/schema/delta/54/stats2.sql
new file mode 100644
index 0000000000..3b2d48447f
--- /dev/null
+++ b/synapse/storage/schema/delta/54/stats2.sql
@@ -0,0 +1,28 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- This delta file gets run after `54/stats.sql` delta.
+
+-- We want to add some indices to the temporary stats table, so we re-insert
+-- 'populate_stats_createtables' if we are still processing the rooms update.
+INSERT INTO background_updates (update_name, progress_json)
+    SELECT 'populate_stats_createtables', '{}'
+    WHERE
+        'populate_stats_process_rooms' IN (
+            SELECT update_name FROM background_updates
+        )
+        AND 'populate_stats_createtables' NOT IN (  -- don't insert if already exists
+            SELECT update_name FROM background_updates
+        );
diff --git a/synapse/storage/state_deltas.py b/synapse/storage/state_deltas.py
index 31a0279b18..5fdb442104 100644
--- a/synapse/storage/state_deltas.py
+++ b/synapse/storage/state_deltas.py
@@ -84,10 +84,16 @@ class StateDeltasStore(SQLBaseStore):
             "get_current_state_deltas", get_current_state_deltas_txn
         )
 
-    def get_max_stream_id_in_current_state_deltas(self):
-        return self._simple_select_one_onecol(
+    def _get_max_stream_id_in_current_state_deltas_txn(self, txn):
+        return self._simple_select_one_onecol_txn(
+            txn,
             table="current_state_delta_stream",
             keyvalues={},
             retcol="COALESCE(MAX(stream_id), -1)",
-            desc="get_max_stream_id_in_current_state_deltas",
+        )
+
+    def get_max_stream_id_in_current_state_deltas(self):
+        return self.runInteraction(
+            "get_max_stream_id_in_current_state_deltas",
+            self._get_max_stream_id_in_current_state_deltas_txn,
         )
diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py
new file mode 100644
index 0000000000..1c0b183a56
--- /dev/null
+++ b/synapse/storage/stats.py
@@ -0,0 +1,452 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018, 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.storage.prepare_database import get_statements
+from synapse.storage.state_deltas import StateDeltasStore
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+# these fields track absolutes (e.g. total number of rooms on the server)
+ABSOLUTE_STATS_FIELDS = {
+    "room": (
+        "current_state_events",
+        "joined_members",
+        "invited_members",
+        "left_members",
+        "banned_members",
+        "state_events",
+    ),
+    "user": ("public_rooms", "private_rooms"),
+}
+
+TYPE_TO_ROOM = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")}
+
+TEMP_TABLE = "_temp_populate_stats"
+
+
+class StatsStore(StateDeltasStore):
+    def __init__(self, db_conn, hs):
+        super(StatsStore, self).__init__(db_conn, hs)
+
+        self.server_name = hs.hostname
+        self.clock = self.hs.get_clock()
+        self.stats_enabled = hs.config.stats_enabled
+        self.stats_bucket_size = hs.config.stats_bucket_size
+
+        self.register_background_update_handler(
+            "populate_stats_createtables", self._populate_stats_createtables
+        )
+        self.register_background_update_handler(
+            "populate_stats_process_rooms", self._populate_stats_process_rooms
+        )
+        self.register_background_update_handler(
+            "populate_stats_cleanup", self._populate_stats_cleanup
+        )
+
+    @defer.inlineCallbacks
+    def _populate_stats_createtables(self, progress, batch_size):
+
+        if not self.stats_enabled:
+            yield self._end_background_update("populate_stats_createtables")
+            defer.returnValue(1)
+
+        # Get all the rooms that we want to process.
+        def _make_staging_area(txn):
+            # Create the temporary tables
+            stmts = get_statements("""
+                -- We just recreate the table, we'll be reinserting the
+                -- correct entries again later anyway.
+                DROP TABLE IF EXISTS {temp}_rooms;
+
+                CREATE TABLE IF NOT EXISTS {temp}_rooms(
+                    room_id TEXT NOT NULL,
+                    events BIGINT NOT NULL
+                );
+
+                CREATE INDEX {temp}_rooms_events
+                    ON {temp}_rooms(events);
+                CREATE INDEX {temp}_rooms_id
+                    ON {temp}_rooms(room_id);
+            """.format(temp=TEMP_TABLE).splitlines())
+
+            for statement in stmts:
+                txn.execute(statement)
+
+            sql = (
+                "CREATE TABLE IF NOT EXISTS "
+                + TEMP_TABLE
+                + "_position(position TEXT NOT NULL)"
+            )
+            txn.execute(sql)
+
+            # Get rooms we want to process from the database, only adding
+            # those that we haven't (i.e. those not in room_stats_earliest_token)
+            sql = """
+                INSERT INTO %s_rooms (room_id, events)
+                SELECT c.room_id, count(*) FROM current_state_events AS c
+                LEFT JOIN room_stats_earliest_token AS t USING (room_id)
+                WHERE t.room_id IS NULL
+                GROUP BY c.room_id
+            """ % (TEMP_TABLE,)
+            txn.execute(sql)
+
+        new_pos = yield self.get_max_stream_id_in_current_state_deltas()
+        yield self.runInteraction("populate_stats_temp_build", _make_staging_area)
+        yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos})
+        self.get_earliest_token_for_room_stats.invalidate_all()
+
+        yield self._end_background_update("populate_stats_createtables")
+        defer.returnValue(1)
+
+    @defer.inlineCallbacks
+    def _populate_stats_cleanup(self, progress, batch_size):
+        """
+        Update the user directory stream position, then clean up the old tables.
+        """
+        if not self.stats_enabled:
+            yield self._end_background_update("populate_stats_cleanup")
+            defer.returnValue(1)
+
+        position = yield self._simple_select_one_onecol(
+            TEMP_TABLE + "_position", None, "position"
+        )
+        yield self.update_stats_stream_pos(position)
+
+        def _delete_staging_area(txn):
+            txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms")
+            txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position")
+
+        yield self.runInteraction("populate_stats_cleanup", _delete_staging_area)
+
+        yield self._end_background_update("populate_stats_cleanup")
+        defer.returnValue(1)
+
+    @defer.inlineCallbacks
+    def _populate_stats_process_rooms(self, progress, batch_size):
+
+        if not self.stats_enabled:
+            yield self._end_background_update("populate_stats_process_rooms")
+            defer.returnValue(1)
+
+        # If we don't have progress filed, delete everything.
+        if not progress:
+            yield self.delete_all_stats()
+
+        def _get_next_batch(txn):
+            # Only fetch 250 rooms, so we don't fetch too many at once, even
+            # if those 250 rooms have less than batch_size state events.
+            sql = """
+                SELECT room_id, events FROM %s_rooms
+                ORDER BY events DESC
+                LIMIT 250
+            """ % (
+                TEMP_TABLE,
+            )
+            txn.execute(sql)
+            rooms_to_work_on = txn.fetchall()
+
+            if not rooms_to_work_on:
+                return None
+
+            # Get how many are left to process, so we can give status on how
+            # far we are in processing
+            txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
+            progress["remaining"] = txn.fetchone()[0]
+
+            return rooms_to_work_on
+
+        rooms_to_work_on = yield self.runInteraction(
+            "populate_stats_temp_read", _get_next_batch
+        )
+
+        # No more rooms -- complete the transaction.
+        if not rooms_to_work_on:
+            yield self._end_background_update("populate_stats_process_rooms")
+            defer.returnValue(1)
+
+        logger.info(
+            "Processing the next %d rooms of %d remaining",
+            len(rooms_to_work_on), progress["remaining"],
+        )
+
+        # Number of state events we've processed by going through each room
+        processed_event_count = 0
+
+        for room_id, event_count in rooms_to_work_on:
+
+            current_state_ids = yield self.get_current_state_ids(room_id)
+
+            join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
+            history_visibility_id = current_state_ids.get(
+                (EventTypes.RoomHistoryVisibility, "")
+            )
+            encryption_id = current_state_ids.get((EventTypes.RoomEncryption, ""))
+            name_id = current_state_ids.get((EventTypes.Name, ""))
+            topic_id = current_state_ids.get((EventTypes.Topic, ""))
+            avatar_id = current_state_ids.get((EventTypes.RoomAvatar, ""))
+            canonical_alias_id = current_state_ids.get((EventTypes.CanonicalAlias, ""))
+
+            state_events = yield self.get_events([
+                join_rules_id, history_visibility_id, encryption_id, name_id,
+                topic_id, avatar_id, canonical_alias_id,
+            ])
+
+            def _get_or_none(event_id, arg):
+                event = state_events.get(event_id)
+                if event:
+                    return event.content.get(arg)
+                return None
+
+            yield self.update_room_state(
+                room_id,
+                {
+                    "join_rules": _get_or_none(join_rules_id, "join_rule"),
+                    "history_visibility": _get_or_none(
+                        history_visibility_id, "history_visibility"
+                    ),
+                    "encryption": _get_or_none(encryption_id, "algorithm"),
+                    "name": _get_or_none(name_id, "name"),
+                    "topic": _get_or_none(topic_id, "topic"),
+                    "avatar": _get_or_none(avatar_id, "url"),
+                    "canonical_alias": _get_or_none(canonical_alias_id, "alias"),
+                },
+            )
+
+            now = self.hs.get_reactor().seconds()
+
+            # quantise time to the nearest bucket
+            now = (now // self.stats_bucket_size) * self.stats_bucket_size
+
+            def _fetch_data(txn):
+
+                # Get the current token of the room
+                current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn)
+
+                current_state_events = len(current_state_ids)
+
+                membership_counts = self._get_user_counts_in_room_txn(txn, room_id)
+
+                total_state_events = self._get_total_state_event_counts_txn(
+                    txn, room_id
+                )
+
+                self._update_stats_txn(
+                    txn,
+                    "room",
+                    room_id,
+                    now,
+                    {
+                        "bucket_size": self.stats_bucket_size,
+                        "current_state_events": current_state_events,
+                        "joined_members": membership_counts.get(Membership.JOIN, 0),
+                        "invited_members": membership_counts.get(Membership.INVITE, 0),
+                        "left_members": membership_counts.get(Membership.LEAVE, 0),
+                        "banned_members": membership_counts.get(Membership.BAN, 0),
+                        "state_events": total_state_events,
+                    },
+                )
+                self._simple_insert_txn(
+                    txn,
+                    "room_stats_earliest_token",
+                    {"room_id": room_id, "token": current_token},
+                )
+
+                # We've finished a room. Delete it from the table.
+                self._simple_delete_one_txn(
+                    txn, TEMP_TABLE + "_rooms", {"room_id": room_id},
+                )
+
+            yield self.runInteraction("update_room_stats", _fetch_data)
+
+            # Update the remaining counter.
+            progress["remaining"] -= 1
+            yield self.runInteraction(
+                "populate_stats",
+                self._background_update_progress_txn,
+                "populate_stats_process_rooms",
+                progress,
+            )
+
+            processed_event_count += event_count
+
+            if processed_event_count > batch_size:
+                # Don't process any more rooms, we've hit our batch size.
+                defer.returnValue(processed_event_count)
+
+        defer.returnValue(processed_event_count)
+
+    def delete_all_stats(self):
+        """
+        Delete all statistics records.
+        """
+
+        def _delete_all_stats_txn(txn):
+            txn.execute("DELETE FROM room_state")
+            txn.execute("DELETE FROM room_stats")
+            txn.execute("DELETE FROM room_stats_earliest_token")
+            txn.execute("DELETE FROM user_stats")
+
+        return self.runInteraction("delete_all_stats", _delete_all_stats_txn)
+
+    def get_stats_stream_pos(self):
+        return self._simple_select_one_onecol(
+            table="stats_stream_pos",
+            keyvalues={},
+            retcol="stream_id",
+            desc="stats_stream_pos",
+        )
+
+    def update_stats_stream_pos(self, stream_id):
+        return self._simple_update_one(
+            table="stats_stream_pos",
+            keyvalues={},
+            updatevalues={"stream_id": stream_id},
+            desc="update_stats_stream_pos",
+        )
+
+    def update_room_state(self, room_id, fields):
+        """
+        Args:
+            room_id (str)
+            fields (dict[str:Any])
+        """
+        return self._simple_upsert(
+            table="room_state",
+            keyvalues={"room_id": room_id},
+            values=fields,
+            desc="update_room_state",
+        )
+
+    def get_deltas_for_room(self, room_id, start, size=100):
+        """
+        Get statistics deltas for a given room.
+
+        Args:
+            room_id (str)
+            start (int): Pagination start. Number of entries, not timestamp.
+            size (int): How many entries to return.
+
+        Returns:
+            Deferred[list[dict]], where the dict has the keys of
+            ABSOLUTE_STATS_FIELDS["room"] and "ts".
+        """
+        return self._simple_select_list_paginate(
+            "room_stats",
+            {"room_id": room_id},
+            "ts",
+            start,
+            size,
+            retcols=(list(ABSOLUTE_STATS_FIELDS["room"]) + ["ts"]),
+            order_direction="DESC",
+        )
+
+    def get_all_room_state(self):
+        return self._simple_select_list(
+            "room_state", None, retcols=("name", "topic", "canonical_alias")
+        )
+
+    @cached()
+    def get_earliest_token_for_room_stats(self, room_id):
+        """
+        Fetch the "earliest token". This is used by the room stats delta
+        processor to ignore deltas that have been processed between the
+        start of the background task and any particular room's stats
+        being calculated.
+
+        Returns:
+            Deferred[int]
+        """
+        return self._simple_select_one_onecol(
+            "room_stats_earliest_token",
+            {"room_id": room_id},
+            retcol="token",
+            allow_none=True,
+        )
+
+    def update_stats(self, stats_type, stats_id, ts, fields):
+        table, id_col = TYPE_TO_ROOM[stats_type]
+        return self._simple_upsert(
+            table=table,
+            keyvalues={id_col: stats_id, "ts": ts},
+            values=fields,
+            desc="update_stats",
+        )
+
+    def _update_stats_txn(self, txn, stats_type, stats_id, ts, fields):
+        table, id_col = TYPE_TO_ROOM[stats_type]
+        return self._simple_upsert_txn(
+            txn, table=table, keyvalues={id_col: stats_id, "ts": ts}, values=fields
+        )
+
+    def update_stats_delta(self, ts, stats_type, stats_id, field, value):
+        def _update_stats_delta(txn):
+            table, id_col = TYPE_TO_ROOM[stats_type]
+
+            sql = (
+                "SELECT * FROM %s"
+                " WHERE %s=? and ts=("
+                "  SELECT MAX(ts) FROM %s"
+                "  WHERE %s=?"
+                ")"
+            ) % (table, id_col, table, id_col)
+            txn.execute(sql, (stats_id, stats_id))
+            rows = self.cursor_to_dict(txn)
+            if len(rows) == 0:
+                # silently skip as we don't have anything to apply a delta to yet.
+                # this tries to minimise any race between the initial sync and
+                # subsequent deltas arriving.
+                return
+
+            current_ts = ts
+            latest_ts = rows[0]["ts"]
+            if current_ts < latest_ts:
+                # This one is in the past, but we're just encountering it now.
+                # Mark it as part of the current bucket.
+                current_ts = latest_ts
+            elif ts != latest_ts:
+                # we have to copy our absolute counters over to the new entry.
+                values = {
+                    key: rows[0][key] for key in ABSOLUTE_STATS_FIELDS[stats_type]
+                }
+                values[id_col] = stats_id
+                values["ts"] = ts
+                values["bucket_size"] = self.stats_bucket_size
+
+                self._simple_insert_txn(txn, table=table, values=values)
+
+            # actually update the new value
+            if stats_type in ABSOLUTE_STATS_FIELDS[stats_type]:
+                self._simple_update_txn(
+                    txn,
+                    table=table,
+                    keyvalues={id_col: stats_id, "ts": current_ts},
+                    updatevalues={field: value},
+                )
+            else:
+                sql = ("UPDATE %s SET %s=%s+? WHERE %s=? AND ts=?") % (
+                    table,
+                    field,
+                    field,
+                    id_col,
+                )
+                txn.execute(sql, (value, stats_id, current_ts))
+
+        return self.runInteraction("update_stats_delta", _update_stats_delta)
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index d105b6b17d..529ad4ea79 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -64,59 +64,135 @@ _EventDictReturn = namedtuple(
 )
 
 
-def lower_bound(token, engine, inclusive=False):
-    inclusive = "=" if inclusive else ""
-    if token.topological is None:
-        return "(%d <%s %s)" % (token.stream, inclusive, "stream_ordering")
-    else:
-        if isinstance(engine, PostgresEngine):
-            # Postgres doesn't optimise ``(x < a) OR (x=a AND y<b)`` as well
-            # as it optimises ``(x,y) < (a,b)`` on multicolumn indexes. So we
-            # use the later form when running against postgres.
-            return "((%d,%d) <%s (%s,%s))" % (
-                token.topological,
-                token.stream,
-                inclusive,
-                "topological_ordering",
-                "stream_ordering",
+def generate_pagination_where_clause(
+    direction, column_names, from_token, to_token, engine,
+):
+    """Creates an SQL expression to bound the columns by the pagination
+    tokens.
+
+    For example creates an SQL expression like:
+
+        (6, 7) >= (topological_ordering, stream_ordering)
+        AND (5, 3) < (topological_ordering, stream_ordering)
+
+    would be generated for dir=b, from_token=(6, 7) and to_token=(5, 3).
+
+    Note that tokens are considered to be after the row they are in, e.g. if
+    a row A has a token T, then we consider A to be before T. This convention
+    is important when figuring out inequalities for the generated SQL, and
+    produces the following result:
+        - If paginating forwards then we exclude any rows matching the from
+          token, but include those that match the to token.
+        - If paginating backwards then we include any rows matching the from
+          token, but include those that match the to token.
+
+    Args:
+        direction (str): Whether we're paginating backwards("b") or
+            forwards ("f").
+        column_names (tuple[str, str]): The column names to bound. Must *not*
+            be user defined as these get inserted directly into the SQL
+            statement without escapes.
+        from_token (tuple[int, int]|None): The start point for the pagination.
+            This is an exclusive minimum bound if direction is "f", and an
+            inclusive maximum bound if direction is "b".
+        to_token (tuple[int, int]|None): The endpoint point for the pagination.
+            This is an inclusive maximum bound if direction is "f", and an
+            exclusive minimum bound if direction is "b".
+        engine: The database engine to generate the clauses for
+
+    Returns:
+        str: The sql expression
+    """
+    assert direction in ("b", "f")
+
+    where_clause = []
+    if from_token:
+        where_clause.append(
+            _make_generic_sql_bound(
+                bound=">=" if direction == "b" else "<",
+                column_names=column_names,
+                values=from_token,
+                engine=engine,
             )
-        return "(%d < %s OR (%d = %s AND %d <%s %s))" % (
-            token.topological,
-            "topological_ordering",
-            token.topological,
-            "topological_ordering",
-            token.stream,
-            inclusive,
-            "stream_ordering",
-        )
-
-
-def upper_bound(token, engine, inclusive=True):
-    inclusive = "=" if inclusive else ""
-    if token.topological is None:
-        return "(%d >%s %s)" % (token.stream, inclusive, "stream_ordering")
-    else:
-        if isinstance(engine, PostgresEngine):
-            # Postgres doesn't optimise ``(x > a) OR (x=a AND y>b)`` as well
-            # as it optimises ``(x,y) > (a,b)`` on multicolumn indexes. So we
-            # use the later form when running against postgres.
-            return "((%d,%d) >%s (%s,%s))" % (
-                token.topological,
-                token.stream,
-                inclusive,
-                "topological_ordering",
-                "stream_ordering",
+        )
+
+    if to_token:
+        where_clause.append(
+            _make_generic_sql_bound(
+                bound="<" if direction == "b" else ">=",
+                column_names=column_names,
+                values=to_token,
+                engine=engine,
             )
-        return "(%d > %s OR (%d = %s AND %d >%s %s))" % (
-            token.topological,
-            "topological_ordering",
-            token.topological,
-            "topological_ordering",
-            token.stream,
-            inclusive,
-            "stream_ordering",
         )
 
+    return " AND ".join(where_clause)
+
+
+def _make_generic_sql_bound(bound, column_names, values, engine):
+    """Create an SQL expression that bounds the given column names by the
+    values, e.g. create the equivalent of `(1, 2) < (col1, col2)`.
+
+    Only works with two columns.
+
+    Older versions of SQLite don't support that syntax so we have to expand it
+    out manually.
+
+    Args:
+        bound (str): The comparison operator to use. One of ">", "<", ">=",
+            "<=", where the values are on the left and columns on the right.
+        names (tuple[str, str]): The column names. Must *not* be user defined
+            as these get inserted directly into the SQL statement without
+            escapes.
+        values (tuple[int|None, int]): The values to bound the columns by. If
+            the first value is None then only creates a bound on the second
+            column.
+        engine: The database engine to generate the SQL for
+
+    Returns:
+        str
+    """
+
+    assert(bound in (">", "<", ">=", "<="))
+
+    name1, name2 = column_names
+    val1, val2 = values
+
+    if val1 is None:
+        val2 = int(val2)
+        return "(%d %s %s)" % (val2, bound, name2)
+
+    val1 = int(val1)
+    val2 = int(val2)
+
+    if isinstance(engine, PostgresEngine):
+        # Postgres doesn't optimise ``(x < a) OR (x=a AND y<b)`` as well
+        # as it optimises ``(x,y) < (a,b)`` on multicolumn indexes. So we
+        # use the later form when running against postgres.
+        return "((%d,%d) %s (%s,%s))" % (
+            val1, val2,
+            bound,
+            name1, name2,
+        )
+
+    # We want to generate queries of e.g. the form:
+    #
+    #   (val1 < name1 OR (val1 = name1 AND val2 <= name2))
+    #
+    # which is equivalent to (val1, val2) < (name1, name2)
+
+    return """(
+        {val1:d} {strict_bound} {name1}
+        OR ({val1:d} = {name1} AND {val2:d} {bound} {name2})
+    )""".format(
+        name1=name1,
+        val1=val1,
+        name2=name2,
+        val2=val2,
+        strict_bound=bound[0],  # The first bound must always be strict equality here
+        bound=bound,
+    )
+
 
 def filter_to_clause(event_filter):
     # NB: This may create SQL clauses that don't optimise well (and we don't
@@ -762,20 +838,16 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         args = [False, room_id]
         if direction == 'b':
             order = "DESC"
-            bounds = upper_bound(from_token, self.database_engine)
-            if to_token:
-                bounds = "%s AND %s" % (
-                    bounds,
-                    lower_bound(to_token, self.database_engine),
-                )
         else:
             order = "ASC"
-            bounds = lower_bound(from_token, self.database_engine)
-            if to_token:
-                bounds = "%s AND %s" % (
-                    bounds,
-                    upper_bound(to_token, self.database_engine),
-                )
+
+        bounds = generate_pagination_where_clause(
+            direction=direction,
+            column_names=("topological_ordering", "stream_ordering"),
+            from_token=from_token,
+            to_token=to_token,
+            engine=self.database_engine,
+        )
 
         filter_clause, filter_args = filter_to_clause(event_filter)
 
diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index 311b49e18a..fe412355d8 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -226,6 +226,8 @@ class LoggingContext(object):
             self.request = request
 
     def __str__(self):
+        if self.request:
+            return str(self.request)
         return "%s@%x" % (self.name, id(self))
 
     @classmethod
@@ -274,12 +276,10 @@ class LoggingContext(object):
         current = self.set_current_context(self.previous_context)
         if current is not self:
             if current is self.sentinel:
-                logger.warn("Expected logging context %s has been lost", self)
+                logger.warning("Expected logging context %s was lost", self)
             else:
-                logger.warn(
-                    "Current logging context %s is not expected context %s",
-                    current,
-                    self
+                logger.warning(
+                    "Expected logging context %s but found %s", self, current
                 )
         self.previous_context = None
         self.alive = False
@@ -433,10 +433,14 @@ class PreserveLoggingContext(object):
         context = LoggingContext.set_current_context(self.current_context)
 
         if context != self.new_context:
-            logger.warn(
-                "Unexpected logging context: %s is not %s",
-                context, self.new_context,
-            )
+            if context is LoggingContext.sentinel:
+                logger.warning("Expected logging context %s was lost", self.new_context)
+            else:
+                logger.warning(
+                    "Expected logging context %s but found %s",
+                    self.new_context,
+                    context,
+                )
 
         if self.current_context is not LoggingContext.sentinel:
             if not self.current_context.alive:
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 3c79d4afe7..3933ad4347 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -24,7 +24,12 @@ from twisted.internet import defer
 
 from synapse.api.errors import SynapseError
 from synapse.crypto import keyring
-from synapse.crypto.keyring import KeyLookupError
+from synapse.crypto.keyring import (
+    KeyLookupError,
+    PerspectivesKeyFetcher,
+    ServerKeyFetcher,
+)
+from synapse.storage.keys import FetchKeyResult
 from synapse.util import logcontext
 from synapse.util.logcontext import LoggingContext
 
@@ -50,11 +55,11 @@ class MockPerspectiveServer(object):
                 key_id: {"key": signedjson.key.encode_verify_key_base64(verify_key)}
             },
         }
-        return self.get_signed_response(res)
+        self.sign_response(res)
+        return res
 
-    def get_signed_response(self, res):
+    def sign_response(self, res):
         signedjson.sign.sign_json(res, self.server_name, self.key)
-        return res
 
 
 class KeyringTestCase(unittest.HomeserverTestCase):
@@ -80,7 +85,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         # we run the lookup in a logcontext so that the patched inlineCallbacks can check
         # it is doing the right thing with logcontexts.
         wait_1_deferred = run_in_context(
-            kr.wait_for_previous_lookups, ["server1"], {"server1": lookup_1_deferred}
+            kr.wait_for_previous_lookups, {"server1": lookup_1_deferred}
         )
 
         # there were no previous lookups, so the deferred should be ready
@@ -89,7 +94,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         # set off another wait. It should block because the first lookup
         # hasn't yet completed.
         wait_2_deferred = run_in_context(
-            kr.wait_for_previous_lookups, ["server1"], {"server1": lookup_2_deferred}
+            kr.wait_for_previous_lookups, {"server1": lookup_2_deferred}
         )
 
         self.assertFalse(wait_2_deferred.called)
@@ -192,8 +197,18 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         kr = keyring.Keyring(self.hs)
 
         key1 = signedjson.key.generate_signing_key(1)
-        r = self.hs.datastore.store_server_verify_key(
-            "server9", "", time.time() * 1000, signedjson.key.get_verify_key(key1)
+        key1_id = "%s:%s" % (key1.alg, key1.version)
+
+        r = self.hs.datastore.store_server_verify_keys(
+            "server9",
+            time.time() * 1000,
+            [
+                (
+                    "server9",
+                    key1_id,
+                    FetchKeyResult(signedjson.key.get_verify_key(key1), 1000),
+                ),
+            ],
         )
         self.get_success(r)
         json1 = {}
@@ -207,16 +222,23 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         self.assertFalse(d.called)
         self.get_success(d)
 
+
+class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
+    def make_homeserver(self, reactor, clock):
+        self.http_client = Mock()
+        hs = self.setup_test_homeserver(handlers=None, http_client=self.http_client)
+        return hs
+
     def test_get_keys_from_server(self):
         # arbitrarily advance the clock a bit
         self.reactor.advance(100)
 
         SERVER_NAME = "server2"
-        kr = keyring.Keyring(self.hs)
+        fetcher = ServerKeyFetcher(self.hs)
         testkey = signedjson.key.generate_signing_key("ver1")
         testverifykey = signedjson.key.get_verify_key(testkey)
         testverifykey_id = "ed25519:ver1"
-        VALID_UNTIL_TS = 1000
+        VALID_UNTIL_TS = 200 * 1000
 
         # valid response
         response = {
@@ -239,11 +261,12 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         self.http_client.get_json.side_effect = get_json
 
         server_name_and_key_ids = [(SERVER_NAME, ("key1",))]
-        keys = self.get_success(kr.get_keys_from_server(server_name_and_key_ids))
+        keys = self.get_success(fetcher.get_keys(server_name_and_key_ids))
         k = keys[SERVER_NAME][testverifykey_id]
-        self.assertEqual(k, testverifykey)
-        self.assertEqual(k.alg, "ed25519")
-        self.assertEqual(k.version, "ver1")
+        self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS)
+        self.assertEqual(k.verify_key, testverifykey)
+        self.assertEqual(k.verify_key.alg, "ed25519")
+        self.assertEqual(k.verify_key.version, "ver1")
 
         # check that the perspectives store is correctly updated
         lookup_triplet = (SERVER_NAME, testverifykey_id, None)
@@ -266,15 +289,26 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         # change the server name: it should cause a rejection
         response["server_name"] = "OTHER_SERVER"
         self.get_failure(
-            kr.get_keys_from_server(server_name_and_key_ids), KeyLookupError
+            fetcher.get_keys(server_name_and_key_ids), KeyLookupError
         )
 
+
+class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
+    def make_homeserver(self, reactor, clock):
+        self.mock_perspective_server = MockPerspectiveServer()
+        self.http_client = Mock()
+        hs = self.setup_test_homeserver(handlers=None, http_client=self.http_client)
+        keys = self.mock_perspective_server.get_verify_keys()
+        hs.config.perspectives = {self.mock_perspective_server.server_name: keys}
+        return hs
+
     def test_get_keys_from_perspectives(self):
         # arbitrarily advance the clock a bit
         self.reactor.advance(100)
 
+        fetcher = PerspectivesKeyFetcher(self.hs)
+
         SERVER_NAME = "server2"
-        kr = keyring.Keyring(self.hs)
         testkey = signedjson.key.generate_signing_key("ver1")
         testverifykey = signedjson.key.get_verify_key(testkey)
         testverifykey_id = "ed25519:ver1"
@@ -292,9 +326,10 @@ class KeyringTestCase(unittest.HomeserverTestCase):
             },
         }
 
-        persp_resp = {
-            "server_keys": [self.mock_perspective_server.get_signed_response(response)]
-        }
+        # the response must be signed by both the origin server and the perspectives
+        # server.
+        signedjson.sign.sign_json(response, SERVER_NAME, testkey)
+        self.mock_perspective_server.sign_response(response)
 
         def post_json(destination, path, data, **kwargs):
             self.assertEqual(destination, self.mock_perspective_server.server_name)
@@ -303,17 +338,18 @@ class KeyringTestCase(unittest.HomeserverTestCase):
             # check that the request is for the expected key
             q = data["server_keys"]
             self.assertEqual(list(q[SERVER_NAME].keys()), ["key1"])
-            return persp_resp
+            return {"server_keys": [response]}
 
         self.http_client.post_json.side_effect = post_json
 
         server_name_and_key_ids = [(SERVER_NAME, ("key1",))]
-        keys = self.get_success(kr.get_keys_from_perspectives(server_name_and_key_ids))
+        keys = self.get_success(fetcher.get_keys(server_name_and_key_ids))
         self.assertIn(SERVER_NAME, keys)
         k = keys[SERVER_NAME][testverifykey_id]
-        self.assertEqual(k, testverifykey)
-        self.assertEqual(k.alg, "ed25519")
-        self.assertEqual(k.version, "ver1")
+        self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS)
+        self.assertEqual(k.verify_key, testverifykey)
+        self.assertEqual(k.verify_key.alg, "ed25519")
+        self.assertEqual(k.verify_key.version, "ver1")
 
         # check that the perspectives store is correctly updated
         lookup_triplet = (SERVER_NAME, testverifykey_id, None)
@@ -330,13 +366,81 @@ class KeyringTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(
             bytes(res["key_json"]),
-            canonicaljson.encode_canonical_json(persp_resp["server_keys"][0]),
+            canonicaljson.encode_canonical_json(response),
         )
 
+    def test_invalid_perspectives_responses(self):
+        """Check that invalid responses from the perspectives server are rejected"""
+        # arbitrarily advance the clock a bit
+        self.reactor.advance(100)
+
+        SERVER_NAME = "server2"
+        testkey = signedjson.key.generate_signing_key("ver1")
+        testverifykey = signedjson.key.get_verify_key(testkey)
+        testverifykey_id = "ed25519:ver1"
+        VALID_UNTIL_TS = 200 * 1000
+
+        def build_response():
+            # valid response
+            response = {
+                "server_name": SERVER_NAME,
+                "old_verify_keys": {},
+                "valid_until_ts": VALID_UNTIL_TS,
+                "verify_keys": {
+                    testverifykey_id: {
+                        "key": signedjson.key.encode_verify_key_base64(testverifykey)
+                    }
+                },
+            }
+
+            # the response must be signed by both the origin server and the perspectives
+            # server.
+            signedjson.sign.sign_json(response, SERVER_NAME, testkey)
+            self.mock_perspective_server.sign_response(response)
+            return response
+
+        def get_key_from_perspectives(response):
+            fetcher = PerspectivesKeyFetcher(self.hs)
+            server_name_and_key_ids = [(SERVER_NAME, ("key1",))]
+
+            def post_json(destination, path, data, **kwargs):
+                self.assertEqual(destination, self.mock_perspective_server.server_name)
+                self.assertEqual(path, "/_matrix/key/v2/query")
+                return {"server_keys": [response]}
+
+            self.http_client.post_json.side_effect = post_json
+
+            return self.get_success(
+                fetcher.get_keys(server_name_and_key_ids)
+            )
+
+        # start with a valid response so we can check we are testing the right thing
+        response = build_response()
+        keys = get_key_from_perspectives(response)
+        k = keys[SERVER_NAME][testverifykey_id]
+        self.assertEqual(k.verify_key, testverifykey)
+
+        # remove the perspectives server's signature
+        response = build_response()
+        del response["signatures"][self.mock_perspective_server.server_name]
+        self.http_client.post_json.return_value = {"server_keys": [response]}
+        keys = get_key_from_perspectives(response)
+        self.assertEqual(keys, {}, "Expected empty dict with missing persp server sig")
+
+        # remove the origin server's signature
+        response = build_response()
+        del response["signatures"][SERVER_NAME]
+        self.http_client.post_json.return_value = {"server_keys": [response]}
+        keys = get_key_from_perspectives(response)
+        self.assertEqual(keys, {}, "Expected empty dict with missing origin server sig")
+
 
 @defer.inlineCallbacks
 def run_in_context(f, *args, **kwargs):
-    with LoggingContext("testctx"):
+    with LoggingContext("testctx") as ctx:
+        # we set the "request" prop to make it easier to follow what's going on in the
+        # logs.
+        ctx.request = "testctx"
         rv = yield f(*args, **kwargs)
     defer.returnValue(rv)
 
diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py
new file mode 100644
index 0000000000..1e3e5aec66
--- /dev/null
+++ b/tests/federation/test_complexity.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 Matrix.org Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+
+from synapse.config.ratelimiting import FederationRateLimitConfig
+from synapse.federation.transport import server
+from synapse.rest import admin
+from synapse.rest.client.v1 import login, room
+from synapse.util.ratelimitutils import FederationRateLimiter
+
+from tests import unittest
+
+
+class RoomComplexityTests(unittest.HomeserverTestCase):
+
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def default_config(self, name='test'):
+        config = super(RoomComplexityTests, self).default_config(name=name)
+        config["limit_large_remote_room_joins"] = True
+        config["limit_large_remote_room_complexity"] = 0.05
+        return config
+
+    def prepare(self, reactor, clock, homeserver):
+        class Authenticator(object):
+            def authenticate_request(self, request, content):
+                return defer.succeed("otherserver.nottld")
+
+        ratelimiter = FederationRateLimiter(
+            clock,
+            FederationRateLimitConfig(
+                window_size=1,
+                sleep_limit=1,
+                sleep_msec=1,
+                reject_limit=1000,
+                concurrent_requests=1000,
+            ),
+        )
+        server.register_servlets(
+            homeserver, self.resource, Authenticator(), ratelimiter
+        )
+
+    def test_complexity_simple(self):
+
+        u1 = self.register_user("u1", "pass")
+        u1_token = self.login("u1", "pass")
+
+        room_1 = self.helper.create_room_as(u1, tok=u1_token)
+        self.helper.send_state(
+            room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token
+        )
+
+        # Get the room complexity
+        request, channel = self.make_request(
+            "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,)
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code)
+        complexity = channel.json_body["v1"]
+        self.assertTrue(complexity > 0, complexity)
+
+        # Artificially raise the complexity
+        store = self.hs.get_datastore()
+        store.get_current_state_event_counts = lambda x: defer.succeed(500 * 1.23)
+
+        # Get the room complexity again -- make sure it's our artificial value
+        request, channel = self.make_request(
+            "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,)
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code)
+        complexity = channel.json_body["v1"]
+        self.assertEqual(complexity, 1.23)
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 1c253d0579..5ffba2ca7a 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -228,3 +228,10 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
     def test_register_not_support_user(self):
         res = self.get_success(self.handler.register(localpart='user'))
         self.assertFalse(self.store.is_support_user(res[0]))
+
+    def test_invalid_user_id_length(self):
+        invalid_user_id = "x" * 256
+        self.get_failure(
+            self.handler.register(localpart=invalid_user_id),
+            SynapseError
+        )
diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py
new file mode 100644
index 0000000000..249aba3d59
--- /dev/null
+++ b/tests/handlers/test_stats.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from mock import Mock
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.rest import admin
+from synapse.rest.client.v1 import login, room
+
+from tests import unittest
+
+
+class StatsRoomTests(unittest.HomeserverTestCase):
+
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor, clock, hs):
+
+        self.store = hs.get_datastore()
+        self.handler = self.hs.get_stats_handler()
+
+    def _add_background_updates(self):
+        """
+        Add the background updates we need to run.
+        """
+        # Ugh, have to reset this flag
+        self.store._all_done = False
+
+        self.get_success(
+            self.store._simple_insert(
+                "background_updates",
+                {"update_name": "populate_stats_createtables", "progress_json": "{}"},
+            )
+        )
+        self.get_success(
+            self.store._simple_insert(
+                "background_updates",
+                {
+                    "update_name": "populate_stats_process_rooms",
+                    "progress_json": "{}",
+                    "depends_on": "populate_stats_createtables",
+                },
+            )
+        )
+        self.get_success(
+            self.store._simple_insert(
+                "background_updates",
+                {
+                    "update_name": "populate_stats_cleanup",
+                    "progress_json": "{}",
+                    "depends_on": "populate_stats_process_rooms",
+                },
+            )
+        )
+
+    def test_initial_room(self):
+        """
+        The background updates will build the table from scratch.
+        """
+        r = self.get_success(self.store.get_all_room_state())
+        self.assertEqual(len(r), 0)
+
+        # Disable stats
+        self.hs.config.stats_enabled = False
+        self.handler.stats_enabled = False
+
+        u1 = self.register_user("u1", "pass")
+        u1_token = self.login("u1", "pass")
+
+        room_1 = self.helper.create_room_as(u1, tok=u1_token)
+        self.helper.send_state(
+            room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token
+        )
+
+        # Stats disabled, shouldn't have done anything
+        r = self.get_success(self.store.get_all_room_state())
+        self.assertEqual(len(r), 0)
+
+        # Enable stats
+        self.hs.config.stats_enabled = True
+        self.handler.stats_enabled = True
+
+        # Do the initial population of the user directory via the background update
+        self._add_background_updates()
+
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+        r = self.get_success(self.store.get_all_room_state())
+
+        self.assertEqual(len(r), 1)
+        self.assertEqual(r[0]["topic"], "foo")
+
+    def test_initial_earliest_token(self):
+        """
+        Ingestion via notify_new_event will ignore tokens that the background
+        update have already processed.
+        """
+        self.reactor.advance(86401)
+
+        self.hs.config.stats_enabled = False
+        self.handler.stats_enabled = False
+
+        u1 = self.register_user("u1", "pass")
+        u1_token = self.login("u1", "pass")
+
+        u2 = self.register_user("u2", "pass")
+        u2_token = self.login("u2", "pass")
+
+        u3 = self.register_user("u3", "pass")
+        u3_token = self.login("u3", "pass")
+
+        room_1 = self.helper.create_room_as(u1, tok=u1_token)
+        self.helper.send_state(
+            room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token
+        )
+
+        # Begin the ingestion by creating the temp tables. This will also store
+        # the position that the deltas should begin at, once they take over.
+        self.hs.config.stats_enabled = True
+        self.handler.stats_enabled = True
+        self.store._all_done = False
+        self.get_success(self.store.update_stats_stream_pos(None))
+
+        self.get_success(
+            self.store._simple_insert(
+                "background_updates",
+                {"update_name": "populate_stats_createtables", "progress_json": "{}"},
+            )
+        )
+
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+        # Now, before the table is actually ingested, add some more events.
+        self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token)
+        self.helper.join(room=room_1, user=u2, tok=u2_token)
+
+        # Now do the initial ingestion.
+        self.get_success(
+            self.store._simple_insert(
+                "background_updates",
+                {"update_name": "populate_stats_process_rooms", "progress_json": "{}"},
+            )
+        )
+        self.get_success(
+            self.store._simple_insert(
+                "background_updates",
+                {
+                    "update_name": "populate_stats_cleanup",
+                    "progress_json": "{}",
+                    "depends_on": "populate_stats_process_rooms",
+                },
+            )
+        )
+
+        self.store._all_done = False
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+        self.reactor.advance(86401)
+
+        # Now add some more events, triggering ingestion. Because of the stream
+        # position being set to before the events sent in the middle, a simpler
+        # implementation would reprocess those events, and say there were four
+        # users, not three.
+        self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token)
+        self.helper.join(room=room_1, user=u3, tok=u3_token)
+
+        # Get the deltas! There should be two -- day 1, and day 2.
+        r = self.get_success(self.store.get_deltas_for_room(room_1, 0))
+
+        # The oldest has 2 joined members
+        self.assertEqual(r[-1]["joined_members"], 2)
+
+        # The newest has 3
+        self.assertEqual(r[0]["joined_members"], 3)
+
+    def test_incorrect_state_transition(self):
+        """
+        If the state transition is not one of (JOIN, INVITE, LEAVE, BAN) to
+        (JOIN, INVITE, LEAVE, BAN), an error is raised.
+        """
+        events = {
+            "a1": {"membership": Membership.LEAVE},
+            "a2": {"membership": "not a real thing"},
+        }
+
+        def get_event(event_id):
+            m = Mock()
+            m.content = events[event_id]
+            d = defer.Deferred()
+            self.reactor.callLater(0.0, d.callback, m)
+            return d
+
+        def get_received_ts(event_id):
+            return defer.succeed(1)
+
+        self.store.get_received_ts = get_received_ts
+        self.store.get_event = get_event
+
+        deltas = [
+            {
+                "type": EventTypes.Member,
+                "state_key": "some_user",
+                "room_id": "room",
+                "event_id": "a1",
+                "prev_event_id": "a2",
+                "stream_id": "bleb",
+            }
+        ]
+
+        f = self.get_failure(self.handler._handle_deltas(deltas), ValueError)
+        self.assertEqual(
+            f.value.args[0], "'not a real thing' is not a valid prev_membership"
+        )
+
+        # And the other way...
+        deltas = [
+            {
+                "type": EventTypes.Member,
+                "state_key": "some_user",
+                "room_id": "room",
+                "event_id": "a2",
+                "prev_event_id": "a1",
+                "stream_id": "bleb",
+            }
+        ]
+
+        f = self.get_failure(self.handler._handle_deltas(deltas), ValueError)
+        self.assertEqual(
+            f.value.args[0], "'not a real thing' is not a valid membership"
+        )
diff --git a/tests/rest/client/v1/utils.py b/tests/rest/client/v1/utils.py
index 05b0143c42..f7133fc12e 100644
--- a/tests/rest/client/v1/utils.py
+++ b/tests/rest/client/v1/utils.py
@@ -127,3 +127,20 @@ class RestHelper(object):
         )
 
         return channel.json_body
+
+    def send_state(self, room_id, event_type, body, tok, expect_code=200):
+        path = "/_matrix/client/r0/rooms/%s/state/%s" % (room_id, event_type)
+        if tok:
+            path = path + "?access_token=%s" % tok
+
+        request, channel = make_request(
+            self.hs.get_reactor(), "PUT", path, json.dumps(body).encode('utf8')
+        )
+        render(request, self.resource, self.hs.get_reactor())
+
+        assert int(channel.result["code"]) == expect_code, (
+            "Expected: %d, got: %d, resp: %r"
+            % (expect_code, int(channel.result["code"]), channel.result["body"])
+        )
+
+        return channel.json_body
diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py
index ad7d476401..b9ef46e8fb 100644
--- a/tests/rest/client/v2_alpha/test_auth.py
+++ b/tests/rest/client/v2_alpha/test_auth.py
@@ -92,7 +92,14 @@ class FallbackAuthTests(unittest.HomeserverTestCase):
         self.assertEqual(len(self.recaptcha_attempts), 1)
         self.assertEqual(self.recaptcha_attempts[0][0]["response"], "a")
 
-        # Now we have fufilled the recaptcha fallback step, we can then send a
+        # also complete the dummy auth
+        request, channel = self.make_request(
+            "POST", "register", {"auth": {"session": session, "type": "m.login.dummy"}}
+        )
+        self.render(request)
+
+        # Now we should have fufilled a complete auth flow, including
+        # the recaptcha fallback step, we can then send a
         # request to the register API with the session in the authdict.
         request, channel = self.make_request(
             "POST", "register", {"auth": {"session": session}}
diff --git a/tests/rest/client/v2_alpha/test_capabilities.py b/tests/rest/client/v2_alpha/test_capabilities.py
index f3ef977404..bce5b0cf4c 100644
--- a/tests/rest/client/v2_alpha/test_capabilities.py
+++ b/tests/rest/client/v2_alpha/test_capabilities.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import synapse.rest.admin
-from synapse.api.room_versions import DEFAULT_ROOM_VERSION, KNOWN_ROOM_VERSIONS
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.rest.client.v1 import login
 from synapse.rest.client.v2_alpha import capabilities
 
@@ -32,6 +32,7 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase):
         self.url = b"/_matrix/client/r0/capabilities"
         hs = self.setup_test_homeserver()
         self.store = hs.get_datastore()
+        self.config = hs.config
         return hs
 
     def test_check_auth_required(self):
@@ -51,8 +52,10 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 200)
         for room_version in capabilities['m.room_versions']['available'].keys():
             self.assertTrue(room_version in KNOWN_ROOM_VERSIONS, "" + room_version)
+
         self.assertEqual(
-            DEFAULT_ROOM_VERSION.identifier, capabilities['m.room_versions']['default']
+            self.config.default_room_version.identifier,
+            capabilities['m.room_versions']['default'],
         )
 
     def test_get_change_password_capabilities(self):
diff --git a/tests/rest/client/v2_alpha/test_relations.py b/tests/rest/client/v2_alpha/test_relations.py
new file mode 100644
index 0000000000..43b3049daa
--- /dev/null
+++ b/tests/rest/client/v2_alpha/test_relations.py
@@ -0,0 +1,564 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import json
+
+import six
+
+from synapse.api.constants import EventTypes, RelationTypes
+from synapse.rest import admin
+from synapse.rest.client.v1 import login, room
+from synapse.rest.client.v2_alpha import register, relations
+
+from tests import unittest
+
+
+class RelationsTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        relations.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+        register.register_servlets,
+        admin.register_servlets_for_client_rest_resource,
+    ]
+    hijack_auth = False
+
+    def make_homeserver(self, reactor, clock):
+        # We need to enable msc1849 support for aggregations
+        config = self.default_config()
+        config["experimental_msc1849_support_enabled"] = True
+        return self.setup_test_homeserver(config=config)
+
+    def prepare(self, reactor, clock, hs):
+        self.user_id, self.user_token = self._create_user("alice")
+        self.user2_id, self.user2_token = self._create_user("bob")
+
+        self.room = self.helper.create_room_as(self.user_id, tok=self.user_token)
+        self.helper.join(self.room, user=self.user2_id, tok=self.user2_token)
+        res = self.helper.send(self.room, body="Hi!", tok=self.user_token)
+        self.parent_id = res["event_id"]
+
+    def test_send_relation(self):
+        """Tests that sending a relation using the new /send_relation works
+        creates the right shape of event.
+        """
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key=u"👍")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        event_id = channel.json_body["event_id"]
+
+        request, channel = self.make_request(
+            "GET",
+            "/rooms/%s/event/%s" % (self.room, event_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        self.assert_dict(
+            {
+                "type": "m.reaction",
+                "sender": self.user_id,
+                "content": {
+                    "m.relates_to": {
+                        "event_id": self.parent_id,
+                        "key": u"👍",
+                        "rel_type": RelationTypes.ANNOTATION,
+                    }
+                },
+            },
+            channel.json_body,
+        )
+
+    def test_deny_membership(self):
+        """Test that we deny relations on membership events
+        """
+        channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member)
+        self.assertEquals(400, channel.code, channel.json_body)
+
+    def test_deny_double_react(self):
+        """Test that we deny relations on membership events
+        """
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
+        self.assertEquals(400, channel.code, channel.json_body)
+
+    def test_basic_paginate_relations(self):
+        """Tests that calling pagination API corectly the latest relations.
+        """
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction")
+        self.assertEquals(200, channel.code, channel.json_body)
+        annotation_id = channel.json_body["event_id"]
+
+        request, channel = self.make_request(
+            "GET",
+            "/_matrix/client/unstable/rooms/%s/relations/%s?limit=1"
+            % (self.room, self.parent_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        # We expect to get back a single pagination result, which is the full
+        # relation event we sent above.
+        self.assertEquals(len(channel.json_body["chunk"]), 1, channel.json_body)
+        self.assert_dict(
+            {"event_id": annotation_id, "sender": self.user_id, "type": "m.reaction"},
+            channel.json_body["chunk"][0],
+        )
+
+        # Make sure next_batch has something in it that looks like it could be a
+        # valid token.
+        self.assertIsInstance(
+            channel.json_body.get("next_batch"), six.string_types, channel.json_body
+        )
+
+    def test_repeated_paginate_relations(self):
+        """Test that if we paginate using a limit and tokens then we get the
+        expected events.
+        """
+
+        expected_event_ids = []
+        for _ in range(10):
+            channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction")
+            self.assertEquals(200, channel.code, channel.json_body)
+            expected_event_ids.append(channel.json_body["event_id"])
+
+        prev_token = None
+        found_event_ids = []
+        for _ in range(20):
+            from_token = ""
+            if prev_token:
+                from_token = "&from=" + prev_token
+
+            request, channel = self.make_request(
+                "GET",
+                "/_matrix/client/unstable/rooms/%s/relations/%s?limit=1%s"
+                % (self.room, self.parent_id, from_token),
+                access_token=self.user_token,
+            )
+            self.render(request)
+            self.assertEquals(200, channel.code, channel.json_body)
+
+            found_event_ids.extend(e["event_id"] for e in channel.json_body["chunk"])
+            next_batch = channel.json_body.get("next_batch")
+
+            self.assertNotEquals(prev_token, next_batch)
+            prev_token = next_batch
+
+            if not prev_token:
+                break
+
+        # We paginated backwards, so reverse
+        found_event_ids.reverse()
+        self.assertEquals(found_event_ids, expected_event_ids)
+
+    def test_aggregation_pagination_groups(self):
+        """Test that we can paginate annotation groups correctly.
+        """
+
+        # We need to create ten separate users to send each reaction.
+        access_tokens = [self.user_token, self.user2_token]
+        idx = 0
+        while len(access_tokens) < 10:
+            user_id, token = self._create_user("test" + str(idx))
+            idx += 1
+
+            self.helper.join(self.room, user=user_id, tok=token)
+            access_tokens.append(token)
+
+        idx = 0
+        sent_groups = {u"👍": 10, u"a": 7, u"b": 5, u"c": 3, u"d": 2, u"e": 1}
+        for key in itertools.chain.from_iterable(
+            itertools.repeat(key, num) for key, num in sent_groups.items()
+        ):
+            channel = self._send_relation(
+                RelationTypes.ANNOTATION,
+                "m.reaction",
+                key=key,
+                access_token=access_tokens[idx],
+            )
+            self.assertEquals(200, channel.code, channel.json_body)
+
+            idx += 1
+            idx %= len(access_tokens)
+
+        prev_token = None
+        found_groups = {}
+        for _ in range(20):
+            from_token = ""
+            if prev_token:
+                from_token = "&from=" + prev_token
+
+            request, channel = self.make_request(
+                "GET",
+                "/_matrix/client/unstable/rooms/%s/aggregations/%s?limit=1%s"
+                % (self.room, self.parent_id, from_token),
+                access_token=self.user_token,
+            )
+            self.render(request)
+            self.assertEquals(200, channel.code, channel.json_body)
+
+            self.assertEqual(len(channel.json_body["chunk"]), 1, channel.json_body)
+
+            for groups in channel.json_body["chunk"]:
+                # We only expect reactions
+                self.assertEqual(groups["type"], "m.reaction", channel.json_body)
+
+                # We should only see each key once
+                self.assertNotIn(groups["key"], found_groups, channel.json_body)
+
+                found_groups[groups["key"]] = groups["count"]
+
+            next_batch = channel.json_body.get("next_batch")
+
+            self.assertNotEquals(prev_token, next_batch)
+            prev_token = next_batch
+
+            if not prev_token:
+                break
+
+        self.assertEquals(sent_groups, found_groups)
+
+    def test_aggregation_pagination_within_group(self):
+        """Test that we can paginate within an annotation group.
+        """
+
+        # We need to create ten separate users to send each reaction.
+        access_tokens = [self.user_token, self.user2_token]
+        idx = 0
+        while len(access_tokens) < 10:
+            user_id, token = self._create_user("test" + str(idx))
+            idx += 1
+
+            self.helper.join(self.room, user=user_id, tok=token)
+            access_tokens.append(token)
+
+        idx = 0
+        expected_event_ids = []
+        for _ in range(10):
+            channel = self._send_relation(
+                RelationTypes.ANNOTATION,
+                "m.reaction",
+                key=u"👍",
+                access_token=access_tokens[idx],
+            )
+            self.assertEquals(200, channel.code, channel.json_body)
+            expected_event_ids.append(channel.json_body["event_id"])
+
+            idx += 1
+
+        # Also send a different type of reaction so that we test we don't see it
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        prev_token = None
+        found_event_ids = []
+        encoded_key = six.moves.urllib.parse.quote_plus(u"👍".encode("utf-8"))
+        for _ in range(20):
+            from_token = ""
+            if prev_token:
+                from_token = "&from=" + prev_token
+
+            request, channel = self.make_request(
+                "GET",
+                "/_matrix/client/unstable/rooms/%s"
+                "/aggregations/%s/%s/m.reaction/%s?limit=1%s"
+                % (
+                    self.room,
+                    self.parent_id,
+                    RelationTypes.ANNOTATION,
+                    encoded_key,
+                    from_token,
+                ),
+                access_token=self.user_token,
+            )
+            self.render(request)
+            self.assertEquals(200, channel.code, channel.json_body)
+
+            self.assertEqual(len(channel.json_body["chunk"]), 1, channel.json_body)
+
+            found_event_ids.extend(e["event_id"] for e in channel.json_body["chunk"])
+
+            next_batch = channel.json_body.get("next_batch")
+
+            self.assertNotEquals(prev_token, next_batch)
+            prev_token = next_batch
+
+            if not prev_token:
+                break
+
+        # We paginated backwards, so reverse
+        found_event_ids.reverse()
+        self.assertEquals(found_event_ids, expected_event_ids)
+
+    def test_aggregation(self):
+        """Test that annotations get correctly aggregated.
+        """
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        request, channel = self.make_request(
+            "GET",
+            "/_matrix/client/unstable/rooms/%s/aggregations/%s"
+            % (self.room, self.parent_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        self.assertEquals(
+            channel.json_body,
+            {
+                "chunk": [
+                    {"type": "m.reaction", "key": "a", "count": 2},
+                    {"type": "m.reaction", "key": "b", "count": 1},
+                ]
+            },
+        )
+
+    def test_aggregation_redactions(self):
+        """Test that annotations get correctly aggregated after a redaction.
+        """
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
+        self.assertEquals(200, channel.code, channel.json_body)
+        to_redact_event_id = channel.json_body["event_id"]
+
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        # Now lets redact one of the 'a' reactions
+        request, channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/rooms/%s/redact/%s" % (self.room, to_redact_event_id),
+            access_token=self.user_token,
+            content={},
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        request, channel = self.make_request(
+            "GET",
+            "/_matrix/client/unstable/rooms/%s/aggregations/%s"
+            % (self.room, self.parent_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        self.assertEquals(
+            channel.json_body,
+            {"chunk": [{"type": "m.reaction", "key": "a", "count": 1}]},
+        )
+
+    def test_aggregation_must_be_annotation(self):
+        """Test that aggregations must be annotations.
+        """
+
+        request, channel = self.make_request(
+            "GET",
+            "/_matrix/client/unstable/rooms/%s/aggregations/%s/%s?limit=1"
+            % (self.room, self.parent_id, RelationTypes.REPLACE),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(400, channel.code, channel.json_body)
+
+    def test_aggregation_get_event(self):
+        """Test that annotations and references get correctly bundled when
+        getting the parent event.
+        """
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b")
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        channel = self._send_relation(RelationTypes.REFERENCE, "m.room.test")
+        self.assertEquals(200, channel.code, channel.json_body)
+        reply_1 = channel.json_body["event_id"]
+
+        channel = self._send_relation(RelationTypes.REFERENCE, "m.room.test")
+        self.assertEquals(200, channel.code, channel.json_body)
+        reply_2 = channel.json_body["event_id"]
+
+        request, channel = self.make_request(
+            "GET",
+            "/rooms/%s/event/%s" % (self.room, self.parent_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        self.assertEquals(
+            channel.json_body["unsigned"].get("m.relations"),
+            {
+                RelationTypes.ANNOTATION: {
+                    "chunk": [
+                        {"type": "m.reaction", "key": "a", "count": 2},
+                        {"type": "m.reaction", "key": "b", "count": 1},
+                    ]
+                },
+                RelationTypes.REFERENCE: {
+                    "chunk": [{"event_id": reply_1}, {"event_id": reply_2}]
+                },
+            },
+        )
+
+    def test_edit(self):
+        """Test that a simple edit works.
+        """
+
+        new_body = {"msgtype": "m.text", "body": "I've been edited!"}
+        channel = self._send_relation(
+            RelationTypes.REPLACE,
+            "m.room.message",
+            content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body},
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        edit_event_id = channel.json_body["event_id"]
+
+        request, channel = self.make_request(
+            "GET",
+            "/rooms/%s/event/%s" % (self.room, self.parent_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        self.assertEquals(channel.json_body["content"], new_body)
+
+        self.assertEquals(
+            channel.json_body["unsigned"].get("m.relations"),
+            {RelationTypes.REPLACE: {"event_id": edit_event_id}},
+        )
+
+    def test_multi_edit(self):
+        """Test that multiple edits, including attempts by people who
+        shouldn't be allowed, are correctly handled.
+        """
+
+        channel = self._send_relation(
+            RelationTypes.REPLACE,
+            "m.room.message",
+            content={
+                "msgtype": "m.text",
+                "body": "Wibble",
+                "m.new_content": {"msgtype": "m.text", "body": "First edit"},
+            },
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        new_body = {"msgtype": "m.text", "body": "I've been edited!"}
+        channel = self._send_relation(
+            RelationTypes.REPLACE,
+            "m.room.message",
+            content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body},
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        edit_event_id = channel.json_body["event_id"]
+
+        channel = self._send_relation(
+            RelationTypes.REPLACE,
+            "m.room.message.WRONG_TYPE",
+            content={
+                "msgtype": "m.text",
+                "body": "Wibble",
+                "m.new_content": {"msgtype": "m.text", "body": "Edit, but wrong type"},
+            },
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        request, channel = self.make_request(
+            "GET",
+            "/rooms/%s/event/%s" % (self.room, self.parent_id),
+            access_token=self.user_token,
+        )
+        self.render(request)
+        self.assertEquals(200, channel.code, channel.json_body)
+
+        self.assertEquals(channel.json_body["content"], new_body)
+
+        self.assertEquals(
+            channel.json_body["unsigned"].get("m.relations"),
+            {RelationTypes.REPLACE: {"event_id": edit_event_id}},
+        )
+
+    def _send_relation(
+        self, relation_type, event_type, key=None, content={}, access_token=None
+    ):
+        """Helper function to send a relation pointing at `self.parent_id`
+
+        Args:
+            relation_type (str): One of `RelationTypes`
+            event_type (str): The type of the event to create
+            key (str|None): The aggregation key used for m.annotation relation
+                type.
+            content(dict|None): The content of the created event.
+            access_token (str|None): The access token used to send the relation,
+                defaults to `self.user_token`
+
+        Returns:
+            FakeChannel
+        """
+        if not access_token:
+            access_token = self.user_token
+
+        query = ""
+        if key:
+            query = "?key=" + six.moves.urllib.parse.quote_plus(key.encode("utf-8"))
+
+        request, channel = self.make_request(
+            "POST",
+            "/_matrix/client/unstable/rooms/%s/send_relation/%s/%s/%s%s"
+            % (self.room, self.parent_id, relation_type, event_type, query),
+            json.dumps(content).encode("utf-8"),
+            access_token=access_token,
+        )
+        self.render(request)
+        return channel
+
+    def _create_user(self, localpart):
+        user_id = self.register_user(localpart, "abc123")
+        access_token = self.login(localpart, "abc123")
+
+        return user_id, access_token
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
new file mode 100644
index 0000000000..6dda66ecd3
--- /dev/null
+++ b/tests/storage/test_cleanup_extrems.py
@@ -0,0 +1,248 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+
+from synapse.api.constants import EventTypes
+from synapse.storage import prepare_database
+from synapse.types import Requester, UserID
+
+from tests.unittest import HomeserverTestCase
+
+
+class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
+    """Test the background update to clean forward extremities table.
+    """
+
+    def prepare(self, reactor, clock, homeserver):
+        self.store = homeserver.get_datastore()
+        self.event_creator = homeserver.get_event_creation_handler()
+        self.room_creator = homeserver.get_room_creation_handler()
+
+        # Create a test user and room
+        self.user = UserID("alice", "test")
+        self.requester = Requester(self.user, None, False, None, None)
+        info = self.get_success(self.room_creator.create_room(self.requester, {}))
+        self.room_id = info["room_id"]
+
+    def create_and_send_event(self, soft_failed=False, prev_event_ids=None):
+        """Create and send an event.
+
+        Args:
+            soft_failed (bool): Whether to create a soft failed event or not
+            prev_event_ids (list[str]|None): Explicitly set the prev events,
+                or if None just use the default
+
+        Returns:
+            str: The new event's ID.
+        """
+        prev_events_and_hashes = None
+        if prev_event_ids:
+            prev_events_and_hashes = [[p, {}, 0] for p in prev_event_ids]
+
+        event, context = self.get_success(
+            self.event_creator.create_event(
+                self.requester,
+                {
+                    "type": EventTypes.Message,
+                    "room_id": self.room_id,
+                    "sender": self.user.to_string(),
+                    "content": {"body": "", "msgtype": "m.text"},
+                },
+                prev_events_and_hashes=prev_events_and_hashes,
+            )
+        )
+
+        if soft_failed:
+            event.internal_metadata.soft_failed = True
+
+        self.get_success(
+            self.event_creator.send_nonmember_event(self.requester, event, context)
+        )
+
+        return event.event_id
+
+    def add_extremity(self, event_id):
+        """Add the given event as an extremity to the room.
+        """
+        self.get_success(
+            self.store._simple_insert(
+                table="event_forward_extremities",
+                values={"room_id": self.room_id, "event_id": event_id},
+                desc="test_add_extremity",
+            )
+        )
+
+        self.store.get_latest_event_ids_in_room.invalidate((self.room_id,))
+
+    def run_background_update(self):
+        """Re run the background update to clean up the extremities.
+        """
+        # Make sure we don't clash with in progress updates.
+        self.assertTrue(self.store._all_done, "Background updates are still ongoing")
+
+        schema_path = os.path.join(
+            prepare_database.dir_path,
+            "schema",
+            "delta",
+            "54",
+            "delete_forward_extremities.sql",
+        )
+
+        def run_delta_file(txn):
+            prepare_database.executescript(txn, schema_path)
+
+        self.get_success(
+            self.store.runInteraction("test_delete_forward_extremities", run_delta_file)
+        )
+
+        # Ugh, have to reset this flag
+        self.store._all_done = False
+
+        while not self.get_success(self.store.has_completed_background_updates()):
+            self.get_success(self.store.do_next_background_update(100), by=0.1)
+
+    def test_soft_failed_extremities_handled_correctly(self):
+        """Test that extremities are correctly calculated in the presence of
+        soft failed events.
+
+        Tests a graph like:
+
+            A <- SF1 <- SF2 <- B
+
+        Where SF* are soft failed.
+        """
+
+        # Create the room graph
+        event_id_1 = self.create_and_send_event()
+        event_id_2 = self.create_and_send_event(True, [event_id_1])
+        event_id_3 = self.create_and_send_event(True, [event_id_2])
+        event_id_4 = self.create_and_send_event(False, [event_id_3])
+
+        # Check the latest events are as expected
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+
+        self.assertEqual(latest_event_ids, [event_id_4])
+
+    def test_basic_cleanup(self):
+        """Test that extremities are correctly calculated in the presence of
+        soft failed events.
+
+        Tests a graph like:
+
+            A <- SF1 <- B
+
+        Where SF* are soft failed, and with extremities of A and B
+        """
+        # Create the room graph
+        event_id_a = self.create_and_send_event()
+        event_id_sf1 = self.create_and_send_event(True, [event_id_a])
+        event_id_b = self.create_and_send_event(False, [event_id_sf1])
+
+        # Add the new extremity and check the latest events are as expected
+        self.add_extremity(event_id_a)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b)))
+
+        # Run the background update and check it did the right thing
+        self.run_background_update()
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(latest_event_ids, [event_id_b])
+
+    def test_chain_of_fail_cleanup(self):
+        """Test that extremities are correctly calculated in the presence of
+        soft failed events.
+
+        Tests a graph like:
+
+            A <- SF1 <- SF2 <- B
+
+        Where SF* are soft failed, and with extremities of A and B
+        """
+        # Create the room graph
+        event_id_a = self.create_and_send_event()
+        event_id_sf1 = self.create_and_send_event(True, [event_id_a])
+        event_id_sf2 = self.create_and_send_event(True, [event_id_sf1])
+        event_id_b = self.create_and_send_event(False, [event_id_sf2])
+
+        # Add the new extremity and check the latest events are as expected
+        self.add_extremity(event_id_a)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(set(latest_event_ids), set((event_id_a, event_id_b)))
+
+        # Run the background update and check it did the right thing
+        self.run_background_update()
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(latest_event_ids, [event_id_b])
+
+    def test_forked_graph_cleanup(self):
+        r"""Test that extremities are correctly calculated in the presence of
+        soft failed events.
+
+        Tests a graph like, where time flows down the page:
+
+                A     B
+               / \   /
+              /   \ /
+            SF1   SF2
+             |     |
+            SF3    |
+           /   \   |
+           |    \  |
+           C     SF4
+
+        Where SF* are soft failed, and with them A, B and C marked as
+        extremities. This should resolve to B and C being marked as extremity.
+        """
+        # Create the room graph
+        event_id_a = self.create_and_send_event()
+        event_id_b = self.create_and_send_event()
+        event_id_sf1 = self.create_and_send_event(True, [event_id_a])
+        event_id_sf2 = self.create_and_send_event(True, [event_id_a, event_id_b])
+        event_id_sf3 = self.create_and_send_event(True, [event_id_sf1])
+        self.create_and_send_event(True, [event_id_sf2, event_id_sf3])  # SF4
+        event_id_c = self.create_and_send_event(False, [event_id_sf3])
+
+        # Add the new extremity and check the latest events are as expected
+        self.add_extremity(event_id_a)
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(
+            set(latest_event_ids), set((event_id_a, event_id_b, event_id_c))
+        )
+
+        # Run the background update and check it did the right thing
+        self.run_background_update()
+
+        latest_event_ids = self.get_success(
+            self.store.get_latest_event_ids_in_room(self.room_id)
+        )
+        self.assertEqual(set(latest_event_ids), set([event_id_b, event_id_c]))
diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py
index 6bfaa00fe9..e07ff01201 100644
--- a/tests/storage/test_keys.py
+++ b/tests/storage/test_keys.py
@@ -17,6 +17,8 @@ import signedjson.key
 
 from twisted.internet.defer import Deferred
 
+from synapse.storage.keys import FetchKeyResult
+
 import tests.unittest
 
 KEY_1 = signedjson.key.decode_verify_key_base64(
@@ -31,23 +33,34 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
     def test_get_server_verify_keys(self):
         store = self.hs.get_datastore()
 
-        d = store.store_server_verify_key("server1", "from_server", 0, KEY_1)
-        self.get_success(d)
-        d = store.store_server_verify_key("server1", "from_server", 0, KEY_2)
+        key_id_1 = "ed25519:key1"
+        key_id_2 = "ed25519:KEY_ID_2"
+        d = store.store_server_verify_keys(
+            "from_server",
+            10,
+            [
+                ("server1", key_id_1, FetchKeyResult(KEY_1, 100)),
+                ("server1", key_id_2, FetchKeyResult(KEY_2, 200)),
+            ],
+        )
         self.get_success(d)
 
         d = store.get_server_verify_keys(
-            [
-                ("server1", "ed25519:key1"),
-                ("server1", "ed25519:key2"),
-                ("server1", "ed25519:key3"),
-            ]
+            [("server1", key_id_1), ("server1", key_id_2), ("server1", "ed25519:key3")]
         )
         res = self.get_success(d)
 
         self.assertEqual(len(res.keys()), 3)
-        self.assertEqual(res[("server1", "ed25519:key1")].version, "key1")
-        self.assertEqual(res[("server1", "ed25519:key2")].version, "key2")
+        res1 = res[("server1", key_id_1)]
+        self.assertEqual(res1.verify_key, KEY_1)
+        self.assertEqual(res1.verify_key.version, "key1")
+        self.assertEqual(res1.valid_until_ts, 100)
+
+        res2 = res[("server1", key_id_2)]
+        self.assertEqual(res2.verify_key, KEY_2)
+        # version comes from the ID it was stored with
+        self.assertEqual(res2.verify_key.version, "KEY_ID_2")
+        self.assertEqual(res2.valid_until_ts, 200)
 
         # non-existent result gives None
         self.assertIsNone(res[("server1", "ed25519:key3")])
@@ -60,32 +73,51 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
         key_id_1 = "ed25519:key1"
         key_id_2 = "ed25519:key2"
 
-        d = store.store_server_verify_key("srv1", "from_server", 0, KEY_1)
-        self.get_success(d)
-        d = store.store_server_verify_key("srv1", "from_server", 0, KEY_2)
+        d = store.store_server_verify_keys(
+            "from_server",
+            0,
+            [
+                ("srv1", key_id_1, FetchKeyResult(KEY_1, 100)),
+                ("srv1", key_id_2, FetchKeyResult(KEY_2, 200)),
+            ],
+        )
         self.get_success(d)
 
         d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)])
         res = self.get_success(d)
         self.assertEqual(len(res.keys()), 2)
-        self.assertEqual(res[("srv1", key_id_1)], KEY_1)
-        self.assertEqual(res[("srv1", key_id_2)], KEY_2)
+
+        res1 = res[("srv1", key_id_1)]
+        self.assertEqual(res1.verify_key, KEY_1)
+        self.assertEqual(res1.valid_until_ts, 100)
+
+        res2 = res[("srv1", key_id_2)]
+        self.assertEqual(res2.verify_key, KEY_2)
+        self.assertEqual(res2.valid_until_ts, 200)
 
         # we should be able to look up the same thing again without a db hit
         res = store.get_server_verify_keys([("srv1", key_id_1)])
         if isinstance(res, Deferred):
             res = self.successResultOf(res)
         self.assertEqual(len(res.keys()), 1)
-        self.assertEqual(res[("srv1", key_id_1)], KEY_1)
+        self.assertEqual(res[("srv1", key_id_1)].verify_key, KEY_1)
 
         new_key_2 = signedjson.key.get_verify_key(
             signedjson.key.generate_signing_key("key2")
         )
-        d = store.store_server_verify_key("srv1", "from_server", 10, new_key_2)
+        d = store.store_server_verify_keys(
+            "from_server", 10, [("srv1", key_id_2, FetchKeyResult(new_key_2, 300))]
+        )
         self.get_success(d)
 
         d = store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)])
         res = self.get_success(d)
         self.assertEqual(len(res.keys()), 2)
-        self.assertEqual(res[("srv1", key_id_1)], KEY_1)
-        self.assertEqual(res[("srv1", key_id_2)], new_key_2)
+
+        res1 = res[("srv1", key_id_1)]
+        self.assertEqual(res1.verify_key, KEY_1)
+        self.assertEqual(res1.valid_until_ts, 100)
+
+        res2 = res[("srv1", key_id_2)]
+        self.assertEqual(res2.verify_key, new_key_2)
+        self.assertEqual(res2.valid_until_ts, 300)
diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py
index f412985d2c..52739fbabc 100644
--- a/tests/test_terms_auth.py
+++ b/tests/test_terms_auth.py
@@ -59,7 +59,7 @@ class TermsTestCase(unittest.HomeserverTestCase):
         for flow in channel.json_body["flows"]:
             self.assertIsInstance(flow["stages"], list)
             self.assertTrue(len(flow["stages"]) > 0)
-            self.assertEquals(flow["stages"][-1], "m.login.terms")
+            self.assertTrue("m.login.terms" in flow["stages"])
 
         expected_params = {
             "m.login.terms": {
diff --git a/tox.ini b/tox.ini
index d0e519ce46..543b232ae7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -94,7 +94,7 @@ commands =
     # Make all greater-thans equals so we test the oldest version of our direct
     # dependencies, but make the pyopenssl 17.0, which can work against an
     # OpenSSL 1.1 compiled cryptography (as older ones don't compile on Travis).
-    /bin/sh -c 'python -m synapse.python_dependencies | sed -e "s/>=/==/g" -e "s/psycopg2==2.6//" -e "s/pyopenssl==16.0.0/pyopenssl==17.0.0/" | xargs pip install'
+    /bin/sh -c 'python -m synapse.python_dependencies | sed -e "s/>=/==/g" -e "s/psycopg2==2.6//" -e "s/pyopenssl==16.0.0/pyopenssl==17.0.0/" | xargs -d"\n" pip install'
 
     # Add this so that coverage will run on subprocesses
     /bin/sh -c 'echo "import coverage; coverage.process_startup()" > {envsitepackagesdir}/../sitecustomize.py'