From 3ca4c7c516a349cbb9dace0ace33bb889955eda1 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 23 Oct 2019 12:02:36 +0100
Subject: Use new EventPersistenceStore

---
 synapse/server.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'synapse/server.py')

diff --git a/synapse/server.py b/synapse/server.py
index 1fcc7375d3..54a7f4aa5f 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -95,6 +95,7 @@ from synapse.server_notices.worker_server_notices_sender import (
     WorkerServerNoticesSender,
 )
 from synapse.state import StateHandler, StateResolutionHandler
+from synapse.storage import DataStores, Storage
 from synapse.streams.events import EventSources
 from synapse.util import Clock
 from synapse.util.distributor import Distributor
@@ -196,6 +197,7 @@ class HomeServer(object):
         "account_validity_handler",
         "saml_handler",
         "event_client_serializer",
+        "storage",
     ]
 
     REQUIRED_ON_MASTER_STARTUP = ["user_directory_handler", "stats_handler"]
@@ -225,6 +227,7 @@ class HomeServer(object):
         self.registration_ratelimiter = Ratelimiter()
 
         self.datastore = None
+        self.datastores = None
 
         # Other kwargs are explicit dependencies
         for depname in kwargs:
@@ -234,6 +237,7 @@ class HomeServer(object):
         logger.info("Setting up.")
         with self.get_db_conn() as conn:
             self.datastore = self.DATASTORE_CLASS(conn, self)
+            self.datastores = DataStores(self.datastore, conn, self)
             conn.commit()
         logger.info("Finished setting up.")
 
@@ -266,7 +270,7 @@ class HomeServer(object):
         return self.clock
 
     def get_datastore(self):
-        return self.datastore
+        return self.datastores.main
 
     def get_config(self):
         return self.config
@@ -537,6 +541,9 @@ class HomeServer(object):
     def build_event_client_serializer(self):
         return EventClientSerializer(self)
 
+    def build_storage(self) -> Storage:
+        return Storage(self, self.datastores)
+
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
 
-- 
cgit 1.5.1


From a8d16f6c00d5adb204af5fa73ffaea40eea4b632 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 30 Oct 2019 13:33:38 +0000
Subject: Review comments

---
 synapse/server.py                          |  5 ++---
 synapse/storage/__init__.py                |  4 ++--
 synapse/storage/data_stores/main/events.py | 19 ++++++++++++++-----
 synapse/storage/persist_events.py          | 13 ++++++++++---
 tests/crypto/test_keyring.py               |  4 ++--
 tests/test_federation.py                   | 11 +++++++----
 6 files changed, 37 insertions(+), 19 deletions(-)

(limited to 'synapse/server.py')

diff --git a/synapse/server.py b/synapse/server.py
index 54a7f4aa5f..0b81af646c 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -226,7 +226,6 @@ class HomeServer(object):
         self.admin_redaction_ratelimiter = Ratelimiter()
         self.registration_ratelimiter = Ratelimiter()
 
-        self.datastore = None
         self.datastores = None
 
         # Other kwargs are explicit dependencies
@@ -236,8 +235,8 @@ class HomeServer(object):
     def setup(self):
         logger.info("Setting up.")
         with self.get_db_conn() as conn:
-            self.datastore = self.DATASTORE_CLASS(conn, self)
-            self.datastores = DataStores(self.datastore, conn, self)
+            datastore = self.DATASTORE_CLASS(conn, self)
+            self.datastores = DataStores(datastore, conn, self)
             conn.commit()
         logger.info("Finished setting up.")
 
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index a58187a76f..a6429d17ed 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -29,7 +29,7 @@ stored in `synapse.storage.schema`.
 
 from synapse.storage.data_stores import DataStores
 from synapse.storage.data_stores.main import DataStore
-from synapse.storage.persist_events import EventsPersistenceStore
+from synapse.storage.persist_events import EventsPersistenceStorage
 
 __all__ = ["DataStores", "DataStore"]
 
@@ -44,7 +44,7 @@ class Storage(object):
         # interfaces.
         self.main = stores.main
 
-        self.persistence = EventsPersistenceStore(hs, stores)
+        self.persistence = EventsPersistenceStorage(hs, stores)
 
 
 def are_all_users_on_domain(txn, database_engine, domain):
diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py
index 6304531cd5..813f34528c 100644
--- a/synapse/storage/data_stores/main/events.py
+++ b/synapse/storage/data_stores/main/events.py
@@ -146,7 +146,7 @@ class EventsStore(
 
     @_retry_on_integrity_error
     @defer.inlineCallbacks
-    def _persist_events(
+    def _persist_events_and_state_updates(
         self,
         events_and_contexts,
         current_state_for_room,
@@ -155,18 +155,27 @@ class EventsStore(
         backfilled=False,
         delete_existing=False,
     ):
-        """Persist events to db
+        """Persist a set of events alongside updates to the current state and
+        forward extremities tables.
 
         Args:
             events_and_contexts (list[(EventBase, EventContext)]):
-            backfilled (bool):
+            current_state_for_room (dict[str, dict]): Map from room_id to the
+                current state of the room based on forward extremities
+            state_delta_for_room (dict[str, tuple]): Map from room_id to tuple
+                of `(to_delete, to_insert)` where to_delete is a list
+                of type/state keys to remove from current state, and to_insert
+                is a map (type,key)->event_id giving the state delta in each
+                room.
+            new_forward_extremities (dict[str, list[str]]): Map from room_id
+                to list of event IDs that are the new forward extremities of
+                the room.
+            backfilled (bool)
             delete_existing (bool):
 
         Returns:
             Deferred: resolves when the events have been persisted
         """
-        if not events_and_contexts:
-            return
 
         # We want to calculate the stream orderings as late as possible, as
         # we only notify after all events with a lesser stream ordering have
diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py
index 9a63953d4d..cf66225574 100644
--- a/synapse/storage/persist_events.py
+++ b/synapse/storage/persist_events.py
@@ -171,7 +171,13 @@ class _EventPeristenceQueue(object):
             pass
 
 
-class EventsPersistenceStore(object):
+class EventsPersistenceStorage(object):
+    """High level interface for handling persisting newly received events.
+
+    Takes care of batching up events by room, and calculating the necessary
+    current state and forward extremity changes.
+    """
+
     def __init__(self, hs, stores: DataStores):
         # We ultimately want to split out the state store from the main store,
         # so we use separate variables here even though they point to the same
@@ -257,7 +263,8 @@ class EventsPersistenceStore(object):
     def _persist_events(
         self, events_and_contexts, backfilled=False, delete_existing=False
     ):
-        """Persist events to db
+        """Calculates the change to current state and forward extremities, and
+        persists the given events and with those updates.
 
         Args:
             events_and_contexts (list[(EventBase, EventContext)]):
@@ -399,7 +406,7 @@ class EventsPersistenceStore(object):
                         if current_state is not None:
                             current_state_for_room[room_id] = current_state
 
-            yield self.main_store._persist_events(
+            yield self.main_store._persist_events_and_state_updates(
                 chunk,
                 current_state_for_room=current_state_for_room,
                 state_delta_for_room=state_delta_for_room,
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index c4f0bbd3dd..8efd39c7f7 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -178,7 +178,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         kr = keyring.Keyring(self.hs)
 
         key1 = signedjson.key.generate_signing_key(1)
-        r = self.hs.datastore.store_server_verify_keys(
+        r = self.hs.get_datastore().store_server_verify_keys(
             "server9",
             time.time() * 1000,
             [("server9", get_key_id(key1), FetchKeyResult(get_verify_key(key1), 1000))],
@@ -209,7 +209,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         )
 
         key1 = signedjson.key.generate_signing_key(1)
-        r = self.hs.datastore.store_server_verify_keys(
+        r = self.hs.get_datastore().store_server_verify_keys(
             "server9",
             time.time() * 1000,
             [("server9", get_key_id(key1), FetchKeyResult(get_verify_key(key1), None))],
diff --git a/tests/test_federation.py b/tests/test_federation.py
index a73f18f88e..d1acb16f30 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -36,7 +36,8 @@ class MessageAcceptTests(unittest.TestCase):
         # Figure out what the most recent event is
         most_recent = self.successResultOf(
             maybeDeferred(
-                self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id
+                self.homeserver.get_datastore().get_latest_event_ids_in_room,
+                self.room_id,
             )
         )[0]
 
@@ -75,7 +76,8 @@ class MessageAcceptTests(unittest.TestCase):
         self.assertEqual(
             self.successResultOf(
                 maybeDeferred(
-                    self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id
+                    self.homeserver.get_datastore().get_latest_event_ids_in_room,
+                    self.room_id,
                 )
             )[0],
             "$join:test.serv",
@@ -97,7 +99,8 @@ class MessageAcceptTests(unittest.TestCase):
         # Figure out what the most recent event is
         most_recent = self.successResultOf(
             maybeDeferred(
-                self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id
+                self.homeserver.get_datastore().get_latest_event_ids_in_room,
+                self.room_id,
             )
         )[0]
 
@@ -137,6 +140,6 @@ class MessageAcceptTests(unittest.TestCase):
 
         # Make sure the invalid event isn't there
         extrem = maybeDeferred(
-            self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id
+            self.homeserver.get_datastore().get_latest_event_ids_in_room, self.room_id
         )
         self.assertEqual(self.successResultOf(extrem)[0], "$join:test.serv")
-- 
cgit 1.5.1


From 1cb84c6486a5131dd284f341bb657434becda255 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 1 Nov 2019 14:07:44 +0000
Subject: Support for routing outbound HTTP requests via a proxy (#6239)

The `http_proxy` and `HTTPS_PROXY` env vars can be set to a `host[:port]` value which should point to a proxy.

The address of the proxy should be excluded from IP blacklists such as the `url_preview_ip_range_blacklist`.

The proxy will then be used for
 * push
 * url previews
 * phone-home stats
 * recaptcha validation
 * CAS auth validation

It will *not* be used for:
 * Application Services
 * Identity servers
 * Outbound federation
 * In worker configurations, connections from workers to masters

Fixes #4198.
---
 changelog.d/6238.feature                           |   1 +
 synapse/app/homeserver.py                          |   2 +-
 synapse/handlers/ui_auth/checkers.py               |   2 +-
 synapse/http/client.py                             |  17 +-
 synapse/http/connectproxyclient.py                 | 195 ++++++++++++
 synapse/http/proxyagent.py                         | 195 ++++++++++++
 synapse/push/httppusher.py                         |   2 +-
 synapse/rest/client/v1/login.py                    |   2 +-
 synapse/rest/media/v1/preview_url_resource.py      |   2 +
 synapse/server.py                                  |   9 +
 synapse/server.pyi                                 |   9 +
 tests/http/__init__.py                             |  17 ++
 .../federation/test_matrix_federation_agent.py     |  11 +-
 tests/http/test_proxyagent.py                      | 334 +++++++++++++++++++++
 tests/push/test_http.py                            |   2 +-
 tests/server.py                                    |  24 +-
 16 files changed, 812 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/6238.feature
 create mode 100644 synapse/http/connectproxyclient.py
 create mode 100644 synapse/http/proxyagent.py
 create mode 100644 tests/http/test_proxyagent.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/6238.feature b/changelog.d/6238.feature
new file mode 100644
index 0000000000..d225ac33b6
--- /dev/null
+++ b/changelog.d/6238.feature
@@ -0,0 +1 @@
+Add support for outbound http proxying via http_proxy/HTTPS_PROXY env vars.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 8997c1f9e7..8d28076d92 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -565,7 +565,7 @@ def run(hs):
             "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats)
         )
         try:
-            yield hs.get_simple_http_client().put_json(
+            yield hs.get_proxied_http_client().put_json(
                 hs.config.report_stats_endpoint, stats
             )
         except Exception as e:
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index 29aa1e5aaf..8363d887a9 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -81,7 +81,7 @@ class RecaptchaAuthChecker(UserInteractiveAuthChecker):
     def __init__(self, hs):
         super().__init__(hs)
         self._enabled = bool(hs.config.recaptcha_private_key)
-        self._http_client = hs.get_simple_http_client()
+        self._http_client = hs.get_proxied_http_client()
         self._url = hs.config.recaptcha_siteverify_api
         self._secret = hs.config.recaptcha_private_key
 
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 2df5b383b5..d4c285445e 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -45,6 +45,7 @@ from synapse.http import (
     cancelled_to_request_timed_out_error,
     redact_uri,
 )
+from synapse.http.proxyagent import ProxyAgent
 from synapse.logging.context import make_deferred_yieldable
 from synapse.logging.opentracing import set_tag, start_active_span, tags
 from synapse.util.async_helpers import timeout_deferred
@@ -183,7 +184,15 @@ class SimpleHttpClient(object):
     using HTTP in Matrix
     """
 
-    def __init__(self, hs, treq_args={}, ip_whitelist=None, ip_blacklist=None):
+    def __init__(
+        self,
+        hs,
+        treq_args={},
+        ip_whitelist=None,
+        ip_blacklist=None,
+        http_proxy=None,
+        https_proxy=None,
+    ):
         """
         Args:
             hs (synapse.server.HomeServer)
@@ -192,6 +201,8 @@ class SimpleHttpClient(object):
                 we may not request.
             ip_whitelist (netaddr.IPSet): The whitelisted IP addresses, that we can
                request if it were otherwise caught in a blacklist.
+            http_proxy (bytes): proxy server to use for http connections. host[:port]
+            https_proxy (bytes): proxy server to use for https connections. host[:port]
         """
         self.hs = hs
 
@@ -236,11 +247,13 @@ class SimpleHttpClient(object):
         # The default context factory in Twisted 14.0.0 (which we require) is
         # BrowserLikePolicyForHTTPS which will do regular cert validation
         # 'like a browser'
-        self.agent = Agent(
+        self.agent = ProxyAgent(
             self.reactor,
             connectTimeout=15,
             contextFactory=self.hs.get_http_client_context_factory(),
             pool=pool,
+            http_proxy=http_proxy,
+            https_proxy=https_proxy,
         )
 
         if self._ip_blacklist:
diff --git a/synapse/http/connectproxyclient.py b/synapse/http/connectproxyclient.py
new file mode 100644
index 0000000000..be7b2ceb8e
--- /dev/null
+++ b/synapse/http/connectproxyclient.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from zope.interface import implementer
+
+from twisted.internet import defer, protocol
+from twisted.internet.error import ConnectError
+from twisted.internet.interfaces import IStreamClientEndpoint
+from twisted.internet.protocol import connectionDone
+from twisted.web import http
+
+logger = logging.getLogger(__name__)
+
+
+class ProxyConnectError(ConnectError):
+    pass
+
+
+@implementer(IStreamClientEndpoint)
+class HTTPConnectProxyEndpoint(object):
+    """An Endpoint implementation which will send a CONNECT request to an http proxy
+
+    Wraps an existing HostnameEndpoint for the proxy.
+
+    When we get the connect() request from the connection pool (via the TLS wrapper),
+    we'll first connect to the proxy endpoint with a ProtocolFactory which will make the
+    CONNECT request. Once that completes, we invoke the protocolFactory which was passed
+    in.
+
+    Args:
+        reactor: the Twisted reactor to use for the connection
+        proxy_endpoint (IStreamClientEndpoint): the endpoint to use to connect to the
+            proxy
+        host (bytes): hostname that we want to CONNECT to
+        port (int): port that we want to connect to
+    """
+
+    def __init__(self, reactor, proxy_endpoint, host, port):
+        self._reactor = reactor
+        self._proxy_endpoint = proxy_endpoint
+        self._host = host
+        self._port = port
+
+    def __repr__(self):
+        return "<HTTPConnectProxyEndpoint %s>" % (self._proxy_endpoint,)
+
+    def connect(self, protocolFactory):
+        f = HTTPProxiedClientFactory(self._host, self._port, protocolFactory)
+        d = self._proxy_endpoint.connect(f)
+        # once the tcp socket connects successfully, we need to wait for the
+        # CONNECT to complete.
+        d.addCallback(lambda conn: f.on_connection)
+        return d
+
+
+class HTTPProxiedClientFactory(protocol.ClientFactory):
+    """ClientFactory wrapper that triggers an HTTP proxy CONNECT on connect.
+
+    Once the CONNECT completes, invokes the original ClientFactory to build the
+    HTTP Protocol object and run the rest of the connection.
+
+    Args:
+        dst_host (bytes): hostname that we want to CONNECT to
+        dst_port (int): port that we want to connect to
+        wrapped_factory (protocol.ClientFactory): The original Factory
+    """
+
+    def __init__(self, dst_host, dst_port, wrapped_factory):
+        self.dst_host = dst_host
+        self.dst_port = dst_port
+        self.wrapped_factory = wrapped_factory
+        self.on_connection = defer.Deferred()
+
+    def startedConnecting(self, connector):
+        return self.wrapped_factory.startedConnecting(connector)
+
+    def buildProtocol(self, addr):
+        wrapped_protocol = self.wrapped_factory.buildProtocol(addr)
+
+        return HTTPConnectProtocol(
+            self.dst_host, self.dst_port, wrapped_protocol, self.on_connection
+        )
+
+    def clientConnectionFailed(self, connector, reason):
+        logger.debug("Connection to proxy failed: %s", reason)
+        if not self.on_connection.called:
+            self.on_connection.errback(reason)
+        return self.wrapped_factory.clientConnectionFailed(connector, reason)
+
+    def clientConnectionLost(self, connector, reason):
+        logger.debug("Connection to proxy lost: %s", reason)
+        if not self.on_connection.called:
+            self.on_connection.errback(reason)
+        return self.wrapped_factory.clientConnectionLost(connector, reason)
+
+
+class HTTPConnectProtocol(protocol.Protocol):
+    """Protocol that wraps an existing Protocol to do a CONNECT handshake at connect
+
+    Args:
+        host (bytes): The original HTTP(s) hostname or IPv4 or IPv6 address literal
+            to put in the CONNECT request
+
+        port (int): The original HTTP(s) port to put in the CONNECT request
+
+        wrapped_protocol (interfaces.IProtocol): the original protocol (probably
+            HTTPChannel or TLSMemoryBIOProtocol, but could be anything really)
+
+        connected_deferred (Deferred): a Deferred which will be callbacked with
+            wrapped_protocol when the CONNECT completes
+    """
+
+    def __init__(self, host, port, wrapped_protocol, connected_deferred):
+        self.host = host
+        self.port = port
+        self.wrapped_protocol = wrapped_protocol
+        self.connected_deferred = connected_deferred
+        self.http_setup_client = HTTPConnectSetupClient(self.host, self.port)
+        self.http_setup_client.on_connected.addCallback(self.proxyConnected)
+
+    def connectionMade(self):
+        self.http_setup_client.makeConnection(self.transport)
+
+    def connectionLost(self, reason=connectionDone):
+        if self.wrapped_protocol.connected:
+            self.wrapped_protocol.connectionLost(reason)
+
+        self.http_setup_client.connectionLost(reason)
+
+        if not self.connected_deferred.called:
+            self.connected_deferred.errback(reason)
+
+    def proxyConnected(self, _):
+        self.wrapped_protocol.makeConnection(self.transport)
+
+        self.connected_deferred.callback(self.wrapped_protocol)
+
+        # Get any pending data from the http buf and forward it to the original protocol
+        buf = self.http_setup_client.clearLineBuffer()
+        if buf:
+            self.wrapped_protocol.dataReceived(buf)
+
+    def dataReceived(self, data):
+        # if we've set up the HTTP protocol, we can send the data there
+        if self.wrapped_protocol.connected:
+            return self.wrapped_protocol.dataReceived(data)
+
+        # otherwise, we must still be setting up the connection: send the data to the
+        # setup client
+        return self.http_setup_client.dataReceived(data)
+
+
+class HTTPConnectSetupClient(http.HTTPClient):
+    """HTTPClient protocol to send a CONNECT message for proxies and read the response.
+
+    Args:
+        host (bytes): The hostname to send in the CONNECT message
+        port (int): The port to send in the CONNECT message
+    """
+
+    def __init__(self, host, port):
+        self.host = host
+        self.port = port
+        self.on_connected = defer.Deferred()
+
+    def connectionMade(self):
+        logger.debug("Connected to proxy, sending CONNECT")
+        self.sendCommand(b"CONNECT", b"%s:%d" % (self.host, self.port))
+        self.endHeaders()
+
+    def handleStatus(self, version, status, message):
+        logger.debug("Got Status: %s %s %s", status, message, version)
+        if status != b"200":
+            raise ProxyConnectError("Unexpected status on CONNECT: %s" % status)
+
+    def handleEndHeaders(self):
+        logger.debug("End Headers")
+        self.on_connected.callback(None)
+
+    def handleResponse(self, body):
+        pass
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
new file mode 100644
index 0000000000..332da02a8d
--- /dev/null
+++ b/synapse/http/proxyagent.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import re
+
+from zope.interface import implementer
+
+from twisted.internet import defer
+from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.python.failure import Failure
+from twisted.web.client import URI, BrowserLikePolicyForHTTPS, _AgentBase
+from twisted.web.error import SchemeNotSupported
+from twisted.web.iweb import IAgent
+
+from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint
+
+logger = logging.getLogger(__name__)
+
+_VALID_URI = re.compile(br"\A[\x21-\x7e]+\Z")
+
+
+@implementer(IAgent)
+class ProxyAgent(_AgentBase):
+    """An Agent implementation which will use an HTTP proxy if one was requested
+
+    Args:
+        reactor: twisted reactor to place outgoing
+            connections.
+
+        contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the
+            verification parameters of OpenSSL.  The default is to use a
+            `BrowserLikePolicyForHTTPS`, so unless you have special
+            requirements you can leave this as-is.
+
+        connectTimeout (float): The amount of time that this Agent will wait
+            for the peer to accept a connection.
+
+        bindAddress (bytes): The local address for client sockets to bind to.
+
+        pool (HTTPConnectionPool|None): connection pool to be used. If None, a
+            non-persistent pool instance will be created.
+    """
+
+    def __init__(
+        self,
+        reactor,
+        contextFactory=BrowserLikePolicyForHTTPS(),
+        connectTimeout=None,
+        bindAddress=None,
+        pool=None,
+        http_proxy=None,
+        https_proxy=None,
+    ):
+        _AgentBase.__init__(self, reactor, pool)
+
+        self._endpoint_kwargs = {}
+        if connectTimeout is not None:
+            self._endpoint_kwargs["timeout"] = connectTimeout
+        if bindAddress is not None:
+            self._endpoint_kwargs["bindAddress"] = bindAddress
+
+        self.http_proxy_endpoint = _http_proxy_endpoint(
+            http_proxy, reactor, **self._endpoint_kwargs
+        )
+
+        self.https_proxy_endpoint = _http_proxy_endpoint(
+            https_proxy, reactor, **self._endpoint_kwargs
+        )
+
+        self._policy_for_https = contextFactory
+        self._reactor = reactor
+
+    def request(self, method, uri, headers=None, bodyProducer=None):
+        """
+        Issue a request to the server indicated by the given uri.
+
+        Supports `http` and `https` schemes.
+
+        An existing connection from the connection pool may be used or a new one may be
+        created.
+
+        See also: twisted.web.iweb.IAgent.request
+
+        Args:
+            method (bytes): The request method to use, such as `GET`, `POST`, etc
+
+            uri (bytes): The location of the resource to request.
+
+            headers (Headers|None): Extra headers to send with the request
+
+            bodyProducer (IBodyProducer|None): An object which can generate bytes to
+                make up the body of this request (for example, the properly encoded
+                contents of a file for a file upload). Or, None if the request is to
+                have no body.
+
+        Returns:
+            Deferred[IResponse]: completes when the header of the response has
+                 been received (regardless of the response status code).
+        """
+        uri = uri.strip()
+        if not _VALID_URI.match(uri):
+            raise ValueError("Invalid URI {!r}".format(uri))
+
+        parsed_uri = URI.fromBytes(uri)
+        pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
+        request_path = parsed_uri.originForm
+
+        if parsed_uri.scheme == b"http" and self.http_proxy_endpoint:
+            # Cache *all* connections under the same key, since we are only
+            # connecting to a single destination, the proxy:
+            pool_key = ("http-proxy", self.http_proxy_endpoint)
+            endpoint = self.http_proxy_endpoint
+            request_path = uri
+        elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint:
+            endpoint = HTTPConnectProxyEndpoint(
+                self._reactor,
+                self.https_proxy_endpoint,
+                parsed_uri.host,
+                parsed_uri.port,
+            )
+        else:
+            # not using a proxy
+            endpoint = HostnameEndpoint(
+                self._reactor, parsed_uri.host, parsed_uri.port, **self._endpoint_kwargs
+            )
+
+        logger.debug("Requesting %s via %s", uri, endpoint)
+
+        if parsed_uri.scheme == b"https":
+            tls_connection_creator = self._policy_for_https.creatorForNetloc(
+                parsed_uri.host, parsed_uri.port
+            )
+            endpoint = wrapClientTLS(tls_connection_creator, endpoint)
+        elif parsed_uri.scheme == b"http":
+            pass
+        else:
+            return defer.fail(
+                Failure(
+                    SchemeNotSupported("Unsupported scheme: %r" % (parsed_uri.scheme,))
+                )
+            )
+
+        return self._requestWithEndpoint(
+            pool_key, endpoint, method, parsed_uri, headers, bodyProducer, request_path
+        )
+
+
+def _http_proxy_endpoint(proxy, reactor, **kwargs):
+    """Parses an http proxy setting and returns an endpoint for the proxy
+
+    Args:
+        proxy (bytes|None):  the proxy setting
+        reactor: reactor to be used to connect to the proxy
+        kwargs: other args to be passed to HostnameEndpoint
+
+    Returns:
+        interfaces.IStreamClientEndpoint|None: endpoint to use to connect to the proxy,
+            or None
+    """
+    if proxy is None:
+        return None
+
+    # currently we only support hostname:port. Some apps also support
+    # protocol://<host>[:port], which allows a way of requiring a TLS connection to the
+    # proxy.
+
+    host, port = parse_host_port(proxy, default_port=1080)
+    return HostnameEndpoint(reactor, host, port, **kwargs)
+
+
+def parse_host_port(hostport, default_port=None):
+    # could have sworn we had one of these somewhere else...
+    if b":" in hostport:
+        host, port = hostport.rsplit(b":", 1)
+        try:
+            port = int(port)
+            return host, port
+        except ValueError:
+            # the thing after the : wasn't a valid port; presumably this is an
+            # IPv6 address.
+            pass
+
+    return hostport, default_port
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 7dde2ad055..e994037be6 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -103,7 +103,7 @@ class HttpPusher(object):
         if "url" not in self.data:
             raise PusherConfigException("'url' required in data for HTTP pusher")
         self.url = self.data["url"]
-        self.http_client = hs.get_simple_http_client()
+        self.http_client = hs.get_proxied_http_client()
         self.data_minus_url = {}
         self.data_minus_url.update(self.data)
         del self.data_minus_url["url"]
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 00a7dd6d09..24a0ce74f2 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -381,7 +381,7 @@ class CasTicketServlet(RestServlet):
         self.cas_displayname_attribute = hs.config.cas_displayname_attribute
         self.cas_required_attributes = hs.config.cas_required_attributes
         self._sso_auth_handler = SSOAuthHandler(hs)
-        self._http_client = hs.get_simple_http_client()
+        self._http_client = hs.get_proxied_http_client()
 
     @defer.inlineCallbacks
     def on_GET(self, request):
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 5a25b6b3fc..531d923f76 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -74,6 +74,8 @@ class PreviewUrlResource(DirectServeResource):
             treq_args={"browser_like_redirects": True},
             ip_whitelist=hs.config.url_preview_ip_range_whitelist,
             ip_blacklist=hs.config.url_preview_ip_range_blacklist,
+            http_proxy=os.getenv("http_proxy"),
+            https_proxy=os.getenv("HTTPS_PROXY"),
         )
         self.media_repo = media_repo
         self.primary_base_path = media_repo.primary_base_path
diff --git a/synapse/server.py b/synapse/server.py
index 0b81af646c..f8aeebcff8 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -23,6 +23,7 @@
 # Imports required for the default HomeServer() implementation
 import abc
 import logging
+import os
 
 from twisted.enterprise import adbapi
 from twisted.mail.smtp import sendmail
@@ -168,6 +169,7 @@ class HomeServer(object):
         "filtering",
         "http_client_context_factory",
         "simple_http_client",
+        "proxied_http_client",
         "media_repository",
         "media_repository_resource",
         "federation_transport_client",
@@ -311,6 +313,13 @@ class HomeServer(object):
     def build_simple_http_client(self):
         return SimpleHttpClient(self)
 
+    def build_proxied_http_client(self):
+        return SimpleHttpClient(
+            self,
+            http_proxy=os.getenv("http_proxy"),
+            https_proxy=os.getenv("HTTPS_PROXY"),
+        )
+
     def build_room_creation_handler(self):
         return RoomCreationHandler(self)
 
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 83d1f11283..b5e0b57095 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -12,6 +12,7 @@ import synapse.handlers.message
 import synapse.handlers.room
 import synapse.handlers.room_member
 import synapse.handlers.set_password
+import synapse.http.client
 import synapse.rest.media.v1.media_repository
 import synapse.server_notices.server_notices_manager
 import synapse.server_notices.server_notices_sender
@@ -38,6 +39,14 @@ class HomeServer(object):
         pass
     def get_state_resolution_handler(self) -> synapse.state.StateResolutionHandler:
         pass
+    def get_simple_http_client(self) -> synapse.http.client.SimpleHttpClient:
+        """Fetch an HTTP client implementation which doesn't do any blacklisting
+        or support any HTTP_PROXY settings"""
+        pass
+    def get_proxied_http_client(self) -> synapse.http.client.SimpleHttpClient:
+        """Fetch an HTTP client implementation which doesn't do any blacklisting
+        but does support HTTP_PROXY settings"""
+        pass
     def get_deactivate_account_handler(
         self,
     ) -> synapse.handlers.deactivate_account.DeactivateAccountHandler:
diff --git a/tests/http/__init__.py b/tests/http/__init__.py
index 2d5dba6464..2096ba3c91 100644
--- a/tests/http/__init__.py
+++ b/tests/http/__init__.py
@@ -20,6 +20,23 @@ from zope.interface import implementer
 from OpenSSL import SSL
 from OpenSSL.SSL import Connection
 from twisted.internet.interfaces import IOpenSSLServerConnectionCreator
+from twisted.internet.ssl import Certificate, trustRootFromCertificates
+from twisted.web.client import BrowserLikePolicyForHTTPS  # noqa: F401
+from twisted.web.iweb import IPolicyForHTTPS  # noqa: F401
+
+
+def get_test_https_policy():
+    """Get a test IPolicyForHTTPS which trusts the test CA cert
+
+    Returns:
+        IPolicyForHTTPS
+    """
+    ca_file = get_test_ca_cert_file()
+    with open(ca_file) as stream:
+        content = stream.read()
+    cert = Certificate.loadPEM(content)
+    trust_root = trustRootFromCertificates([cert])
+    return BrowserLikePolicyForHTTPS(trustRoot=trust_root)
 
 
 def get_test_ca_cert_file():
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index 71d7025264..cfcd98ff7d 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -124,19 +124,24 @@ class MatrixFederationAgentTests(unittest.TestCase):
             FakeTransport(client_protocol, self.reactor, server_tls_protocol)
         )
 
+        # grab a hold of the TLS connection, in case it gets torn down
+        server_tls_connection = server_tls_protocol._tlsConnection
+
+        # fish the test server back out of the server-side TLS protocol.
+        http_protocol = server_tls_protocol.wrappedProtocol
+
         # give the reactor a pump to get the TLS juices flowing.
         self.reactor.pump((0.1,))
 
         # check the SNI
-        server_name = server_tls_protocol._tlsConnection.get_servername()
+        server_name = server_tls_connection.get_servername()
         self.assertEqual(
             server_name,
             expected_sni,
             "Expected SNI %s but got %s" % (expected_sni, server_name),
         )
 
-        # fish the test server back out of the server-side TLS protocol.
-        return server_tls_protocol.wrappedProtocol
+        return http_protocol
 
     @defer.inlineCallbacks
     def _make_get_request(self, uri):
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
new file mode 100644
index 0000000000..22abf76515
--- /dev/null
+++ b/tests/http/test_proxyagent.py
@@ -0,0 +1,334 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import treq
+
+from twisted.internet import interfaces  # noqa: F401
+from twisted.internet.protocol import Factory
+from twisted.protocols.tls import TLSMemoryBIOFactory
+from twisted.web.http import HTTPChannel
+
+from synapse.http.proxyagent import ProxyAgent
+
+from tests.http import TestServerTLSConnectionFactory, get_test_https_policy
+from tests.server import FakeTransport, ThreadedMemoryReactorClock
+from tests.unittest import TestCase
+
+logger = logging.getLogger(__name__)
+
+HTTPFactory = Factory.forProtocol(HTTPChannel)
+
+
+class MatrixFederationAgentTests(TestCase):
+    def setUp(self):
+        self.reactor = ThreadedMemoryReactorClock()
+
+    def _make_connection(
+        self, client_factory, server_factory, ssl=False, expected_sni=None
+    ):
+        """Builds a test server, and completes the outgoing client connection
+
+        Args:
+            client_factory (interfaces.IProtocolFactory): the the factory that the
+                application is trying to use to make the outbound connection. We will
+                invoke it to build the client Protocol
+
+            server_factory (interfaces.IProtocolFactory): a factory to build the
+                server-side protocol
+
+            ssl (bool): If true, we will expect an ssl connection and wrap
+                server_factory with a TLSMemoryBIOFactory
+
+            expected_sni (bytes|None): the expected SNI value
+
+        Returns:
+            IProtocol: the server Protocol returned by server_factory
+        """
+        if ssl:
+            server_factory = _wrap_server_factory_for_tls(server_factory)
+
+        server_protocol = server_factory.buildProtocol(None)
+
+        # now, tell the client protocol factory to build the client protocol,
+        # and wire the output of said protocol up to the server via
+        # a FakeTransport.
+        #
+        # Normally this would be done by the TCP socket code in Twisted, but we are
+        # stubbing that out here.
+        client_protocol = client_factory.buildProtocol(None)
+        client_protocol.makeConnection(
+            FakeTransport(server_protocol, self.reactor, client_protocol)
+        )
+
+        # tell the server protocol to send its stuff back to the client, too
+        server_protocol.makeConnection(
+            FakeTransport(client_protocol, self.reactor, server_protocol)
+        )
+
+        if ssl:
+            http_protocol = server_protocol.wrappedProtocol
+            tls_connection = server_protocol._tlsConnection
+        else:
+            http_protocol = server_protocol
+            tls_connection = None
+
+        # give the reactor a pump to get the TLS juices flowing (if needed)
+        self.reactor.advance(0)
+
+        if expected_sni is not None:
+            server_name = tls_connection.get_servername()
+            self.assertEqual(
+                server_name,
+                expected_sni,
+                "Expected SNI %s but got %s" % (expected_sni, server_name),
+            )
+
+        return http_protocol
+
+    def test_http_request(self):
+        agent = ProxyAgent(self.reactor)
+
+        self.reactor.lookups["test.com"] = "1.2.3.4"
+        d = agent.request(b"GET", b"http://test.com")
+
+        # there should be a pending TCP connection
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 80)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(
+            client_factory, _get_test_protocol_factory()
+        )
+
+        # the FakeTransport is async, so we need to pump the reactor
+        self.reactor.advance(0)
+
+        # now there should be a pending request
+        self.assertEqual(len(http_server.requests), 1)
+
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/")
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
+        request.write(b"result")
+        request.finish()
+
+        self.reactor.advance(0)
+
+        resp = self.successResultOf(d)
+        body = self.successResultOf(treq.content(resp))
+        self.assertEqual(body, b"result")
+
+    def test_https_request(self):
+        agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy())
+
+        self.reactor.lookups["test.com"] = "1.2.3.4"
+        d = agent.request(b"GET", b"https://test.com/abc")
+
+        # there should be a pending TCP connection
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 443)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(
+            client_factory,
+            _get_test_protocol_factory(),
+            ssl=True,
+            expected_sni=b"test.com",
+        )
+
+        # the FakeTransport is async, so we need to pump the reactor
+        self.reactor.advance(0)
+
+        # now there should be a pending request
+        self.assertEqual(len(http_server.requests), 1)
+
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/abc")
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
+        request.write(b"result")
+        request.finish()
+
+        self.reactor.advance(0)
+
+        resp = self.successResultOf(d)
+        body = self.successResultOf(treq.content(resp))
+        self.assertEqual(body, b"result")
+
+    def test_http_request_via_proxy(self):
+        agent = ProxyAgent(self.reactor, http_proxy=b"proxy.com:8888")
+
+        self.reactor.lookups["proxy.com"] = "1.2.3.5"
+        d = agent.request(b"GET", b"http://test.com")
+
+        # there should be a pending TCP connection
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.5")
+        self.assertEqual(port, 8888)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(
+            client_factory, _get_test_protocol_factory()
+        )
+
+        # the FakeTransport is async, so we need to pump the reactor
+        self.reactor.advance(0)
+
+        # now there should be a pending request
+        self.assertEqual(len(http_server.requests), 1)
+
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"http://test.com")
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
+        request.write(b"result")
+        request.finish()
+
+        self.reactor.advance(0)
+
+        resp = self.successResultOf(d)
+        body = self.successResultOf(treq.content(resp))
+        self.assertEqual(body, b"result")
+
+    def test_https_request_via_proxy(self):
+        agent = ProxyAgent(
+            self.reactor,
+            contextFactory=get_test_https_policy(),
+            https_proxy=b"proxy.com",
+        )
+
+        self.reactor.lookups["proxy.com"] = "1.2.3.5"
+        d = agent.request(b"GET", b"https://test.com/abc")
+
+        # there should be a pending TCP connection
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.5")
+        self.assertEqual(port, 1080)
+
+        # make a test HTTP server, and wire up the client
+        proxy_server = self._make_connection(
+            client_factory, _get_test_protocol_factory()
+        )
+
+        # fish the transports back out so that we can do the old switcheroo
+        s2c_transport = proxy_server.transport
+        client_protocol = s2c_transport.other
+        c2s_transport = client_protocol.transport
+
+        # the FakeTransport is async, so we need to pump the reactor
+        self.reactor.advance(0)
+
+        # now there should be a pending CONNECT request
+        self.assertEqual(len(proxy_server.requests), 1)
+
+        request = proxy_server.requests[0]
+        self.assertEqual(request.method, b"CONNECT")
+        self.assertEqual(request.path, b"test.com:443")
+
+        # tell the proxy server not to close the connection
+        proxy_server.persistent = True
+
+        # this just stops the http Request trying to do a chunked response
+        # request.setHeader(b"Content-Length", b"0")
+        request.finish()
+
+        # now we can replace the proxy channel with a new, SSL-wrapped HTTP channel
+        ssl_factory = _wrap_server_factory_for_tls(_get_test_protocol_factory())
+        ssl_protocol = ssl_factory.buildProtocol(None)
+        http_server = ssl_protocol.wrappedProtocol
+
+        ssl_protocol.makeConnection(
+            FakeTransport(client_protocol, self.reactor, ssl_protocol)
+        )
+        c2s_transport.other = ssl_protocol
+
+        self.reactor.advance(0)
+
+        server_name = ssl_protocol._tlsConnection.get_servername()
+        expected_sni = b"test.com"
+        self.assertEqual(
+            server_name,
+            expected_sni,
+            "Expected SNI %s but got %s" % (expected_sni, server_name),
+        )
+
+        # now there should be a pending request
+        self.assertEqual(len(http_server.requests), 1)
+
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/abc")
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
+        request.write(b"result")
+        request.finish()
+
+        self.reactor.advance(0)
+
+        resp = self.successResultOf(d)
+        body = self.successResultOf(treq.content(resp))
+        self.assertEqual(body, b"result")
+
+
+def _wrap_server_factory_for_tls(factory, sanlist=None):
+    """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory
+
+    The resultant factory will create a TLS server which presents a certificate
+    signed by our test CA, valid for the domains in `sanlist`
+
+    Args:
+        factory (interfaces.IProtocolFactory): protocol factory to wrap
+        sanlist (iterable[bytes]): list of domains the cert should be valid for
+
+    Returns:
+        interfaces.IProtocolFactory
+    """
+    if sanlist is None:
+        sanlist = [b"DNS:test.com"]
+
+    connection_creator = TestServerTLSConnectionFactory(sanlist=sanlist)
+    return TLSMemoryBIOFactory(
+        connection_creator, isClient=False, wrappedFactory=factory
+    )
+
+
+def _get_test_protocol_factory():
+    """Get a protocol Factory which will build an HTTPChannel
+
+    Returns:
+        interfaces.IProtocolFactory
+    """
+    server_factory = Factory.forProtocol(HTTPChannel)
+
+    # Request.finish expects the factory to have a 'log' method.
+    server_factory.log = _log_request
+
+    return server_factory
+
+
+def _log_request(request):
+    """Implements Factory.log, which is expected by Request.finish"""
+    logger.info("Completed request %s", request)
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index 8ce6bb62da..af2327fb66 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -50,7 +50,7 @@ class HTTPPusherTests(HomeserverTestCase):
         config = self.default_config()
         config["start_pushers"] = True
 
-        hs = self.setup_test_homeserver(config=config, simple_http_client=m)
+        hs = self.setup_test_homeserver(config=config, proxied_http_client=m)
 
         return hs
 
diff --git a/tests/server.py b/tests/server.py
index 469efb4edb..f878aeaada 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -395,11 +395,24 @@ class FakeTransport(object):
             self.disconnecting = True
             if self._protocol:
                 self._protocol.connectionLost(reason)
-            self.disconnected = True
+
+            # if we still have data to write, delay until that is done
+            if self.buffer:
+                logger.info(
+                    "FakeTransport: Delaying disconnect until buffer is flushed"
+                )
+            else:
+                self.disconnected = True
 
     def abortConnection(self):
         logger.info("FakeTransport: abortConnection()")
-        self.loseConnection()
+
+        if not self.disconnecting:
+            self.disconnecting = True
+            if self._protocol:
+                self._protocol.connectionLost(None)
+
+        self.disconnected = True
 
     def pauseProducing(self):
         if not self.producer:
@@ -430,6 +443,9 @@ class FakeTransport(object):
             self._reactor.callLater(0.0, _produce)
 
     def write(self, byt):
+        if self.disconnecting:
+            raise Exception("Writing to disconnecting FakeTransport")
+
         self.buffer = self.buffer + byt
 
         # always actually do the write asynchronously. Some protocols (notably the
@@ -474,6 +490,10 @@ class FakeTransport(object):
         if self.buffer and self.autoflush:
             self._reactor.callLater(0.0, self.flush)
 
+        if not self.buffer and self.disconnecting:
+            logger.info("FakeTransport: Buffer now empty, completing disconnect")
+            self.disconnected = True
+
 
 def connect_client(reactor: IReactorTCP, client_id: int) -> AccumulatingProtocol:
     """
-- 
cgit 1.5.1


From 4e1c7b79fa3498c48106c17d0edbab2f7bcc0c38 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Tue, 5 Nov 2019 05:05:48 +1100
Subject: Remove the psutil dependency (#6318)

* remove psutil and replace with resource
---
 changelog.d/6318.misc          |   1 +
 synapse/app/homeserver.py      | 174 ++++++++++++++++++++++-------------------
 synapse/python_dependencies.py |   1 -
 synapse/server.py              |   2 +
 tests/test_phone_home.py       |  51 ++++++++++++
 5 files changed, 146 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/6318.misc
 create mode 100644 tests/test_phone_home.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/6318.misc b/changelog.d/6318.misc
new file mode 100644
index 0000000000..63527ccef4
--- /dev/null
+++ b/changelog.d/6318.misc
@@ -0,0 +1 @@
+Remove the dependency on psutil and replace functionality with the stdlib `resource` module.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 8d28076d92..00a7f8330e 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -19,12 +19,13 @@ from __future__ import print_function
 
 import gc
 import logging
+import math
 import os
+import resource
 import sys
 
 from six import iteritems
 
-import psutil
 from prometheus_client import Gauge
 
 from twisted.application import service
@@ -471,6 +472,87 @@ class SynapseService(service.Service):
         return self._port.stopListening()
 
 
+# Contains the list of processes we will be monitoring
+# currently either 0 or 1
+_stats_process = []
+
+
+@defer.inlineCallbacks
+def phone_stats_home(hs, stats, stats_process=_stats_process):
+    logger.info("Gathering stats for reporting")
+    now = int(hs.get_clock().time())
+    uptime = int(now - hs.start_time)
+    if uptime < 0:
+        uptime = 0
+
+    stats["homeserver"] = hs.config.server_name
+    stats["server_context"] = hs.config.server_context
+    stats["timestamp"] = now
+    stats["uptime_seconds"] = uptime
+    version = sys.version_info
+    stats["python_version"] = "{}.{}.{}".format(
+        version.major, version.minor, version.micro
+    )
+    stats["total_users"] = yield hs.get_datastore().count_all_users()
+
+    total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users()
+    stats["total_nonbridged_users"] = total_nonbridged_users
+
+    daily_user_type_results = yield hs.get_datastore().count_daily_user_type()
+    for name, count in iteritems(daily_user_type_results):
+        stats["daily_user_type_" + name] = count
+
+    room_count = yield hs.get_datastore().get_room_count()
+    stats["total_room_count"] = room_count
+
+    stats["daily_active_users"] = yield hs.get_datastore().count_daily_users()
+    stats["monthly_active_users"] = yield hs.get_datastore().count_monthly_users()
+    stats["daily_active_rooms"] = yield hs.get_datastore().count_daily_active_rooms()
+    stats["daily_messages"] = yield hs.get_datastore().count_daily_messages()
+
+    r30_results = yield hs.get_datastore().count_r30_users()
+    for name, count in iteritems(r30_results):
+        stats["r30_users_" + name] = count
+
+    daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
+    stats["daily_sent_messages"] = daily_sent_messages
+    stats["cache_factor"] = CACHE_SIZE_FACTOR
+    stats["event_cache_size"] = hs.config.event_cache_size
+
+    #
+    # Performance statistics
+    #
+    old = stats_process[0]
+    new = (now, resource.getrusage(resource.RUSAGE_SELF))
+    stats_process[0] = new
+
+    # Get RSS in bytes
+    stats["memory_rss"] = new[1].ru_maxrss
+
+    # Get CPU time in % of a single core, not % of all cores
+    used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
+        old[1].ru_utime + old[1].ru_stime
+    )
+    if used_cpu_time == 0 or new[0] == old[0]:
+        stats["cpu_average"] = 0
+    else:
+        stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
+
+    #
+    # Database version
+    #
+
+    stats["database_engine"] = hs.get_datastore().database_engine_name
+    stats["database_server_version"] = hs.get_datastore().get_server_version()
+    logger.info("Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats))
+    try:
+        yield hs.get_proxied_http_client().put_json(
+            hs.config.report_stats_endpoint, stats
+        )
+    except Exception as e:
+        logger.warning("Error reporting stats: %s", e)
+
+
 def run(hs):
     PROFILE_SYNAPSE = False
     if PROFILE_SYNAPSE:
@@ -497,91 +579,19 @@ def run(hs):
         reactor.run = profile(reactor.run)
 
     clock = hs.get_clock()
-    start_time = clock.time()
 
     stats = {}
 
-    # Contains the list of processes we will be monitoring
-    # currently either 0 or 1
-    stats_process = []
+    def performance_stats_init():
+        _stats_process.clear()
+        _stats_process.append(
+            (int(hs.get_clock().time(), resource.getrusage(resource.RUSAGE_SELF)))
+        )
 
     def start_phone_stats_home():
-        return run_as_background_process("phone_stats_home", phone_stats_home)
-
-    @defer.inlineCallbacks
-    def phone_stats_home():
-        logger.info("Gathering stats for reporting")
-        now = int(hs.get_clock().time())
-        uptime = int(now - start_time)
-        if uptime < 0:
-            uptime = 0
-
-        stats["homeserver"] = hs.config.server_name
-        stats["server_context"] = hs.config.server_context
-        stats["timestamp"] = now
-        stats["uptime_seconds"] = uptime
-        version = sys.version_info
-        stats["python_version"] = "{}.{}.{}".format(
-            version.major, version.minor, version.micro
-        )
-        stats["total_users"] = yield hs.get_datastore().count_all_users()
-
-        total_nonbridged_users = yield hs.get_datastore().count_nonbridged_users()
-        stats["total_nonbridged_users"] = total_nonbridged_users
-
-        daily_user_type_results = yield hs.get_datastore().count_daily_user_type()
-        for name, count in iteritems(daily_user_type_results):
-            stats["daily_user_type_" + name] = count
-
-        room_count = yield hs.get_datastore().get_room_count()
-        stats["total_room_count"] = room_count
-
-        stats["daily_active_users"] = yield hs.get_datastore().count_daily_users()
-        stats["monthly_active_users"] = yield hs.get_datastore().count_monthly_users()
-        stats[
-            "daily_active_rooms"
-        ] = yield hs.get_datastore().count_daily_active_rooms()
-        stats["daily_messages"] = yield hs.get_datastore().count_daily_messages()
-
-        r30_results = yield hs.get_datastore().count_r30_users()
-        for name, count in iteritems(r30_results):
-            stats["r30_users_" + name] = count
-
-        daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
-        stats["daily_sent_messages"] = daily_sent_messages
-        stats["cache_factor"] = CACHE_SIZE_FACTOR
-        stats["event_cache_size"] = hs.config.event_cache_size
-
-        if len(stats_process) > 0:
-            stats["memory_rss"] = 0
-            stats["cpu_average"] = 0
-            for process in stats_process:
-                stats["memory_rss"] += process.memory_info().rss
-                stats["cpu_average"] += int(process.cpu_percent(interval=None))
-
-        stats["database_engine"] = hs.get_datastore().database_engine_name
-        stats["database_server_version"] = hs.get_datastore().get_server_version()
-        logger.info(
-            "Reporting stats to %s: %s" % (hs.config.report_stats_endpoint, stats)
+        return run_as_background_process(
+            "phone_stats_home", phone_stats_home, hs, stats
         )
-        try:
-            yield hs.get_proxied_http_client().put_json(
-                hs.config.report_stats_endpoint, stats
-            )
-        except Exception as e:
-            logger.warning("Error reporting stats: %s", e)
-
-    def performance_stats_init():
-        try:
-            process = psutil.Process()
-            # Ensure we can fetch both, and make the initial request for cpu_percent
-            # so the next request will use this as the initial point.
-            process.memory_info().rss
-            process.cpu_percent(interval=None)
-            logger.info("report_stats can use psutil")
-            stats_process.append(process)
-        except (AttributeError):
-            logger.warning("Unable to read memory/cpu stats. Disabling reporting.")
 
     def generate_user_daily_visit_stats():
         return run_as_background_process(
@@ -626,7 +636,7 @@ def run(hs):
 
     if hs.config.report_stats:
         logger.info("Scheduling stats reporting for 3 hour intervals")
-        clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000)
+        clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
 
         # We need to defer this init for the cases that we daemonize
         # otherwise the process ID we get is that of the non-daemon process
@@ -634,7 +644,7 @@ def run(hs):
 
         # We wait 5 minutes to send the first set of stats as the server can
         # be quite busy the first few minutes
-        clock.call_later(5 * 60, start_phone_stats_home)
+        clock.call_later(5 * 60, start_phone_stats_home, hs, stats)
 
     _base.start_reactor(
         "synapse-homeserver",
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index aa7da1c543..5871feaafd 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -61,7 +61,6 @@ REQUIREMENTS = [
     "bcrypt>=3.1.0",
     "pillow>=4.3.0",
     "sortedcontainers>=1.4.4",
-    "psutil>=2.0.0",
     "pymacaroons>=0.13.0",
     "msgpack>=0.5.2",
     "phonenumbers>=8.2.0",
diff --git a/synapse/server.py b/synapse/server.py
index f8aeebcff8..90c3b072e8 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -221,6 +221,7 @@ class HomeServer(object):
         self.hostname = hostname
         self._building = {}
         self._listening_services = []
+        self.start_time = None
 
         self.clock = Clock(reactor)
         self.distributor = Distributor()
@@ -240,6 +241,7 @@ class HomeServer(object):
             datastore = self.DATASTORE_CLASS(conn, self)
             self.datastores = DataStores(datastore, conn, self)
             conn.commit()
+        self.start_time = int(self.get_clock().time())
         logger.info("Finished setting up.")
 
     def setup_master(self):
diff --git a/tests/test_phone_home.py b/tests/test_phone_home.py
new file mode 100644
index 0000000000..7657bddea5
--- /dev/null
+++ b/tests/test_phone_home.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import resource
+
+import mock
+
+from synapse.app.homeserver import phone_stats_home
+
+from tests.unittest import HomeserverTestCase
+
+
+class PhoneHomeStatsTestCase(HomeserverTestCase):
+    def test_performance_frozen_clock(self):
+        """
+        If time doesn't move, don't error out.
+        """
+        past_stats = [
+            (self.hs.get_clock().time(), resource.getrusage(resource.RUSAGE_SELF))
+        ]
+        stats = {}
+        self.get_success(phone_stats_home(self.hs, stats, past_stats))
+        self.assertEqual(stats["cpu_average"], 0)
+
+    def test_performance_100(self):
+        """
+        1 second of usage over 1 second is 100% CPU usage.
+        """
+        real_res = resource.getrusage(resource.RUSAGE_SELF)
+        old_resource = mock.Mock(spec=real_res)
+        old_resource.ru_utime = real_res.ru_utime - 1
+        old_resource.ru_stime = real_res.ru_stime
+        old_resource.ru_maxrss = real_res.ru_maxrss
+
+        past_stats = [(self.hs.get_clock().time(), old_resource)]
+        stats = {}
+        self.reactor.advance(1)
+        self.get_success(phone_stats_home(self.hs, stats, past_stats))
+        self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5)
-- 
cgit 1.5.1


From ef1a85e7733bc1979f48357dd59b638110285075 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 26 Nov 2019 18:10:50 +0000
Subject: Fix startup error when http proxy is defined. (#6421)

Guess I only tested this on python 2 :/

Fixes #6419.
---
 changelog.d/6421.bugfix                       | 1 +
 synapse/rest/media/v1/preview_url_resource.py | 4 ++--
 synapse/server.py                             | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/6421.bugfix

(limited to 'synapse/server.py')

diff --git a/changelog.d/6421.bugfix b/changelog.d/6421.bugfix
new file mode 100644
index 0000000000..7969f7f71d
--- /dev/null
+++ b/changelog.d/6421.bugfix
@@ -0,0 +1 @@
+Fix startup error when http proxy is defined.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index a23d6f5c75..fb0d02aa83 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -77,8 +77,8 @@ class PreviewUrlResource(DirectServeResource):
             treq_args={"browser_like_redirects": True},
             ip_whitelist=hs.config.url_preview_ip_range_whitelist,
             ip_blacklist=hs.config.url_preview_ip_range_blacklist,
-            http_proxy=os.getenv("http_proxy"),
-            https_proxy=os.getenv("HTTPS_PROXY"),
+            http_proxy=os.getenvb(b"http_proxy"),
+            https_proxy=os.getenvb(b"HTTPS_PROXY"),
         )
         self.media_repo = media_repo
         self.primary_base_path = media_repo.primary_base_path
diff --git a/synapse/server.py b/synapse/server.py
index 90c3b072e8..be9af7f986 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -318,8 +318,8 @@ class HomeServer(object):
     def build_proxied_http_client(self):
         return SimpleHttpClient(
             self,
-            http_proxy=os.getenv("http_proxy"),
-            https_proxy=os.getenv("HTTPS_PROXY"),
+            http_proxy=os.getenvb(b"http_proxy"),
+            https_proxy=os.getenvb(b"HTTPS_PROXY"),
         )
 
     def build_room_creation_handler(self):
-- 
cgit 1.5.1


From d537be1ebd0e7ce4c84118efa400932cc6432aa9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 6 Dec 2019 13:40:02 +0000
Subject: Pass Database into the data store

---
 synapse/server.py                       |  3 +--
 synapse/storage/_base.py                |  2 +-
 synapse/storage/background_updates.py   |  2 +-
 synapse/storage/data_stores/__init__.py |  7 ++++--
 synapse/storage/database.py             | 38 ++++++++++++++-------------------
 5 files changed, 24 insertions(+), 28 deletions(-)

(limited to 'synapse/server.py')

diff --git a/synapse/server.py b/synapse/server.py
index be9af7f986..2db3dab221 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -238,8 +238,7 @@ class HomeServer(object):
     def setup(self):
         logger.info("Setting up.")
         with self.get_db_conn() as conn:
-            datastore = self.DATASTORE_CLASS(conn, self)
-            self.datastores = DataStores(datastore, conn, self)
+            self.datastores = DataStores(self.DATASTORE_CLASS, conn, self)
             conn.commit()
         self.start_time = int(self.get_clock().time())
         logger.info("Finished setting up.")
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index f9e7f9a71e..b7637b5dc0 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -41,7 +41,7 @@ class SQLBaseStore(object):
         self.hs = hs
         self._clock = hs.get_clock()
         self.database_engine = hs.database_engine
-        self.db = Database(hs)  # In future this will be passed in
+        self.db = database
         self.rand = random.SystemRandom()
 
     def _invalidate_state_caches(self, room_id, members_changed):
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index a9a13a2658..4f97fd5ab6 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -379,7 +379,7 @@ class BackgroundUpdater(object):
             logger.debug("[SQL] %s", sql)
             c.execute(sql)
 
-        if isinstance(self.db.database_engine, engines.PostgresEngine):
+        if isinstance(self.db.engine, engines.PostgresEngine):
             runner = create_index_psql
         elif psql_only:
             runner = None
diff --git a/synapse/storage/data_stores/__init__.py b/synapse/storage/data_stores/__init__.py
index cb184a98cc..79ecc62735 100644
--- a/synapse/storage/data_stores/__init__.py
+++ b/synapse/storage/data_stores/__init__.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from synapse.storage.database import Database
+
 
 class DataStores(object):
     """The various data stores.
@@ -20,7 +22,8 @@ class DataStores(object):
     These are low level interfaces to physical databases.
     """
 
-    def __init__(self, main_store, db_conn, hs):
+    def __init__(self, main_store_class, db_conn, hs):
         # Note we pass in the main store here as workers use a different main
         # store.
-        self.main = main_store
+        database = Database(hs)
+        self.main = main_store_class(database, db_conn, hs)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 6843b7e7f8..ec19ae1d9d 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -234,7 +234,7 @@ class Database(object):
         #   to watch it
         self._txn_perf_counters = PerformanceCounters()
 
-        self.database_engine = hs.database_engine
+        self.engine = hs.database_engine
 
         # A set of tables that are not safe to use native upserts in.
         self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys())
@@ -242,10 +242,10 @@ class Database(object):
         # We add the user_directory_search table to the blacklist on SQLite
         # because the existing search table does not have an index, making it
         # unsafe to use native upserts.
-        if isinstance(self.database_engine, Sqlite3Engine):
+        if isinstance(self.engine, Sqlite3Engine):
             self._unsafe_to_upsert_tables.add("user_directory_search")
 
-        if self.database_engine.can_native_upsert:
+        if self.engine.can_native_upsert:
             # Check ASAP (and then later, every 1s) to see if we have finished
             # background updates of tables that aren't safe to update.
             self._clock.call_later(
@@ -331,7 +331,7 @@ class Database(object):
                 cursor = LoggingTransaction(
                     conn.cursor(),
                     name,
-                    self.database_engine,
+                    self.engine,
                     after_callbacks,
                     exception_callbacks,
                 )
@@ -339,7 +339,7 @@ class Database(object):
                     r = func(cursor, *args, **kwargs)
                     conn.commit()
                     return r
-                except self.database_engine.module.OperationalError as e:
+                except self.engine.module.OperationalError as e:
                     # This can happen if the database disappears mid
                     # transaction.
                     logger.warning(
@@ -353,20 +353,20 @@ class Database(object):
                         i += 1
                         try:
                             conn.rollback()
-                        except self.database_engine.module.Error as e1:
+                        except self.engine.module.Error as e1:
                             logger.warning(
                                 "[TXN EROLL] {%s} %s", name, exception_to_unicode(e1)
                             )
                         continue
                     raise
-                except self.database_engine.module.DatabaseError as e:
-                    if self.database_engine.is_deadlock(e):
+                except self.engine.module.DatabaseError as e:
+                    if self.engine.is_deadlock(e):
                         logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
                         if i < N:
                             i += 1
                             try:
                                 conn.rollback()
-                            except self.database_engine.module.Error as e1:
+                            except self.engine.module.Error as e1:
                                 logger.warning(
                                     "[TXN EROLL] {%s} %s",
                                     name,
@@ -494,7 +494,7 @@ class Database(object):
                 sql_scheduling_timer.observe(sched_duration_sec)
                 context.add_database_scheduled(sched_duration_sec)
 
-                if self.database_engine.is_connection_closed(conn):
+                if self.engine.is_connection_closed(conn):
                     logger.debug("Reconnecting closed database connection")
                     conn.reconnect()
 
@@ -561,7 +561,7 @@ class Database(object):
         """
         try:
             yield self.runInteraction(desc, self.simple_insert_txn, table, values)
-        except self.database_engine.module.IntegrityError:
+        except self.engine.module.IntegrityError:
             # We have to do or_ignore flag at this layer, since we can't reuse
             # a cursor after we receive an error from the db.
             if not or_ignore:
@@ -660,7 +660,7 @@ class Database(object):
                     lock=lock,
                 )
                 return result
-            except self.database_engine.module.IntegrityError as e:
+            except self.engine.module.IntegrityError as e:
                 attempts += 1
                 if attempts >= 5:
                     # don't retry forever, because things other than races
@@ -692,10 +692,7 @@ class Database(object):
             upserts return True if a new entry was created, False if an existing
             one was updated.
         """
-        if (
-            self.database_engine.can_native_upsert
-            and table not in self._unsafe_to_upsert_tables
-        ):
+        if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_txn_native_upsert(
                 txn, table, keyvalues, values, insertion_values=insertion_values
             )
@@ -726,7 +723,7 @@ class Database(object):
         """
         # We need to lock the table :(, unless we're *really* careful
         if lock:
-            self.database_engine.lock_table(txn, table)
+            self.engine.lock_table(txn, table)
 
         def _getwhere(key):
             # If the value we're passing in is None (aka NULL), we need to use
@@ -828,10 +825,7 @@ class Database(object):
         Returns:
             None
         """
-        if (
-            self.database_engine.can_native_upsert
-            and table not in self._unsafe_to_upsert_tables
-        ):
+        if self.engine.can_native_upsert and table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_many_txn_native_upsert(
                 txn, table, key_names, key_values, value_names, value_values
             )
@@ -1301,7 +1295,7 @@ class Database(object):
             "limit": limit,
         }
 
-        sql = self.database_engine.convert_param_style(sql)
+        sql = self.engine.convert_param_style(sql)
 
         txn = db_conn.cursor()
         txn.execute(sql, (int(max_value),))
-- 
cgit 1.5.1


From bc5cb8bfe8c5b0b551de9a97912cd00caeaf6b48 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 6 Dec 2019 17:42:18 +0000
Subject: Remove database config parsing from apps.

---
 synapse/app/admin_cmd.py         |  5 -----
 synapse/app/appservice.py        |  5 -----
 synapse/app/client_reader.py     |  5 -----
 synapse/app/event_creator.py     |  5 -----
 synapse/app/federation_reader.py |  5 -----
 synapse/app/federation_sender.py |  5 -----
 synapse/app/frontend_proxy.py    |  5 -----
 synapse/app/homeserver.py        |  7 +------
 synapse/app/media_repository.py  |  5 -----
 synapse/app/pusher.py            |  5 -----
 synapse/app/synchrotron.py       |  5 -----
 synapse/app/user_dir.py          |  5 -----
 synapse/server.py                | 10 +++++++++-
 tests/test_types.py              | 19 ++++++++-----------
 tests/utils.py                   |  2 --
 15 files changed, 18 insertions(+), 75 deletions(-)

(limited to 'synapse/server.py')

diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 04751a6a5e..51a909419f 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -45,7 +45,6 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto
 from synapse.replication.slave.storage.room import RoomStore
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.server import HomeServer
-from synapse.storage.engines import create_engine
 from synapse.util.logcontext import LoggingContext
 from synapse.util.versionstring import get_version_string
 
@@ -229,14 +228,10 @@ def start(config_options):
 
     synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = AdminCmdServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py
index 02b900f382..e82e0f11e3 100644
--- a/synapse/app/appservice.py
+++ b/synapse/app/appservice.py
@@ -34,7 +34,6 @@ from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.server import HomeServer
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -143,8 +142,6 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     if config.notify_appservices:
         sys.stderr.write(
             "\nThe appservices must be disabled in the main synapse process"
@@ -159,10 +156,8 @@ def start(config_options):
 
     ps = AppserviceServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ps, config, use_worker_options=True)
diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py
index dadb487d5f..3edfe19567 100644
--- a/synapse/app/client_reader.py
+++ b/synapse/app/client_reader.py
@@ -62,7 +62,6 @@ from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet
 from synapse.rest.client.v2_alpha.register import RegisterRestServlet
 from synapse.rest.client.versions import VersionsRestServlet
 from synapse.server import HomeServer
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -181,14 +180,10 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = ClientReaderServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py
index d110599a35..d0ddbe38fc 100644
--- a/synapse/app/event_creator.py
+++ b/synapse/app/event_creator.py
@@ -57,7 +57,6 @@ from synapse.rest.client.v1.room import (
 )
 from synapse.server import HomeServer
 from synapse.storage.data_stores.main.user_directory import UserDirectoryStore
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -180,14 +179,10 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = EventCreatorServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py
index 418c086254..311523e0ed 100644
--- a/synapse/app/federation_reader.py
+++ b/synapse/app/federation_reader.py
@@ -46,7 +46,6 @@ from synapse.replication.slave.storage.transactions import SlavedTransactionStor
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.rest.key.v2 import KeyApiV2Resource
 from synapse.server import HomeServer
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -162,14 +161,10 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = FederationReaderServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py
index f24920a7d6..83c436229c 100644
--- a/synapse/app/federation_sender.py
+++ b/synapse/app/federation_sender.py
@@ -41,7 +41,6 @@ from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.replication.tcp.streams._base import ReceiptsStream
 from synapse.server import HomeServer
 from synapse.storage.database import Database
-from synapse.storage.engines import create_engine
 from synapse.types import ReadReceipt
 from synapse.util.async_helpers import Linearizer
 from synapse.util.httpresourcetree import create_resource_tree
@@ -174,8 +173,6 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     if config.send_federation:
         sys.stderr.write(
             "\nThe send_federation must be disabled in the main synapse process"
@@ -190,10 +187,8 @@ def start(config_options):
 
     ss = FederationSenderServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py
index e647459d0e..30e435eead 100644
--- a/synapse/app/frontend_proxy.py
+++ b/synapse/app/frontend_proxy.py
@@ -39,7 +39,6 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.rest.client.v2_alpha._base import client_patterns
 from synapse.server import HomeServer
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -234,14 +233,10 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = FrontendProxyServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index df65d0a989..79341784c5 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -69,7 +69,7 @@ from synapse.rest.media.v0.content_repository import ContentRepoResource
 from synapse.rest.well_known import WellKnownResource
 from synapse.server import HomeServer
 from synapse.storage import DataStore
-from synapse.storage.engines import IncorrectDatabaseSetup, create_engine
+from synapse.storage.engines import IncorrectDatabaseSetup
 from synapse.storage.prepare_database import UpgradeDatabaseException
 from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.httpresourcetree import create_resource_tree
@@ -328,15 +328,10 @@ def setup(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-    config.database_config["args"]["cp_openfun"] = database_engine.on_new_connection
-
     hs = SynapseHomeServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     synapse.config.logger.setup_logging(hs, config, use_worker_options=False)
diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py
index 2c6dd3ef02..4c80f257e2 100644
--- a/synapse/app/media_repository.py
+++ b/synapse/app/media_repository.py
@@ -40,7 +40,6 @@ from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.media.v0.content_repository import ContentRepoResource
 from synapse.server import HomeServer
 from synapse.storage.data_stores.main.media_repository import MediaRepositoryStore
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -157,14 +156,10 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = MediaRepositoryServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py
index 01a5ffc363..e157cbf64b 100644
--- a/synapse/app/pusher.py
+++ b/synapse/app/pusher.py
@@ -36,7 +36,6 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.server import HomeServer
 from synapse.storage import DataStore
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.versionstring import get_version_string
@@ -198,14 +197,10 @@ def start(config_options):
     # Force the pushers to start since they will be disabled in the main config
     config.start_pushers = True
 
-    database_engine = create_engine(config.database_config)
-
     ps = PusherServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ps, config, use_worker_options=True)
diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py
index 288ee64b42..dd2132e608 100644
--- a/synapse/app/synchrotron.py
+++ b/synapse/app/synchrotron.py
@@ -55,7 +55,6 @@ from synapse.rest.client.v1.room import RoomInitialSyncRestServlet
 from synapse.rest.client.v2_alpha import sync
 from synapse.server import HomeServer
 from synapse.storage.data_stores.main.presence import UserPresenceState
-from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
 from synapse.util.stringutils import random_string
@@ -437,14 +436,10 @@ def start(config_options):
 
     synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     ss = SynchrotronServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
         application_service_handler=SynchrotronApplicationService(),
     )
 
diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py
index c01fb34a9b..1257098f92 100644
--- a/synapse/app/user_dir.py
+++ b/synapse/app/user_dir.py
@@ -44,7 +44,6 @@ from synapse.rest.client.v2_alpha import user_directory
 from synapse.server import HomeServer
 from synapse.storage.data_stores.main.user_directory import UserDirectoryStore
 from synapse.storage.database import Database
-from synapse.storage.engines import create_engine
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
@@ -200,8 +199,6 @@ def start(config_options):
 
     events.USE_FROZEN_DICTS = config.use_frozen_dicts
 
-    database_engine = create_engine(config.database_config)
-
     if config.update_user_directory:
         sys.stderr.write(
             "\nThe update_user_directory must be disabled in the main synapse process"
@@ -216,10 +213,8 @@ def start(config_options):
 
     ss = UserDirectoryServer(
         config.server_name,
-        db_config=config.database_config,
         config=config,
         version_string="Synapse/" + get_version_string(synapse),
-        database_engine=database_engine,
     )
 
     setup_logging(ss, config, use_worker_options=True)
diff --git a/synapse/server.py b/synapse/server.py
index 2db3dab221..a8b5459ff3 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -97,6 +97,7 @@ from synapse.server_notices.worker_server_notices_sender import (
 )
 from synapse.state import StateHandler, StateResolutionHandler
 from synapse.storage import DataStores, Storage
+from synapse.storage.engines import create_engine
 from synapse.streams.events import EventSources
 from synapse.util import Clock
 from synapse.util.distributor import Distributor
@@ -209,7 +210,7 @@ class HomeServer(object):
     # instantiated during setup() for future return by get_datastore()
     DATASTORE_CLASS = abc.abstractproperty()
 
-    def __init__(self, hostname, reactor=None, **kwargs):
+    def __init__(self, hostname, config, reactor=None, **kwargs):
         """
         Args:
             hostname : The hostname for the server.
@@ -219,6 +220,7 @@ class HomeServer(object):
 
         self._reactor = reactor
         self.hostname = hostname
+        self.config = config
         self._building = {}
         self._listening_services = []
         self.start_time = None
@@ -229,6 +231,12 @@ class HomeServer(object):
         self.admin_redaction_ratelimiter = Ratelimiter()
         self.registration_ratelimiter = Ratelimiter()
 
+        self.database_engine = create_engine(config.database_config)
+        config.database_config.setdefault("args", {})[
+            "cp_openfun"
+        ] = self.database_engine.on_new_connection
+        self.db_config = config.database_config
+
         self.datastores = None
 
         # Other kwargs are explicit dependencies
diff --git a/tests/test_types.py b/tests/test_types.py
index 9ab5f829b0..8d97c751ea 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -17,18 +17,15 @@ from synapse.api.errors import SynapseError
 from synapse.types import GroupID, RoomAlias, UserID, map_username_to_mxid_localpart
 
 from tests import unittest
-from tests.utils import TestHomeServer
 
-mock_homeserver = TestHomeServer(hostname="my.domain")
 
-
-class UserIDTestCase(unittest.TestCase):
+class UserIDTestCase(unittest.HomeserverTestCase):
     def test_parse(self):
-        user = UserID.from_string("@1234abcd:my.domain")
+        user = UserID.from_string("@1234abcd:test")
 
         self.assertEquals("1234abcd", user.localpart)
-        self.assertEquals("my.domain", user.domain)
-        self.assertEquals(True, mock_homeserver.is_mine(user))
+        self.assertEquals("test", user.domain)
+        self.assertEquals(True, self.hs.is_mine(user))
 
     def test_pase_empty(self):
         with self.assertRaises(SynapseError):
@@ -48,13 +45,13 @@ class UserIDTestCase(unittest.TestCase):
         self.assertTrue(userA != userB)
 
 
-class RoomAliasTestCase(unittest.TestCase):
+class RoomAliasTestCase(unittest.HomeserverTestCase):
     def test_parse(self):
-        room = RoomAlias.from_string("#channel:my.domain")
+        room = RoomAlias.from_string("#channel:test")
 
         self.assertEquals("channel", room.localpart)
-        self.assertEquals("my.domain", room.domain)
-        self.assertEquals(True, mock_homeserver.is_mine(room))
+        self.assertEquals("test", room.domain)
+        self.assertEquals(True, self.hs.is_mine(room))
 
     def test_build(self):
         room = RoomAlias("channel", "my.domain")
diff --git a/tests/utils.py b/tests/utils.py
index c57da59191..585f305b9a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -260,9 +260,7 @@ def setup_test_homeserver(
         hs = homeserverToUse(
             name,
             config=config,
-            db_config=config.database_config,
             version_string="Synapse/tests",
-            database_engine=db_engine,
             tls_server_context_factory=Mock(),
             tls_client_options_factory=Mock(),
             reactor=reactor,
-- 
cgit 1.5.1


From cfcfb57e581bc1577a5fba3b34a1b1af7ccb6c0d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Dec 2019 17:27:46 +0000
Subject: Add new config param to docstring and add types

---
 synapse/server.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'synapse/server.py')

diff --git a/synapse/server.py b/synapse/server.py
index a8b5459ff3..5021068ce0 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -34,6 +34,7 @@ from synapse.api.filtering import Filtering
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.appservice.api import ApplicationServiceApi
 from synapse.appservice.scheduler import ApplicationServiceScheduler
+from synapse.config.homeserver import HomeServerConfig
 from synapse.crypto import context_factory
 from synapse.crypto.keyring import Keyring
 from synapse.events.builder import EventBuilderFactory
@@ -210,10 +211,11 @@ class HomeServer(object):
     # instantiated during setup() for future return by get_datastore()
     DATASTORE_CLASS = abc.abstractproperty()
 
-    def __init__(self, hostname, config, reactor=None, **kwargs):
+    def __init__(self, hostname: str, config: HomeServerConfig, reactor=None, **kwargs):
         """
         Args:
             hostname : The hostname for the server.
+            config: The full config for the homeserver.
         """
         if not reactor:
             from twisted.internet import reactor
-- 
cgit 1.5.1


From 2284eb3a533a2df04784df08da28e67d6588a5ea Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 18 Dec 2019 10:45:12 +0000
Subject: Add database config class (#6513)

This encapsulates config for a given database and is the way to get new
connections.
---
 changelog.d/6513.misc                          |  1 +
 scripts-dev/update_database                    |  9 +--
 scripts/synapse_port_db                        | 58 ++++++++-----------
 synapse/config/database.py                     | 78 ++++++++++++++++++++------
 synapse/handlers/presence.py                   |  2 +-
 synapse/server.py                              | 41 ++------------
 synapse/storage/_base.py                       |  2 +-
 synapse/storage/data_stores/__init__.py        | 40 ++++++++++---
 synapse/storage/data_stores/main/client_ips.py |  2 +-
 synapse/storage/database.py                    | 45 ++++++++++++++-
 synapse/storage/engines/sqlite.py              | 16 +++++-
 synapse/storage/prepare_database.py            |  7 +--
 tests/handlers/test_typing.py                  | 39 ++++++-------
 tests/replication/slave/storage/_base.py       |  6 +-
 tests/server.py                                | 55 +++++++++---------
 tests/storage/test_appservice.py               | 37 ++++++++----
 tests/storage/test_base.py                     | 14 +++--
 tests/storage/test_registration.py             |  1 -
 tests/utils.py                                 | 43 +++++---------
 19 files changed, 287 insertions(+), 209 deletions(-)
 create mode 100644 changelog.d/6513.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/6513.misc b/changelog.d/6513.misc
new file mode 100644
index 0000000000..36700f5657
--- /dev/null
+++ b/changelog.d/6513.misc
@@ -0,0 +1 @@
+Remove all assumptions of there being a single phyiscal DB apart from the `synapse.config`.
diff --git a/scripts-dev/update_database b/scripts-dev/update_database
index 23017c21f8..1d62f0403a 100755
--- a/scripts-dev/update_database
+++ b/scripts-dev/update_database
@@ -26,7 +26,6 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.server import HomeServer
 from synapse.storage import DataStore
-from synapse.storage.prepare_database import prepare_database
 
 logger = logging.getLogger("update_database")
 
@@ -77,12 +76,8 @@ if __name__ == "__main__":
     # Instantiate and initialise the homeserver object.
     hs = MockHomeserver(config)
 
-    db_conn = hs.get_db_conn()
-    # Update the database to the latest schema.
-    prepare_database(db_conn, hs.database_engine, config=config)
-    db_conn.commit()
-
-    # setup instantiates the store within the homeserver object.
+    # Setup instantiates the store within the homeserver object and updates the
+    # DB.
     hs.setup()
     store = hs.get_datastore()
 
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index e393a9b2f7..5b5368988c 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -30,6 +30,7 @@ import yaml
 from twisted.enterprise import adbapi
 from twisted.internet import defer, reactor
 
+from synapse.config.database import DatabaseConnectionConfig
 from synapse.config.homeserver import HomeServerConfig
 from synapse.logging.context import PreserveLoggingContext
 from synapse.storage._base import LoggingTransaction
@@ -55,7 +56,7 @@ from synapse.storage.data_stores.main.stats import StatsStore
 from synapse.storage.data_stores.main.user_directory import (
     UserDirectoryBackgroundUpdateStore,
 )
-from synapse.storage.database import Database
+from synapse.storage.database import Database, make_conn
 from synapse.storage.engines import create_engine
 from synapse.storage.prepare_database import prepare_database
 from synapse.util import Clock
@@ -165,23 +166,17 @@ class Store(
 
 
 class MockHomeserver:
-    def __init__(self, config, database_engine, db_conn, db_pool):
-        self.database_engine = database_engine
-        self.db_conn = db_conn
-        self.db_pool = db_pool
+    def __init__(self, config):
         self.clock = Clock(reactor)
         self.config = config
         self.hostname = config.server_name
 
-    def get_db_conn(self):
-        return self.db_conn
-
-    def get_db_pool(self):
-        return self.db_pool
-
     def get_clock(self):
         return self.clock
 
+    def get_reactor(self):
+        return reactor
+
 
 class Porter(object):
     def __init__(self, **kwargs):
@@ -445,45 +440,36 @@ class Porter(object):
             else:
                 return
 
-    def setup_db(self, db_config, database_engine):
-        db_conn = database_engine.module.connect(
-            **{
-                k: v
-                for k, v in db_config.get("args", {}).items()
-                if not k.startswith("cp_")
-            }
-        )
-
-        prepare_database(db_conn, database_engine, config=None)
+    def setup_db(self, db_config: DatabaseConnectionConfig, engine):
+        db_conn = make_conn(db_config, engine)
+        prepare_database(db_conn, engine, config=None)
 
         db_conn.commit()
 
         return db_conn
 
     @defer.inlineCallbacks
-    def build_db_store(self, config):
+    def build_db_store(self, db_config: DatabaseConnectionConfig):
         """Builds and returns a database store using the provided configuration.
 
         Args:
-            config: The database configuration, i.e. a dict following the structure of
-                the "database" section of Synapse's configuration file.
+            config: The database configuration
 
         Returns:
             The built Store object.
         """
-        engine = create_engine(config)
-
-        self.progress.set_state("Preparing %s" % config["name"])
-        conn = self.setup_db(config, engine)
+        self.progress.set_state("Preparing %s" % db_config.config["name"])
 
-        db_pool = adbapi.ConnectionPool(config["name"], **config["args"])
+        engine = create_engine(db_config.config)
+        conn = self.setup_db(db_config, engine)
 
-        hs = MockHomeserver(self.hs_config, engine, conn, db_pool)
+        hs = MockHomeserver(self.hs_config)
 
-        store = Store(Database(hs), conn, hs)
+        store = Store(Database(hs, db_config, engine), conn, hs)
 
         yield store.db.runInteraction(
-            "%s_engine.check_database" % config["name"], engine.check_database,
+            "%s_engine.check_database" % db_config.config["name"],
+            engine.check_database,
         )
 
         return store
@@ -509,7 +495,11 @@ class Porter(object):
     @defer.inlineCallbacks
     def run(self):
         try:
-            self.sqlite_store = yield self.build_db_store(self.sqlite_config)
+            self.sqlite_store = yield self.build_db_store(
+                DatabaseConnectionConfig(
+                    "master", self.sqlite_config, data_stores=["main"]
+                )
+            )
 
             # Check if all background updates are done, abort if not.
             updates_complete = (
@@ -524,7 +514,7 @@ class Porter(object):
                 defer.returnValue(None)
 
             self.postgres_store = yield self.build_db_store(
-                self.hs_config.database_config
+                self.hs_config.get_single_database()
             )
 
             yield self.run_background_updates_on_postgres()
diff --git a/synapse/config/database.py b/synapse/config/database.py
index 0e2509f0b1..5f2f3c7cfd 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -12,12 +12,43 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import os
 from textwrap import indent
+from typing import List
 
 import yaml
 
-from ._base import Config
+from synapse.config._base import Config, ConfigError
+
+logger = logging.getLogger(__name__)
+
+
+class DatabaseConnectionConfig:
+    """Contains the connection config for a particular database.
+
+    Args:
+        name: A label for the database, used for logging.
+        db_config: The config for a particular database, as per `database`
+            section of main config. Has two fields: `name` for database
+            module name, and `args` for the args to give to the database
+            connector.
+        data_stores: The list of data stores that should be provisioned on the
+            database.
+    """
+
+    def __init__(self, name: str, db_config: dict, data_stores: List[str]):
+        if db_config["name"] not in ("sqlite3", "psycopg2"):
+            raise ConfigError("Unsupported database type %r" % (db_config["name"],))
+
+        if db_config["name"] == "sqlite3":
+            db_config.setdefault("args", {}).update(
+                {"cp_min": 1, "cp_max": 1, "check_same_thread": False}
+            )
+
+        self.name = name
+        self.config = db_config
+        self.data_stores = data_stores
 
 
 class DatabaseConfig(Config):
@@ -26,20 +57,14 @@ class DatabaseConfig(Config):
     def read_config(self, config, **kwargs):
         self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K"))
 
-        self.database_config = config.get("database")
+        database_config = config.get("database")
 
-        if self.database_config is None:
-            self.database_config = {"name": "sqlite3", "args": {}}
+        if database_config is None:
+            database_config = {"name": "sqlite3", "args": {}}
 
-        name = self.database_config.get("name", None)
-        if name == "psycopg2":
-            pass
-        elif name == "sqlite3":
-            self.database_config.setdefault("args", {}).update(
-                {"cp_min": 1, "cp_max": 1, "check_same_thread": False}
-            )
-        else:
-            raise RuntimeError("Unsupported database type '%s'" % (name,))
+        self.databases = [
+            DatabaseConnectionConfig("master", database_config, data_stores=["main"])
+        ]
 
         self.set_databasepath(config.get("database_path"))
 
@@ -76,11 +101,24 @@ class DatabaseConfig(Config):
         self.set_databasepath(args.database_path)
 
     def set_databasepath(self, database_path):
+        if database_path is None:
+            return
+
         if database_path != ":memory:":
             database_path = self.abspath(database_path)
-        if self.database_config.get("name", None) == "sqlite3":
-            if database_path is not None:
-                self.database_config["args"]["database"] = database_path
+
+        # We only support setting a database path if we have a single sqlite3
+        # database.
+        if len(self.databases) != 1:
+            raise ConfigError("Cannot specify 'database_path' with multiple databases")
+
+        database = self.get_single_database()
+        if database.config["name"] != "sqlite3":
+            # We don't raise here as we haven't done so before for this case.
+            logger.warn("Ignoring 'database_path' for non-sqlite3 database")
+            return
+
+        database.config["args"]["database"] = database_path
 
     @staticmethod
     def add_arguments(parser):
@@ -91,3 +129,11 @@ class DatabaseConfig(Config):
             metavar="SQLITE_DATABASE_PATH",
             help="The path to a sqlite database to use.",
         )
+
+    def get_single_database(self) -> DatabaseConnectionConfig:
+        """Returns the database if there is only one, useful for e.g. tests
+        """
+        if len(self.databases) != 1:
+            raise Exception("More than one database exists")
+
+        return self.databases[0]
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index eda15bc623..240c4add12 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -230,7 +230,7 @@ class PresenceHandler(object):
         is some spurious presence changes that will self-correct.
         """
         # If the DB pool has already terminated, don't try updating
-        if not self.hs.get_db_pool().running:
+        if not self.store.database.is_running():
             return
 
         logger.info(
diff --git a/synapse/server.py b/synapse/server.py
index 5021068ce0..7926867b77 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -25,7 +25,6 @@ import abc
 import logging
 import os
 
-from twisted.enterprise import adbapi
 from twisted.mail.smtp import sendmail
 from twisted.web.client import BrowserLikePolicyForHTTPS
 
@@ -98,7 +97,6 @@ from synapse.server_notices.worker_server_notices_sender import (
 )
 from synapse.state import StateHandler, StateResolutionHandler
 from synapse.storage import DataStores, Storage
-from synapse.storage.engines import create_engine
 from synapse.streams.events import EventSources
 from synapse.util import Clock
 from synapse.util.distributor import Distributor
@@ -134,7 +132,6 @@ class HomeServer(object):
 
     DEPENDENCIES = [
         "http_client",
-        "db_pool",
         "federation_client",
         "federation_server",
         "handlers",
@@ -233,12 +230,6 @@ class HomeServer(object):
         self.admin_redaction_ratelimiter = Ratelimiter()
         self.registration_ratelimiter = Ratelimiter()
 
-        self.database_engine = create_engine(config.database_config)
-        config.database_config.setdefault("args", {})[
-            "cp_openfun"
-        ] = self.database_engine.on_new_connection
-        self.db_config = config.database_config
-
         self.datastores = None
 
         # Other kwargs are explicit dependencies
@@ -247,10 +238,8 @@ class HomeServer(object):
 
     def setup(self):
         logger.info("Setting up.")
-        with self.get_db_conn() as conn:
-            self.datastores = DataStores(self.DATASTORE_CLASS, conn, self)
-            conn.commit()
         self.start_time = int(self.get_clock().time())
+        self.datastores = DataStores(self.DATASTORE_CLASS, self)
         logger.info("Finished setting up.")
 
     def setup_master(self):
@@ -284,6 +273,9 @@ class HomeServer(object):
     def get_datastore(self):
         return self.datastores.main
 
+    def get_datastores(self):
+        return self.datastores
+
     def get_config(self):
         return self.config
 
@@ -433,31 +425,6 @@ class HomeServer(object):
         )
         return MatrixFederationHttpClient(self, tls_client_options_factory)
 
-    def build_db_pool(self):
-        name = self.db_config["name"]
-
-        return adbapi.ConnectionPool(
-            name, cp_reactor=self.get_reactor(), **self.db_config.get("args", {})
-        )
-
-    def get_db_conn(self, run_new_connection=True):
-        """Makes a new connection to the database, skipping the db pool
-
-        Returns:
-            Connection: a connection object implementing the PEP-249 spec
-        """
-        # Any param beginning with cp_ is a parameter for adbapi, and should
-        # not be passed to the database engine.
-        db_params = {
-            k: v
-            for k, v in self.db_config.get("args", {}).items()
-            if not k.startswith("cp_")
-        }
-        db_conn = self.database_engine.module.connect(**db_params)
-        if run_new_connection:
-            self.database_engine.on_new_connection(db_conn)
-        return db_conn
-
     def build_media_repository_resource(self):
         # build the media repo resource. This indirects through the HomeServer
         # to ensure that we only have a single instance of
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index b7637b5dc0..88546ad614 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -40,7 +40,7 @@ class SQLBaseStore(object):
     def __init__(self, database: Database, db_conn, hs):
         self.hs = hs
         self._clock = hs.get_clock()
-        self.database_engine = hs.database_engine
+        self.database_engine = database.engine
         self.db = database
         self.rand = random.SystemRandom()
 
diff --git a/synapse/storage/data_stores/__init__.py b/synapse/storage/data_stores/__init__.py
index cafedd5c0d..0983e059c0 100644
--- a/synapse/storage/data_stores/__init__.py
+++ b/synapse/storage/data_stores/__init__.py
@@ -13,24 +13,50 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.storage.database import Database
+import logging
+
+from synapse.storage.database import Database, make_conn
+from synapse.storage.engines import create_engine
 from synapse.storage.prepare_database import prepare_database
 
+logger = logging.getLogger(__name__)
+
 
 class DataStores(object):
     """The various data stores.
 
     These are low level interfaces to physical databases.
+
+    Attributes:
+        main (DataStore)
     """
 
-    def __init__(self, main_store_class, db_conn, hs):
+    def __init__(self, main_store_class, hs):
         # Note we pass in the main store class here as workers use a different main
         # store.
-        database = Database(hs)
 
-        # Check that db is correctly configured.
-        database.engine.check_database(db_conn.cursor())
+        self.databases = []
+
+        for database_config in hs.config.database.databases:
+            db_name = database_config.name
+            engine = create_engine(database_config.config)
+
+            with make_conn(database_config, engine) as db_conn:
+                logger.info("Preparing database %r...", db_name)
+
+                engine.check_database(db_conn.cursor())
+                prepare_database(
+                    db_conn, engine, hs.config, data_stores=database_config.data_stores,
+                )
+
+                database = Database(hs, database_config, engine)
+
+                if "main" in database_config.data_stores:
+                    logger.info("Starting 'main' data store")
+                    self.main = main_store_class(database, db_conn, hs)
+
+                db_conn.commit()
 
-        prepare_database(db_conn, database.engine, config=hs.config)
+                self.databases.append(database)
 
-        self.main = main_store_class(database, db_conn, hs)
+                logger.info("Database %r prepared", db_name)
diff --git a/synapse/storage/data_stores/main/client_ips.py b/synapse/storage/data_stores/main/client_ips.py
index add3037b69..13f4c9c72e 100644
--- a/synapse/storage/data_stores/main/client_ips.py
+++ b/synapse/storage/data_stores/main/client_ips.py
@@ -412,7 +412,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore):
     def _update_client_ips_batch(self):
 
         # If the DB pool has already terminated, don't try updating
-        if not self.hs.get_db_pool().running:
+        if not self.db.is_running():
             return
 
         to_update = self._batch_row_update
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index ec19ae1d9d..1003dd84a5 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -24,9 +24,11 @@ from six.moves import intern, range
 
 from prometheus_client import Histogram
 
+from twisted.enterprise import adbapi
 from twisted.internet import defer
 
 from synapse.api.errors import StoreError
+from synapse.config.database import DatabaseConnectionConfig
 from synapse.logging.context import LoggingContext, make_deferred_yieldable
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
@@ -74,6 +76,37 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
 }
 
 
+def make_pool(
+    reactor, db_config: DatabaseConnectionConfig, engine
+) -> adbapi.ConnectionPool:
+    """Get the connection pool for the database.
+    """
+
+    return adbapi.ConnectionPool(
+        db_config.config["name"],
+        cp_reactor=reactor,
+        cp_openfun=engine.on_new_connection,
+        **db_config.config.get("args", {})
+    )
+
+
+def make_conn(db_config: DatabaseConnectionConfig, engine):
+    """Make a new connection to the database and return it.
+
+    Returns:
+        Connection
+    """
+
+    db_params = {
+        k: v
+        for k, v in db_config.config.get("args", {}).items()
+        if not k.startswith("cp_")
+    }
+    db_conn = engine.module.connect(**db_params)
+    engine.on_new_connection(db_conn)
+    return db_conn
+
+
 class LoggingTransaction(object):
     """An object that almost-transparently proxies for the 'txn' object
     passed to the constructor. Adds logging and metrics to the .execute()
@@ -218,10 +251,11 @@ class Database(object):
 
     _TXN_ID = 0
 
-    def __init__(self, hs):
+    def __init__(self, hs, database_config: DatabaseConnectionConfig, engine):
         self.hs = hs
         self._clock = hs.get_clock()
-        self._db_pool = hs.get_db_pool()
+        self._database_config = database_config
+        self._db_pool = make_pool(hs.get_reactor(), database_config, engine)
 
         self.updates = BackgroundUpdater(hs, self)
 
@@ -234,7 +268,7 @@ class Database(object):
         #   to watch it
         self._txn_perf_counters = PerformanceCounters()
 
-        self.engine = hs.database_engine
+        self.engine = engine
 
         # A set of tables that are not safe to use native upserts in.
         self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys())
@@ -255,6 +289,11 @@ class Database(object):
                 self._check_safe_to_upsert,
             )
 
+    def is_running(self):
+        """Is the database pool currently running
+        """
+        return self._db_pool.running
+
     @defer.inlineCallbacks
     def _check_safe_to_upsert(self):
         """
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index ddad17dc5a..df039a072d 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -16,8 +16,6 @@
 import struct
 import threading
 
-from synapse.storage.prepare_database import prepare_database
-
 
 class Sqlite3Engine(object):
     single_threaded = True
@@ -25,6 +23,9 @@ class Sqlite3Engine(object):
     def __init__(self, database_module, database_config):
         self.module = database_module
 
+        database = database_config.get("args", {}).get("database")
+        self._is_in_memory = database in (None, ":memory:",)
+
         # The current max state_group, or None if we haven't looked
         # in the DB yet.
         self._current_state_group_id = None
@@ -59,7 +60,16 @@ class Sqlite3Engine(object):
         return sql
 
     def on_new_connection(self, db_conn):
-        prepare_database(db_conn, self, config=None)
+
+        # We need to import here to avoid an import loop.
+        from synapse.storage.prepare_database import prepare_database
+
+        if self._is_in_memory:
+            # In memory databases need to be rebuilt each time. Ideally we'd
+            # reuse the same connection as we do when starting up, but that
+            # would involve using adbapi before we have started the reactor.
+            prepare_database(db_conn, self, config=None)
+
         db_conn.create_function("rank", 1, _rank)
 
     def is_deadlock(self, error):
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 731e1c9d9c..b4194b44ee 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -41,7 +41,7 @@ class UpgradeDatabaseException(PrepareDatabaseException):
     pass
 
 
-def prepare_database(db_conn, database_engine, config):
+def prepare_database(db_conn, database_engine, config, data_stores=["main"]):
     """Prepares a database for usage. Will either create all necessary tables
     or upgrade from an older schema version.
 
@@ -54,11 +54,10 @@ def prepare_database(db_conn, database_engine, config):
         config (synapse.config.homeserver.HomeServerConfig|None):
             application config, or None if we are connecting to an existing
             database which we expect to be configured already
+        data_stores (list[str]): The name of the data stores that will be used
+            with this database. Defaults to all data stores.
     """
 
-    # For now we only have the one datastore.
-    data_stores = ["main"]
-
     try:
         cur = db_conn.cursor()
         version_info = _get_or_create_schema_state(cur, database_engine)
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 92b8726093..596ddc6970 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -64,28 +64,29 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         mock_federation_client = Mock(spec=["put_json"])
         mock_federation_client.put_json.return_value = defer.succeed((200, "OK"))
 
+        datastores = Mock()
+        datastores.main = Mock(
+            spec=[
+                # Bits that Federation needs
+                "prep_send_transaction",
+                "delivered_txn",
+                "get_received_txn_response",
+                "set_received_txn_response",
+                "get_destination_retry_timings",
+                "get_devices_by_remote",
+                # Bits that user_directory needs
+                "get_user_directory_stream_pos",
+                "get_current_state_deltas",
+                "get_device_updates_by_remote",
+            ]
+        )
+
         hs = self.setup_test_homeserver(
-            datastore=(
-                Mock(
-                    spec=[
-                        # Bits that Federation needs
-                        "prep_send_transaction",
-                        "delivered_txn",
-                        "get_received_txn_response",
-                        "set_received_txn_response",
-                        "get_destination_retry_timings",
-                        "get_device_updates_by_remote",
-                        # Bits that user_directory needs
-                        "get_user_directory_stream_pos",
-                        "get_current_state_deltas",
-                    ]
-                )
-            ),
-            notifier=Mock(),
-            http_client=mock_federation_client,
-            keyring=mock_keyring,
+            notifier=Mock(), http_client=mock_federation_client, keyring=mock_keyring
         )
 
+        hs.datastores = datastores
+
         return hs
 
     def prepare(self, reactor, clock, hs):
diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py
index 3dae83c543..2a1e7c7166 100644
--- a/tests/replication/slave/storage/_base.py
+++ b/tests/replication/slave/storage/_base.py
@@ -20,7 +20,7 @@ from synapse.replication.tcp.client import (
     ReplicationClientHandler,
 )
 from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
-from synapse.storage.database import Database
+from synapse.storage.database import make_conn
 
 from tests import unittest
 from tests.server import FakeTransport
@@ -41,10 +41,12 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase):
 
     def prepare(self, reactor, clock, hs):
 
+        db_config = hs.config.database.get_single_database()
         self.master_store = self.hs.get_datastore()
         self.storage = hs.get_storage()
+        database = hs.get_datastores().databases[0]
         self.slaved_store = self.STORE_TYPE(
-            Database(hs), self.hs.get_db_conn(), self.hs
+            database, make_conn(db_config, database.engine), self.hs
         )
         self.event_id = 0
 
diff --git a/tests/server.py b/tests/server.py
index 2b7cf4242e..a554dfdd57 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -302,41 +302,42 @@ def setup_test_homeserver(cleanup_func, *args, **kwargs):
     Set up a synchronous test server, driven by the reactor used by
     the homeserver.
     """
-    d = _sth(cleanup_func, *args, **kwargs).result
+    server = _sth(cleanup_func, *args, **kwargs)
 
-    if isinstance(d, Failure):
-        d.raiseException()
+    database = server.config.database.get_single_database()
 
     # Make the thread pool synchronous.
-    clock = d.get_clock()
-    pool = d.get_db_pool()
-
-    def runWithConnection(func, *args, **kwargs):
-        return threads.deferToThreadPool(
-            pool._reactor,
-            pool.threadpool,
-            pool._runWithConnection,
-            func,
-            *args,
-            **kwargs
-        )
-
-    def runInteraction(interaction, *args, **kwargs):
-        return threads.deferToThreadPool(
-            pool._reactor,
-            pool.threadpool,
-            pool._runInteraction,
-            interaction,
-            *args,
-            **kwargs
-        )
+    clock = server.get_clock()
+
+    for database in server.get_datastores().databases:
+        pool = database._db_pool
+
+        def runWithConnection(func, *args, **kwargs):
+            return threads.deferToThreadPool(
+                pool._reactor,
+                pool.threadpool,
+                pool._runWithConnection,
+                func,
+                *args,
+                **kwargs
+            )
+
+        def runInteraction(interaction, *args, **kwargs):
+            return threads.deferToThreadPool(
+                pool._reactor,
+                pool.threadpool,
+                pool._runInteraction,
+                interaction,
+                *args,
+                **kwargs
+            )
 
-    if pool:
         pool.runWithConnection = runWithConnection
         pool.runInteraction = runInteraction
         pool.threadpool = ThreadPool(clock._reactor)
         pool.running = True
-    return d
+
+    return server
 
 
 def get_clock():
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 2e521e9ab7..fd52512696 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -28,7 +28,7 @@ from synapse.storage.data_stores.main.appservice import (
     ApplicationServiceStore,
     ApplicationServiceTransactionStore,
 )
-from synapse.storage.database import Database
+from synapse.storage.database import Database, make_conn
 
 from tests import unittest
 from tests.utils import setup_test_homeserver
@@ -55,8 +55,10 @@ class ApplicationServiceStoreTestCase(unittest.TestCase):
         self._add_appservice("token2", "as2", "some_url", "some_hs_token", "bob")
         self._add_appservice("token3", "as3", "some_url", "some_hs_token", "bob")
         # must be done after inserts
-        database = Database(hs)
-        self.store = ApplicationServiceStore(database, hs.get_db_conn(), hs)
+        database = hs.get_datastores().databases[0]
+        self.store = ApplicationServiceStore(
+            database, make_conn(database._database_config, database.engine), hs
+        )
 
     def tearDown(self):
         # TODO: suboptimal that we need to create files for tests!
@@ -111,9 +113,6 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
         hs.config.event_cache_size = 1
         hs.config.password_providers = []
 
-        self.db_pool = hs.get_db_pool()
-        self.engine = hs.database_engine
-
         self.as_list = [
             {"token": "token1", "url": "https://matrix-as.org", "id": "id_1"},
             {"token": "alpha_tok", "url": "https://alpha.com", "id": "id_alpha"},
@@ -125,8 +124,15 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase):
 
         self.as_yaml_files = []
 
-        database = Database(hs)
-        self.store = TestTransactionStore(database, hs.get_db_conn(), hs)
+        # We assume there is only one database in these tests
+        database = hs.get_datastores().databases[0]
+        self.db_pool = database._db_pool
+        self.engine = database.engine
+
+        db_config = hs.config.get_single_database()
+        self.store = TestTransactionStore(
+            database, make_conn(db_config, self.engine), hs
+        )
 
     def _add_service(self, url, as_token, id):
         as_yaml = dict(
@@ -419,7 +425,10 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         hs.config.event_cache_size = 1
         hs.config.password_providers = []
 
-        ApplicationServiceStore(Database(hs), hs.get_db_conn(), hs)
+        database = hs.get_datastores().databases[0]
+        ApplicationServiceStore(
+            database, make_conn(database._database_config, database.engine), hs
+        )
 
     @defer.inlineCallbacks
     def test_duplicate_ids(self):
@@ -435,7 +444,10 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         hs.config.password_providers = []
 
         with self.assertRaises(ConfigError) as cm:
-            ApplicationServiceStore(Database(hs), hs.get_db_conn(), hs)
+            database = hs.get_datastores().databases[0]
+            ApplicationServiceStore(
+                database, make_conn(database._database_config, database.engine), hs
+            )
 
         e = cm.exception
         self.assertIn(f1, str(e))
@@ -456,7 +468,10 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase):
         hs.config.password_providers = []
 
         with self.assertRaises(ConfigError) as cm:
-            ApplicationServiceStore(Database(hs), hs.get_db_conn(), hs)
+            database = hs.get_datastores().databases[0]
+            ApplicationServiceStore(
+                database, make_conn(database._database_config, database.engine), hs
+            )
 
         e = cm.exception
         self.assertIn(f1, str(e))
diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py
index 537cfe9f64..cdee0a9e60 100644
--- a/tests/storage/test_base.py
+++ b/tests/storage/test_base.py
@@ -52,15 +52,17 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         config = Mock()
         config._disable_native_upserts = True
         config.event_cache_size = 1
-        config.database_config = {"name": "sqlite3"}
-        engine = create_engine(config.database_config)
+        hs = TestHomeServer("test", config=config)
+
+        sqlite_config = {"name": "sqlite3"}
+        engine = create_engine(sqlite_config)
         fake_engine = Mock(wraps=engine)
         fake_engine.can_native_upsert = False
-        hs = TestHomeServer(
-            "test", db_pool=self.db_pool, config=config, database_engine=fake_engine
-        )
 
-        self.datastore = SQLBaseStore(Database(hs), None, hs)
+        db = Database(Mock(), Mock(config=sqlite_config), fake_engine)
+        db._db_pool = self.db_pool
+
+        self.datastore = SQLBaseStore(db, None, hs)
 
     @defer.inlineCallbacks
     def test_insert_1col(self):
diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py
index 4578cc3b60..ed5786865a 100644
--- a/tests/storage/test_registration.py
+++ b/tests/storage/test_registration.py
@@ -26,7 +26,6 @@ class RegistrationStoreTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def setUp(self):
         hs = yield setup_test_homeserver(self.addCleanup)
-        self.db_pool = hs.get_db_pool()
 
         self.store = hs.get_datastore()
 
diff --git a/tests/utils.py b/tests/utils.py
index 585f305b9a..9f5bf40b4b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -30,6 +30,7 @@ from twisted.internet import defer, reactor
 from synapse.api.constants import EventTypes
 from synapse.api.errors import CodeMessageException, cs_error
 from synapse.api.room_versions import RoomVersions
+from synapse.config.database import DatabaseConnectionConfig
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import DEFAULT_ROOM_VERSION
 from synapse.federation.transport import server as federation_server
@@ -177,7 +178,6 @@ class TestHomeServer(HomeServer):
     DATASTORE_CLASS = DataStore
 
 
-@defer.inlineCallbacks
 def setup_test_homeserver(
     cleanup_func,
     name="test",
@@ -214,7 +214,7 @@ def setup_test_homeserver(
     if USE_POSTGRES_FOR_TESTS:
         test_db = "synapse_test_%s" % uuid.uuid4().hex
 
-        config.database_config = {
+        database_config = {
             "name": "psycopg2",
             "args": {
                 "database": test_db,
@@ -226,12 +226,15 @@ def setup_test_homeserver(
             },
         }
     else:
-        config.database_config = {
+        database_config = {
             "name": "sqlite3",
             "args": {"database": ":memory:", "cp_min": 1, "cp_max": 1},
         }
 
-    db_engine = create_engine(config.database_config)
+    database = DatabaseConnectionConfig("master", database_config, ["main"])
+    config.database.databases = [database]
+
+    db_engine = create_engine(database.config)
 
     # Create the database before we actually try and connect to it, based off
     # the template database we generate in setupdb()
@@ -251,11 +254,6 @@ def setup_test_homeserver(
         cur.close()
         db_conn.close()
 
-    # we need to configure the connection pool to run the on_new_connection
-    # function, so that we can test code that uses custom sqlite functions
-    # (like rank).
-    config.database_config["args"]["cp_openfun"] = db_engine.on_new_connection
-
     if datastore is None:
         hs = homeserverToUse(
             name,
@@ -267,21 +265,19 @@ def setup_test_homeserver(
             **kargs
         )
 
-        # Prepare the DB on SQLite -- PostgreSQL is a copy of an already up to
-        # date db
-        if not isinstance(db_engine, PostgresEngine):
-            db_conn = hs.get_db_conn()
-            yield prepare_database(db_conn, db_engine, config)
-            db_conn.commit()
-            db_conn.close()
+        hs.setup()
+        if homeserverToUse.__name__ == "TestHomeServer":
+            hs.setup_master()
+
+        if isinstance(db_engine, PostgresEngine):
+            database = hs.get_datastores().databases[0]
 
-        else:
             # We need to do cleanup on PostgreSQL
             def cleanup():
                 import psycopg2
 
                 # Close all the db pools
-                hs.get_db_pool().close()
+                database._db_pool.close()
 
                 dropped = False
 
@@ -320,23 +316,12 @@ def setup_test_homeserver(
                 # Register the cleanup hook
                 cleanup_func(cleanup)
 
-        hs.setup()
-        if homeserverToUse.__name__ == "TestHomeServer":
-            hs.setup_master()
     else:
-        # If we have been given an explicit datastore we probably want to mock
-        # out the DataStores somehow too. This all feels a bit wrong, but then
-        # mocking the stores feels wrong too.
-        datastores = Mock(datastore=datastore)
-
         hs = homeserverToUse(
             name,
-            db_pool=None,
             datastore=datastore,
-            datastores=datastores,
             config=config,
             version_string="Synapse/tests",
-            database_engine=db_engine,
             tls_server_context_factory=Mock(),
             tls_client_options_factory=Mock(),
             reactor=reactor,
-- 
cgit 1.5.1


From de2d267375069c2d22bceb0d6ef9c6f5a77380e3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 7 Feb 2020 11:14:19 +0000
Subject: Allow moving group read APIs to workers (#6866)

---
 changelog.d/6866.feature                         |   1 +
 docs/workers.md                                  |   8 +
 synapse/app/client_reader.py                     |   3 +
 synapse/app/federation_reader.py                 |   2 +
 synapse/groups/groups_server.py                  | 377 +++++-----
 synapse/handlers/groups_local.py                 | 270 +++----
 synapse/replication/slave/storage/groups.py      |  14 +-
 synapse/server.py                                |  14 +-
 synapse/storage/data_stores/main/group_server.py | 880 ++++++++++++-----------
 9 files changed, 802 insertions(+), 767 deletions(-)
 create mode 100644 changelog.d/6866.feature

(limited to 'synapse/server.py')

diff --git a/changelog.d/6866.feature b/changelog.d/6866.feature
new file mode 100644
index 0000000000..256feab6ff
--- /dev/null
+++ b/changelog.d/6866.feature
@@ -0,0 +1 @@
+Add ability to run some group APIs on workers.
diff --git a/docs/workers.md b/docs/workers.md
index 09a9d8a7b8..82442d6a0a 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -177,8 +177,13 @@ endpoints matching the following regular expressions:
     ^/_matrix/federation/v1/event_auth/
     ^/_matrix/federation/v1/exchange_third_party_invite/
     ^/_matrix/federation/v1/send/
+    ^/_matrix/federation/v1/get_groups_publicised$
     ^/_matrix/key/v2/query
 
+Additionally, the following REST endpoints can be handled for GET requests:
+
+    ^/_matrix/federation/v1/groups/
+
 The above endpoints should all be routed to the federation_reader worker by the
 reverse-proxy configuration.
 
@@ -254,10 +259,13 @@ following regular expressions:
     ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$
     ^/_matrix/client/versions$
     ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
+    ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$
+    ^/_matrix/client/(api/v1|r0|unstable)/get_groups_publicised$
 
 Additionally, the following REST endpoints can be handled for GET requests:
 
     ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$
+    ^/_matrix/client/(api/v1|r0|unstable)/groups/.*$
 
 Additionally, the following REST endpoints can be handled, but all requests must
 be routed to the same instance:
diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py
index ca96da6a4a..7fa91a3b11 100644
--- a/synapse/app/client_reader.py
+++ b/synapse/app/client_reader.py
@@ -57,6 +57,7 @@ from synapse.rest.client.v1.room import (
     RoomStateRestServlet,
 )
 from synapse.rest.client.v1.voip import VoipRestServlet
+from synapse.rest.client.v2_alpha import groups
 from synapse.rest.client.v2_alpha.account import ThreepidRestServlet
 from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet
 from synapse.rest.client.v2_alpha.register import RegisterRestServlet
@@ -124,6 +125,8 @@ class ClientReaderServer(HomeServer):
                     PushRuleRestServlet(self).register(resource)
                     VersionsRestServlet(self).register(resource)
 
+                    groups.register_servlets(self, resource)
+
                     resources.update({"/_matrix/client": resource})
 
         root_resource = create_resource_tree(resources, NoResource())
diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py
index 1f1cea1416..5e17ef1396 100644
--- a/synapse/app/federation_reader.py
+++ b/synapse/app/federation_reader.py
@@ -35,6 +35,7 @@ from synapse.replication.slave.storage.account_data import SlavedAccountDataStor
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.directory import DirectoryStore
 from synapse.replication.slave.storage.events import SlavedEventStore
+from synapse.replication.slave.storage.groups import SlavedGroupServerStore
 from synapse.replication.slave.storage.keys import SlavedKeyStore
 from synapse.replication.slave.storage.profile import SlavedProfileStore
 from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
@@ -66,6 +67,7 @@ class FederationReaderSlavedStore(
     SlavedEventStore,
     SlavedKeyStore,
     SlavedRegistrationStore,
+    SlavedGroupServerStore,
     RoomStore,
     DirectoryStore,
     SlavedTransactionStore,
diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py
index 0ec9be3cb5..c106abae21 100644
--- a/synapse/groups/groups_server.py
+++ b/synapse/groups/groups_server.py
@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)
 # TODO: Flairs
 
 
-class GroupsServerHandler(object):
+class GroupsServerWorkerHandler(object):
     def __init__(self, hs):
         self.hs = hs
         self.store = hs.get_datastore()
@@ -51,9 +51,6 @@ class GroupsServerHandler(object):
         self.transport_client = hs.get_federation_transport_client()
         self.profile_handler = hs.get_profile_handler()
 
-        # Ensure attestations get renewed
-        hs.get_groups_attestation_renewer()
-
     @defer.inlineCallbacks
     def check_group_is_ours(
         self, group_id, requester_user_id, and_exists=False, and_is_admin=None
@@ -167,6 +164,197 @@ class GroupsServerHandler(object):
             "user": membership_info,
         }
 
+    @defer.inlineCallbacks
+    def get_group_categories(self, group_id, requester_user_id):
+        """Get all categories in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        categories = yield self.store.get_group_categories(group_id=group_id)
+        return {"categories": categories}
+
+    @defer.inlineCallbacks
+    def get_group_category(self, group_id, requester_user_id, category_id):
+        """Get a specific category in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        res = yield self.store.get_group_category(
+            group_id=group_id, category_id=category_id
+        )
+
+        logger.info("group %s", res)
+
+        return res
+
+    @defer.inlineCallbacks
+    def get_group_roles(self, group_id, requester_user_id):
+        """Get all roles in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        roles = yield self.store.get_group_roles(group_id=group_id)
+        return {"roles": roles}
+
+    @defer.inlineCallbacks
+    def get_group_role(self, group_id, requester_user_id, role_id):
+        """Get a specific role in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        res = yield self.store.get_group_role(group_id=group_id, role_id=role_id)
+        return res
+
+    @defer.inlineCallbacks
+    def get_group_profile(self, group_id, requester_user_id):
+        """Get the group profile as seen by requester_user_id
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id)
+
+        group = yield self.store.get_group(group_id)
+
+        if group:
+            cols = [
+                "name",
+                "short_description",
+                "long_description",
+                "avatar_url",
+                "is_public",
+            ]
+            group_description = {key: group[key] for key in cols}
+            group_description["is_openly_joinable"] = group["join_policy"] == "open"
+
+            return group_description
+        else:
+            raise SynapseError(404, "Unknown group")
+
+    @defer.inlineCallbacks
+    def get_users_in_group(self, group_id, requester_user_id):
+        """Get the users in group as seen by requester_user_id.
+
+        The ordering is arbitrary at the moment
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        is_user_in_group = yield self.store.is_user_in_group(
+            requester_user_id, group_id
+        )
+
+        user_results = yield self.store.get_users_in_group(
+            group_id, include_private=is_user_in_group
+        )
+
+        chunk = []
+        for user_result in user_results:
+            g_user_id = user_result["user_id"]
+            is_public = user_result["is_public"]
+            is_privileged = user_result["is_admin"]
+
+            entry = {"user_id": g_user_id}
+
+            profile = yield self.profile_handler.get_profile_from_cache(g_user_id)
+            entry.update(profile)
+
+            entry["is_public"] = bool(is_public)
+            entry["is_privileged"] = bool(is_privileged)
+
+            if not self.is_mine_id(g_user_id):
+                attestation = yield self.store.get_remote_attestation(
+                    group_id, g_user_id
+                )
+                if not attestation:
+                    continue
+
+                entry["attestation"] = attestation
+            else:
+                entry["attestation"] = self.attestations.create_attestation(
+                    group_id, g_user_id
+                )
+
+            chunk.append(entry)
+
+        # TODO: If admin add lists of users whose attestations have timed out
+
+        return {"chunk": chunk, "total_user_count_estimate": len(user_results)}
+
+    @defer.inlineCallbacks
+    def get_invited_users_in_group(self, group_id, requester_user_id):
+        """Get the users that have been invited to a group as seen by requester_user_id.
+
+        The ordering is arbitrary at the moment
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        is_user_in_group = yield self.store.is_user_in_group(
+            requester_user_id, group_id
+        )
+
+        if not is_user_in_group:
+            raise SynapseError(403, "User not in group")
+
+        invited_users = yield self.store.get_invited_users_in_group(group_id)
+
+        user_profiles = []
+
+        for user_id in invited_users:
+            user_profile = {"user_id": user_id}
+            try:
+                profile = yield self.profile_handler.get_profile_from_cache(user_id)
+                user_profile.update(profile)
+            except Exception as e:
+                logger.warning("Error getting profile for %s: %s", user_id, e)
+            user_profiles.append(user_profile)
+
+        return {"chunk": user_profiles, "total_user_count_estimate": len(invited_users)}
+
+    @defer.inlineCallbacks
+    def get_rooms_in_group(self, group_id, requester_user_id):
+        """Get the rooms in group as seen by requester_user_id
+
+        This returns rooms in order of decreasing number of joined users
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        is_user_in_group = yield self.store.is_user_in_group(
+            requester_user_id, group_id
+        )
+
+        room_results = yield self.store.get_rooms_in_group(
+            group_id, include_private=is_user_in_group
+        )
+
+        chunk = []
+        for room_result in room_results:
+            room_id = room_result["room_id"]
+
+            joined_users = yield self.store.get_users_in_room(room_id)
+            entry = yield self.room_list_handler.generate_room_entry(
+                room_id, len(joined_users), with_alias=False, allow_private=True
+            )
+
+            if not entry:
+                continue
+
+            entry["is_public"] = bool(room_result["is_public"])
+
+            chunk.append(entry)
+
+        chunk.sort(key=lambda e: -e["num_joined_members"])
+
+        return {"chunk": chunk, "total_room_count_estimate": len(room_results)}
+
+
+class GroupsServerHandler(GroupsServerWorkerHandler):
+    def __init__(self, hs):
+        super(GroupsServerHandler, self).__init__(hs)
+
+        # Ensure attestations get renewed
+        hs.get_groups_attestation_renewer()
+
     @defer.inlineCallbacks
     def update_group_summary_room(
         self, group_id, requester_user_id, room_id, category_id, content
@@ -229,27 +417,6 @@ class GroupsServerHandler(object):
 
         return {}
 
-    @defer.inlineCallbacks
-    def get_group_categories(self, group_id, requester_user_id):
-        """Get all categories in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        categories = yield self.store.get_group_categories(group_id=group_id)
-        return {"categories": categories}
-
-    @defer.inlineCallbacks
-    def get_group_category(self, group_id, requester_user_id, category_id):
-        """Get a specific category in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        res = yield self.store.get_group_category(
-            group_id=group_id, category_id=category_id
-        )
-
-        return res
-
     @defer.inlineCallbacks
     def update_group_category(self, group_id, requester_user_id, category_id, content):
         """Add/Update a group category
@@ -284,24 +451,6 @@ class GroupsServerHandler(object):
 
         return {}
 
-    @defer.inlineCallbacks
-    def get_group_roles(self, group_id, requester_user_id):
-        """Get all roles in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        roles = yield self.store.get_group_roles(group_id=group_id)
-        return {"roles": roles}
-
-    @defer.inlineCallbacks
-    def get_group_role(self, group_id, requester_user_id, role_id):
-        """Get a specific role in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        res = yield self.store.get_group_role(group_id=group_id, role_id=role_id)
-        return res
-
     @defer.inlineCallbacks
     def update_group_role(self, group_id, requester_user_id, role_id, content):
         """Add/update a role in a group
@@ -370,30 +519,6 @@ class GroupsServerHandler(object):
 
         return {}
 
-    @defer.inlineCallbacks
-    def get_group_profile(self, group_id, requester_user_id):
-        """Get the group profile as seen by requester_user_id
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id)
-
-        group = yield self.store.get_group(group_id)
-
-        if group:
-            cols = [
-                "name",
-                "short_description",
-                "long_description",
-                "avatar_url",
-                "is_public",
-            ]
-            group_description = {key: group[key] for key in cols}
-            group_description["is_openly_joinable"] = group["join_policy"] == "open"
-
-            return group_description
-        else:
-            raise SynapseError(404, "Unknown group")
-
     @defer.inlineCallbacks
     def update_group_profile(self, group_id, requester_user_id, content):
         """Update the group profile
@@ -412,124 +537,6 @@ class GroupsServerHandler(object):
 
         yield self.store.update_group_profile(group_id, profile)
 
-    @defer.inlineCallbacks
-    def get_users_in_group(self, group_id, requester_user_id):
-        """Get the users in group as seen by requester_user_id.
-
-        The ordering is arbitrary at the moment
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        is_user_in_group = yield self.store.is_user_in_group(
-            requester_user_id, group_id
-        )
-
-        user_results = yield self.store.get_users_in_group(
-            group_id, include_private=is_user_in_group
-        )
-
-        chunk = []
-        for user_result in user_results:
-            g_user_id = user_result["user_id"]
-            is_public = user_result["is_public"]
-            is_privileged = user_result["is_admin"]
-
-            entry = {"user_id": g_user_id}
-
-            profile = yield self.profile_handler.get_profile_from_cache(g_user_id)
-            entry.update(profile)
-
-            entry["is_public"] = bool(is_public)
-            entry["is_privileged"] = bool(is_privileged)
-
-            if not self.is_mine_id(g_user_id):
-                attestation = yield self.store.get_remote_attestation(
-                    group_id, g_user_id
-                )
-                if not attestation:
-                    continue
-
-                entry["attestation"] = attestation
-            else:
-                entry["attestation"] = self.attestations.create_attestation(
-                    group_id, g_user_id
-                )
-
-            chunk.append(entry)
-
-        # TODO: If admin add lists of users whose attestations have timed out
-
-        return {"chunk": chunk, "total_user_count_estimate": len(user_results)}
-
-    @defer.inlineCallbacks
-    def get_invited_users_in_group(self, group_id, requester_user_id):
-        """Get the users that have been invited to a group as seen by requester_user_id.
-
-        The ordering is arbitrary at the moment
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        is_user_in_group = yield self.store.is_user_in_group(
-            requester_user_id, group_id
-        )
-
-        if not is_user_in_group:
-            raise SynapseError(403, "User not in group")
-
-        invited_users = yield self.store.get_invited_users_in_group(group_id)
-
-        user_profiles = []
-
-        for user_id in invited_users:
-            user_profile = {"user_id": user_id}
-            try:
-                profile = yield self.profile_handler.get_profile_from_cache(user_id)
-                user_profile.update(profile)
-            except Exception as e:
-                logger.warning("Error getting profile for %s: %s", user_id, e)
-            user_profiles.append(user_profile)
-
-        return {"chunk": user_profiles, "total_user_count_estimate": len(invited_users)}
-
-    @defer.inlineCallbacks
-    def get_rooms_in_group(self, group_id, requester_user_id):
-        """Get the rooms in group as seen by requester_user_id
-
-        This returns rooms in order of decreasing number of joined users
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        is_user_in_group = yield self.store.is_user_in_group(
-            requester_user_id, group_id
-        )
-
-        room_results = yield self.store.get_rooms_in_group(
-            group_id, include_private=is_user_in_group
-        )
-
-        chunk = []
-        for room_result in room_results:
-            room_id = room_result["room_id"]
-
-            joined_users = yield self.store.get_users_in_room(room_id)
-            entry = yield self.room_list_handler.generate_room_entry(
-                room_id, len(joined_users), with_alias=False, allow_private=True
-            )
-
-            if not entry:
-                continue
-
-            entry["is_public"] = bool(room_result["is_public"])
-
-            chunk.append(entry)
-
-        chunk.sort(key=lambda e: -e["num_joined_members"])
-
-        return {"chunk": chunk, "total_room_count_estimate": len(room_results)}
-
     @defer.inlineCallbacks
     def add_room_to_group(self, group_id, requester_user_id, room_id, content):
         """Add room to group
diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py
index 319565510f..ad22415782 100644
--- a/synapse/handlers/groups_local.py
+++ b/synapse/handlers/groups_local.py
@@ -63,7 +63,7 @@ def _create_rerouter(func_name):
     return f
 
 
-class GroupsLocalHandler(object):
+class GroupsLocalWorkerHandler(object):
     def __init__(self, hs):
         self.hs = hs
         self.store = hs.get_datastore()
@@ -81,40 +81,17 @@ class GroupsLocalHandler(object):
 
         self.profile_handler = hs.get_profile_handler()
 
-        # Ensure attestations get renewed
-        hs.get_groups_attestation_renewer()
-
     # The following functions merely route the query to the local groups server
     # or federation depending on if the group is local or remote
 
     get_group_profile = _create_rerouter("get_group_profile")
-    update_group_profile = _create_rerouter("update_group_profile")
     get_rooms_in_group = _create_rerouter("get_rooms_in_group")
-
     get_invited_users_in_group = _create_rerouter("get_invited_users_in_group")
-
-    add_room_to_group = _create_rerouter("add_room_to_group")
-    update_room_in_group = _create_rerouter("update_room_in_group")
-    remove_room_from_group = _create_rerouter("remove_room_from_group")
-
-    update_group_summary_room = _create_rerouter("update_group_summary_room")
-    delete_group_summary_room = _create_rerouter("delete_group_summary_room")
-
-    update_group_category = _create_rerouter("update_group_category")
-    delete_group_category = _create_rerouter("delete_group_category")
     get_group_category = _create_rerouter("get_group_category")
     get_group_categories = _create_rerouter("get_group_categories")
-
-    update_group_summary_user = _create_rerouter("update_group_summary_user")
-    delete_group_summary_user = _create_rerouter("delete_group_summary_user")
-
-    update_group_role = _create_rerouter("update_group_role")
-    delete_group_role = _create_rerouter("delete_group_role")
     get_group_role = _create_rerouter("get_group_role")
     get_group_roles = _create_rerouter("get_group_roles")
 
-    set_group_join_policy = _create_rerouter("set_group_join_policy")
-
     @defer.inlineCallbacks
     def get_group_summary(self, group_id, requester_user_id):
         """Get the group summary for a group.
@@ -169,6 +146,144 @@ class GroupsLocalHandler(object):
 
         return res
 
+    @defer.inlineCallbacks
+    def get_users_in_group(self, group_id, requester_user_id):
+        """Get users in a group
+        """
+        if self.is_mine_id(group_id):
+            res = yield self.groups_server_handler.get_users_in_group(
+                group_id, requester_user_id
+            )
+            return res
+
+        group_server_name = get_domain_from_id(group_id)
+
+        try:
+            res = yield self.transport_client.get_users_in_group(
+                get_domain_from_id(group_id), group_id, requester_user_id
+            )
+        except HttpResponseException as e:
+            raise e.to_synapse_error()
+        except RequestSendFailed:
+            raise SynapseError(502, "Failed to contact group server")
+
+        chunk = res["chunk"]
+        valid_entries = []
+        for entry in chunk:
+            g_user_id = entry["user_id"]
+            attestation = entry.pop("attestation", {})
+            try:
+                if get_domain_from_id(g_user_id) != group_server_name:
+                    yield self.attestations.verify_attestation(
+                        attestation,
+                        group_id=group_id,
+                        user_id=g_user_id,
+                        server_name=get_domain_from_id(g_user_id),
+                    )
+                valid_entries.append(entry)
+            except Exception as e:
+                logger.info("Failed to verify user is in group: %s", e)
+
+        res["chunk"] = valid_entries
+
+        return res
+
+    @defer.inlineCallbacks
+    def get_joined_groups(self, user_id):
+        group_ids = yield self.store.get_joined_groups(user_id)
+        return {"groups": group_ids}
+
+    @defer.inlineCallbacks
+    def get_publicised_groups_for_user(self, user_id):
+        if self.hs.is_mine_id(user_id):
+            result = yield self.store.get_publicised_groups_for_user(user_id)
+
+            # Check AS associated groups for this user - this depends on the
+            # RegExps in the AS registration file (under `users`)
+            for app_service in self.store.get_app_services():
+                result.extend(app_service.get_groups_for_user(user_id))
+
+            return {"groups": result}
+        else:
+            try:
+                bulk_result = yield self.transport_client.bulk_get_publicised_groups(
+                    get_domain_from_id(user_id), [user_id]
+                )
+            except HttpResponseException as e:
+                raise e.to_synapse_error()
+            except RequestSendFailed:
+                raise SynapseError(502, "Failed to contact group server")
+
+            result = bulk_result.get("users", {}).get(user_id)
+            # TODO: Verify attestations
+            return {"groups": result}
+
+    @defer.inlineCallbacks
+    def bulk_get_publicised_groups(self, user_ids, proxy=True):
+        destinations = {}
+        local_users = set()
+
+        for user_id in user_ids:
+            if self.hs.is_mine_id(user_id):
+                local_users.add(user_id)
+            else:
+                destinations.setdefault(get_domain_from_id(user_id), set()).add(user_id)
+
+        if not proxy and destinations:
+            raise SynapseError(400, "Some user_ids are not local")
+
+        results = {}
+        failed_results = []
+        for destination, dest_user_ids in iteritems(destinations):
+            try:
+                r = yield self.transport_client.bulk_get_publicised_groups(
+                    destination, list(dest_user_ids)
+                )
+                results.update(r["users"])
+            except Exception:
+                failed_results.extend(dest_user_ids)
+
+        for uid in local_users:
+            results[uid] = yield self.store.get_publicised_groups_for_user(uid)
+
+            # Check AS associated groups for this user - this depends on the
+            # RegExps in the AS registration file (under `users`)
+            for app_service in self.store.get_app_services():
+                results[uid].extend(app_service.get_groups_for_user(uid))
+
+        return {"users": results}
+
+
+class GroupsLocalHandler(GroupsLocalWorkerHandler):
+    def __init__(self, hs):
+        super(GroupsLocalHandler, self).__init__(hs)
+
+        # Ensure attestations get renewed
+        hs.get_groups_attestation_renewer()
+
+    # The following functions merely route the query to the local groups server
+    # or federation depending on if the group is local or remote
+
+    update_group_profile = _create_rerouter("update_group_profile")
+
+    add_room_to_group = _create_rerouter("add_room_to_group")
+    update_room_in_group = _create_rerouter("update_room_in_group")
+    remove_room_from_group = _create_rerouter("remove_room_from_group")
+
+    update_group_summary_room = _create_rerouter("update_group_summary_room")
+    delete_group_summary_room = _create_rerouter("delete_group_summary_room")
+
+    update_group_category = _create_rerouter("update_group_category")
+    delete_group_category = _create_rerouter("delete_group_category")
+
+    update_group_summary_user = _create_rerouter("update_group_summary_user")
+    delete_group_summary_user = _create_rerouter("delete_group_summary_user")
+
+    update_group_role = _create_rerouter("update_group_role")
+    delete_group_role = _create_rerouter("delete_group_role")
+
+    set_group_join_policy = _create_rerouter("set_group_join_policy")
+
     @defer.inlineCallbacks
     def create_group(self, group_id, user_id, content):
         """Create a group
@@ -219,48 +334,6 @@ class GroupsLocalHandler(object):
 
         return res
 
-    @defer.inlineCallbacks
-    def get_users_in_group(self, group_id, requester_user_id):
-        """Get users in a group
-        """
-        if self.is_mine_id(group_id):
-            res = yield self.groups_server_handler.get_users_in_group(
-                group_id, requester_user_id
-            )
-            return res
-
-        group_server_name = get_domain_from_id(group_id)
-
-        try:
-            res = yield self.transport_client.get_users_in_group(
-                get_domain_from_id(group_id), group_id, requester_user_id
-            )
-        except HttpResponseException as e:
-            raise e.to_synapse_error()
-        except RequestSendFailed:
-            raise SynapseError(502, "Failed to contact group server")
-
-        chunk = res["chunk"]
-        valid_entries = []
-        for entry in chunk:
-            g_user_id = entry["user_id"]
-            attestation = entry.pop("attestation", {})
-            try:
-                if get_domain_from_id(g_user_id) != group_server_name:
-                    yield self.attestations.verify_attestation(
-                        attestation,
-                        group_id=group_id,
-                        user_id=g_user_id,
-                        server_name=get_domain_from_id(g_user_id),
-                    )
-                valid_entries.append(entry)
-            except Exception as e:
-                logger.info("Failed to verify user is in group: %s", e)
-
-        res["chunk"] = valid_entries
-
-        return res
-
     @defer.inlineCallbacks
     def join_group(self, group_id, user_id, content):
         """Request to join a group
@@ -452,68 +525,3 @@ class GroupsLocalHandler(object):
             group_id, user_id, membership="leave"
         )
         self.notifier.on_new_event("groups_key", token, users=[user_id])
-
-    @defer.inlineCallbacks
-    def get_joined_groups(self, user_id):
-        group_ids = yield self.store.get_joined_groups(user_id)
-        return {"groups": group_ids}
-
-    @defer.inlineCallbacks
-    def get_publicised_groups_for_user(self, user_id):
-        if self.hs.is_mine_id(user_id):
-            result = yield self.store.get_publicised_groups_for_user(user_id)
-
-            # Check AS associated groups for this user - this depends on the
-            # RegExps in the AS registration file (under `users`)
-            for app_service in self.store.get_app_services():
-                result.extend(app_service.get_groups_for_user(user_id))
-
-            return {"groups": result}
-        else:
-            try:
-                bulk_result = yield self.transport_client.bulk_get_publicised_groups(
-                    get_domain_from_id(user_id), [user_id]
-                )
-            except HttpResponseException as e:
-                raise e.to_synapse_error()
-            except RequestSendFailed:
-                raise SynapseError(502, "Failed to contact group server")
-
-            result = bulk_result.get("users", {}).get(user_id)
-            # TODO: Verify attestations
-            return {"groups": result}
-
-    @defer.inlineCallbacks
-    def bulk_get_publicised_groups(self, user_ids, proxy=True):
-        destinations = {}
-        local_users = set()
-
-        for user_id in user_ids:
-            if self.hs.is_mine_id(user_id):
-                local_users.add(user_id)
-            else:
-                destinations.setdefault(get_domain_from_id(user_id), set()).add(user_id)
-
-        if not proxy and destinations:
-            raise SynapseError(400, "Some user_ids are not local")
-
-        results = {}
-        failed_results = []
-        for destination, dest_user_ids in iteritems(destinations):
-            try:
-                r = yield self.transport_client.bulk_get_publicised_groups(
-                    destination, list(dest_user_ids)
-                )
-                results.update(r["users"])
-            except Exception:
-                failed_results.extend(dest_user_ids)
-
-        for uid in local_users:
-            results[uid] = yield self.store.get_publicised_groups_for_user(uid)
-
-            # Check AS associated groups for this user - this depends on the
-            # RegExps in the AS registration file (under `users`)
-            for app_service in self.store.get_app_services():
-                results[uid].extend(app_service.get_groups_for_user(uid))
-
-        return {"users": results}
diff --git a/synapse/replication/slave/storage/groups.py b/synapse/replication/slave/storage/groups.py
index 69a4ae42f9..2d4fd08cf5 100644
--- a/synapse/replication/slave/storage/groups.py
+++ b/synapse/replication/slave/storage/groups.py
@@ -13,15 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.storage import DataStore
+from synapse.replication.slave.storage._base import BaseSlavedStore
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.storage.data_stores.main.group_server import GroupServerWorkerStore
 from synapse.storage.database import Database
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore, __func__
-from ._slaved_id_tracker import SlavedIdTracker
 
-
-class SlavedGroupServerStore(BaseSlavedStore):
+class SlavedGroupServerStore(GroupServerWorkerStore, BaseSlavedStore):
     def __init__(self, database: Database, db_conn, hs):
         super(SlavedGroupServerStore, self).__init__(database, db_conn, hs)
 
@@ -35,9 +34,8 @@ class SlavedGroupServerStore(BaseSlavedStore):
             self._group_updates_id_gen.get_current_token(),
         )
 
-    get_groups_changes_for_user = __func__(DataStore.get_groups_changes_for_user)
-    get_group_stream_token = __func__(DataStore.get_group_stream_token)
-    get_all_groups_for_user = __func__(DataStore.get_all_groups_for_user)
+    def get_group_stream_token(self):
+        return self._group_updates_id_gen.get_current_token()
 
     def stream_positions(self):
         result = super(SlavedGroupServerStore, self).stream_positions()
diff --git a/synapse/server.py b/synapse/server.py
index 7926867b77..fd2f69e928 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -50,7 +50,7 @@ from synapse.federation.send_queue import FederationRemoteSendQueue
 from synapse.federation.sender import FederationSender
 from synapse.federation.transport.client import TransportLayerClient
 from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer
-from synapse.groups.groups_server import GroupsServerHandler
+from synapse.groups.groups_server import GroupsServerHandler, GroupsServerWorkerHandler
 from synapse.handlers import Handlers
 from synapse.handlers.account_validity import AccountValidityHandler
 from synapse.handlers.acme import AcmeHandler
@@ -62,7 +62,7 @@ from synapse.handlers.devicemessage import DeviceMessageHandler
 from synapse.handlers.e2e_keys import E2eKeysHandler
 from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler
 from synapse.handlers.events import EventHandler, EventStreamHandler
-from synapse.handlers.groups_local import GroupsLocalHandler
+from synapse.handlers.groups_local import GroupsLocalHandler, GroupsLocalWorkerHandler
 from synapse.handlers.initial_sync import InitialSyncHandler
 from synapse.handlers.message import EventCreationHandler, MessageHandler
 from synapse.handlers.pagination import PaginationHandler
@@ -460,10 +460,16 @@ class HomeServer(object):
         return UserDirectoryHandler(self)
 
     def build_groups_local_handler(self):
-        return GroupsLocalHandler(self)
+        if self.config.worker_app:
+            return GroupsLocalWorkerHandler(self)
+        else:
+            return GroupsLocalHandler(self)
 
     def build_groups_server_handler(self):
-        return GroupsServerHandler(self)
+        if self.config.worker_app:
+            return GroupsServerWorkerHandler(self)
+        else:
+            return GroupsServerHandler(self)
 
     def build_groups_attestation_signing(self):
         return GroupAttestationSigning(self)
diff --git a/synapse/storage/data_stores/main/group_server.py b/synapse/storage/data_stores/main/group_server.py
index 6acd45e9f3..0963e6c250 100644
--- a/synapse/storage/data_stores/main/group_server.py
+++ b/synapse/storage/data_stores/main/group_server.py
@@ -27,21 +27,7 @@ _DEFAULT_CATEGORY_ID = ""
 _DEFAULT_ROLE_ID = ""
 
 
-class GroupServerStore(SQLBaseStore):
-    def set_group_join_policy(self, group_id, join_policy):
-        """Set the join policy of a group.
-
-        join_policy can be one of:
-         * "invite"
-         * "open"
-        """
-        return self.db.simple_update_one(
-            table="groups",
-            keyvalues={"group_id": group_id},
-            updatevalues={"join_policy": join_policy},
-            desc="set_group_join_policy",
-        )
-
+class GroupServerWorkerStore(SQLBaseStore):
     def get_group(self, group_id):
         return self.db.simple_select_one(
             table="groups",
@@ -157,6 +143,366 @@ class GroupServerStore(SQLBaseStore):
             "get_rooms_for_summary", _get_rooms_for_summary_txn
         )
 
+    @defer.inlineCallbacks
+    def get_group_categories(self, group_id):
+        rows = yield self.db.simple_select_list(
+            table="group_room_categories",
+            keyvalues={"group_id": group_id},
+            retcols=("category_id", "is_public", "profile"),
+            desc="get_group_categories",
+        )
+
+        return {
+            row["category_id"]: {
+                "is_public": row["is_public"],
+                "profile": json.loads(row["profile"]),
+            }
+            for row in rows
+        }
+
+    @defer.inlineCallbacks
+    def get_group_category(self, group_id, category_id):
+        category = yield self.db.simple_select_one(
+            table="group_room_categories",
+            keyvalues={"group_id": group_id, "category_id": category_id},
+            retcols=("is_public", "profile"),
+            desc="get_group_category",
+        )
+
+        category["profile"] = json.loads(category["profile"])
+
+        return category
+
+    @defer.inlineCallbacks
+    def get_group_roles(self, group_id):
+        rows = yield self.db.simple_select_list(
+            table="group_roles",
+            keyvalues={"group_id": group_id},
+            retcols=("role_id", "is_public", "profile"),
+            desc="get_group_roles",
+        )
+
+        return {
+            row["role_id"]: {
+                "is_public": row["is_public"],
+                "profile": json.loads(row["profile"]),
+            }
+            for row in rows
+        }
+
+    @defer.inlineCallbacks
+    def get_group_role(self, group_id, role_id):
+        role = yield self.db.simple_select_one(
+            table="group_roles",
+            keyvalues={"group_id": group_id, "role_id": role_id},
+            retcols=("is_public", "profile"),
+            desc="get_group_role",
+        )
+
+        role["profile"] = json.loads(role["profile"])
+
+        return role
+
+    def get_local_groups_for_room(self, room_id):
+        """Get all of the local group that contain a given room
+        Args:
+            room_id (str): The ID of a room
+        Returns:
+            Deferred[list[str]]: A twisted.Deferred containing a list of group ids
+                containing this room
+        """
+        return self.db.simple_select_onecol(
+            table="group_rooms",
+            keyvalues={"room_id": room_id},
+            retcol="group_id",
+            desc="get_local_groups_for_room",
+        )
+
+    def get_users_for_summary_by_role(self, group_id, include_private=False):
+        """Get the users and roles that should be included in a summary request
+
+        Returns ([users], [roles])
+        """
+
+        def _get_users_for_summary_txn(txn):
+            keyvalues = {"group_id": group_id}
+            if not include_private:
+                keyvalues["is_public"] = True
+
+            sql = """
+                SELECT user_id, is_public, role_id, user_order
+                FROM group_summary_users
+                WHERE group_id = ?
+            """
+
+            if not include_private:
+                sql += " AND is_public = ?"
+                txn.execute(sql, (group_id, True))
+            else:
+                txn.execute(sql, (group_id,))
+
+            users = [
+                {
+                    "user_id": row[0],
+                    "is_public": row[1],
+                    "role_id": row[2] if row[2] != _DEFAULT_ROLE_ID else None,
+                    "order": row[3],
+                }
+                for row in txn
+            ]
+
+            sql = """
+                SELECT role_id, is_public, profile, role_order
+                FROM group_summary_roles
+                INNER JOIN group_roles USING (group_id, role_id)
+                WHERE group_id = ?
+            """
+
+            if not include_private:
+                sql += " AND is_public = ?"
+                txn.execute(sql, (group_id, True))
+            else:
+                txn.execute(sql, (group_id,))
+
+            roles = {
+                row[0]: {
+                    "is_public": row[1],
+                    "profile": json.loads(row[2]),
+                    "order": row[3],
+                }
+                for row in txn
+            }
+
+            return users, roles
+
+        return self.db.runInteraction(
+            "get_users_for_summary_by_role", _get_users_for_summary_txn
+        )
+
+    def is_user_in_group(self, user_id, group_id):
+        return self.db.simple_select_one_onecol(
+            table="group_users",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcol="user_id",
+            allow_none=True,
+            desc="is_user_in_group",
+        ).addCallback(lambda r: bool(r))
+
+    def is_user_admin_in_group(self, group_id, user_id):
+        return self.db.simple_select_one_onecol(
+            table="group_users",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcol="is_admin",
+            allow_none=True,
+            desc="is_user_admin_in_group",
+        )
+
+    def is_user_invited_to_local_group(self, group_id, user_id):
+        """Has the group server invited a user?
+        """
+        return self.db.simple_select_one_onecol(
+            table="group_invites",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcol="user_id",
+            desc="is_user_invited_to_local_group",
+            allow_none=True,
+        )
+
+    def get_users_membership_info_in_group(self, group_id, user_id):
+        """Get a dict describing the membership of a user in a group.
+
+        Example if joined:
+
+            {
+                "membership": "join",
+                "is_public": True,
+                "is_privileged": False,
+            }
+
+        Returns an empty dict if the user is not join/invite/etc
+        """
+
+        def _get_users_membership_in_group_txn(txn):
+            row = self.db.simple_select_one_txn(
+                txn,
+                table="group_users",
+                keyvalues={"group_id": group_id, "user_id": user_id},
+                retcols=("is_admin", "is_public"),
+                allow_none=True,
+            )
+
+            if row:
+                return {
+                    "membership": "join",
+                    "is_public": row["is_public"],
+                    "is_privileged": row["is_admin"],
+                }
+
+            row = self.db.simple_select_one_onecol_txn(
+                txn,
+                table="group_invites",
+                keyvalues={"group_id": group_id, "user_id": user_id},
+                retcol="user_id",
+                allow_none=True,
+            )
+
+            if row:
+                return {"membership": "invite"}
+
+            return {}
+
+        return self.db.runInteraction(
+            "get_users_membership_info_in_group", _get_users_membership_in_group_txn
+        )
+
+    def get_publicised_groups_for_user(self, user_id):
+        """Get all groups a user is publicising
+        """
+        return self.db.simple_select_onecol(
+            table="local_group_membership",
+            keyvalues={"user_id": user_id, "membership": "join", "is_publicised": True},
+            retcol="group_id",
+            desc="get_publicised_groups_for_user",
+        )
+
+    def get_attestations_need_renewals(self, valid_until_ms):
+        """Get all attestations that need to be renewed until givent time
+        """
+
+        def _get_attestations_need_renewals_txn(txn):
+            sql = """
+                SELECT group_id, user_id FROM group_attestations_renewals
+                WHERE valid_until_ms <= ?
+            """
+            txn.execute(sql, (valid_until_ms,))
+            return self.db.cursor_to_dict(txn)
+
+        return self.db.runInteraction(
+            "get_attestations_need_renewals", _get_attestations_need_renewals_txn
+        )
+
+    @defer.inlineCallbacks
+    def get_remote_attestation(self, group_id, user_id):
+        """Get the attestation that proves the remote agrees that the user is
+        in the group.
+        """
+        row = yield self.db.simple_select_one(
+            table="group_attestations_remote",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcols=("valid_until_ms", "attestation_json"),
+            desc="get_remote_attestation",
+            allow_none=True,
+        )
+
+        now = int(self._clock.time_msec())
+        if row and now < row["valid_until_ms"]:
+            return json.loads(row["attestation_json"])
+
+        return None
+
+    def get_joined_groups(self, user_id):
+        return self.db.simple_select_onecol(
+            table="local_group_membership",
+            keyvalues={"user_id": user_id, "membership": "join"},
+            retcol="group_id",
+            desc="get_joined_groups",
+        )
+
+    def get_all_groups_for_user(self, user_id, now_token):
+        def _get_all_groups_for_user_txn(txn):
+            sql = """
+                SELECT group_id, type, membership, u.content
+                FROM local_group_updates AS u
+                INNER JOIN local_group_membership USING (group_id, user_id)
+                WHERE user_id = ? AND membership != 'leave'
+                    AND stream_id <= ?
+            """
+            txn.execute(sql, (user_id, now_token))
+            return [
+                {
+                    "group_id": row[0],
+                    "type": row[1],
+                    "membership": row[2],
+                    "content": json.loads(row[3]),
+                }
+                for row in txn
+            ]
+
+        return self.db.runInteraction(
+            "get_all_groups_for_user", _get_all_groups_for_user_txn
+        )
+
+    def get_groups_changes_for_user(self, user_id, from_token, to_token):
+        from_token = int(from_token)
+        has_changed = self._group_updates_stream_cache.has_entity_changed(
+            user_id, from_token
+        )
+        if not has_changed:
+            return defer.succeed([])
+
+        def _get_groups_changes_for_user_txn(txn):
+            sql = """
+                SELECT group_id, membership, type, u.content
+                FROM local_group_updates AS u
+                INNER JOIN local_group_membership USING (group_id, user_id)
+                WHERE user_id = ? AND ? < stream_id AND stream_id <= ?
+            """
+            txn.execute(sql, (user_id, from_token, to_token))
+            return [
+                {
+                    "group_id": group_id,
+                    "membership": membership,
+                    "type": gtype,
+                    "content": json.loads(content_json),
+                }
+                for group_id, membership, gtype, content_json in txn
+            ]
+
+        return self.db.runInteraction(
+            "get_groups_changes_for_user", _get_groups_changes_for_user_txn
+        )
+
+    def get_all_groups_changes(self, from_token, to_token, limit):
+        from_token = int(from_token)
+        has_changed = self._group_updates_stream_cache.has_any_entity_changed(
+            from_token
+        )
+        if not has_changed:
+            return defer.succeed([])
+
+        def _get_all_groups_changes_txn(txn):
+            sql = """
+                SELECT stream_id, group_id, user_id, type, content
+                FROM local_group_updates
+                WHERE ? < stream_id AND stream_id <= ?
+                LIMIT ?
+            """
+            txn.execute(sql, (from_token, to_token, limit))
+            return [
+                (stream_id, group_id, user_id, gtype, json.loads(content_json))
+                for stream_id, group_id, user_id, gtype, content_json in txn
+            ]
+
+        return self.db.runInteraction(
+            "get_all_groups_changes", _get_all_groups_changes_txn
+        )
+
+
+class GroupServerStore(GroupServerWorkerStore):
+    def set_group_join_policy(self, group_id, join_policy):
+        """Set the join policy of a group.
+
+        join_policy can be one of:
+         * "invite"
+         * "open"
+        """
+        return self.db.simple_update_one(
+            table="groups",
+            keyvalues={"group_id": group_id},
+            updatevalues={"join_policy": join_policy},
+            desc="set_group_join_policy",
+        )
+
     def add_room_to_summary(self, group_id, room_id, category_id, order, is_public):
         return self.db.runInteraction(
             "add_room_to_summary",
@@ -299,36 +645,6 @@ class GroupServerStore(SQLBaseStore):
             desc="remove_room_from_summary",
         )
 
-    @defer.inlineCallbacks
-    def get_group_categories(self, group_id):
-        rows = yield self.db.simple_select_list(
-            table="group_room_categories",
-            keyvalues={"group_id": group_id},
-            retcols=("category_id", "is_public", "profile"),
-            desc="get_group_categories",
-        )
-
-        return {
-            row["category_id"]: {
-                "is_public": row["is_public"],
-                "profile": json.loads(row["profile"]),
-            }
-            for row in rows
-        }
-
-    @defer.inlineCallbacks
-    def get_group_category(self, group_id, category_id):
-        category = yield self.db.simple_select_one(
-            table="group_room_categories",
-            keyvalues={"group_id": group_id, "category_id": category_id},
-            retcols=("is_public", "profile"),
-            desc="get_group_category",
-        )
-
-        category["profile"] = json.loads(category["profile"])
-
-        return category
-
     def upsert_group_category(self, group_id, category_id, profile, is_public):
         """Add/update room category for group
         """
@@ -360,36 +676,6 @@ class GroupServerStore(SQLBaseStore):
             desc="remove_group_category",
         )
 
-    @defer.inlineCallbacks
-    def get_group_roles(self, group_id):
-        rows = yield self.db.simple_select_list(
-            table="group_roles",
-            keyvalues={"group_id": group_id},
-            retcols=("role_id", "is_public", "profile"),
-            desc="get_group_roles",
-        )
-
-        return {
-            row["role_id"]: {
-                "is_public": row["is_public"],
-                "profile": json.loads(row["profile"]),
-            }
-            for row in rows
-        }
-
-    @defer.inlineCallbacks
-    def get_group_role(self, group_id, role_id):
-        role = yield self.db.simple_select_one(
-            table="group_roles",
-            keyvalues={"group_id": group_id, "role_id": role_id},
-            retcols=("is_public", "profile"),
-            desc="get_group_role",
-        )
-
-        role["profile"] = json.loads(role["profile"])
-
-        return role
-
     def upsert_group_role(self, group_id, role_id, profile, is_public):
         """Add/remove user role
         """
@@ -469,251 +755,99 @@ class GroupServerStore(SQLBaseStore):
             if not role_exists:
                 raise SynapseError(400, "Role doesn't exist")
 
-            # TODO: Check role is part of the summary already
-            role_exists = self.db.simple_select_one_onecol_txn(
-                txn,
-                table="group_summary_roles",
-                keyvalues={"group_id": group_id, "role_id": role_id},
-                retcol="group_id",
-                allow_none=True,
-            )
-            if not role_exists:
-                # If not, add it with an order larger than all others
-                txn.execute(
-                    """
-                    INSERT INTO group_summary_roles
-                    (group_id, role_id, role_order)
-                    SELECT ?, ?, COALESCE(MAX(role_order), 0) + 1
-                    FROM group_summary_roles
-                    WHERE group_id = ? AND role_id = ?
-                """,
-                    (group_id, role_id, group_id, role_id),
-                )
-
-        existing = self.db.simple_select_one_txn(
-            txn,
-            table="group_summary_users",
-            keyvalues={"group_id": group_id, "user_id": user_id, "role_id": role_id},
-            retcols=("user_order", "is_public"),
-            allow_none=True,
-        )
-
-        if order is not None:
-            # Shuffle other users orders that come after the given order
-            sql = """
-                UPDATE group_summary_users SET user_order = user_order + 1
-                WHERE group_id = ? AND role_id = ? AND user_order >= ?
-            """
-            txn.execute(sql, (group_id, role_id, order))
-        elif not existing:
-            sql = """
-                SELECT COALESCE(MAX(user_order), 0) + 1 FROM group_summary_users
-                WHERE group_id = ? AND role_id = ?
-            """
-            txn.execute(sql, (group_id, role_id))
-            (order,) = txn.fetchone()
-
-        if existing:
-            to_update = {}
-            if order is not None:
-                to_update["user_order"] = order
-            if is_public is not None:
-                to_update["is_public"] = is_public
-            self.db.simple_update_txn(
-                txn,
-                table="group_summary_users",
-                keyvalues={
-                    "group_id": group_id,
-                    "role_id": role_id,
-                    "user_id": user_id,
-                },
-                values=to_update,
-            )
-        else:
-            if is_public is None:
-                is_public = True
-
-            self.db.simple_insert_txn(
-                txn,
-                table="group_summary_users",
-                values={
-                    "group_id": group_id,
-                    "role_id": role_id,
-                    "user_id": user_id,
-                    "user_order": order,
-                    "is_public": is_public,
-                },
-            )
-
-    def remove_user_from_summary(self, group_id, user_id, role_id):
-        if role_id is None:
-            role_id = _DEFAULT_ROLE_ID
-
-        return self.db.simple_delete(
-            table="group_summary_users",
-            keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id},
-            desc="remove_user_from_summary",
-        )
-
-    def get_local_groups_for_room(self, room_id):
-        """Get all of the local group that contain a given room
-        Args:
-            room_id (str): The ID of a room
-        Returns:
-            Deferred[list[str]]: A twisted.Deferred containing a list of group ids
-                containing this room
-        """
-        return self.db.simple_select_onecol(
-            table="group_rooms",
-            keyvalues={"room_id": room_id},
-            retcol="group_id",
-            desc="get_local_groups_for_room",
-        )
-
-    def get_users_for_summary_by_role(self, group_id, include_private=False):
-        """Get the users and roles that should be included in a summary request
-
-        Returns ([users], [roles])
-        """
-
-        def _get_users_for_summary_txn(txn):
-            keyvalues = {"group_id": group_id}
-            if not include_private:
-                keyvalues["is_public"] = True
-
-            sql = """
-                SELECT user_id, is_public, role_id, user_order
-                FROM group_summary_users
-                WHERE group_id = ?
-            """
-
-            if not include_private:
-                sql += " AND is_public = ?"
-                txn.execute(sql, (group_id, True))
-            else:
-                txn.execute(sql, (group_id,))
-
-            users = [
-                {
-                    "user_id": row[0],
-                    "is_public": row[1],
-                    "role_id": row[2] if row[2] != _DEFAULT_ROLE_ID else None,
-                    "order": row[3],
-                }
-                for row in txn
-            ]
-
-            sql = """
-                SELECT role_id, is_public, profile, role_order
-                FROM group_summary_roles
-                INNER JOIN group_roles USING (group_id, role_id)
-                WHERE group_id = ?
-            """
-
-            if not include_private:
-                sql += " AND is_public = ?"
-                txn.execute(sql, (group_id, True))
-            else:
-                txn.execute(sql, (group_id,))
-
-            roles = {
-                row[0]: {
-                    "is_public": row[1],
-                    "profile": json.loads(row[2]),
-                    "order": row[3],
-                }
-                for row in txn
-            }
-
-            return users, roles
-
-        return self.db.runInteraction(
-            "get_users_for_summary_by_role", _get_users_for_summary_txn
-        )
-
-    def is_user_in_group(self, user_id, group_id):
-        return self.db.simple_select_one_onecol(
-            table="group_users",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcol="user_id",
-            allow_none=True,
-            desc="is_user_in_group",
-        ).addCallback(lambda r: bool(r))
-
-    def is_user_admin_in_group(self, group_id, user_id):
-        return self.db.simple_select_one_onecol(
-            table="group_users",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcol="is_admin",
-            allow_none=True,
-            desc="is_user_admin_in_group",
-        )
-
-    def add_group_invite(self, group_id, user_id):
-        """Record that the group server has invited a user
-        """
-        return self.db.simple_insert(
-            table="group_invites",
-            values={"group_id": group_id, "user_id": user_id},
-            desc="add_group_invite",
-        )
+            # TODO: Check role is part of the summary already
+            role_exists = self.db.simple_select_one_onecol_txn(
+                txn,
+                table="group_summary_roles",
+                keyvalues={"group_id": group_id, "role_id": role_id},
+                retcol="group_id",
+                allow_none=True,
+            )
+            if not role_exists:
+                # If not, add it with an order larger than all others
+                txn.execute(
+                    """
+                    INSERT INTO group_summary_roles
+                    (group_id, role_id, role_order)
+                    SELECT ?, ?, COALESCE(MAX(role_order), 0) + 1
+                    FROM group_summary_roles
+                    WHERE group_id = ? AND role_id = ?
+                """,
+                    (group_id, role_id, group_id, role_id),
+                )
 
-    def is_user_invited_to_local_group(self, group_id, user_id):
-        """Has the group server invited a user?
-        """
-        return self.db.simple_select_one_onecol(
-            table="group_invites",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcol="user_id",
-            desc="is_user_invited_to_local_group",
+        existing = self.db.simple_select_one_txn(
+            txn,
+            table="group_summary_users",
+            keyvalues={"group_id": group_id, "user_id": user_id, "role_id": role_id},
+            retcols=("user_order", "is_public"),
             allow_none=True,
         )
 
-    def get_users_membership_info_in_group(self, group_id, user_id):
-        """Get a dict describing the membership of a user in a group.
-
-        Example if joined:
-
-            {
-                "membership": "join",
-                "is_public": True,
-                "is_privileged": False,
-            }
-
-        Returns an empty dict if the user is not join/invite/etc
-        """
+        if order is not None:
+            # Shuffle other users orders that come after the given order
+            sql = """
+                UPDATE group_summary_users SET user_order = user_order + 1
+                WHERE group_id = ? AND role_id = ? AND user_order >= ?
+            """
+            txn.execute(sql, (group_id, role_id, order))
+        elif not existing:
+            sql = """
+                SELECT COALESCE(MAX(user_order), 0) + 1 FROM group_summary_users
+                WHERE group_id = ? AND role_id = ?
+            """
+            txn.execute(sql, (group_id, role_id))
+            (order,) = txn.fetchone()
 
-        def _get_users_membership_in_group_txn(txn):
-            row = self.db.simple_select_one_txn(
+        if existing:
+            to_update = {}
+            if order is not None:
+                to_update["user_order"] = order
+            if is_public is not None:
+                to_update["is_public"] = is_public
+            self.db.simple_update_txn(
                 txn,
-                table="group_users",
-                keyvalues={"group_id": group_id, "user_id": user_id},
-                retcols=("is_admin", "is_public"),
-                allow_none=True,
+                table="group_summary_users",
+                keyvalues={
+                    "group_id": group_id,
+                    "role_id": role_id,
+                    "user_id": user_id,
+                },
+                values=to_update,
             )
+        else:
+            if is_public is None:
+                is_public = True
 
-            if row:
-                return {
-                    "membership": "join",
-                    "is_public": row["is_public"],
-                    "is_privileged": row["is_admin"],
-                }
-
-            row = self.db.simple_select_one_onecol_txn(
+            self.db.simple_insert_txn(
                 txn,
-                table="group_invites",
-                keyvalues={"group_id": group_id, "user_id": user_id},
-                retcol="user_id",
-                allow_none=True,
+                table="group_summary_users",
+                values={
+                    "group_id": group_id,
+                    "role_id": role_id,
+                    "user_id": user_id,
+                    "user_order": order,
+                    "is_public": is_public,
+                },
             )
 
-            if row:
-                return {"membership": "invite"}
+    def remove_user_from_summary(self, group_id, user_id, role_id):
+        if role_id is None:
+            role_id = _DEFAULT_ROLE_ID
 
-            return {}
+        return self.db.simple_delete(
+            table="group_summary_users",
+            keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id},
+            desc="remove_user_from_summary",
+        )
 
-        return self.db.runInteraction(
-            "get_users_membership_info_in_group", _get_users_membership_in_group_txn
+    def add_group_invite(self, group_id, user_id):
+        """Record that the group server has invited a user
+        """
+        return self.db.simple_insert(
+            table="group_invites",
+            values={"group_id": group_id, "user_id": user_id},
+            desc="add_group_invite",
         )
 
     def add_user_to_group(
@@ -846,16 +980,6 @@ class GroupServerStore(SQLBaseStore):
             "remove_room_from_group", _remove_room_from_group_txn
         )
 
-    def get_publicised_groups_for_user(self, user_id):
-        """Get all groups a user is publicising
-        """
-        return self.db.simple_select_onecol(
-            table="local_group_membership",
-            keyvalues={"user_id": user_id, "membership": "join", "is_publicised": True},
-            retcol="group_id",
-            desc="get_publicised_groups_for_user",
-        )
-
     def update_group_publicity(self, group_id, user_id, publicise):
         """Update whether the user is publicising their membership of the group
         """
@@ -1000,22 +1124,6 @@ class GroupServerStore(SQLBaseStore):
             desc="update_group_profile",
         )
 
-    def get_attestations_need_renewals(self, valid_until_ms):
-        """Get all attestations that need to be renewed until givent time
-        """
-
-        def _get_attestations_need_renewals_txn(txn):
-            sql = """
-                SELECT group_id, user_id FROM group_attestations_renewals
-                WHERE valid_until_ms <= ?
-            """
-            txn.execute(sql, (valid_until_ms,))
-            return self.db.cursor_to_dict(txn)
-
-        return self.db.runInteraction(
-            "get_attestations_need_renewals", _get_attestations_need_renewals_txn
-        )
-
     def update_attestation_renewal(self, group_id, user_id, attestation):
         """Update an attestation that we have renewed
         """
@@ -1054,112 +1162,6 @@ class GroupServerStore(SQLBaseStore):
             desc="remove_attestation_renewal",
         )
 
-    @defer.inlineCallbacks
-    def get_remote_attestation(self, group_id, user_id):
-        """Get the attestation that proves the remote agrees that the user is
-        in the group.
-        """
-        row = yield self.db.simple_select_one(
-            table="group_attestations_remote",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcols=("valid_until_ms", "attestation_json"),
-            desc="get_remote_attestation",
-            allow_none=True,
-        )
-
-        now = int(self._clock.time_msec())
-        if row and now < row["valid_until_ms"]:
-            return json.loads(row["attestation_json"])
-
-        return None
-
-    def get_joined_groups(self, user_id):
-        return self.db.simple_select_onecol(
-            table="local_group_membership",
-            keyvalues={"user_id": user_id, "membership": "join"},
-            retcol="group_id",
-            desc="get_joined_groups",
-        )
-
-    def get_all_groups_for_user(self, user_id, now_token):
-        def _get_all_groups_for_user_txn(txn):
-            sql = """
-                SELECT group_id, type, membership, u.content
-                FROM local_group_updates AS u
-                INNER JOIN local_group_membership USING (group_id, user_id)
-                WHERE user_id = ? AND membership != 'leave'
-                    AND stream_id <= ?
-            """
-            txn.execute(sql, (user_id, now_token))
-            return [
-                {
-                    "group_id": row[0],
-                    "type": row[1],
-                    "membership": row[2],
-                    "content": json.loads(row[3]),
-                }
-                for row in txn
-            ]
-
-        return self.db.runInteraction(
-            "get_all_groups_for_user", _get_all_groups_for_user_txn
-        )
-
-    def get_groups_changes_for_user(self, user_id, from_token, to_token):
-        from_token = int(from_token)
-        has_changed = self._group_updates_stream_cache.has_entity_changed(
-            user_id, from_token
-        )
-        if not has_changed:
-            return defer.succeed([])
-
-        def _get_groups_changes_for_user_txn(txn):
-            sql = """
-                SELECT group_id, membership, type, u.content
-                FROM local_group_updates AS u
-                INNER JOIN local_group_membership USING (group_id, user_id)
-                WHERE user_id = ? AND ? < stream_id AND stream_id <= ?
-            """
-            txn.execute(sql, (user_id, from_token, to_token))
-            return [
-                {
-                    "group_id": group_id,
-                    "membership": membership,
-                    "type": gtype,
-                    "content": json.loads(content_json),
-                }
-                for group_id, membership, gtype, content_json in txn
-            ]
-
-        return self.db.runInteraction(
-            "get_groups_changes_for_user", _get_groups_changes_for_user_txn
-        )
-
-    def get_all_groups_changes(self, from_token, to_token, limit):
-        from_token = int(from_token)
-        has_changed = self._group_updates_stream_cache.has_any_entity_changed(
-            from_token
-        )
-        if not has_changed:
-            return defer.succeed([])
-
-        def _get_all_groups_changes_txn(txn):
-            sql = """
-                SELECT stream_id, group_id, user_id, type, content
-                FROM local_group_updates
-                WHERE ? < stream_id AND stream_id <= ?
-                LIMIT ?
-            """
-            txn.execute(sql, (from_token, to_token, limit))
-            return [
-                (stream_id, group_id, user_id, gtype, json.loads(content_json))
-                for stream_id, group_id, user_id, gtype, content_json in txn
-            ]
-
-        return self.db.runInteraction(
-            "get_all_groups_changes", _get_all_groups_changes_txn
-        )
-
     def get_group_stream_token(self):
         return self._group_updates_id_gen.get_current_token()
 
-- 
cgit 1.5.1


From c37db0211e36cd298426ff8811e547b0acd10bf4 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 17 Mar 2020 22:32:25 +0100
Subject: Share SSL contexts for non-federation requests (#7094)

Extends #5794 etc to the SimpleHttpClient so that it also applies to non-federation requests.

Fixes #7092.
---
 changelog.d/7094.misc                              |  1 +
 synapse/crypto/context_factory.py                  | 68 ++++++++++++++--------
 synapse/http/client.py                             |  3 -
 synapse/http/federation/matrix_federation_agent.py |  2 +-
 synapse/server.py                                  |  6 +-
 tests/config/test_tls.py                           | 29 +++++----
 .../federation/test_matrix_federation_agent.py     |  6 +-
 7 files changed, 71 insertions(+), 44 deletions(-)
 create mode 100644 changelog.d/7094.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/7094.misc b/changelog.d/7094.misc
new file mode 100644
index 0000000000..aa093ee3c0
--- /dev/null
+++ b/changelog.d/7094.misc
@@ -0,0 +1 @@
+Improve performance when making HTTPS requests to sygnal, sydent, etc, by sharing the SSL context object between connections.
diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py
index e93f0b3705..a5a2a7815d 100644
--- a/synapse/crypto/context_factory.py
+++ b/synapse/crypto/context_factory.py
@@ -75,7 +75,7 @@ class ServerContextFactory(ContextFactory):
 
 
 @implementer(IPolicyForHTTPS)
-class ClientTLSOptionsFactory(object):
+class FederationPolicyForHTTPS(object):
     """Factory for Twisted SSLClientConnectionCreators that are used to make connections
     to remote servers for federation.
 
@@ -103,15 +103,15 @@ class ClientTLSOptionsFactory(object):
         # let us do).
         minTLS = _TLS_VERSION_MAP[config.federation_client_minimum_tls_version]
 
-        self._verify_ssl = CertificateOptions(
+        _verify_ssl = CertificateOptions(
             trustRoot=trust_root, insecurelyLowerMinimumTo=minTLS
         )
-        self._verify_ssl_context = self._verify_ssl.getContext()
-        self._verify_ssl_context.set_info_callback(self._context_info_cb)
+        self._verify_ssl_context = _verify_ssl.getContext()
+        self._verify_ssl_context.set_info_callback(_context_info_cb)
 
-        self._no_verify_ssl = CertificateOptions(insecurelyLowerMinimumTo=minTLS)
-        self._no_verify_ssl_context = self._no_verify_ssl.getContext()
-        self._no_verify_ssl_context.set_info_callback(self._context_info_cb)
+        _no_verify_ssl = CertificateOptions(insecurelyLowerMinimumTo=minTLS)
+        self._no_verify_ssl_context = _no_verify_ssl.getContext()
+        self._no_verify_ssl_context.set_info_callback(_context_info_cb)
 
     def get_options(self, host: bytes):
 
@@ -136,23 +136,6 @@ class ClientTLSOptionsFactory(object):
 
         return SSLClientConnectionCreator(host, ssl_context, should_verify)
 
-    @staticmethod
-    def _context_info_cb(ssl_connection, where, ret):
-        """The 'information callback' for our openssl context object."""
-        # we assume that the app_data on the connection object has been set to
-        # a TLSMemoryBIOProtocol object. (This is done by SSLClientConnectionCreator)
-        tls_protocol = ssl_connection.get_app_data()
-        try:
-            # ... we further assume that SSLClientConnectionCreator has set the
-            # '_synapse_tls_verifier' attribute to a ConnectionVerifier object.
-            tls_protocol._synapse_tls_verifier.verify_context_info_cb(
-                ssl_connection, where
-            )
-        except:  # noqa: E722, taken from the twisted implementation
-            logger.exception("Error during info_callback")
-            f = Failure()
-            tls_protocol.failVerification(f)
-
     def creatorForNetloc(self, hostname, port):
         """Implements the IPolicyForHTTPS interace so that this can be passed
         directly to agents.
@@ -160,6 +143,43 @@ class ClientTLSOptionsFactory(object):
         return self.get_options(hostname)
 
 
+@implementer(IPolicyForHTTPS)
+class RegularPolicyForHTTPS(object):
+    """Factory for Twisted SSLClientConnectionCreators that are used to make connections
+    to remote servers, for other than federation.
+
+    Always uses the same OpenSSL context object, which uses the default OpenSSL CA
+    trust root.
+    """
+
+    def __init__(self):
+        trust_root = platformTrust()
+        self._ssl_context = CertificateOptions(trustRoot=trust_root).getContext()
+        self._ssl_context.set_info_callback(_context_info_cb)
+
+    def creatorForNetloc(self, hostname, port):
+        return SSLClientConnectionCreator(hostname, self._ssl_context, True)
+
+
+def _context_info_cb(ssl_connection, where, ret):
+    """The 'information callback' for our openssl context objects.
+
+    Note: Once this is set as the info callback on a Context object, the Context should
+    only be used with the SSLClientConnectionCreator.
+    """
+    # we assume that the app_data on the connection object has been set to
+    # a TLSMemoryBIOProtocol object. (This is done by SSLClientConnectionCreator)
+    tls_protocol = ssl_connection.get_app_data()
+    try:
+        # ... we further assume that SSLClientConnectionCreator has set the
+        # '_synapse_tls_verifier' attribute to a ConnectionVerifier object.
+        tls_protocol._synapse_tls_verifier.verify_context_info_cb(ssl_connection, where)
+    except:  # noqa: E722, taken from the twisted implementation
+        logger.exception("Error during info_callback")
+        f = Failure()
+        tls_protocol.failVerification(f)
+
+
 @implementer(IOpenSSLClientConnectionCreator)
 class SSLClientConnectionCreator(object):
     """Creates openssl connection objects for client connections.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index d4c285445e..3797545824 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -244,9 +244,6 @@ class SimpleHttpClient(object):
         pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5))
         pool.cachedConnectionTimeout = 2 * 60
 
-        # The default context factory in Twisted 14.0.0 (which we require) is
-        # BrowserLikePolicyForHTTPS which will do regular cert validation
-        # 'like a browser'
         self.agent = ProxyAgent(
             self.reactor,
             connectTimeout=15,
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 647d26dc56..f5f917f5ae 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -45,7 +45,7 @@ class MatrixFederationAgent(object):
     Args:
         reactor (IReactor): twisted reactor to use for underlying requests
 
-        tls_client_options_factory (ClientTLSOptionsFactory|None):
+        tls_client_options_factory (FederationPolicyForHTTPS|None):
             factory to use for fetching client tls options, or none to disable TLS.
 
         _srv_resolver (SrvResolver|None):
diff --git a/synapse/server.py b/synapse/server.py
index fd2f69e928..1b980371de 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -26,7 +26,6 @@ import logging
 import os
 
 from twisted.mail.smtp import sendmail
-from twisted.web.client import BrowserLikePolicyForHTTPS
 
 from synapse.api.auth import Auth
 from synapse.api.filtering import Filtering
@@ -35,6 +34,7 @@ from synapse.appservice.api import ApplicationServiceApi
 from synapse.appservice.scheduler import ApplicationServiceScheduler
 from synapse.config.homeserver import HomeServerConfig
 from synapse.crypto import context_factory
+from synapse.crypto.context_factory import RegularPolicyForHTTPS
 from synapse.crypto.keyring import Keyring
 from synapse.events.builder import EventBuilderFactory
 from synapse.events.spamcheck import SpamChecker
@@ -310,7 +310,7 @@ class HomeServer(object):
         return (
             InsecureInterceptableContextFactory()
             if self.config.use_insecure_ssl_client_just_for_testing_do_not_use
-            else BrowserLikePolicyForHTTPS()
+            else RegularPolicyForHTTPS()
         )
 
     def build_simple_http_client(self):
@@ -420,7 +420,7 @@ class HomeServer(object):
         return PusherPool(self)
 
     def build_http_client(self):
-        tls_client_options_factory = context_factory.ClientTLSOptionsFactory(
+        tls_client_options_factory = context_factory.FederationPolicyForHTTPS(
             self.config
         )
         return MatrixFederationHttpClient(self, tls_client_options_factory)
diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py
index 1be6ff563b..ec32d4b1ca 100644
--- a/tests/config/test_tls.py
+++ b/tests/config/test_tls.py
@@ -23,7 +23,7 @@ from OpenSSL import SSL
 
 from synapse.config._base import Config, RootConfig
 from synapse.config.tls import ConfigError, TlsConfig
-from synapse.crypto.context_factory import ClientTLSOptionsFactory
+from synapse.crypto.context_factory import FederationPolicyForHTTPS
 
 from tests.unittest import TestCase
 
@@ -180,12 +180,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         t = TestConfig()
         t.read_config(config, config_dir_path="", data_dir_path="")
 
-        cf = ClientTLSOptionsFactory(t)
+        cf = FederationPolicyForHTTPS(t)
+        options = _get_ssl_context_options(cf._verify_ssl_context)
 
         # The context has had NO_TLSv1_1 and NO_TLSv1_0 set, but not NO_TLSv1_2
-        self.assertNotEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1, 0)
-        self.assertNotEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1_1, 0)
-        self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1_2, 0)
+        self.assertNotEqual(options & SSL.OP_NO_TLSv1, 0)
+        self.assertNotEqual(options & SSL.OP_NO_TLSv1_1, 0)
+        self.assertEqual(options & SSL.OP_NO_TLSv1_2, 0)
 
     def test_tls_client_minimum_set_passed_through_1_0(self):
         """
@@ -195,12 +196,13 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         t = TestConfig()
         t.read_config(config, config_dir_path="", data_dir_path="")
 
-        cf = ClientTLSOptionsFactory(t)
+        cf = FederationPolicyForHTTPS(t)
+        options = _get_ssl_context_options(cf._verify_ssl_context)
 
         # The context has not had any of the NO_TLS set.
-        self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1, 0)
-        self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1_1, 0)
-        self.assertEqual(cf._verify_ssl._options & SSL.OP_NO_TLSv1_2, 0)
+        self.assertEqual(options & SSL.OP_NO_TLSv1, 0)
+        self.assertEqual(options & SSL.OP_NO_TLSv1_1, 0)
+        self.assertEqual(options & SSL.OP_NO_TLSv1_2, 0)
 
     def test_acme_disabled_in_generated_config_no_acme_domain_provied(self):
         """
@@ -273,7 +275,7 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         t = TestConfig()
         t.read_config(config, config_dir_path="", data_dir_path="")
 
-        cf = ClientTLSOptionsFactory(t)
+        cf = FederationPolicyForHTTPS(t)
 
         # Not in the whitelist
         opts = cf.get_options(b"notexample.com")
@@ -282,3 +284,10 @@ s4niecZKPBizL6aucT59CsunNmmb5Glq8rlAcU+1ZTZZzGYqVYhF6axB9Qg=
         # Caught by the wildcard
         opts = cf.get_options(idna.encode("テスト.ドメイン.テスト"))
         self.assertFalse(opts._verifier._verify_certs)
+
+
+def _get_ssl_context_options(ssl_context: SSL.Context) -> int:
+    """get the options bits from an openssl context object"""
+    # the OpenSSL.SSL.Context wrapper doesn't expose get_options, so we have to
+    # use the low-level interface
+    return SSL._lib.SSL_CTX_get_options(ssl_context._context)
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index cfcd98ff7d..fdc1d918ff 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -31,7 +31,7 @@ from twisted.web.http_headers import Headers
 from twisted.web.iweb import IPolicyForHTTPS
 
 from synapse.config.homeserver import HomeServerConfig
-from synapse.crypto.context_factory import ClientTLSOptionsFactory
+from synapse.crypto.context_factory import FederationPolicyForHTTPS
 from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.http.federation.srv_resolver import Server
 from synapse.http.federation.well_known_resolver import (
@@ -79,7 +79,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self._config = config = HomeServerConfig()
         config.parse_config_dict(config_dict, "", "")
 
-        self.tls_factory = ClientTLSOptionsFactory(config)
+        self.tls_factory = FederationPolicyForHTTPS(config)
 
         self.well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds)
         self.had_well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds)
@@ -715,7 +715,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         config = default_config("test", parse=True)
 
         # Build a new agent and WellKnownResolver with a different tls factory
-        tls_factory = ClientTLSOptionsFactory(config)
+        tls_factory = FederationPolicyForHTTPS(config)
         agent = MatrixFederationAgent(
             reactor=self.reactor,
             tls_client_options_factory=tls_factory,
-- 
cgit 1.5.1


From 4cff617df1ba6f241fee6957cc44859f57edcc0e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 25 Mar 2020 14:54:01 +0000
Subject: Move catchup of replication streams to worker. (#7024)

This changes the replication protocol so that the server does not send down `RDATA` for rows that happened before the client connected. Instead, the server will send a `POSITION` and clients then query the database (or master out of band) to get up to date.
---
 changelog.d/7024.misc                             |   1 +
 docs/tcp_replication.md                           |  46 ++---
 synapse/app/generic_worker.py                     |   3 +
 synapse/federation/sender/__init__.py             |   9 +
 synapse/replication/http/__init__.py              |   2 +
 synapse/replication/http/streams.py               |  78 ++++++++
 synapse/replication/slave/storage/_base.py        |  14 +-
 synapse/replication/slave/storage/pushers.py      |   3 +
 synapse/replication/tcp/client.py                 |   3 +-
 synapse/replication/tcp/commands.py               |  34 +---
 synapse/replication/tcp/protocol.py               | 206 ++++++++--------------
 synapse/replication/tcp/resource.py               |  19 +-
 synapse/replication/tcp/streams/__init__.py       |   8 +-
 synapse/replication/tcp/streams/_base.py          | 160 +++++++++++------
 synapse/replication/tcp/streams/events.py         |   5 +-
 synapse/replication/tcp/streams/federation.py     |  19 +-
 synapse/server.py                                 |   5 +
 synapse/storage/data_stores/main/cache.py         |  44 ++---
 synapse/storage/data_stores/main/deviceinbox.py   |  88 ++++-----
 synapse/storage/data_stores/main/events.py        | 114 ------------
 synapse/storage/data_stores/main/events_worker.py | 114 ++++++++++++
 synapse/storage/data_stores/main/room.py          |  40 ++---
 tests/replication/tcp/streams/_base.py            |  55 ++++--
 tests/replication/tcp/streams/test_receipts.py    |  52 +++++-
 24 files changed, 635 insertions(+), 487 deletions(-)
 create mode 100644 changelog.d/7024.misc
 create mode 100644 synapse/replication/http/streams.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/7024.misc b/changelog.d/7024.misc
new file mode 100644
index 0000000000..676f285377
--- /dev/null
+++ b/changelog.d/7024.misc
@@ -0,0 +1 @@
+Move catchup of replication streams logic to worker.
diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md
index e3a4634b14..d4f7d9ec18 100644
--- a/docs/tcp_replication.md
+++ b/docs/tcp_replication.md
@@ -14,16 +14,16 @@ example flow would be (where '>' indicates master to worker and
 '<' worker to master flows):
 
     > SERVER example.com
-    < REPLICATE events 53
+    < REPLICATE
+    > POSITION events 53
     > RDATA events 54 ["$foo1:bar.com", ...]
     > RDATA events 55 ["$foo4:bar.com", ...]
 
-The example shows the server accepting a new connection and sending its
-identity with the `SERVER` command, followed by the client asking to
-subscribe to the `events` stream from the token `53`. The server then
-periodically sends `RDATA` commands which have the format
-`RDATA <stream_name> <token> <row>`, where the format of `<row>` is
-defined by the individual streams.
+The example shows the server accepting a new connection and sending its identity
+with the `SERVER` command, followed by the client server to respond with the
+position of all streams. The server then periodically sends `RDATA` commands
+which have the format `RDATA <stream_name> <token> <row>`, where the format of
+`<row>` is defined by the individual streams.
 
 Error reporting happens by either the client or server sending an ERROR
 command, and usually the connection will be closed.
@@ -32,9 +32,6 @@ Since the protocol is a simple line based, its possible to manually
 connect to the server using a tool like netcat. A few things should be
 noted when manually using the protocol:
 
--   When subscribing to a stream using `REPLICATE`, the special token
-    `NOW` can be used to get all future updates. The special stream name
-    `ALL` can be used with `NOW` to subscribe to all available streams.
 -   The federation stream is only available if federation sending has
     been disabled on the main process.
 -   The server will only time connections out that have sent a `PING`
@@ -91,9 +88,7 @@ The client:
 -   Sends a `NAME` command, allowing the server to associate a human
     friendly name with the connection. This is optional.
 -   Sends a `PING` as above
--   For each stream the client wishes to subscribe to it sends a
-    `REPLICATE` with the `stream_name` and token it wants to subscribe
-    from.
+-   Sends a `REPLICATE` to get the current position of all streams.
 -   On receipt of a `SERVER` command, checks that the server name
     matches the expected server name.
 
@@ -140,9 +135,7 @@ the wire:
     > PING 1490197665618
     < NAME synapse.app.appservice
     < PING 1490197665618
-    < REPLICATE events 1
-    < REPLICATE backfill 1
-    < REPLICATE caches 1
+    < REPLICATE
     > POSITION events 1
     > POSITION backfill 1
     > POSITION caches 1
@@ -181,9 +174,9 @@ client (C):
 
 #### POSITION (S)
 
-   The position of the stream has been updated. Sent to the client
-    after all missing updates for a stream have been sent to the client
-    and they're now up to date.
+   On receipt of a POSITION command clients should check if they have missed any
+   updates, and if so then fetch them out of band. Sent in response to a
+   REPLICATE command (but can happen at any time).
 
 #### ERROR (S, C)
 
@@ -199,20 +192,7 @@ client (C):
 
 #### REPLICATE (C)
 
-Asks the server to replicate a given stream. The syntax is:
-
-```
-    REPLICATE <stream_name> <token>
-```
-
-Where `<token>` may be either:
- * a numeric stream_id to stream updates since (exclusive)
- * `NOW` to stream all subsequent updates.
-
-The `<stream_name>` is the name of a replication stream to subscribe
-to (see [here](../synapse/replication/tcp/streams/_base.py) for a list
-of streams). It can also be `ALL` to subscribe to all known streams,
-in which case the `<token>` must be set to `NOW`.
+Asks the server for the current position of all streams.
 
 #### USER_SYNC (C)
 
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index bd1733573b..fba7ad9551 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -401,6 +401,9 @@ class GenericWorkerTyping(object):
             self._room_serials[row.room_id] = token
             self._room_typing[row.room_id] = row.user_ids
 
+    def get_current_token(self) -> int:
+        return self._latest_room_serial
+
 
 class GenericWorkerSlavedStore(
     # FIXME(#3714): We need to add UserDirectoryStore as we write directly
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 233cb33daf..a477578e44 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -499,4 +499,13 @@ class FederationSender(object):
         self._get_per_destination_queue(destination).attempt_new_transaction()
 
     def get_current_token(self) -> int:
+        # Dummy implementation for case where federation sender isn't offloaded
+        # to a worker.
         return 0
+
+    async def get_replication_rows(
+        self, from_token, to_token, limit, federation_ack=None
+    ):
+        # Dummy implementation for case where federation sender isn't offloaded
+        # to a worker.
+        return []
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index 28dbc6fcba..4613b2538c 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -21,6 +21,7 @@ from synapse.replication.http import (
     membership,
     register,
     send_event,
+    streams,
 )
 
 REPLICATION_PREFIX = "/_synapse/replication"
@@ -38,3 +39,4 @@ class ReplicationRestResource(JsonResource):
         login.register_servlets(hs, self)
         register.register_servlets(hs, self)
         devices.register_servlets(hs, self)
+        streams.register_servlets(hs, self)
diff --git a/synapse/replication/http/streams.py b/synapse/replication/http/streams.py
new file mode 100644
index 0000000000..ffd4c61993
--- /dev/null
+++ b/synapse/replication/http/streams.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.api.errors import SynapseError
+from synapse.http.servlet import parse_integer
+from synapse.replication.http._base import ReplicationEndpoint
+
+logger = logging.getLogger(__name__)
+
+
+class ReplicationGetStreamUpdates(ReplicationEndpoint):
+    """Fetches stream updates from a server. Used for streams not persisted to
+    the database, e.g. typing notifications.
+
+    The API looks like:
+
+        GET /_synapse/replication/get_repl_stream_updates/events?from_token=0&to_token=10&limit=100
+
+        200 OK
+
+        {
+            updates: [ ... ],
+            upto_token: 10,
+            limited: False,
+        }
+
+    """
+
+    NAME = "get_repl_stream_updates"
+    PATH_ARGS = ("stream_name",)
+    METHOD = "GET"
+
+    def __init__(self, hs):
+        super().__init__(hs)
+
+        # We pull the streams from the replication steamer (if we try and make
+        # them ourselves we end up in an import loop).
+        self.streams = hs.get_replication_streamer().get_streams()
+
+    @staticmethod
+    def _serialize_payload(stream_name, from_token, upto_token, limit):
+        return {"from_token": from_token, "upto_token": upto_token, "limit": limit}
+
+    async def _handle_request(self, request, stream_name):
+        stream = self.streams.get(stream_name)
+        if stream is None:
+            raise SynapseError(400, "Unknown stream")
+
+        from_token = parse_integer(request, "from_token", required=True)
+        upto_token = parse_integer(request, "upto_token", required=True)
+        limit = parse_integer(request, "limit", required=True)
+
+        updates, upto_token, limited = await stream.get_updates_since(
+            from_token, upto_token, limit
+        )
+
+        return (
+            200,
+            {"updates": updates, "upto_token": upto_token, "limited": limited},
+        )
+
+
+def register_servlets(hs, http_server):
+    ReplicationGetStreamUpdates(hs).register(http_server)
diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
index f45cbd37a0..751c799d94 100644
--- a/synapse/replication/slave/storage/_base.py
+++ b/synapse/replication/slave/storage/_base.py
@@ -18,8 +18,10 @@ from typing import Dict, Optional
 
 import six
 
-from synapse.storage._base import SQLBaseStore
-from synapse.storage.data_stores.main.cache import CURRENT_STATE_CACHE_NAME
+from synapse.storage.data_stores.main.cache import (
+    CURRENT_STATE_CACHE_NAME,
+    CacheInvalidationWorkerStore,
+)
 from synapse.storage.database import Database
 from synapse.storage.engines import PostgresEngine
 
@@ -35,7 +37,7 @@ def __func__(inp):
         return inp.__func__
 
 
-class BaseSlavedStore(SQLBaseStore):
+class BaseSlavedStore(CacheInvalidationWorkerStore):
     def __init__(self, database: Database, db_conn, hs):
         super(BaseSlavedStore, self).__init__(database, db_conn, hs)
         if isinstance(self.database_engine, PostgresEngine):
@@ -60,6 +62,12 @@ class BaseSlavedStore(SQLBaseStore):
             pos["caches"] = self._cache_id_gen.get_current_token()
         return pos
 
+    def get_cache_stream_token(self):
+        if self._cache_id_gen:
+            return self._cache_id_gen.get_current_token()
+        else:
+            return 0
+
     def process_replication_rows(self, stream_name, token, rows):
         if stream_name == "caches":
             if self._cache_id_gen:
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
index f22c2d44a3..bce8a3d115 100644
--- a/synapse/replication/slave/storage/pushers.py
+++ b/synapse/replication/slave/storage/pushers.py
@@ -33,6 +33,9 @@ class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore):
         result["pushers"] = self._pushers_id_gen.get_current_token()
         return result
 
+    def get_pushers_stream_token(self):
+        return self._pushers_id_gen.get_current_token()
+
     def process_replication_rows(self, stream_name, token, rows):
         if stream_name == "pushers":
             self._pushers_id_gen.advance(token)
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 02ab5b66ea..7e7ad0f798 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -55,6 +55,7 @@ class ReplicationClientFactory(ReconnectingClientFactory):
         self.client_name = client_name
         self.handler = handler
         self.server_name = hs.config.server_name
+        self.hs = hs
         self._clock = hs.get_clock()  # As self.clock is defined in super class
 
         hs.get_reactor().addSystemEventTrigger("before", "shutdown", self.stopTrying)
@@ -65,7 +66,7 @@ class ReplicationClientFactory(ReconnectingClientFactory):
     def buildProtocol(self, addr):
         logger.info("Connected to replication: %r", addr)
         return ClientReplicationStreamProtocol(
-            self.client_name, self.server_name, self._clock, self.handler
+            self.hs, self.client_name, self.server_name, self._clock, self.handler,
         )
 
     def clientConnectionLost(self, connector, reason):
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 451671412d..5a6b734094 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -136,8 +136,8 @@ class PositionCommand(Command):
     """Sent by the server to tell the client the stream postition without
     needing to send an RDATA.
 
-    Sent to the client after all missing updates for a stream have been sent
-    to the client and they're now up to date.
+    On receipt of a POSITION command clients should check if they have missed
+    any updates, and if so then fetch them out of band.
     """
 
     NAME = "POSITION"
@@ -179,42 +179,24 @@ class NameCommand(Command):
 
 
 class ReplicateCommand(Command):
-    """Sent by the client to subscribe to the stream.
+    """Sent by the client to subscribe to streams.
 
     Format::
 
-        REPLICATE <stream_name> <token>
-
-    Where <token> may be either:
-        * a numeric stream_id to stream updates from
-        * "NOW" to stream all subsequent updates.
-
-    The <stream_name> can be "ALL" to subscribe to all known streams, in which
-    case the <token> must be set to "NOW", i.e.::
-
-        REPLICATE ALL NOW
+        REPLICATE
     """
 
     NAME = "REPLICATE"
 
-    def __init__(self, stream_name, token):
-        self.stream_name = stream_name
-        self.token = token
+    def __init__(self):
+        pass
 
     @classmethod
     def from_line(cls, line):
-        stream_name, token = line.split(" ", 1)
-        if token in ("NOW", "now"):
-            token = "NOW"
-        else:
-            token = int(token)
-        return cls(stream_name, token)
+        return cls()
 
     def to_line(self):
-        return " ".join((self.stream_name, str(self.token)))
-
-    def get_logcontext_id(self):
-        return "REPLICATE-" + self.stream_name
+        return ""
 
 
 class UserSyncCommand(Command):
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index bc1482a9bb..f81d2e2442 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -35,9 +35,7 @@ indicate which side is sending, these are *not* included on the wire::
     > PING 1490197665618
     < NAME synapse.app.appservice
     < PING 1490197665618
-    < REPLICATE events 1
-    < REPLICATE backfill 1
-    < REPLICATE caches 1
+    < REPLICATE
     > POSITION events 1
     > POSITION backfill 1
     > POSITION caches 1
@@ -53,17 +51,15 @@ import fcntl
 import logging
 import struct
 from collections import defaultdict
-from typing import Any, DefaultDict, Dict, List, Set, Tuple
+from typing import Any, DefaultDict, Dict, List, Set
 
-from six import iteritems, iterkeys
+from six import iteritems
 
 from prometheus_client import Counter
 
-from twisted.internet import defer
 from twisted.protocols.basic import LineOnlyReceiver
 from twisted.python.failure import Failure
 
-from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.tcp.commands import (
@@ -82,11 +78,16 @@ from synapse.replication.tcp.commands import (
     SyncCommand,
     UserSyncCommand,
 )
-from synapse.replication.tcp.streams import STREAMS_MAP
+from synapse.replication.tcp.streams import STREAMS_MAP, Stream
 from synapse.types import Collection
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
+MYPY = False
+if MYPY:
+    from synapse.server import HomeServer
+
+
 connection_close_counter = Counter(
     "synapse_replication_tcp_protocol_close_reason", "", ["reason_type"]
 )
@@ -411,16 +412,6 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
         self.server_name = server_name
         self.streamer = streamer
 
-        # The streams the client has subscribed to and is up to date with
-        self.replication_streams = set()  # type: Set[str]
-
-        # The streams the client is currently subscribing to.
-        self.connecting_streams = set()  # type:  Set[str]
-
-        # Map from stream name to list of updates to send once we've finished
-        # subscribing the client to the stream.
-        self.pending_rdata = {}  # type: Dict[str, List[Tuple[int, Any]]]
-
     def connectionMade(self):
         self.send_command(ServerCommand(self.server_name))
         BaseReplicationStreamProtocol.connectionMade(self)
@@ -436,21 +427,10 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
         )
 
     async def on_REPLICATE(self, cmd):
-        stream_name = cmd.stream_name
-        token = cmd.token
-
-        if stream_name == "ALL":
-            # Subscribe to all streams we're publishing to.
-            deferreds = [
-                run_in_background(self.subscribe_to_stream, stream, token)
-                for stream in iterkeys(self.streamer.streams_by_name)
-            ]
-
-            await make_deferred_yieldable(
-                defer.gatherResults(deferreds, consumeErrors=True)
-            )
-        else:
-            await self.subscribe_to_stream(stream_name, token)
+        # Subscribe to all streams we're publishing to.
+        for stream_name in self.streamer.streams_by_name:
+            current_token = self.streamer.get_stream_token(stream_name)
+            self.send_command(PositionCommand(stream_name, current_token))
 
     async def on_FEDERATION_ACK(self, cmd):
         self.streamer.federation_ack(cmd.token)
@@ -474,87 +454,12 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
             cmd.last_seen,
         )
 
-    async def subscribe_to_stream(self, stream_name, token):
-        """Subscribe the remote to a stream.
-
-        This invloves checking if they've missed anything and sending those
-        updates down if they have. During that time new updates for the stream
-        are queued and sent once we've sent down any missed updates.
-        """
-        self.replication_streams.discard(stream_name)
-        self.connecting_streams.add(stream_name)
-
-        try:
-            # Get missing updates
-            updates, current_token = await self.streamer.get_stream_updates(
-                stream_name, token
-            )
-
-            # Send all the missing updates
-            for update in updates:
-                token, row = update[0], update[1]
-                self.send_command(RdataCommand(stream_name, token, row))
-
-            # We send a POSITION command to ensure that they have an up to
-            # date token (especially useful if we didn't send any updates
-            # above)
-            self.send_command(PositionCommand(stream_name, current_token))
-
-            # Now we can send any updates that came in while we were subscribing
-            pending_rdata = self.pending_rdata.pop(stream_name, [])
-            updates = []
-            for token, update in pending_rdata:
-                # If the token is null, it is part of a batch update. Batches
-                # are multiple updates that share a single token. To denote
-                # this, the token is set to None for all tokens in the batch
-                # except for the last. If we find a None token, we keep looking
-                # through tokens until we find one that is not None and then
-                # process all previous updates in the batch as if they had the
-                # final token.
-                if token is None:
-                    # Store this update as part of a batch
-                    updates.append(update)
-                    continue
-
-                if token <= current_token:
-                    # This update or batch of updates is older than
-                    # current_token, dismiss it
-                    updates = []
-                    continue
-
-                updates.append(update)
-
-                # Send all updates that are part of this batch with the
-                # found token
-                for update in updates:
-                    self.send_command(RdataCommand(stream_name, token, update))
-
-                # Clear stored updates
-                updates = []
-
-            # They're now fully subscribed
-            self.replication_streams.add(stream_name)
-        except Exception as e:
-            logger.exception("[%s] Failed to handle REPLICATE command", self.id())
-            self.send_error("failed to handle replicate: %r", e)
-        finally:
-            self.connecting_streams.discard(stream_name)
-
     def stream_update(self, stream_name, token, data):
         """Called when a new update is available to stream to clients.
 
         We need to check if the client is interested in the stream or not
         """
-        if stream_name in self.replication_streams:
-            # The client is subscribed to the stream
-            self.send_command(RdataCommand(stream_name, token, data))
-        elif stream_name in self.connecting_streams:
-            # The client is being subscribed to the stream
-            logger.debug("[%s] Queuing RDATA %r %r", self.id(), stream_name, token)
-            self.pending_rdata.setdefault(stream_name, []).append((token, data))
-        else:
-            # The client isn't subscribed
-            logger.debug("[%s] Dropping RDATA %r %r", self.id(), stream_name, token)
+        self.send_command(RdataCommand(stream_name, token, data))
 
     def send_sync(self, data):
         self.send_command(SyncCommand(data))
@@ -638,6 +543,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
 
     def __init__(
         self,
+        hs: "HomeServer",
         client_name: str,
         server_name: str,
         clock: Clock,
@@ -649,22 +555,25 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         self.server_name = server_name
         self.handler = handler
 
+        self.streams = {
+            stream.NAME: stream(hs) for stream in STREAMS_MAP.values()
+        }  # type: Dict[str, Stream]
+
         # Set of stream names that have been subscribe to, but haven't yet
         # caught up with. This is used to track when the client has been fully
         # connected to the remote.
-        self.streams_connecting = set()  # type: Set[str]
+        self.streams_connecting = set(STREAMS_MAP)  # type: Set[str]
 
         # Map of stream to batched updates. See RdataCommand for info on how
         # batching works.
-        self.pending_batches = {}  # type: Dict[str, Any]
+        self.pending_batches = {}  # type: Dict[str, List[Any]]
 
     def connectionMade(self):
         self.send_command(NameCommand(self.client_name))
         BaseReplicationStreamProtocol.connectionMade(self)
 
         # Once we've connected subscribe to the necessary streams
-        for stream_name, token in iteritems(self.handler.get_streams_to_replicate()):
-            self.replicate(stream_name, token)
+        self.replicate()
 
         # Tell the server if we have any users currently syncing (should only
         # happen on synchrotrons)
@@ -676,10 +585,6 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         # We've now finished connecting to so inform the client handler
         self.handler.update_connection(self)
 
-        # This will happen if we don't actually subscribe to any streams
-        if not self.streams_connecting:
-            self.handler.finished_connecting()
-
     async def on_SERVER(self, cmd):
         if cmd.data != self.server_name:
             logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data)
@@ -697,7 +602,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
             )
             raise
 
-        if cmd.token is None:
+        if cmd.token is None or stream_name in self.streams_connecting:
             # I.e. this is part of a batch of updates for this stream. Batch
             # until we get an update for the stream with a non None token
             self.pending_batches.setdefault(stream_name, []).append(row)
@@ -707,14 +612,55 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
             rows.append(row)
             await self.handler.on_rdata(stream_name, cmd.token, rows)
 
-    async def on_POSITION(self, cmd):
-        # When we get a `POSITION` command it means we've finished getting
-        # missing updates for the given stream, and are now up to date.
+    async def on_POSITION(self, cmd: PositionCommand):
+        stream = self.streams.get(cmd.stream_name)
+        if not stream:
+            logger.error("Got POSITION for unknown stream: %s", cmd.stream_name)
+            return
+
+        # Find where we previously streamed up to.
+        current_token = self.handler.get_streams_to_replicate().get(cmd.stream_name)
+        if current_token is None:
+            logger.warning(
+                "Got POSITION for stream we're not subscribed to: %s", cmd.stream_name
+            )
+            return
+
+        # Fetch all updates between then and now.
+        limited = True
+        while limited:
+            updates, current_token, limited = await stream.get_updates_since(
+                current_token, cmd.token
+            )
+
+            # Check if the connection was closed underneath us, if so we bail
+            # rather than risk having concurrent catch ups going on.
+            if self.state == ConnectionStates.CLOSED:
+                return
+
+            if updates:
+                await self.handler.on_rdata(
+                    cmd.stream_name,
+                    current_token,
+                    [stream.parse_row(update[1]) for update in updates],
+                )
+
+        # We've now caught up to position sent to us, notify handler.
+        await self.handler.on_position(cmd.stream_name, cmd.token)
+
         self.streams_connecting.discard(cmd.stream_name)
         if not self.streams_connecting:
             self.handler.finished_connecting()
 
-        await self.handler.on_position(cmd.stream_name, cmd.token)
+        # Check if the connection was closed underneath us, if so we bail
+        # rather than risk having concurrent catch ups going on.
+        if self.state == ConnectionStates.CLOSED:
+            return
+
+        # Handle any RDATA that came in while we were catching up.
+        rows = self.pending_batches.pop(cmd.stream_name, [])
+        if rows:
+            await self.handler.on_rdata(cmd.stream_name, rows[-1].token, rows)
 
     async def on_SYNC(self, cmd):
         self.handler.on_sync(cmd.data)
@@ -722,22 +668,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
     async def on_REMOTE_SERVER_UP(self, cmd: RemoteServerUpCommand):
         self.handler.on_remote_server_up(cmd.data)
 
-    def replicate(self, stream_name, token):
+    def replicate(self):
         """Send the subscription request to the server
         """
-        if stream_name not in STREAMS_MAP:
-            raise Exception("Invalid stream name %r" % (stream_name,))
-
-        logger.info(
-            "[%s] Subscribing to replication stream: %r from %r",
-            self.id(),
-            stream_name,
-            token,
-        )
-
-        self.streams_connecting.add(stream_name)
+        logger.info("[%s] Subscribing to replication streams", self.id())
 
-        self.send_command(ReplicateCommand(stream_name, token))
+        self.send_command(ReplicateCommand())
 
     def on_connection_closed(self):
         BaseReplicationStreamProtocol.on_connection_closed(self)
diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
index 6e2ebaf614..4374e99e32 100644
--- a/synapse/replication/tcp/resource.py
+++ b/synapse/replication/tcp/resource.py
@@ -17,7 +17,7 @@
 
 import logging
 import random
-from typing import Any, List
+from typing import Any, Dict, List
 
 from six import itervalues
 
@@ -30,7 +30,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.util.metrics import Measure, measure_func
 
 from .protocol import ServerReplicationStreamProtocol
-from .streams import STREAMS_MAP
+from .streams import STREAMS_MAP, Stream
 from .streams.federation import FederationStream
 
 stream_updates_counter = Counter(
@@ -52,7 +52,7 @@ class ReplicationStreamProtocolFactory(Factory):
     """
 
     def __init__(self, hs):
-        self.streamer = ReplicationStreamer(hs)
+        self.streamer = hs.get_replication_streamer()
         self.clock = hs.get_clock()
         self.server_name = hs.config.server_name
 
@@ -133,6 +133,11 @@ class ReplicationStreamer(object):
         for conn in self.connections:
             conn.send_error("server shutting down")
 
+    def get_streams(self) -> Dict[str, Stream]:
+        """Get a mapp from stream name to stream instance.
+        """
+        return self.streams_by_name
+
     def on_notifier_poke(self):
         """Checks if there is actually any new data and sends it to the
         connections if there are.
@@ -190,7 +195,8 @@ class ReplicationStreamer(object):
                             stream.current_token(),
                         )
                         try:
-                            updates, current_token = await stream.get_updates()
+                            updates, current_token, limited = await stream.get_updates()
+                            self.pending_updates |= limited
                         except Exception:
                             logger.info("Failed to handle stream %s", stream.NAME)
                             raise
@@ -226,8 +232,7 @@ class ReplicationStreamer(object):
             self.pending_updates = False
             self.is_looping = False
 
-    @measure_func("repl.get_stream_updates")
-    async def get_stream_updates(self, stream_name, token):
+    def get_stream_token(self, stream_name):
         """For a given stream get all updates since token. This is called when
         a client first subscribes to a stream.
         """
@@ -235,7 +240,7 @@ class ReplicationStreamer(object):
         if not stream:
             raise Exception("unknown stream %s", stream_name)
 
-        return await stream.get_updates_since(token)
+        return stream.current_token()
 
     @measure_func("repl.federation_ack")
     def federation_ack(self, token):
diff --git a/synapse/replication/tcp/streams/__init__.py b/synapse/replication/tcp/streams/__init__.py
index 29199f5b46..37bcd3de66 100644
--- a/synapse/replication/tcp/streams/__init__.py
+++ b/synapse/replication/tcp/streams/__init__.py
@@ -24,6 +24,9 @@ Each stream is defined by the following information:
     current_token:      The function that returns the current token for the stream
     update_function:    The function that returns a list of updates between two tokens
 """
+
+from typing import Dict, Type
+
 from synapse.replication.tcp.streams._base import (
     AccountDataStream,
     BackfillStream,
@@ -35,6 +38,7 @@ from synapse.replication.tcp.streams._base import (
     PushersStream,
     PushRulesStream,
     ReceiptsStream,
+    Stream,
     TagAccountDataStream,
     ToDeviceStream,
     TypingStream,
@@ -63,10 +67,12 @@ STREAMS_MAP = {
         GroupServerStream,
         UserSignatureStream,
     )
-}
+}  # type: Dict[str, Type[Stream]]
+
 
 __all__ = [
     "STREAMS_MAP",
+    "Stream",
     "BackfillStream",
     "PresenceStream",
     "TypingStream",
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index 32d9514883..c14dff6c64 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -14,13 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import itertools
 import logging
 from collections import namedtuple
-from typing import Any, List, Optional, Tuple
+from typing import Any, Awaitable, Callable, List, Optional, Tuple
 
 import attr
 
+from synapse.replication.http.streams import ReplicationGetStreamUpdates
 from synapse.types import JsonDict
 
 logger = logging.getLogger(__name__)
@@ -29,6 +29,15 @@ logger = logging.getLogger(__name__)
 MAX_EVENTS_BEHIND = 500000
 
 
+# Some type aliases to make things a bit easier.
+
+# A stream position token
+Token = int
+
+# A pair of position in stream and args used to create an instance of `ROW_TYPE`.
+StreamRow = Tuple[Token, tuple]
+
+
 class Stream(object):
     """Base class for the streams.
 
@@ -56,6 +65,7 @@ class Stream(object):
         return cls.ROW_TYPE(*row)
 
     def __init__(self, hs):
+
         # The token from which we last asked for updates
         self.last_token = self.current_token()
 
@@ -65,61 +75,46 @@ class Stream(object):
         """
         self.last_token = self.current_token()
 
-    async def get_updates(self):
+    async def get_updates(self) -> Tuple[List[Tuple[Token, JsonDict]], Token, bool]:
         """Gets all updates since the last time this function was called (or
         since the stream was constructed if it hadn't been called before).
 
         Returns:
-            Deferred[Tuple[List[Tuple[int, Any]], int]:
-                Resolves to a pair ``(updates, current_token)``, where ``updates`` is a
-                list of ``(token, row)`` entries. ``row`` will be json-serialised and
-                sent over the replication steam.
+            A triplet `(updates, new_last_token, limited)`, where `updates` is
+            a list of `(token, row)` entries, `new_last_token` is the new
+            position in stream, and `limited` is whether there are more updates
+            to fetch.
         """
-        updates, current_token = await self.get_updates_since(self.last_token)
+        current_token = self.current_token()
+        updates, current_token, limited = await self.get_updates_since(
+            self.last_token, current_token
+        )
         self.last_token = current_token
 
-        return updates, current_token
+        return updates, current_token, limited
 
     async def get_updates_since(
-        self, from_token: int
-    ) -> Tuple[List[Tuple[int, JsonDict]], int]:
+        self, from_token: Token, upto_token: Token, limit: int = 100
+    ) -> Tuple[List[Tuple[Token, JsonDict]], Token, bool]:
         """Like get_updates except allows specifying from when we should
         stream updates
 
         Returns:
-            Resolves to a pair `(updates, new_last_token)`, where `updates` is
-            a list of `(token, row)` entries and `new_last_token` is the new
-            position in stream.
+            A triplet `(updates, new_last_token, limited)`, where `updates` is
+            a list of `(token, row)` entries, `new_last_token` is the new
+            position in stream, and `limited` is whether there are more updates
+            to fetch.
         """
 
-        if from_token in ("NOW", "now"):
-            return [], self.current_token()
-
-        current_token = self.current_token()
-
         from_token = int(from_token)
 
-        if from_token == current_token:
-            return [], current_token
+        if from_token == upto_token:
+            return [], upto_token, False
 
-        rows = await self.update_function(
-            from_token, current_token, limit=MAX_EVENTS_BEHIND + 1
+        updates, upto_token, limited = await self.update_function(
+            from_token, upto_token, limit=limit,
         )
-
-        # never turn more than MAX_EVENTS_BEHIND + 1 into updates.
-        rows = itertools.islice(rows, MAX_EVENTS_BEHIND + 1)
-
-        updates = [(row[0], row[1:]) for row in rows]
-
-        # check we didn't get more rows than the limit.
-        # doing it like this allows the update_function to be a generator.
-        if len(updates) >= MAX_EVENTS_BEHIND:
-            raise Exception("stream %s has fallen behind" % (self.NAME))
-
-        # The update function didn't hit the limit, so we must have got all
-        # the updates to `current_token`, and can return that as our new
-        # stream position.
-        return updates, current_token
+        return updates, upto_token, limited
 
     def current_token(self):
         """Gets the current token of the underlying streams. Should be provided
@@ -141,6 +136,48 @@ class Stream(object):
         raise NotImplementedError()
 
 
+def db_query_to_update_function(
+    query_function: Callable[[Token, Token, int], Awaitable[List[tuple]]]
+) -> Callable[[Token, Token, int], Awaitable[Tuple[List[StreamRow], Token, bool]]]:
+    """Wraps a db query function which returns a list of rows to make it
+    suitable for use as an `update_function` for the Stream class
+    """
+
+    async def update_function(from_token, upto_token, limit):
+        rows = await query_function(from_token, upto_token, limit)
+        updates = [(row[0], row[1:]) for row in rows]
+        limited = False
+        if len(updates) == limit:
+            upto_token = rows[-1][0]
+            limited = True
+
+        return updates, upto_token, limited
+
+    return update_function
+
+
+def make_http_update_function(
+    hs, stream_name: str
+) -> Callable[[Token, Token, Token], Awaitable[Tuple[List[StreamRow], Token, bool]]]:
+    """Makes a suitable function for use as an `update_function` that queries
+    the master process for updates.
+    """
+
+    client = ReplicationGetStreamUpdates.make_client(hs)
+
+    async def update_function(
+        from_token: int, upto_token: int, limit: int
+    ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+        return await client(
+            stream_name=stream_name,
+            from_token=from_token,
+            upto_token=upto_token,
+            limit=limit,
+        )
+
+    return update_function
+
+
 class BackfillStream(Stream):
     """We fetched some old events and either we had never seen that event before
     or it went from being an outlier to not.
@@ -164,7 +201,7 @@ class BackfillStream(Stream):
     def __init__(self, hs):
         store = hs.get_datastore()
         self.current_token = store.get_current_backfill_token  # type: ignore
-        self.update_function = store.get_all_new_backfill_event_rows  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_new_backfill_event_rows)  # type: ignore
 
         super(BackfillStream, self).__init__(hs)
 
@@ -190,8 +227,15 @@ class PresenceStream(Stream):
         store = hs.get_datastore()
         presence_handler = hs.get_presence_handler()
 
+        self._is_worker = hs.config.worker_app is not None
+
         self.current_token = store.get_current_presence_token  # type: ignore
-        self.update_function = presence_handler.get_all_presence_updates  # type: ignore
+
+        if hs.config.worker_app is None:
+            self.update_function = db_query_to_update_function(presence_handler.get_all_presence_updates)  # type: ignore
+        else:
+            # Query master process
+            self.update_function = make_http_update_function(hs, self.NAME)  # type: ignore
 
         super(PresenceStream, self).__init__(hs)
 
@@ -208,7 +252,12 @@ class TypingStream(Stream):
         typing_handler = hs.get_typing_handler()
 
         self.current_token = typing_handler.get_current_token  # type: ignore
-        self.update_function = typing_handler.get_all_typing_updates  # type: ignore
+
+        if hs.config.worker_app is None:
+            self.update_function = db_query_to_update_function(typing_handler.get_all_typing_updates)  # type: ignore
+        else:
+            # Query master process
+            self.update_function = make_http_update_function(hs, self.NAME)  # type: ignore
 
         super(TypingStream, self).__init__(hs)
 
@@ -232,7 +281,7 @@ class ReceiptsStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_max_receipt_stream_id  # type: ignore
-        self.update_function = store.get_all_updated_receipts  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_updated_receipts)  # type: ignore
 
         super(ReceiptsStream, self).__init__(hs)
 
@@ -256,7 +305,13 @@ class PushRulesStream(Stream):
 
     async def update_function(self, from_token, to_token, limit):
         rows = await self.store.get_all_push_rule_updates(from_token, to_token, limit)
-        return [(row[0], row[2]) for row in rows]
+
+        limited = False
+        if len(rows) == limit:
+            to_token = rows[-1][0]
+            limited = True
+
+        return [(row[0], (row[2],)) for row in rows], to_token, limited
 
 
 class PushersStream(Stream):
@@ -275,7 +330,7 @@ class PushersStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_pushers_stream_token  # type: ignore
-        self.update_function = store.get_all_updated_pushers_rows  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_updated_pushers_rows)  # type: ignore
 
         super(PushersStream, self).__init__(hs)
 
@@ -307,7 +362,7 @@ class CachesStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_cache_stream_token  # type: ignore
-        self.update_function = store.get_all_updated_caches  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_updated_caches)  # type: ignore
 
         super(CachesStream, self).__init__(hs)
 
@@ -333,7 +388,7 @@ class PublicRoomsStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_current_public_room_stream_id  # type: ignore
-        self.update_function = store.get_all_new_public_rooms  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_new_public_rooms)  # type: ignore
 
         super(PublicRoomsStream, self).__init__(hs)
 
@@ -354,7 +409,7 @@ class DeviceListsStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_device_stream_token  # type: ignore
-        self.update_function = store.get_all_device_list_changes_for_remotes  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_device_list_changes_for_remotes)  # type: ignore
 
         super(DeviceListsStream, self).__init__(hs)
 
@@ -372,7 +427,7 @@ class ToDeviceStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_to_device_stream_token  # type: ignore
-        self.update_function = store.get_all_new_device_messages  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_new_device_messages)  # type: ignore
 
         super(ToDeviceStream, self).__init__(hs)
 
@@ -392,7 +447,7 @@ class TagAccountDataStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_max_account_data_stream_id  # type: ignore
-        self.update_function = store.get_all_updated_tags  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_updated_tags)  # type: ignore
 
         super(TagAccountDataStream, self).__init__(hs)
 
@@ -412,10 +467,11 @@ class AccountDataStream(Stream):
         self.store = hs.get_datastore()
 
         self.current_token = self.store.get_max_account_data_stream_id  # type: ignore
+        self.update_function = db_query_to_update_function(self._update_function)  # type: ignore
 
         super(AccountDataStream, self).__init__(hs)
 
-    async def update_function(self, from_token, to_token, limit):
+    async def _update_function(self, from_token, to_token, limit):
         global_results, room_results = await self.store.get_all_updated_account_data(
             from_token, from_token, to_token, limit
         )
@@ -442,7 +498,7 @@ class GroupServerStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_group_stream_token  # type: ignore
-        self.update_function = store.get_all_groups_changes  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_groups_changes)  # type: ignore
 
         super(GroupServerStream, self).__init__(hs)
 
@@ -460,6 +516,6 @@ class UserSignatureStream(Stream):
         store = hs.get_datastore()
 
         self.current_token = store.get_device_stream_token  # type: ignore
-        self.update_function = store.get_all_user_signature_changes_for_remotes  # type: ignore
+        self.update_function = db_query_to_update_function(store.get_all_user_signature_changes_for_remotes)  # type: ignore
 
         super(UserSignatureStream, self).__init__(hs)
diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py
index b3afabb8cd..c6a595629f 100644
--- a/synapse/replication/tcp/streams/events.py
+++ b/synapse/replication/tcp/streams/events.py
@@ -19,7 +19,7 @@ from typing import Tuple, Type
 
 import attr
 
-from ._base import Stream
+from ._base import Stream, db_query_to_update_function
 
 
 """Handling of the 'events' replication stream
@@ -117,10 +117,11 @@ class EventsStream(Stream):
     def __init__(self, hs):
         self._store = hs.get_datastore()
         self.current_token = self._store.get_current_events_token  # type: ignore
+        self.update_function = db_query_to_update_function(self._update_function)  # type: ignore
 
         super(EventsStream, self).__init__(hs)
 
-    async def update_function(self, from_token, current_token, limit=None):
+    async def _update_function(self, from_token, current_token, limit=None):
         event_rows = await self._store.get_all_new_forward_event_rows(
             from_token, current_token, limit
         )
diff --git a/synapse/replication/tcp/streams/federation.py b/synapse/replication/tcp/streams/federation.py
index f5f9336430..48c1d45718 100644
--- a/synapse/replication/tcp/streams/federation.py
+++ b/synapse/replication/tcp/streams/federation.py
@@ -15,7 +15,9 @@
 # limitations under the License.
 from collections import namedtuple
 
-from ._base import Stream
+from twisted.internet import defer
+
+from synapse.replication.tcp.streams._base import Stream, db_query_to_update_function
 
 
 class FederationStream(Stream):
@@ -33,11 +35,18 @@ class FederationStream(Stream):
 
     NAME = "federation"
     ROW_TYPE = FederationStreamRow
+    _QUERY_MASTER = True
 
     def __init__(self, hs):
-        federation_sender = hs.get_federation_sender()
-
-        self.current_token = federation_sender.get_current_token  # type: ignore
-        self.update_function = federation_sender.get_replication_rows  # type: ignore
+        # Not all synapse instances will have a federation sender instance,
+        # whether that's a `FederationSender` or a `FederationRemoteSendQueue`,
+        # so we stub the stream out when that is the case.
+        if hs.config.worker_app is None or hs.should_send_federation():
+            federation_sender = hs.get_federation_sender()
+            self.current_token = federation_sender.get_current_token  # type: ignore
+            self.update_function = db_query_to_update_function(federation_sender.get_replication_rows)  # type: ignore
+        else:
+            self.current_token = lambda: 0  # type: ignore
+            self.update_function = lambda from_token, upto_token, limit: defer.succeed(([], upto_token, bool))  # type: ignore
 
         super(FederationStream, self).__init__(hs)
diff --git a/synapse/server.py b/synapse/server.py
index 1b980371de..9426eb1672 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -85,6 +85,7 @@ from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.notifier import Notifier
 from synapse.push.action_generator import ActionGenerator
 from synapse.push.pusherpool import PusherPool
+from synapse.replication.tcp.resource import ReplicationStreamer
 from synapse.rest.media.v1.media_repository import (
     MediaRepository,
     MediaRepositoryResource,
@@ -199,6 +200,7 @@ class HomeServer(object):
         "saml_handler",
         "event_client_serializer",
         "storage",
+        "replication_streamer",
     ]
 
     REQUIRED_ON_MASTER_STARTUP = ["user_directory_handler", "stats_handler"]
@@ -536,6 +538,9 @@ class HomeServer(object):
     def build_storage(self) -> Storage:
         return Storage(self, self.datastores)
 
+    def build_replication_streamer(self) -> ReplicationStreamer:
+        return ReplicationStreamer(self)
+
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
 
diff --git a/synapse/storage/data_stores/main/cache.py b/synapse/storage/data_stores/main/cache.py
index d4c44dcc75..4dc5da3fe8 100644
--- a/synapse/storage/data_stores/main/cache.py
+++ b/synapse/storage/data_stores/main/cache.py
@@ -32,7 +32,29 @@ logger = logging.getLogger(__name__)
 CURRENT_STATE_CACHE_NAME = "cs_cache_fake"
 
 
-class CacheInvalidationStore(SQLBaseStore):
+class CacheInvalidationWorkerStore(SQLBaseStore):
+    def get_all_updated_caches(self, last_id, current_id, limit):
+        if last_id == current_id:
+            return defer.succeed([])
+
+        def get_all_updated_caches_txn(txn):
+            # We purposefully don't bound by the current token, as we want to
+            # send across cache invalidations as quickly as possible. Cache
+            # invalidations are idempotent, so duplicates are fine.
+            sql = (
+                "SELECT stream_id, cache_func, keys, invalidation_ts"
+                " FROM cache_invalidation_stream"
+                " WHERE stream_id > ? ORDER BY stream_id ASC LIMIT ?"
+            )
+            txn.execute(sql, (last_id, limit))
+            return txn.fetchall()
+
+        return self.db.runInteraction(
+            "get_all_updated_caches", get_all_updated_caches_txn
+        )
+
+
+class CacheInvalidationStore(CacheInvalidationWorkerStore):
     async def invalidate_cache_and_stream(self, cache_name: str, keys: Tuple[Any, ...]):
         """Invalidates the cache and adds it to the cache stream so slaves
         will know to invalidate their caches.
@@ -145,26 +167,6 @@ class CacheInvalidationStore(SQLBaseStore):
                 },
             )
 
-    def get_all_updated_caches(self, last_id, current_id, limit):
-        if last_id == current_id:
-            return defer.succeed([])
-
-        def get_all_updated_caches_txn(txn):
-            # We purposefully don't bound by the current token, as we want to
-            # send across cache invalidations as quickly as possible. Cache
-            # invalidations are idempotent, so duplicates are fine.
-            sql = (
-                "SELECT stream_id, cache_func, keys, invalidation_ts"
-                " FROM cache_invalidation_stream"
-                " WHERE stream_id > ? ORDER BY stream_id ASC LIMIT ?"
-            )
-            txn.execute(sql, (last_id, limit))
-            return txn.fetchall()
-
-        return self.db.runInteraction(
-            "get_all_updated_caches", get_all_updated_caches_txn
-        )
-
     def get_cache_stream_token(self):
         if self._cache_id_gen:
             return self._cache_id_gen.get_current_token()
diff --git a/synapse/storage/data_stores/main/deviceinbox.py b/synapse/storage/data_stores/main/deviceinbox.py
index 0613b49f4a..9a1178fb39 100644
--- a/synapse/storage/data_stores/main/deviceinbox.py
+++ b/synapse/storage/data_stores/main/deviceinbox.py
@@ -207,6 +207,50 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             "delete_device_msgs_for_remote", delete_messages_for_remote_destination_txn
         )
 
+    def get_all_new_device_messages(self, last_pos, current_pos, limit):
+        """
+        Args:
+            last_pos(int):
+            current_pos(int):
+            limit(int):
+        Returns:
+            A deferred list of rows from the device inbox
+        """
+        if last_pos == current_pos:
+            return defer.succeed([])
+
+        def get_all_new_device_messages_txn(txn):
+            # We limit like this as we might have multiple rows per stream_id, and
+            # we want to make sure we always get all entries for any stream_id
+            # we return.
+            upper_pos = min(current_pos, last_pos + limit)
+            sql = (
+                "SELECT max(stream_id), user_id"
+                " FROM device_inbox"
+                " WHERE ? < stream_id AND stream_id <= ?"
+                " GROUP BY user_id"
+            )
+            txn.execute(sql, (last_pos, upper_pos))
+            rows = txn.fetchall()
+
+            sql = (
+                "SELECT max(stream_id), destination"
+                " FROM device_federation_outbox"
+                " WHERE ? < stream_id AND stream_id <= ?"
+                " GROUP BY destination"
+            )
+            txn.execute(sql, (last_pos, upper_pos))
+            rows.extend(txn)
+
+            # Order by ascending stream ordering
+            rows.sort()
+
+            return rows
+
+        return self.db.runInteraction(
+            "get_all_new_device_messages", get_all_new_device_messages_txn
+        )
+
 
 class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
     DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
@@ -411,47 +455,3 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore)
                 rows.append((user_id, device_id, stream_id, message_json))
 
         txn.executemany(sql, rows)
-
-    def get_all_new_device_messages(self, last_pos, current_pos, limit):
-        """
-        Args:
-            last_pos(int):
-            current_pos(int):
-            limit(int):
-        Returns:
-            A deferred list of rows from the device inbox
-        """
-        if last_pos == current_pos:
-            return defer.succeed([])
-
-        def get_all_new_device_messages_txn(txn):
-            # We limit like this as we might have multiple rows per stream_id, and
-            # we want to make sure we always get all entries for any stream_id
-            # we return.
-            upper_pos = min(current_pos, last_pos + limit)
-            sql = (
-                "SELECT max(stream_id), user_id"
-                " FROM device_inbox"
-                " WHERE ? < stream_id AND stream_id <= ?"
-                " GROUP BY user_id"
-            )
-            txn.execute(sql, (last_pos, upper_pos))
-            rows = txn.fetchall()
-
-            sql = (
-                "SELECT max(stream_id), destination"
-                " FROM device_federation_outbox"
-                " WHERE ? < stream_id AND stream_id <= ?"
-                " GROUP BY destination"
-            )
-            txn.execute(sql, (last_pos, upper_pos))
-            rows.extend(txn)
-
-            # Order by ascending stream ordering
-            rows.sort()
-
-            return rows
-
-        return self.db.runInteraction(
-            "get_all_new_device_messages", get_all_new_device_messages_txn
-        )
diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py
index d593ef47b8..e71c23541d 100644
--- a/synapse/storage/data_stores/main/events.py
+++ b/synapse/storage/data_stores/main/events.py
@@ -1267,104 +1267,6 @@ class EventsStore(
         ret = yield self.db.runInteraction("count_daily_active_rooms", _count)
         return ret
 
-    def get_current_backfill_token(self):
-        """The current minimum token that backfilled events have reached"""
-        return -self._backfill_id_gen.get_current_token()
-
-    def get_current_events_token(self):
-        """The current maximum token that events have reached"""
-        return self._stream_id_gen.get_current_token()
-
-    def get_all_new_forward_event_rows(self, last_id, current_id, limit):
-        if last_id == current_id:
-            return defer.succeed([])
-
-        def get_all_new_forward_event_rows(txn):
-            sql = (
-                "SELECT e.stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts, relates_to_id"
-                " FROM events AS e"
-                " LEFT JOIN redactions USING (event_id)"
-                " LEFT JOIN state_events USING (event_id)"
-                " LEFT JOIN event_relations USING (event_id)"
-                " WHERE ? < stream_ordering AND stream_ordering <= ?"
-                " ORDER BY stream_ordering ASC"
-                " LIMIT ?"
-            )
-            txn.execute(sql, (last_id, current_id, limit))
-            new_event_updates = txn.fetchall()
-
-            if len(new_event_updates) == limit:
-                upper_bound = new_event_updates[-1][0]
-            else:
-                upper_bound = current_id
-
-            sql = (
-                "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts, relates_to_id"
-                " FROM events AS e"
-                " INNER JOIN ex_outlier_stream USING (event_id)"
-                " LEFT JOIN redactions USING (event_id)"
-                " LEFT JOIN state_events USING (event_id)"
-                " LEFT JOIN event_relations USING (event_id)"
-                " WHERE ? < event_stream_ordering"
-                " AND event_stream_ordering <= ?"
-                " ORDER BY event_stream_ordering DESC"
-            )
-            txn.execute(sql, (last_id, upper_bound))
-            new_event_updates.extend(txn)
-
-            return new_event_updates
-
-        return self.db.runInteraction(
-            "get_all_new_forward_event_rows", get_all_new_forward_event_rows
-        )
-
-    def get_all_new_backfill_event_rows(self, last_id, current_id, limit):
-        if last_id == current_id:
-            return defer.succeed([])
-
-        def get_all_new_backfill_event_rows(txn):
-            sql = (
-                "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts, relates_to_id"
-                " FROM events AS e"
-                " LEFT JOIN redactions USING (event_id)"
-                " LEFT JOIN state_events USING (event_id)"
-                " LEFT JOIN event_relations USING (event_id)"
-                " WHERE ? > stream_ordering AND stream_ordering >= ?"
-                " ORDER BY stream_ordering ASC"
-                " LIMIT ?"
-            )
-            txn.execute(sql, (-last_id, -current_id, limit))
-            new_event_updates = txn.fetchall()
-
-            if len(new_event_updates) == limit:
-                upper_bound = new_event_updates[-1][0]
-            else:
-                upper_bound = current_id
-
-            sql = (
-                "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type,"
-                " state_key, redacts, relates_to_id"
-                " FROM events AS e"
-                " INNER JOIN ex_outlier_stream USING (event_id)"
-                " LEFT JOIN redactions USING (event_id)"
-                " LEFT JOIN state_events USING (event_id)"
-                " LEFT JOIN event_relations USING (event_id)"
-                " WHERE ? > event_stream_ordering"
-                " AND event_stream_ordering >= ?"
-                " ORDER BY event_stream_ordering DESC"
-            )
-            txn.execute(sql, (-last_id, -upper_bound))
-            new_event_updates.extend(txn.fetchall())
-
-            return new_event_updates
-
-        return self.db.runInteraction(
-            "get_all_new_backfill_event_rows", get_all_new_backfill_event_rows
-        )
-
     @cached(num_args=5, max_entries=10)
     def get_all_new_events(
         self,
@@ -1850,22 +1752,6 @@ class EventsStore(
 
         return (int(res["topological_ordering"]), int(res["stream_ordering"]))
 
-    def get_all_updated_current_state_deltas(self, from_token, to_token, limit):
-        def get_all_updated_current_state_deltas_txn(txn):
-            sql = """
-                SELECT stream_id, room_id, type, state_key, event_id
-                FROM current_state_delta_stream
-                WHERE ? < stream_id AND stream_id <= ?
-                ORDER BY stream_id ASC LIMIT ?
-            """
-            txn.execute(sql, (from_token, to_token, limit))
-            return txn.fetchall()
-
-        return self.db.runInteraction(
-            "get_all_updated_current_state_deltas",
-            get_all_updated_current_state_deltas_txn,
-        )
-
     def insert_labels_for_event_txn(
         self, txn, event_id, labels, room_id, topological_ordering
     ):
diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index 3013f49d32..16ea8948b1 100644
--- a/synapse/storage/data_stores/main/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
@@ -963,3 +963,117 @@ class EventsWorkerStore(SQLBaseStore):
         complexity_v1 = round(state_events / 500, 2)
 
         return {"v1": complexity_v1}
+
+    def get_current_backfill_token(self):
+        """The current minimum token that backfilled events have reached"""
+        return -self._backfill_id_gen.get_current_token()
+
+    def get_current_events_token(self):
+        """The current maximum token that events have reached"""
+        return self._stream_id_gen.get_current_token()
+
+    def get_all_new_forward_event_rows(self, last_id, current_id, limit):
+        if last_id == current_id:
+            return defer.succeed([])
+
+        def get_all_new_forward_event_rows(txn):
+            sql = (
+                "SELECT e.stream_ordering, e.event_id, e.room_id, e.type,"
+                " state_key, redacts, relates_to_id"
+                " FROM events AS e"
+                " LEFT JOIN redactions USING (event_id)"
+                " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
+                " WHERE ? < stream_ordering AND stream_ordering <= ?"
+                " ORDER BY stream_ordering ASC"
+                " LIMIT ?"
+            )
+            txn.execute(sql, (last_id, current_id, limit))
+            new_event_updates = txn.fetchall()
+
+            if len(new_event_updates) == limit:
+                upper_bound = new_event_updates[-1][0]
+            else:
+                upper_bound = current_id
+
+            sql = (
+                "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
+                " state_key, redacts, relates_to_id"
+                " FROM events AS e"
+                " INNER JOIN ex_outlier_stream USING (event_id)"
+                " LEFT JOIN redactions USING (event_id)"
+                " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
+                " WHERE ? < event_stream_ordering"
+                " AND event_stream_ordering <= ?"
+                " ORDER BY event_stream_ordering DESC"
+            )
+            txn.execute(sql, (last_id, upper_bound))
+            new_event_updates.extend(txn)
+
+            return new_event_updates
+
+        return self.db.runInteraction(
+            "get_all_new_forward_event_rows", get_all_new_forward_event_rows
+        )
+
+    def get_all_new_backfill_event_rows(self, last_id, current_id, limit):
+        if last_id == current_id:
+            return defer.succeed([])
+
+        def get_all_new_backfill_event_rows(txn):
+            sql = (
+                "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type,"
+                " state_key, redacts, relates_to_id"
+                " FROM events AS e"
+                " LEFT JOIN redactions USING (event_id)"
+                " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
+                " WHERE ? > stream_ordering AND stream_ordering >= ?"
+                " ORDER BY stream_ordering ASC"
+                " LIMIT ?"
+            )
+            txn.execute(sql, (-last_id, -current_id, limit))
+            new_event_updates = txn.fetchall()
+
+            if len(new_event_updates) == limit:
+                upper_bound = new_event_updates[-1][0]
+            else:
+                upper_bound = current_id
+
+            sql = (
+                "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type,"
+                " state_key, redacts, relates_to_id"
+                " FROM events AS e"
+                " INNER JOIN ex_outlier_stream USING (event_id)"
+                " LEFT JOIN redactions USING (event_id)"
+                " LEFT JOIN state_events USING (event_id)"
+                " LEFT JOIN event_relations USING (event_id)"
+                " WHERE ? > event_stream_ordering"
+                " AND event_stream_ordering >= ?"
+                " ORDER BY event_stream_ordering DESC"
+            )
+            txn.execute(sql, (-last_id, -upper_bound))
+            new_event_updates.extend(txn.fetchall())
+
+            return new_event_updates
+
+        return self.db.runInteraction(
+            "get_all_new_backfill_event_rows", get_all_new_backfill_event_rows
+        )
+
+    def get_all_updated_current_state_deltas(self, from_token, to_token, limit):
+        def get_all_updated_current_state_deltas_txn(txn):
+            sql = """
+                SELECT stream_id, room_id, type, state_key, event_id
+                FROM current_state_delta_stream
+                WHERE ? < stream_id AND stream_id <= ?
+                ORDER BY stream_id ASC LIMIT ?
+            """
+            txn.execute(sql, (from_token, to_token, limit))
+            return txn.fetchall()
+
+        return self.db.runInteraction(
+            "get_all_updated_current_state_deltas",
+            get_all_updated_current_state_deltas_txn,
+        )
diff --git a/synapse/storage/data_stores/main/room.py b/synapse/storage/data_stores/main/room.py
index e6c10c6316..aaebe427d3 100644
--- a/synapse/storage/data_stores/main/room.py
+++ b/synapse/storage/data_stores/main/room.py
@@ -732,6 +732,26 @@ class RoomWorkerStore(SQLBaseStore):
 
         return total_media_quarantined
 
+    def get_all_new_public_rooms(self, prev_id, current_id, limit):
+        def get_all_new_public_rooms(txn):
+            sql = """
+                SELECT stream_id, room_id, visibility, appservice_id, network_id
+                FROM public_room_list_stream
+                WHERE stream_id > ? AND stream_id <= ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (prev_id, current_id, limit))
+            return txn.fetchall()
+
+        if prev_id == current_id:
+            return defer.succeed([])
+
+        return self.db.runInteraction(
+            "get_all_new_public_rooms", get_all_new_public_rooms
+        )
+
 
 class RoomBackgroundUpdateStore(SQLBaseStore):
     REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
@@ -1249,26 +1269,6 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore):
     def get_current_public_room_stream_id(self):
         return self._public_room_id_gen.get_current_token()
 
-    def get_all_new_public_rooms(self, prev_id, current_id, limit):
-        def get_all_new_public_rooms(txn):
-            sql = """
-                SELECT stream_id, room_id, visibility, appservice_id, network_id
-                FROM public_room_list_stream
-                WHERE stream_id > ? AND stream_id <= ?
-                ORDER BY stream_id ASC
-                LIMIT ?
-            """
-
-            txn.execute(sql, (prev_id, current_id, limit))
-            return txn.fetchall()
-
-        if prev_id == current_id:
-            return defer.succeed([])
-
-        return self.db.runInteraction(
-            "get_all_new_public_rooms", get_all_new_public_rooms
-        )
-
     @defer.inlineCallbacks
     def block_room(self, room_id, user_id):
         """Marks the room as blocked. Can be called multiple times.
diff --git a/tests/replication/tcp/streams/_base.py b/tests/replication/tcp/streams/_base.py
index e96ad4ca4e..a755fe2879 100644
--- a/tests/replication/tcp/streams/_base.py
+++ b/tests/replication/tcp/streams/_base.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from mock import Mock
 
 from synapse.replication.tcp.commands import ReplicateCommand
@@ -29,19 +30,37 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         # build a replication server
         server_factory = ReplicationStreamProtocolFactory(self.hs)
         self.streamer = server_factory.streamer
-        server = server_factory.buildProtocol(None)
+        self.server = server_factory.buildProtocol(None)
 
-        # build a replication client, with a dummy handler
-        handler_factory = Mock()
-        self.test_handler = TestReplicationClientHandler()
-        self.test_handler.factory = handler_factory
+        self.test_handler = Mock(wraps=TestReplicationClientHandler())
         self.client = ClientReplicationStreamProtocol(
-            "client", "test", clock, self.test_handler
+            hs, "client", "test", clock, self.test_handler,
         )
 
-        # wire them together
-        self.client.makeConnection(FakeTransport(server, reactor))
-        server.makeConnection(FakeTransport(self.client, reactor))
+        self._client_transport = None
+        self._server_transport = None
+
+    def reconnect(self):
+        if self._client_transport:
+            self.client.close()
+
+        if self._server_transport:
+            self.server.close()
+
+        self._client_transport = FakeTransport(self.server, self.reactor)
+        self.client.makeConnection(self._client_transport)
+
+        self._server_transport = FakeTransport(self.client, self.reactor)
+        self.server.makeConnection(self._server_transport)
+
+    def disconnect(self):
+        if self._client_transport:
+            self._client_transport = None
+            self.client.close()
+
+        if self._server_transport:
+            self._server_transport = None
+            self.server.close()
 
     def replicate(self):
         """Tell the master side of replication that something has happened, and then
@@ -50,19 +69,24 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         self.streamer.on_notifier_poke()
         self.pump(0.1)
 
-    def replicate_stream(self, stream, token="NOW"):
+    def replicate_stream(self):
         """Make the client end a REPLICATE command to set up a subscription to a stream"""
-        self.client.send_command(ReplicateCommand(stream, token))
+        self.client.send_command(ReplicateCommand())
 
 
 class TestReplicationClientHandler(object):
     """Drop-in for ReplicationClientHandler which just collects RDATA rows"""
 
     def __init__(self):
-        self.received_rdata_rows = []
+        self.streams = set()
+        self._received_rdata_rows = []
 
     def get_streams_to_replicate(self):
-        return {}
+        positions = {s: 0 for s in self.streams}
+        for stream, token, _ in self._received_rdata_rows:
+            if stream in self.streams:
+                positions[stream] = max(token, positions.get(stream, 0))
+        return positions
 
     def get_currently_syncing_users(self):
         return []
@@ -73,6 +97,9 @@ class TestReplicationClientHandler(object):
     def finished_connecting(self):
         pass
 
+    async def on_position(self, stream_name, token):
+        """Called when we get new position data."""
+
     async def on_rdata(self, stream_name, token, rows):
         for r in rows:
-            self.received_rdata_rows.append((stream_name, token, r))
+            self._received_rdata_rows.append((stream_name, token, r))
diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py
index fa2493cad6..0ec0825a0e 100644
--- a/tests/replication/tcp/streams/test_receipts.py
+++ b/tests/replication/tcp/streams/test_receipts.py
@@ -17,30 +17,64 @@ from synapse.replication.tcp.streams._base import ReceiptsStream
 from tests.replication.tcp.streams._base import BaseStreamTestCase
 
 USER_ID = "@feeling:blue"
-ROOM_ID = "!room:blue"
-EVENT_ID = "$event:blue"
 
 
 class ReceiptsStreamTestCase(BaseStreamTestCase):
     def test_receipt(self):
+        self.reconnect()
+
         # make the client subscribe to the receipts stream
-        self.replicate_stream("receipts", "NOW")
+        self.replicate_stream()
+        self.test_handler.streams.add("receipts")
 
         # tell the master to send a new receipt
         self.get_success(
             self.hs.get_datastore().insert_receipt(
-                ROOM_ID, "m.read", USER_ID, [EVENT_ID], {"a": 1}
+                "!room:blue", "m.read", USER_ID, ["$event:blue"], {"a": 1}
             )
         )
         self.replicate()
 
         # there should be one RDATA command
-        rdata_rows = self.test_handler.received_rdata_rows
+        self.test_handler.on_rdata.assert_called_once()
+        stream_name, token, rdata_rows = self.test_handler.on_rdata.call_args[0]
+        self.assertEqual(stream_name, "receipts")
         self.assertEqual(1, len(rdata_rows))
-        self.assertEqual(rdata_rows[0][0], "receipts")
-        row = rdata_rows[0][2]  # type: ReceiptsStream.ReceiptsStreamRow
-        self.assertEqual(ROOM_ID, row.room_id)
+        row = rdata_rows[0]  # type: ReceiptsStream.ReceiptsStreamRow
+        self.assertEqual("!room:blue", row.room_id)
         self.assertEqual("m.read", row.receipt_type)
         self.assertEqual(USER_ID, row.user_id)
-        self.assertEqual(EVENT_ID, row.event_id)
+        self.assertEqual("$event:blue", row.event_id)
         self.assertEqual({"a": 1}, row.data)
+
+        # Now let's disconnect and insert some data.
+        self.disconnect()
+
+        self.test_handler.on_rdata.reset_mock()
+
+        self.get_success(
+            self.hs.get_datastore().insert_receipt(
+                "!room2:blue", "m.read", USER_ID, ["$event2:foo"], {"a": 2}
+            )
+        )
+        self.replicate()
+
+        # Nothing should have happened as we are disconnected
+        self.test_handler.on_rdata.assert_not_called()
+
+        self.reconnect()
+        self.pump(0.1)
+
+        # We should now have caught up and get the missing data
+        self.test_handler.on_rdata.assert_called_once()
+        stream_name, token, rdata_rows = self.test_handler.on_rdata.call_args[0]
+        self.assertEqual(stream_name, "receipts")
+        self.assertEqual(token, 3)
+        self.assertEqual(1, len(rdata_rows))
+
+        row = rdata_rows[0]  # type: ReceiptsStream.ReceiptsStreamRow
+        self.assertEqual("!room2:blue", row.room_id)
+        self.assertEqual("m.read", row.receipt_type)
+        self.assertEqual(USER_ID, row.user_id)
+        self.assertEqual("$event2:foo", row.event_id)
+        self.assertEqual({"a": 2}, row.data)
-- 
cgit 1.5.1


From e8e2ddb60ae11db488f159901d918cb159695912 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 26 Mar 2020 17:51:13 +0100
Subject: Allow server admins to define and enforce a password policy
 (MSC2000). (#7118)

---
 changelog.d/7118.feature                           |   1 +
 docs/sample_config.yaml                            |  35 ++++
 synapse/api/errors.py                              |  21 +++
 synapse/config/password.py                         |  39 +++++
 synapse/handlers/password_policy.py                |  93 +++++++++++
 synapse/handlers/set_password.py                   |   2 +
 synapse/rest/__init__.py                           |   2 +
 synapse/rest/client/v2_alpha/password_policy.py    |  58 +++++++
 synapse/rest/client/v2_alpha/register.py           |   2 +
 synapse/server.py                                  |   5 +
 tests/rest/client/v2_alpha/test_password_policy.py | 179 +++++++++++++++++++++
 11 files changed, 437 insertions(+)
 create mode 100644 changelog.d/7118.feature
 create mode 100644 synapse/handlers/password_policy.py
 create mode 100644 synapse/rest/client/v2_alpha/password_policy.py
 create mode 100644 tests/rest/client/v2_alpha/test_password_policy.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/7118.feature b/changelog.d/7118.feature
new file mode 100644
index 0000000000..5cbfd98160
--- /dev/null
+++ b/changelog.d/7118.feature
@@ -0,0 +1 @@
+Allow server admins to define and enforce a password policy (MSC2000).
\ No newline at end of file
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 2ef83646b3..1a1d061759 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1482,6 +1482,41 @@ password_config:
    #
    #pepper: "EVEN_MORE_SECRET"
 
+   # Define and enforce a password policy. Each parameter is optional.
+   # This is an implementation of MSC2000.
+   #
+   policy:
+      # Whether to enforce the password policy.
+      # Defaults to 'false'.
+      #
+      #enabled: true
+
+      # Minimum accepted length for a password.
+      # Defaults to 0.
+      #
+      #minimum_length: 15
+
+      # Whether a password must contain at least one digit.
+      # Defaults to 'false'.
+      #
+      #require_digit: true
+
+      # Whether a password must contain at least one symbol.
+      # A symbol is any character that's not a number or a letter.
+      # Defaults to 'false'.
+      #
+      #require_symbol: true
+
+      # Whether a password must contain at least one lowercase letter.
+      # Defaults to 'false'.
+      #
+      #require_lowercase: true
+
+      # Whether a password must contain at least one lowercase letter.
+      # Defaults to 'false'.
+      #
+      #require_uppercase: true
+
 
 # Configuration for sending emails from Synapse.
 #
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 616942b057..11da016ac5 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -64,6 +64,13 @@ class Codes(object):
     INCOMPATIBLE_ROOM_VERSION = "M_INCOMPATIBLE_ROOM_VERSION"
     WRONG_ROOM_KEYS_VERSION = "M_WRONG_ROOM_KEYS_VERSION"
     EXPIRED_ACCOUNT = "ORG_MATRIX_EXPIRED_ACCOUNT"
+    PASSWORD_TOO_SHORT = "M_PASSWORD_TOO_SHORT"
+    PASSWORD_NO_DIGIT = "M_PASSWORD_NO_DIGIT"
+    PASSWORD_NO_UPPERCASE = "M_PASSWORD_NO_UPPERCASE"
+    PASSWORD_NO_LOWERCASE = "M_PASSWORD_NO_LOWERCASE"
+    PASSWORD_NO_SYMBOL = "M_PASSWORD_NO_SYMBOL"
+    PASSWORD_IN_DICTIONARY = "M_PASSWORD_IN_DICTIONARY"
+    WEAK_PASSWORD = "M_WEAK_PASSWORD"
     INVALID_SIGNATURE = "M_INVALID_SIGNATURE"
     USER_DEACTIVATED = "M_USER_DEACTIVATED"
     BAD_ALIAS = "M_BAD_ALIAS"
@@ -439,6 +446,20 @@ class IncompatibleRoomVersionError(SynapseError):
         return cs_error(self.msg, self.errcode, room_version=self._room_version)
 
 
+class PasswordRefusedError(SynapseError):
+    """A password has been refused, either during password reset/change or registration.
+    """
+
+    def __init__(
+        self,
+        msg="This password doesn't comply with the server's policy",
+        errcode=Codes.WEAK_PASSWORD,
+    ):
+        super(PasswordRefusedError, self).__init__(
+            code=400, msg=msg, errcode=errcode,
+        )
+
+
 class RequestSendFailed(RuntimeError):
     """Sending a HTTP request over federation failed due to not being able to
     talk to the remote server for some reason.
diff --git a/synapse/config/password.py b/synapse/config/password.py
index 2a634ac751..9c0ea8c30a 100644
--- a/synapse/config/password.py
+++ b/synapse/config/password.py
@@ -31,6 +31,10 @@ class PasswordConfig(Config):
         self.password_localdb_enabled = password_config.get("localdb_enabled", True)
         self.password_pepper = password_config.get("pepper", "")
 
+        # Password policy
+        self.password_policy = password_config.get("policy") or {}
+        self.password_policy_enabled = self.password_policy.get("enabled", False)
+
     def generate_config_section(self, config_dir_path, server_name, **kwargs):
         return """\
         password_config:
@@ -48,4 +52,39 @@ class PasswordConfig(Config):
            # DO NOT CHANGE THIS AFTER INITIAL SETUP!
            #
            #pepper: "EVEN_MORE_SECRET"
+
+           # Define and enforce a password policy. Each parameter is optional.
+           # This is an implementation of MSC2000.
+           #
+           policy:
+              # Whether to enforce the password policy.
+              # Defaults to 'false'.
+              #
+              #enabled: true
+
+              # Minimum accepted length for a password.
+              # Defaults to 0.
+              #
+              #minimum_length: 15
+
+              # Whether a password must contain at least one digit.
+              # Defaults to 'false'.
+              #
+              #require_digit: true
+
+              # Whether a password must contain at least one symbol.
+              # A symbol is any character that's not a number or a letter.
+              # Defaults to 'false'.
+              #
+              #require_symbol: true
+
+              # Whether a password must contain at least one lowercase letter.
+              # Defaults to 'false'.
+              #
+              #require_lowercase: true
+
+              # Whether a password must contain at least one lowercase letter.
+              # Defaults to 'false'.
+              #
+              #require_uppercase: true
         """
diff --git a/synapse/handlers/password_policy.py b/synapse/handlers/password_policy.py
new file mode 100644
index 0000000000..d06b110269
--- /dev/null
+++ b/synapse/handlers/password_policy.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import re
+
+from synapse.api.errors import Codes, PasswordRefusedError
+
+logger = logging.getLogger(__name__)
+
+
+class PasswordPolicyHandler(object):
+    def __init__(self, hs):
+        self.policy = hs.config.password_policy
+        self.enabled = hs.config.password_policy_enabled
+
+        # Regexps for the spec'd policy parameters.
+        self.regexp_digit = re.compile("[0-9]")
+        self.regexp_symbol = re.compile("[^a-zA-Z0-9]")
+        self.regexp_uppercase = re.compile("[A-Z]")
+        self.regexp_lowercase = re.compile("[a-z]")
+
+    def validate_password(self, password):
+        """Checks whether a given password complies with the server's policy.
+
+        Args:
+            password (str): The password to check against the server's policy.
+
+        Raises:
+            PasswordRefusedError: The password doesn't comply with the server's policy.
+        """
+
+        if not self.enabled:
+            return
+
+        minimum_accepted_length = self.policy.get("minimum_length", 0)
+        if len(password) < minimum_accepted_length:
+            raise PasswordRefusedError(
+                msg=(
+                    "The password must be at least %d characters long"
+                    % minimum_accepted_length
+                ),
+                errcode=Codes.PASSWORD_TOO_SHORT,
+            )
+
+        if (
+            self.policy.get("require_digit", False)
+            and self.regexp_digit.search(password) is None
+        ):
+            raise PasswordRefusedError(
+                msg="The password must include at least one digit",
+                errcode=Codes.PASSWORD_NO_DIGIT,
+            )
+
+        if (
+            self.policy.get("require_symbol", False)
+            and self.regexp_symbol.search(password) is None
+        ):
+            raise PasswordRefusedError(
+                msg="The password must include at least one symbol",
+                errcode=Codes.PASSWORD_NO_SYMBOL,
+            )
+
+        if (
+            self.policy.get("require_uppercase", False)
+            and self.regexp_uppercase.search(password) is None
+        ):
+            raise PasswordRefusedError(
+                msg="The password must include at least one uppercase letter",
+                errcode=Codes.PASSWORD_NO_UPPERCASE,
+            )
+
+        if (
+            self.policy.get("require_lowercase", False)
+            and self.regexp_lowercase.search(password) is None
+        ):
+            raise PasswordRefusedError(
+                msg="The password must include at least one lowercase letter",
+                errcode=Codes.PASSWORD_NO_LOWERCASE,
+            )
diff --git a/synapse/handlers/set_password.py b/synapse/handlers/set_password.py
index 12657ca698..7d1263caf2 100644
--- a/synapse/handlers/set_password.py
+++ b/synapse/handlers/set_password.py
@@ -32,6 +32,7 @@ class SetPasswordHandler(BaseHandler):
         super(SetPasswordHandler, self).__init__(hs)
         self._auth_handler = hs.get_auth_handler()
         self._device_handler = hs.get_device_handler()
+        self._password_policy_handler = hs.get_password_policy_handler()
 
     @defer.inlineCallbacks
     def set_password(
@@ -44,6 +45,7 @@ class SetPasswordHandler(BaseHandler):
         if not self.hs.config.password_localdb_enabled:
             raise SynapseError(403, "Password change disabled", errcode=Codes.FORBIDDEN)
 
+        self._password_policy_handler.validate_password(new_password)
         password_hash = yield self._auth_handler.hash(new_password)
 
         try:
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 4a1fc2ec2b..46e458e95b 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -41,6 +41,7 @@ from synapse.rest.client.v2_alpha import (
     keys,
     notifications,
     openid,
+    password_policy,
     read_marker,
     receipts,
     register,
@@ -118,6 +119,7 @@ class ClientRestResource(JsonResource):
         capabilities.register_servlets(hs, client_resource)
         account_validity.register_servlets(hs, client_resource)
         relations.register_servlets(hs, client_resource)
+        password_policy.register_servlets(hs, client_resource)
 
         # moving to /_synapse/admin
         synapse.rest.admin.register_servlets_for_client_rest_resource(
diff --git a/synapse/rest/client/v2_alpha/password_policy.py b/synapse/rest/client/v2_alpha/password_policy.py
new file mode 100644
index 0000000000..968403cca4
--- /dev/null
+++ b/synapse/rest/client/v2_alpha/password_policy.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.http.servlet import RestServlet
+
+from ._base import client_patterns
+
+logger = logging.getLogger(__name__)
+
+
+class PasswordPolicyServlet(RestServlet):
+    PATTERNS = client_patterns("/password_policy$")
+
+    def __init__(self, hs):
+        """
+        Args:
+            hs (synapse.server.HomeServer): server
+        """
+        super(PasswordPolicyServlet, self).__init__()
+
+        self.policy = hs.config.password_policy
+        self.enabled = hs.config.password_policy_enabled
+
+    def on_GET(self, request):
+        if not self.enabled or not self.policy:
+            return (200, {})
+
+        policy = {}
+
+        for param in [
+            "minimum_length",
+            "require_digit",
+            "require_symbol",
+            "require_lowercase",
+            "require_uppercase",
+        ]:
+            if param in self.policy:
+                policy["m.%s" % param] = self.policy[param]
+
+        return (200, policy)
+
+
+def register_servlets(hs, http_server):
+    PasswordPolicyServlet(hs).register(http_server)
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 6963d79310..66fc8ec179 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -373,6 +373,7 @@ class RegisterRestServlet(RestServlet):
         self.room_member_handler = hs.get_room_member_handler()
         self.macaroon_gen = hs.get_macaroon_generator()
         self.ratelimiter = hs.get_registration_ratelimiter()
+        self.password_policy_handler = hs.get_password_policy_handler()
         self.clock = hs.get_clock()
 
         self._registration_flows = _calculate_registration_flows(
@@ -420,6 +421,7 @@ class RegisterRestServlet(RestServlet):
                 or len(body["password"]) > 512
             ):
                 raise SynapseError(400, "Invalid password")
+            self.password_policy_handler.validate_password(body["password"])
 
         desired_username = None
         if "username" in body:
diff --git a/synapse/server.py b/synapse/server.py
index 9426eb1672..d0d80e8ac5 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -66,6 +66,7 @@ from synapse.handlers.groups_local import GroupsLocalHandler, GroupsLocalWorkerH
 from synapse.handlers.initial_sync import InitialSyncHandler
 from synapse.handlers.message import EventCreationHandler, MessageHandler
 from synapse.handlers.pagination import PaginationHandler
+from synapse.handlers.password_policy import PasswordPolicyHandler
 from synapse.handlers.presence import PresenceHandler
 from synapse.handlers.profile import BaseProfileHandler, MasterProfileHandler
 from synapse.handlers.read_marker import ReadMarkerHandler
@@ -199,6 +200,7 @@ class HomeServer(object):
         "account_validity_handler",
         "saml_handler",
         "event_client_serializer",
+        "password_policy_handler",
         "storage",
         "replication_streamer",
     ]
@@ -535,6 +537,9 @@ class HomeServer(object):
     def build_event_client_serializer(self):
         return EventClientSerializer(self)
 
+    def build_password_policy_handler(self):
+        return PasswordPolicyHandler(self)
+
     def build_storage(self) -> Storage:
         return Storage(self, self.datastores)
 
diff --git a/tests/rest/client/v2_alpha/test_password_policy.py b/tests/rest/client/v2_alpha/test_password_policy.py
new file mode 100644
index 0000000000..c57072f50c
--- /dev/null
+++ b/tests/rest/client/v2_alpha/test_password_policy.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+from synapse.api.constants import LoginType
+from synapse.api.errors import Codes
+from synapse.rest import admin
+from synapse.rest.client.v1 import login
+from synapse.rest.client.v2_alpha import account, password_policy, register
+
+from tests import unittest
+
+
+class PasswordPolicyTestCase(unittest.HomeserverTestCase):
+    """Tests the password policy feature and its compliance with MSC2000.
+
+    When validating a password, Synapse does the necessary checks in this order:
+
+        1. Password is long enough
+        2. Password contains digit(s)
+        3. Password contains symbol(s)
+        4. Password contains uppercase letter(s)
+        5. Password contains lowercase letter(s)
+
+    For each test below that checks whether a password triggers the right error code,
+    that test provides a password good enough to pass the previous tests, but not the
+    one it is currently testing (nor any test that comes afterward).
+    """
+
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        register.register_servlets,
+        password_policy.register_servlets,
+        account.register_servlets,
+    ]
+
+    def make_homeserver(self, reactor, clock):
+        self.register_url = "/_matrix/client/r0/register"
+        self.policy = {
+            "enabled": True,
+            "minimum_length": 10,
+            "require_digit": True,
+            "require_symbol": True,
+            "require_lowercase": True,
+            "require_uppercase": True,
+        }
+
+        config = self.default_config()
+        config["password_config"] = {
+            "policy": self.policy,
+        }
+
+        hs = self.setup_test_homeserver(config=config)
+        return hs
+
+    def test_get_policy(self):
+        """Tests if the /password_policy endpoint returns the configured policy."""
+
+        request, channel = self.make_request(
+            "GET", "/_matrix/client/r0/password_policy"
+        )
+        self.render(request)
+
+        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "m.minimum_length": 10,
+                "m.require_digit": True,
+                "m.require_symbol": True,
+                "m.require_lowercase": True,
+                "m.require_uppercase": True,
+            },
+            channel.result,
+        )
+
+    def test_password_too_short(self):
+        request_data = json.dumps({"username": "kermit", "password": "shorty"})
+        request, channel = self.make_request("POST", self.register_url, request_data)
+        self.render(request)
+
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["errcode"], Codes.PASSWORD_TOO_SHORT, channel.result,
+        )
+
+    def test_password_no_digit(self):
+        request_data = json.dumps({"username": "kermit", "password": "longerpassword"})
+        request, channel = self.make_request("POST", self.register_url, request_data)
+        self.render(request)
+
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["errcode"], Codes.PASSWORD_NO_DIGIT, channel.result,
+        )
+
+    def test_password_no_symbol(self):
+        request_data = json.dumps({"username": "kermit", "password": "l0ngerpassword"})
+        request, channel = self.make_request("POST", self.register_url, request_data)
+        self.render(request)
+
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["errcode"], Codes.PASSWORD_NO_SYMBOL, channel.result,
+        )
+
+    def test_password_no_uppercase(self):
+        request_data = json.dumps({"username": "kermit", "password": "l0ngerpassword!"})
+        request, channel = self.make_request("POST", self.register_url, request_data)
+        self.render(request)
+
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["errcode"], Codes.PASSWORD_NO_UPPERCASE, channel.result,
+        )
+
+    def test_password_no_lowercase(self):
+        request_data = json.dumps({"username": "kermit", "password": "L0NGERPASSWORD!"})
+        request, channel = self.make_request("POST", self.register_url, request_data)
+        self.render(request)
+
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["errcode"], Codes.PASSWORD_NO_LOWERCASE, channel.result,
+        )
+
+    def test_password_compliant(self):
+        request_data = json.dumps({"username": "kermit", "password": "L0ngerpassword!"})
+        request, channel = self.make_request("POST", self.register_url, request_data)
+        self.render(request)
+
+        # Getting a 401 here means the password has passed validation and the server has
+        # responded with a list of registration flows.
+        self.assertEqual(channel.code, 401, channel.result)
+
+    def test_password_change(self):
+        """This doesn't test every possible use case, only that hitting /account/password
+        triggers the password validation code.
+        """
+        compliant_password = "C0mpl!antpassword"
+        not_compliant_password = "notcompliantpassword"
+
+        user_id = self.register_user("kermit", compliant_password)
+        tok = self.login("kermit", compliant_password)
+
+        request_data = json.dumps(
+            {
+                "new_password": not_compliant_password,
+                "auth": {
+                    "password": compliant_password,
+                    "type": LoginType.PASSWORD,
+                    "user": user_id,
+                },
+            }
+        )
+        request, channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/account/password",
+            request_data,
+            access_token=tok,
+        )
+        self.render(request)
+
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(channel.json_body["errcode"], Codes.PASSWORD_NO_DIGIT)
-- 
cgit 1.5.1


From fa4f12102d52b75d252d9209b45251d2b1591fdf Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 26 Mar 2020 15:05:26 -0400
Subject: Refactor the CAS code (move the logic out of the REST layer to a
 handler) (#7136)

---
 changelog.d/7136.misc           |   1 +
 synapse/handlers/cas_handler.py | 204 ++++++++++++++++++++++++++++++++++++++++
 synapse/rest/client/v1/login.py | 171 ++++-----------------------------
 synapse/server.py               |   5 +
 tox.ini                         |   1 +
 5 files changed, 227 insertions(+), 155 deletions(-)
 create mode 100644 changelog.d/7136.misc
 create mode 100644 synapse/handlers/cas_handler.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/7136.misc b/changelog.d/7136.misc
new file mode 100644
index 0000000000..3f666d25fd
--- /dev/null
+++ b/changelog.d/7136.misc
@@ -0,0 +1 @@
+Refactored the CAS authentication logic to a separate class.
diff --git a/synapse/handlers/cas_handler.py b/synapse/handlers/cas_handler.py
new file mode 100644
index 0000000000..f8dc274b78
--- /dev/null
+++ b/synapse/handlers/cas_handler.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import xml.etree.ElementTree as ET
+from typing import AnyStr, Dict, Optional, Tuple
+
+from six.moves import urllib
+
+from twisted.web.client import PartialDownloadError
+
+from synapse.api.errors import Codes, LoginError
+from synapse.http.site import SynapseRequest
+from synapse.types import UserID, map_username_to_mxid_localpart
+
+logger = logging.getLogger(__name__)
+
+
+class CasHandler:
+    """
+    Utility class for to handle the response from a CAS SSO service.
+
+    Args:
+        hs (synapse.server.HomeServer)
+    """
+
+    def __init__(self, hs):
+        self._hostname = hs.hostname
+        self._auth_handler = hs.get_auth_handler()
+        self._registration_handler = hs.get_registration_handler()
+
+        self._cas_server_url = hs.config.cas_server_url
+        self._cas_service_url = hs.config.cas_service_url
+        self._cas_displayname_attribute = hs.config.cas_displayname_attribute
+        self._cas_required_attributes = hs.config.cas_required_attributes
+
+        self._http_client = hs.get_proxied_http_client()
+
+    def _build_service_param(self, client_redirect_url: AnyStr) -> str:
+        return "%s%s?%s" % (
+            self._cas_service_url,
+            "/_matrix/client/r0/login/cas/ticket",
+            urllib.parse.urlencode({"redirectUrl": client_redirect_url}),
+        )
+
+    async def _handle_cas_response(
+        self, request: SynapseRequest, cas_response_body: str, client_redirect_url: str
+    ) -> None:
+        """
+        Retrieves the user and display name from the CAS response and continues with the authentication.
+
+        Args:
+            request: The original client request.
+            cas_response_body: The response from the CAS server.
+            client_redirect_url: The URl to redirect the client to when
+                everything is done.
+        """
+        user, attributes = self._parse_cas_response(cas_response_body)
+        displayname = attributes.pop(self._cas_displayname_attribute, None)
+
+        for required_attribute, required_value in self._cas_required_attributes.items():
+            # If required attribute was not in CAS Response - Forbidden
+            if required_attribute not in attributes:
+                raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED)
+
+            # Also need to check value
+            if required_value is not None:
+                actual_value = attributes[required_attribute]
+                # If required attribute value does not match expected - Forbidden
+                if required_value != actual_value:
+                    raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED)
+
+        await self._on_successful_auth(user, request, client_redirect_url, displayname)
+
+    def _parse_cas_response(
+        self, cas_response_body: str
+    ) -> Tuple[str, Dict[str, Optional[str]]]:
+        """
+        Retrieve the user and other parameters from the CAS response.
+
+        Args:
+            cas_response_body: The response from the CAS query.
+
+        Returns:
+            A tuple of the user and a mapping of other attributes.
+        """
+        user = None
+        attributes = {}
+        try:
+            root = ET.fromstring(cas_response_body)
+            if not root.tag.endswith("serviceResponse"):
+                raise Exception("root of CAS response is not serviceResponse")
+            success = root[0].tag.endswith("authenticationSuccess")
+            for child in root[0]:
+                if child.tag.endswith("user"):
+                    user = child.text
+                if child.tag.endswith("attributes"):
+                    for attribute in child:
+                        # ElementTree library expands the namespace in
+                        # attribute tags to the full URL of the namespace.
+                        # We don't care about namespace here and it will always
+                        # be encased in curly braces, so we remove them.
+                        tag = attribute.tag
+                        if "}" in tag:
+                            tag = tag.split("}")[1]
+                        attributes[tag] = attribute.text
+            if user is None:
+                raise Exception("CAS response does not contain user")
+        except Exception:
+            logger.exception("Error parsing CAS response")
+            raise LoginError(401, "Invalid CAS response", errcode=Codes.UNAUTHORIZED)
+        if not success:
+            raise LoginError(
+                401, "Unsuccessful CAS response", errcode=Codes.UNAUTHORIZED
+            )
+        return user, attributes
+
+    async def _on_successful_auth(
+        self,
+        username: str,
+        request: SynapseRequest,
+        client_redirect_url: str,
+        user_display_name: Optional[str] = None,
+    ) -> None:
+        """Called once the user has successfully authenticated with the SSO.
+
+        Registers the user if necessary, and then returns a redirect (with
+        a login token) to the client.
+
+        Args:
+            username: the remote user id. We'll map this onto
+                something sane for a MXID localpath.
+
+            request: the incoming request from the browser. We'll
+                respond to it with a redirect.
+
+            client_redirect_url: the redirect_url the client gave us when
+                it first started the process.
+
+            user_display_name: if set, and we have to register a new user,
+                we will set their displayname to this.
+        """
+        localpart = map_username_to_mxid_localpart(username)
+        user_id = UserID(localpart, self._hostname).to_string()
+        registered_user_id = await self._auth_handler.check_user_exists(user_id)
+        if not registered_user_id:
+            registered_user_id = await self._registration_handler.register_user(
+                localpart=localpart, default_display_name=user_display_name
+            )
+
+        self._auth_handler.complete_sso_login(
+            registered_user_id, request, client_redirect_url
+        )
+
+    def handle_redirect_request(self, client_redirect_url: bytes) -> bytes:
+        """
+        Generates a URL to the CAS server where the client should be redirected.
+
+        Args:
+            client_redirect_url: The final URL the client should go to after the
+                user has negotiated SSO.
+
+        Returns:
+            The URL to redirect to.
+        """
+        args = urllib.parse.urlencode(
+            {"service": self._build_service_param(client_redirect_url)}
+        )
+
+        return ("%s/login?%s" % (self._cas_server_url, args)).encode("ascii")
+
+    async def handle_ticket_request(
+        self, request: SynapseRequest, client_redirect_url: str, ticket: str
+    ) -> None:
+        """
+        Validates a CAS ticket sent by the client for login/registration.
+
+        On a successful request, writes a redirect to the request.
+        """
+        uri = self._cas_server_url + "/proxyValidate"
+        args = {
+            "ticket": ticket,
+            "service": self._build_service_param(client_redirect_url),
+        }
+        try:
+            body = await self._http_client.get_raw(uri, args)
+        except PartialDownloadError as pde:
+            # Twisted raises this error if the connection is closed,
+            # even if that's being used old-http style to signal end-of-data
+            body = pde.response
+
+        await self._handle_cas_response(request, body, client_redirect_url)
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 56d713462a..59593cbf6e 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -14,11 +14,6 @@
 # limitations under the License.
 
 import logging
-import xml.etree.ElementTree as ET
-
-from six.moves import urllib
-
-from twisted.web.client import PartialDownloadError
 
 from synapse.api.errors import Codes, LoginError, SynapseError
 from synapse.api.ratelimiting import Ratelimiter
@@ -28,9 +23,10 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.http.site import SynapseRequest
 from synapse.rest.client.v2_alpha._base import client_patterns
 from synapse.rest.well_known import WellKnownBuilder
-from synapse.types import UserID, map_username_to_mxid_localpart
+from synapse.types import UserID
 from synapse.util.msisdn import phone_number_to_msisdn
 
 logger = logging.getLogger(__name__)
@@ -72,14 +68,6 @@ def login_id_thirdparty_from_phone(identifier):
     return {"type": "m.id.thirdparty", "medium": "msisdn", "address": msisdn}
 
 
-def build_service_param(cas_service_url, client_redirect_url):
-    return "%s%s?redirectUrl=%s" % (
-        cas_service_url,
-        "/_matrix/client/r0/login/cas/ticket",
-        urllib.parse.quote(client_redirect_url, safe=""),
-    )
-
-
 class LoginRestServlet(RestServlet):
     PATTERNS = client_patterns("/login$", v1=True)
     CAS_TYPE = "m.login.cas"
@@ -409,7 +397,7 @@ class BaseSSORedirectServlet(RestServlet):
 
     PATTERNS = client_patterns("/login/(cas|sso)/redirect", v1=True)
 
-    def on_GET(self, request):
+    def on_GET(self, request: SynapseRequest):
         args = request.args
         if b"redirectUrl" not in args:
             return 400, "Redirect URL not specified for SSO auth"
@@ -418,15 +406,15 @@ class BaseSSORedirectServlet(RestServlet):
         request.redirect(sso_url)
         finish_request(request)
 
-    def get_sso_url(self, client_redirect_url):
+    def get_sso_url(self, client_redirect_url: bytes) -> bytes:
         """Get the URL to redirect to, to perform SSO auth
 
         Args:
-            client_redirect_url (bytes): the URL that we should redirect the
+            client_redirect_url: the URL that we should redirect the
                 client to when everything is done
 
         Returns:
-            bytes: URL to redirect to
+            URL to redirect to
         """
         # to be implemented by subclasses
         raise NotImplementedError()
@@ -434,16 +422,10 @@ class BaseSSORedirectServlet(RestServlet):
 
 class CasRedirectServlet(BaseSSORedirectServlet):
     def __init__(self, hs):
-        super(CasRedirectServlet, self).__init__()
-        self.cas_server_url = hs.config.cas_server_url
-        self.cas_service_url = hs.config.cas_service_url
+        self._cas_handler = hs.get_cas_handler()
 
-    def get_sso_url(self, client_redirect_url):
-        args = urllib.parse.urlencode(
-            {"service": build_service_param(self.cas_service_url, client_redirect_url)}
-        )
-
-        return "%s/login?%s" % (self.cas_server_url, args)
+    def get_sso_url(self, client_redirect_url: bytes) -> bytes:
+        return self._cas_handler.handle_redirect_request(client_redirect_url)
 
 
 class CasTicketServlet(RestServlet):
@@ -451,81 +433,15 @@ class CasTicketServlet(RestServlet):
 
     def __init__(self, hs):
         super(CasTicketServlet, self).__init__()
-        self.cas_server_url = hs.config.cas_server_url
-        self.cas_service_url = hs.config.cas_service_url
-        self.cas_displayname_attribute = hs.config.cas_displayname_attribute
-        self.cas_required_attributes = hs.config.cas_required_attributes
-        self._sso_auth_handler = SSOAuthHandler(hs)
-        self._http_client = hs.get_proxied_http_client()
-
-    async def on_GET(self, request):
-        client_redirect_url = parse_string(request, "redirectUrl", required=True)
-        uri = self.cas_server_url + "/proxyValidate"
-        args = {
-            "ticket": parse_string(request, "ticket", required=True),
-            "service": build_service_param(self.cas_service_url, client_redirect_url),
-        }
-        try:
-            body = await self._http_client.get_raw(uri, args)
-        except PartialDownloadError as pde:
-            # Twisted raises this error if the connection is closed,
-            # even if that's being used old-http style to signal end-of-data
-            body = pde.response
-        result = await self.handle_cas_response(request, body, client_redirect_url)
-        return result
+        self._cas_handler = hs.get_cas_handler()
 
-    def handle_cas_response(self, request, cas_response_body, client_redirect_url):
-        user, attributes = self.parse_cas_response(cas_response_body)
-        displayname = attributes.pop(self.cas_displayname_attribute, None)
-
-        for required_attribute, required_value in self.cas_required_attributes.items():
-            # If required attribute was not in CAS Response - Forbidden
-            if required_attribute not in attributes:
-                raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED)
-
-            # Also need to check value
-            if required_value is not None:
-                actual_value = attributes[required_attribute]
-                # If required attribute value does not match expected - Forbidden
-                if required_value != actual_value:
-                    raise LoginError(401, "Unauthorized", errcode=Codes.UNAUTHORIZED)
-
-        return self._sso_auth_handler.on_successful_auth(
-            user, request, client_redirect_url, displayname
+    async def on_GET(self, request: SynapseRequest) -> None:
+        client_redirect_url = parse_string(request, "redirectUrl", required=True)
+        ticket = parse_string(request, "ticket", required=True)
+        await self._cas_handler.handle_ticket_request(
+            request, client_redirect_url, ticket
         )
 
-    def parse_cas_response(self, cas_response_body):
-        user = None
-        attributes = {}
-        try:
-            root = ET.fromstring(cas_response_body)
-            if not root.tag.endswith("serviceResponse"):
-                raise Exception("root of CAS response is not serviceResponse")
-            success = root[0].tag.endswith("authenticationSuccess")
-            for child in root[0]:
-                if child.tag.endswith("user"):
-                    user = child.text
-                if child.tag.endswith("attributes"):
-                    for attribute in child:
-                        # ElementTree library expands the namespace in
-                        # attribute tags to the full URL of the namespace.
-                        # We don't care about namespace here and it will always
-                        # be encased in curly braces, so we remove them.
-                        tag = attribute.tag
-                        if "}" in tag:
-                            tag = tag.split("}")[1]
-                        attributes[tag] = attribute.text
-            if user is None:
-                raise Exception("CAS response does not contain user")
-        except Exception:
-            logger.exception("Error parsing CAS response")
-            raise LoginError(401, "Invalid CAS response", errcode=Codes.UNAUTHORIZED)
-        if not success:
-            raise LoginError(
-                401, "Unsuccessful CAS response", errcode=Codes.UNAUTHORIZED
-            )
-        return user, attributes
-
 
 class SAMLRedirectServlet(BaseSSORedirectServlet):
     PATTERNS = client_patterns("/login/sso/redirect", v1=True)
@@ -533,65 +449,10 @@ class SAMLRedirectServlet(BaseSSORedirectServlet):
     def __init__(self, hs):
         self._saml_handler = hs.get_saml_handler()
 
-    def get_sso_url(self, client_redirect_url):
+    def get_sso_url(self, client_redirect_url: bytes) -> bytes:
         return self._saml_handler.handle_redirect_request(client_redirect_url)
 
 
-class SSOAuthHandler(object):
-    """
-    Utility class for Resources and Servlets which handle the response from a SSO
-    service
-
-    Args:
-        hs (synapse.server.HomeServer)
-    """
-
-    def __init__(self, hs):
-        self._hostname = hs.hostname
-        self._auth_handler = hs.get_auth_handler()
-        self._registration_handler = hs.get_registration_handler()
-        self._macaroon_gen = hs.get_macaroon_generator()
-
-        # cast to tuple for use with str.startswith
-        self._whitelisted_sso_clients = tuple(hs.config.sso_client_whitelist)
-
-    async def on_successful_auth(
-        self, username, request, client_redirect_url, user_display_name=None
-    ):
-        """Called once the user has successfully authenticated with the SSO.
-
-        Registers the user if necessary, and then returns a redirect (with
-        a login token) to the client.
-
-        Args:
-            username (unicode|bytes): the remote user id. We'll map this onto
-                something sane for a MXID localpath.
-
-            request (SynapseRequest): the incoming request from the browser. We'll
-                respond to it with a redirect.
-
-            client_redirect_url (unicode): the redirect_url the client gave us when
-                it first started the process.
-
-            user_display_name (unicode|None): if set, and we have to register a new user,
-                we will set their displayname to this.
-
-        Returns:
-            Deferred[none]: Completes once we have handled the request.
-        """
-        localpart = map_username_to_mxid_localpart(username)
-        user_id = UserID(localpart, self._hostname).to_string()
-        registered_user_id = await self._auth_handler.check_user_exists(user_id)
-        if not registered_user_id:
-            registered_user_id = await self._registration_handler.register_user(
-                localpart=localpart, default_display_name=user_display_name
-            )
-
-        self._auth_handler.complete_sso_login(
-            registered_user_id, request, client_redirect_url
-        )
-
-
 def register_servlets(hs, http_server):
     LoginRestServlet(hs).register(http_server)
     if hs.config.cas_enabled:
diff --git a/synapse/server.py b/synapse/server.py
index d0d80e8ac5..c7ca2bda0d 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -56,6 +56,7 @@ from synapse.handlers.account_validity import AccountValidityHandler
 from synapse.handlers.acme import AcmeHandler
 from synapse.handlers.appservice import ApplicationServicesHandler
 from synapse.handlers.auth import AuthHandler, MacaroonGenerator
+from synapse.handlers.cas_handler import CasHandler
 from synapse.handlers.deactivate_account import DeactivateAccountHandler
 from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler
 from synapse.handlers.devicemessage import DeviceMessageHandler
@@ -198,6 +199,7 @@ class HomeServer(object):
         "sendmail",
         "registration_handler",
         "account_validity_handler",
+        "cas_handler",
         "saml_handler",
         "event_client_serializer",
         "password_policy_handler",
@@ -529,6 +531,9 @@ class HomeServer(object):
     def build_account_validity_handler(self):
         return AccountValidityHandler(self)
 
+    def build_cas_handler(self):
+        return CasHandler(self)
+
     def build_saml_handler(self):
         from synapse.handlers.saml_handler import SamlHandler
 
diff --git a/tox.ini b/tox.ini
index 8e3f09e638..a79fc93b57 100644
--- a/tox.ini
+++ b/tox.ini
@@ -186,6 +186,7 @@ commands = mypy \
             synapse/federation/sender \
             synapse/federation/transport \
             synapse/handlers/auth.py \
+            synapse/handlers/cas_handler.py \
             synapse/handlers/directory.py \
             synapse/handlers/presence.py \
             synapse/handlers/sync.py \
-- 
cgit 1.5.1


From 4f21c33be301b8ea6369039c3ad8baa51878e4d5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 30 Mar 2020 16:37:24 +0100
Subject: Remove usage of "conn_id" for presence. (#7128)

* Remove `conn_id` usage for UserSyncCommand.

Each tcp replication connection is assigned a "conn_id", which is used
to give an ID to a remotely connected worker. In a redis world, there
will no longer be a one to one mapping between connection and instance,
so instead we need to replace such usages with an ID generated by the
remote instances and included in the replicaiton commands.

This really only effects UserSyncCommand.

* Add CLEAR_USER_SYNCS command that is sent on shutdown.

This should help with the case where a synchrotron gets restarted
gracefully, rather than rely on 5 minute timeout.
---
 changelog.d/7128.misc               |  1 +
 docs/tcp_replication.md             |  6 ++++++
 synapse/app/generic_worker.py       | 20 ++++++++++++++++----
 synapse/replication/tcp/client.py   |  6 ++++--
 synapse/replication/tcp/commands.py | 36 ++++++++++++++++++++++++++++++++----
 synapse/replication/tcp/protocol.py |  9 +++++++--
 synapse/replication/tcp/resource.py | 17 +++++++----------
 synapse/server.py                   | 11 +++++++++++
 synapse/server.pyi                  |  2 ++
 9 files changed, 86 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/7128.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/7128.misc b/changelog.d/7128.misc
new file mode 100644
index 0000000000..5703f6d2ec
--- /dev/null
+++ b/changelog.d/7128.misc
@@ -0,0 +1 @@
+Add explicit `instance_id` for USER_SYNC commands and remove implicit `conn_id` usage.
diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md
index d4f7d9ec18..3be8e50c4c 100644
--- a/docs/tcp_replication.md
+++ b/docs/tcp_replication.md
@@ -198,6 +198,12 @@ Asks the server for the current position of all streams.
 
    A user has started or stopped syncing
 
+#### CLEAR_USER_SYNC (C)
+
+   The server should clear all associated user sync data from the worker.
+
+   This is used when a worker is shutting down.
+
 #### FEDERATION_ACK (C)
 
    Acknowledge receipt of some federation data
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index fba7ad9551..1ee266f7c5 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -65,6 +65,7 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto
 from synapse.replication.slave.storage.room import RoomStore
 from synapse.replication.slave.storage.transactions import SlavedTransactionStore
 from synapse.replication.tcp.client import ReplicationClientHandler
+from synapse.replication.tcp.commands import ClearUserSyncsCommand
 from synapse.replication.tcp.streams import (
     AccountDataStream,
     DeviceListsStream,
@@ -124,7 +125,6 @@ from synapse.types import ReadReceipt
 from synapse.util.async_helpers import Linearizer
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.manhole import manhole
-from synapse.util.stringutils import random_string
 from synapse.util.versionstring import get_version_string
 
 logger = logging.getLogger("synapse.app.generic_worker")
@@ -233,6 +233,7 @@ class GenericWorkerPresence(object):
         self.user_to_num_current_syncs = {}
         self.clock = hs.get_clock()
         self.notifier = hs.get_notifier()
+        self.instance_id = hs.get_instance_id()
 
         active_presence = self.store.take_presence_startup_info()
         self.user_to_current_state = {state.user_id: state for state in active_presence}
@@ -245,13 +246,24 @@ class GenericWorkerPresence(object):
             self.send_stop_syncing, UPDATE_SYNCING_USERS_MS
         )
 
-        self.process_id = random_string(16)
-        logger.info("Presence process_id is %r", self.process_id)
+        hs.get_reactor().addSystemEventTrigger(
+            "before",
+            "shutdown",
+            run_as_background_process,
+            "generic_presence.on_shutdown",
+            self._on_shutdown,
+        )
+
+    def _on_shutdown(self):
+        if self.hs.config.use_presence:
+            self.hs.get_tcp_replication().send_command(
+                ClearUserSyncsCommand(self.instance_id)
+            )
 
     def send_user_sync(self, user_id, is_syncing, last_sync_ms):
         if self.hs.config.use_presence:
             self.hs.get_tcp_replication().send_user_sync(
-                user_id, is_syncing, last_sync_ms
+                self.instance_id, user_id, is_syncing, last_sync_ms
             )
 
     def mark_as_coming_online(self, user_id):
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 7e7ad0f798..e86d9805f1 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -189,10 +189,12 @@ class ReplicationClientHandler(AbstractReplicationClientHandler):
         """
         self.send_command(FederationAckCommand(token))
 
-    def send_user_sync(self, user_id, is_syncing, last_sync_ms):
+    def send_user_sync(self, instance_id, user_id, is_syncing, last_sync_ms):
         """Poke the master that a user has started/stopped syncing.
         """
-        self.send_command(UserSyncCommand(user_id, is_syncing, last_sync_ms))
+        self.send_command(
+            UserSyncCommand(instance_id, user_id, is_syncing, last_sync_ms)
+        )
 
     def send_remove_pusher(self, app_id, push_key, user_id):
         """Poke the master to remove a pusher for a user
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 5a6b734094..e4eec643f7 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -207,30 +207,32 @@ class UserSyncCommand(Command):
 
     Format::
 
-        USER_SYNC <user_id> <state> <last_sync_ms>
+        USER_SYNC <instance_id> <user_id> <state> <last_sync_ms>
 
     Where <state> is either "start" or "stop"
     """
 
     NAME = "USER_SYNC"
 
-    def __init__(self, user_id, is_syncing, last_sync_ms):
+    def __init__(self, instance_id, user_id, is_syncing, last_sync_ms):
+        self.instance_id = instance_id
         self.user_id = user_id
         self.is_syncing = is_syncing
         self.last_sync_ms = last_sync_ms
 
     @classmethod
     def from_line(cls, line):
-        user_id, state, last_sync_ms = line.split(" ", 2)
+        instance_id, user_id, state, last_sync_ms = line.split(" ", 3)
 
         if state not in ("start", "end"):
             raise Exception("Invalid USER_SYNC state %r" % (state,))
 
-        return cls(user_id, state == "start", int(last_sync_ms))
+        return cls(instance_id, user_id, state == "start", int(last_sync_ms))
 
     def to_line(self):
         return " ".join(
             (
+                self.instance_id,
                 self.user_id,
                 "start" if self.is_syncing else "end",
                 str(self.last_sync_ms),
@@ -238,6 +240,30 @@ class UserSyncCommand(Command):
         )
 
 
+class ClearUserSyncsCommand(Command):
+    """Sent by the client to inform the server that it should drop all
+    information about syncing users sent by the client.
+
+    Mainly used when client is about to shut down.
+
+    Format::
+
+        CLEAR_USER_SYNC <instance_id>
+    """
+
+    NAME = "CLEAR_USER_SYNC"
+
+    def __init__(self, instance_id):
+        self.instance_id = instance_id
+
+    @classmethod
+    def from_line(cls, line):
+        return cls(line)
+
+    def to_line(self):
+        return self.instance_id
+
+
 class FederationAckCommand(Command):
     """Sent by the client when it has processed up to a given point in the
     federation stream. This allows the master to drop in-memory caches of the
@@ -398,6 +424,7 @@ _COMMANDS = (
     InvalidateCacheCommand,
     UserIpCommand,
     RemoteServerUpCommand,
+    ClearUserSyncsCommand,
 )  # type: Tuple[Type[Command], ...]
 
 # Map of command name to command type.
@@ -420,6 +447,7 @@ VALID_CLIENT_COMMANDS = (
     ReplicateCommand.NAME,
     PingCommand.NAME,
     UserSyncCommand.NAME,
+    ClearUserSyncsCommand.NAME,
     FederationAckCommand.NAME,
     RemovePusherCommand.NAME,
     InvalidateCacheCommand.NAME,
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index f81d2e2442..dae246825f 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -423,9 +423,12 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
 
     async def on_USER_SYNC(self, cmd):
         await self.streamer.on_user_sync(
-            self.conn_id, cmd.user_id, cmd.is_syncing, cmd.last_sync_ms
+            cmd.instance_id, cmd.user_id, cmd.is_syncing, cmd.last_sync_ms
         )
 
+    async def on_CLEAR_USER_SYNC(self, cmd):
+        await self.streamer.on_clear_user_syncs(cmd.instance_id)
+
     async def on_REPLICATE(self, cmd):
         # Subscribe to all streams we're publishing to.
         for stream_name in self.streamer.streams_by_name:
@@ -551,6 +554,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
     ):
         BaseReplicationStreamProtocol.__init__(self, clock)
 
+        self.instance_id = hs.get_instance_id()
+
         self.client_name = client_name
         self.server_name = server_name
         self.handler = handler
@@ -580,7 +585,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         currently_syncing = self.handler.get_currently_syncing_users()
         now = self.clock.time_msec()
         for user_id in currently_syncing:
-            self.send_command(UserSyncCommand(user_id, True, now))
+            self.send_command(UserSyncCommand(self.instance_id, user_id, True, now))
 
         # We've now finished connecting to so inform the client handler
         self.handler.update_connection(self)
diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
index 4374e99e32..8b6067e20d 100644
--- a/synapse/replication/tcp/resource.py
+++ b/synapse/replication/tcp/resource.py
@@ -251,14 +251,19 @@ class ReplicationStreamer(object):
             self.federation_sender.federation_ack(token)
 
     @measure_func("repl.on_user_sync")
-    async def on_user_sync(self, conn_id, user_id, is_syncing, last_sync_ms):
+    async def on_user_sync(self, instance_id, user_id, is_syncing, last_sync_ms):
         """A client has started/stopped syncing on a worker.
         """
         user_sync_counter.inc()
         await self.presence_handler.update_external_syncs_row(
-            conn_id, user_id, is_syncing, last_sync_ms
+            instance_id, user_id, is_syncing, last_sync_ms
         )
 
+    async def on_clear_user_syncs(self, instance_id):
+        """A replication client wants us to drop all their UserSync data.
+        """
+        await self.presence_handler.update_external_syncs_clear(instance_id)
+
     @measure_func("repl.on_remove_pusher")
     async def on_remove_pusher(self, app_id, push_key, user_id):
         """A client has asked us to remove a pusher
@@ -321,14 +326,6 @@ class ReplicationStreamer(object):
         except ValueError:
             pass
 
-        # We need to tell the presence handler that the connection has been
-        # lost so that it can handle any ongoing syncs on that connection.
-        run_as_background_process(
-            "update_external_syncs_clear",
-            self.presence_handler.update_external_syncs_clear,
-            connection.conn_id,
-        )
-
 
 def _batch_updates(updates):
     """Takes a list of updates of form [(token, row)] and sets the token to
diff --git a/synapse/server.py b/synapse/server.py
index c7ca2bda0d..cd86475d6b 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -103,6 +103,7 @@ from synapse.storage import DataStores, Storage
 from synapse.streams.events import EventSources
 from synapse.util import Clock
 from synapse.util.distributor import Distributor
+from synapse.util.stringutils import random_string
 
 logger = logging.getLogger(__name__)
 
@@ -230,6 +231,8 @@ class HomeServer(object):
         self._listening_services = []
         self.start_time = None
 
+        self.instance_id = random_string(5)
+
         self.clock = Clock(reactor)
         self.distributor = Distributor()
         self.ratelimiter = Ratelimiter()
@@ -242,6 +245,14 @@ class HomeServer(object):
         for depname in kwargs:
             setattr(self, depname, kwargs[depname])
 
+    def get_instance_id(self):
+        """A unique ID for this synapse process instance.
+
+        This is used to distinguish running instances in worker-based
+        deployments.
+        """
+        return self.instance_id
+
     def setup(self):
         logger.info("Setting up.")
         self.start_time = int(self.get_clock().time())
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 3844f0e12f..9d1dfa71e7 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -114,3 +114,5 @@ class HomeServer(object):
         pass
     def is_mine_id(self, domain_id: str) -> bool:
         pass
+    def get_instance_id(self) -> str:
+        pass
-- 
cgit 1.5.1


From 62a7289133840b4f4a55844b4f24ec664c3d917b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 31 Mar 2020 13:09:16 +0100
Subject: Fix a bug which could cause incorrect 'cyclic dependency' error.
 (#7178)

If there was an exception setting up one of the attributes of the Homeserver
god object, then future attempts to fetch that attribute would raise a
confusing "Cyclic dependency" error. Let's make sure that we clear the
`building` flag so that we just get the original exception.

Ref: #7169
---
 changelog.d/7178.bugfix |  1 +
 synapse/server.py       | 22 ++++++++++------------
 2 files changed, 11 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/7178.bugfix

(limited to 'synapse/server.py')

diff --git a/changelog.d/7178.bugfix b/changelog.d/7178.bugfix
new file mode 100644
index 0000000000..35ea645d75
--- /dev/null
+++ b/changelog.d/7178.bugfix
@@ -0,0 +1 @@
+Fix a bug which could cause incorrect 'cyclic dependency' error.
diff --git a/synapse/server.py b/synapse/server.py
index cd86475d6b..9228e1c892 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -583,24 +583,22 @@ def _make_dependency_method(depname):
         try:
             builder = getattr(hs, "build_%s" % (depname))
         except AttributeError:
-            builder = None
+            raise NotImplementedError(
+                "%s has no %s nor a builder for it" % (type(hs).__name__, depname)
+            )
 
-        if builder:
-            # Prevent cyclic dependencies from deadlocking
-            if depname in hs._building:
-                raise ValueError("Cyclic dependency while building %s" % (depname,))
-            hs._building[depname] = 1
+        # Prevent cyclic dependencies from deadlocking
+        if depname in hs._building:
+            raise ValueError("Cyclic dependency while building %s" % (depname,))
 
+        hs._building[depname] = 1
+        try:
             dep = builder()
             setattr(hs, depname, dep)
-
+        finally:
             del hs._building[depname]
 
-            return dep
-
-        raise NotImplementedError(
-            "%s has no %s nor a builder for it" % (type(hs).__name__, depname)
-        )
+        return dep
 
     setattr(HomeServer, "get_%s" % (depname), _get)
 
-- 
cgit 1.5.1


From 5016b162fcf0372fe35404c64f80aeaf21461f31 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 6 Apr 2020 09:58:42 +0100
Subject: Move client command handling out of TCP protocol (#7185)

The aim here is to move the command handling out of the TCP protocol classes and to also merge the client and server command handling (so that we can reuse them for redis protocol). This PR simply moves the client paths to the new `ReplicationCommandHandler`, a future PR will move the server paths too.
---
 changelog.d/7185.misc                          |   1 +
 synapse/app/admin_cmd.py                       |  12 --
 synapse/app/generic_worker.py                  |   9 +-
 synapse/replication/tcp/__init__.py            |  30 ++-
 synapse/replication/tcp/client.py              | 179 +++---------------
 synapse/replication/tcp/handler.py             | 252 +++++++++++++++++++++++++
 synapse/replication/tcp/protocol.py            | 197 +++----------------
 synapse/server.py                              |   8 +-
 synapse/server.pyi                             |   7 +-
 tests/replication/slave/storage/_base.py       |  15 +-
 tests/replication/tcp/streams/_base.py         |  38 ++--
 tests/replication/tcp/streams/test_receipts.py |   1 -
 12 files changed, 378 insertions(+), 371 deletions(-)
 create mode 100644 changelog.d/7185.misc
 create mode 100644 synapse/replication/tcp/handler.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/7185.misc b/changelog.d/7185.misc
new file mode 100644
index 0000000000..deb9ca7021
--- /dev/null
+++ b/changelog.d/7185.misc
@@ -0,0 +1 @@
+Move client command handling out of TCP protocol.
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 1c7c6ec0c8..a37818fe9a 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -43,7 +43,6 @@ from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
 from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.replication.slave.storage.room import RoomStore
-from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.server import HomeServer
 from synapse.util.logcontext import LoggingContext
 from synapse.util.versionstring import get_version_string
@@ -79,17 +78,6 @@ class AdminCmdServer(HomeServer):
     def start_listening(self, listeners):
         pass
 
-    def build_tcp_replication(self):
-        return AdminCmdReplicationHandler(self)
-
-
-class AdminCmdReplicationHandler(ReplicationClientHandler):
-    async def on_rdata(self, stream_name, token, rows):
-        pass
-
-    def get_streams_to_replicate(self):
-        return {}
-
 
 @defer.inlineCallbacks
 def export_data_command(hs, args):
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 174bef360f..dcd0709a02 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -64,7 +64,7 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.replication.slave.storage.room import RoomStore
 from synapse.replication.slave.storage.transactions import SlavedTransactionStore
-from synapse.replication.tcp.client import ReplicationClientHandler
+from synapse.replication.tcp.client import ReplicationDataHandler
 from synapse.replication.tcp.commands import ClearUserSyncsCommand
 from synapse.replication.tcp.streams import (
     AccountDataStream,
@@ -603,7 +603,7 @@ class GenericWorkerServer(HomeServer):
     def remove_pusher(self, app_id, push_key, user_id):
         self.get_tcp_replication().send_remove_pusher(app_id, push_key, user_id)
 
-    def build_tcp_replication(self):
+    def build_replication_data_handler(self):
         return GenericWorkerReplicationHandler(self)
 
     def build_presence_handler(self):
@@ -613,7 +613,7 @@ class GenericWorkerServer(HomeServer):
         return GenericWorkerTyping(self)
 
 
-class GenericWorkerReplicationHandler(ReplicationClientHandler):
+class GenericWorkerReplicationHandler(ReplicationDataHandler):
     def __init__(self, hs):
         super(GenericWorkerReplicationHandler, self).__init__(hs.get_datastore())
 
@@ -644,9 +644,6 @@ class GenericWorkerReplicationHandler(ReplicationClientHandler):
             args.update(self.send_handler.stream_positions())
         return args
 
-    def get_currently_syncing_users(self):
-        return self.presence_handler.get_currently_syncing_users()
-
     async def process_and_notify(self, stream_name, token, rows):
         try:
             if self.send_handler:
diff --git a/synapse/replication/tcp/__init__.py b/synapse/replication/tcp/__init__.py
index 81c2ea7ee9..523a1358d4 100644
--- a/synapse/replication/tcp/__init__.py
+++ b/synapse/replication/tcp/__init__.py
@@ -20,11 +20,31 @@ Further details can be found in docs/tcp_replication.rst
 
 
 Structure of the module:
- * client.py   - the client classes used for workers to connect to master
+ * handler.py  - the classes used to handle sending/receiving commands to
+                 replication
  * command.py  - the definitions of all the valid commands
- * protocol.py - contains bot the client and server protocol implementations,
-                 these should not be used directly
- * resource.py - the server classes that accepts and handle client connections
- * streams.py  - the definitons of all the valid streams
+ * protocol.py - the TCP protocol classes
+ * resource.py - handles streaming stream updates to replications
+ * streams/    - the definitons of all the valid streams
 
+
+The general interaction of the classes are:
+
+        +---------------------+
+        | ReplicationStreamer |
+        +---------------------+
+                    |
+                    v
+        +---------------------------+     +----------------------+
+        | ReplicationCommandHandler |---->|ReplicationDataHandler|
+        +---------------------------+     +----------------------+
+                    | ^
+                    v |
+            +-------------+
+            | Protocols   |
+            | (TCP/redis) |
+            +-------------+
+
+Where the ReplicationDataHandler (or subclasses) handles incoming stream
+updates.
 """
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index e86d9805f1..700ae79158 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -16,26 +16,16 @@
 """
 
 import logging
-from typing import Dict, List, Optional
+from typing import TYPE_CHECKING, Dict
 
-from twisted.internet import defer
 from twisted.internet.protocol import ReconnectingClientFactory
 
 from synapse.replication.slave.storage._base import BaseSlavedStore
-from synapse.replication.tcp.protocol import (
-    AbstractReplicationClientHandler,
-    ClientReplicationStreamProtocol,
-)
-
-from .commands import (
-    Command,
-    FederationAckCommand,
-    InvalidateCacheCommand,
-    RemoteServerUpCommand,
-    RemovePusherCommand,
-    UserIpCommand,
-    UserSyncCommand,
-)
+from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+    from synapse.replication.tcp.handler import ReplicationCommandHandler
 
 logger = logging.getLogger(__name__)
 
@@ -44,16 +34,20 @@ class ReplicationClientFactory(ReconnectingClientFactory):
     """Factory for building connections to the master. Will reconnect if the
     connection is lost.
 
-    Accepts a handler that will be called when new data is available or data
-    is required.
+    Accepts a handler that is passed to `ClientReplicationStreamProtocol`.
     """
 
     initialDelay = 0.1
     maxDelay = 1  # Try at least once every N seconds
 
-    def __init__(self, hs, client_name, handler: AbstractReplicationClientHandler):
+    def __init__(
+        self,
+        hs: "HomeServer",
+        client_name: str,
+        command_handler: "ReplicationCommandHandler",
+    ):
         self.client_name = client_name
-        self.handler = handler
+        self.command_handler = command_handler
         self.server_name = hs.config.server_name
         self.hs = hs
         self._clock = hs.get_clock()  # As self.clock is defined in super class
@@ -66,7 +60,11 @@ class ReplicationClientFactory(ReconnectingClientFactory):
     def buildProtocol(self, addr):
         logger.info("Connected to replication: %r", addr)
         return ClientReplicationStreamProtocol(
-            self.hs, self.client_name, self.server_name, self._clock, self.handler,
+            self.hs,
+            self.client_name,
+            self.server_name,
+            self._clock,
+            self.command_handler,
         )
 
     def clientConnectionLost(self, connector, reason):
@@ -78,41 +76,17 @@ class ReplicationClientFactory(ReconnectingClientFactory):
         ReconnectingClientFactory.clientConnectionFailed(self, connector, reason)
 
 
-class ReplicationClientHandler(AbstractReplicationClientHandler):
-    """A base handler that can be passed to the ReplicationClientFactory.
+class ReplicationDataHandler:
+    """Handles incoming stream updates from replication.
 
-    By default proxies incoming replication data to the SlaveStore.
+    This instance notifies the slave data store about updates. Can be subclassed
+    to handle updates in additional ways.
     """
 
     def __init__(self, store: BaseSlavedStore):
         self.store = store
 
-        # The current connection. None if we are currently (re)connecting
-        self.connection = None
-
-        # Any pending commands to be sent once a new connection has been
-        # established
-        self.pending_commands = []  # type: List[Command]
-
-        # Map from string -> deferred, to wake up when receiveing a SYNC with
-        # the given string.
-        # Used for tests.
-        self.awaiting_syncs = {}  # type: Dict[str, defer.Deferred]
-
-        # The factory used to create connections.
-        self.factory = None  # type: Optional[ReplicationClientFactory]
-
-    def start_replication(self, hs):
-        """Helper method to start a replication connection to the remote server
-        using TCP.
-        """
-        client_name = hs.config.worker_name
-        self.factory = ReplicationClientFactory(hs, client_name, self)
-        host = hs.config.worker_replication_host
-        port = hs.config.worker_replication_port
-        hs.get_reactor().connectTCP(host, port, self.factory)
-
-    async def on_rdata(self, stream_name, token, rows):
+    async def on_rdata(self, stream_name: str, token: int, rows: list):
         """Called to handle a batch of replication data with a given stream token.
 
         By default this just pokes the slave store. Can be overridden in subclasses to
@@ -124,30 +98,8 @@ class ReplicationClientHandler(AbstractReplicationClientHandler):
             rows (list): a list of Stream.ROW_TYPE objects as returned by
                 Stream.parse_row.
         """
-        logger.debug("Received rdata %s -> %s", stream_name, token)
         self.store.process_replication_rows(stream_name, token, rows)
 
-    async def on_position(self, stream_name, token):
-        """Called when we get new position data. By default this just pokes
-        the slave store.
-
-        Can be overriden in subclasses to handle more.
-        """
-        self.store.process_replication_rows(stream_name, token, [])
-
-    def on_sync(self, data):
-        """When we received a SYNC we wake up any deferreds that were waiting
-        for the sync with the given data.
-
-        Used by tests.
-        """
-        d = self.awaiting_syncs.pop(data, None)
-        if d:
-            d.callback(data)
-
-    def on_remote_server_up(self, server: str):
-        """Called when get a new REMOTE_SERVER_UP command."""
-
     def get_streams_to_replicate(self) -> Dict[str, int]:
         """Called when a new connection has been established and we need to
         subscribe to streams.
@@ -163,85 +115,10 @@ class ReplicationClientHandler(AbstractReplicationClientHandler):
             args["account_data"] = user_account_data
         elif room_account_data:
             args["account_data"] = room_account_data
-
         return args
 
-    def get_currently_syncing_users(self):
-        """Get the list of currently syncing users (if any). This is called
-        when a connection has been established and we need to send the
-        currently syncing users. (Overriden by the synchrotron's only)
-        """
-        return []
-
-    def send_command(self, cmd):
-        """Send a command to master (when we get establish a connection if we
-        don't have one already.)
-        """
-        if self.connection:
-            self.connection.send_command(cmd)
-        else:
-            logger.warning("Queuing command as not connected: %r", cmd.NAME)
-            self.pending_commands.append(cmd)
-
-    def send_federation_ack(self, token):
-        """Ack data for the federation stream. This allows the master to drop
-        data stored purely in memory.
-        """
-        self.send_command(FederationAckCommand(token))
-
-    def send_user_sync(self, instance_id, user_id, is_syncing, last_sync_ms):
-        """Poke the master that a user has started/stopped syncing.
-        """
-        self.send_command(
-            UserSyncCommand(instance_id, user_id, is_syncing, last_sync_ms)
-        )
-
-    def send_remove_pusher(self, app_id, push_key, user_id):
-        """Poke the master to remove a pusher for a user
-        """
-        cmd = RemovePusherCommand(app_id, push_key, user_id)
-        self.send_command(cmd)
-
-    def send_invalidate_cache(self, cache_func, keys):
-        """Poke the master to invalidate a cache.
-        """
-        cmd = InvalidateCacheCommand(cache_func.__name__, keys)
-        self.send_command(cmd)
-
-    def send_user_ip(self, user_id, access_token, ip, user_agent, device_id, last_seen):
-        """Tell the master that the user made a request.
-        """
-        cmd = UserIpCommand(user_id, access_token, ip, user_agent, device_id, last_seen)
-        self.send_command(cmd)
-
-    def send_remote_server_up(self, server: str):
-        self.send_command(RemoteServerUpCommand(server))
-
-    def await_sync(self, data):
-        """Returns a deferred that is resolved when we receive a SYNC command
-        with given data.
-
-        [Not currently] used by tests.
-        """
-        return self.awaiting_syncs.setdefault(data, defer.Deferred())
-
-    def update_connection(self, connection):
-        """Called when a connection has been established (or lost with None).
-        """
-        self.connection = connection
-        if connection:
-            for cmd in self.pending_commands:
-                connection.send_command(cmd)
-            self.pending_commands = []
-
-    def finished_connecting(self):
-        """Called when we have successfully subscribed and caught up to all
-        streams we're interested in.
-        """
-        logger.info("Finished connecting to server")
+    async def on_position(self, stream_name: str, token: int):
+        self.store.process_replication_rows(stream_name, token, [])
 
-        # We don't reset the delay any earlier as otherwise if there is a
-        # problem during start up we'll end up tight looping connecting to the
-        # server.
-        if self.factory:
-            self.factory.resetDelay()
+    def on_remote_server_up(self, server: str):
+        """Called when get a new REMOTE_SERVER_UP command."""
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
new file mode 100644
index 0000000000..12a1cfd6d1
--- /dev/null
+++ b/synapse/replication/tcp/handler.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Any, Callable, Dict, List, Optional, Set
+
+from prometheus_client import Counter
+
+from synapse.replication.tcp.client import ReplicationClientFactory
+from synapse.replication.tcp.commands import (
+    Command,
+    FederationAckCommand,
+    InvalidateCacheCommand,
+    PositionCommand,
+    RdataCommand,
+    RemoteServerUpCommand,
+    RemovePusherCommand,
+    SyncCommand,
+    UserIpCommand,
+    UserSyncCommand,
+)
+from synapse.replication.tcp.streams import STREAMS_MAP, Stream
+from synapse.util.async_helpers import Linearizer
+
+logger = logging.getLogger(__name__)
+
+
+# number of updates received for each RDATA stream
+inbound_rdata_count = Counter(
+    "synapse_replication_tcp_protocol_inbound_rdata_count", "", ["stream_name"]
+)
+
+
+class ReplicationCommandHandler:
+    """Handles incoming commands from replication as well as sending commands
+    back out to connections.
+    """
+
+    def __init__(self, hs):
+        self._replication_data_handler = hs.get_replication_data_handler()
+        self._presence_handler = hs.get_presence_handler()
+
+        # Set of streams that we've caught up with.
+        self._streams_connected = set()  # type: Set[str]
+
+        self._streams = {
+            stream.NAME: stream(hs) for stream in STREAMS_MAP.values()
+        }  # type: Dict[str, Stream]
+
+        self._position_linearizer = Linearizer("replication_position")
+
+        # Map of stream to batched updates. See RdataCommand for info on how
+        # batching works.
+        self._pending_batches = {}  # type: Dict[str, List[Any]]
+
+        # The factory used to create connections.
+        self._factory = None  # type: Optional[ReplicationClientFactory]
+
+        # The current connection. None if we are currently (re)connecting
+        self._connection = None
+
+    def start_replication(self, hs):
+        """Helper method to start a replication connection to the remote server
+        using TCP.
+        """
+        client_name = hs.config.worker_name
+        self._factory = ReplicationClientFactory(hs, client_name, self)
+        host = hs.config.worker_replication_host
+        port = hs.config.worker_replication_port
+        hs.get_reactor().connectTCP(host, port, self._factory)
+
+    async def on_RDATA(self, cmd: RdataCommand):
+        stream_name = cmd.stream_name
+        inbound_rdata_count.labels(stream_name).inc()
+
+        try:
+            row = STREAMS_MAP[stream_name].parse_row(cmd.row)
+        except Exception:
+            logger.exception("Failed to parse RDATA: %r %r", stream_name, cmd.row)
+            raise
+
+        if cmd.token is None or stream_name not in self._streams_connected:
+            # I.e. either this is part of a batch of updates for this stream (in
+            # which case batch until we get an update for the stream with a non
+            # None token) or we're currently connecting so we queue up rows.
+            self._pending_batches.setdefault(stream_name, []).append(row)
+        else:
+            # Check if this is the last of a batch of updates
+            rows = self._pending_batches.pop(stream_name, [])
+            rows.append(row)
+            await self.on_rdata(stream_name, cmd.token, rows)
+
+    async def on_rdata(self, stream_name: str, token: int, rows: list):
+        """Called to handle a batch of replication data with a given stream token.
+
+        Args:
+            stream_name: name of the replication stream for this batch of rows
+            token: stream token for this batch of rows
+            rows: a list of Stream.ROW_TYPE objects as returned by
+                Stream.parse_row.
+        """
+        logger.debug("Received rdata %s -> %s", stream_name, token)
+        await self._replication_data_handler.on_rdata(stream_name, token, rows)
+
+    async def on_POSITION(self, cmd: PositionCommand):
+        stream = self._streams.get(cmd.stream_name)
+        if not stream:
+            logger.error("Got POSITION for unknown stream: %s", cmd.stream_name)
+            return
+
+        # We protect catching up with a linearizer in case the replication
+        # connection reconnects under us.
+        with await self._position_linearizer.queue(cmd.stream_name):
+            # We're about to go and catch up with the stream, so mark as connecting
+            # to stop RDATA being handled at the same time by removing stream from
+            # list of connected streams. We also clear any batched up RDATA from
+            # before we got the POSITION.
+            self._streams_connected.discard(cmd.stream_name)
+            self._pending_batches.clear()
+
+            # Find where we previously streamed up to.
+            current_token = self._replication_data_handler.get_streams_to_replicate().get(
+                cmd.stream_name
+            )
+            if current_token is None:
+                logger.warning(
+                    "Got POSITION for stream we're not subscribed to: %s",
+                    cmd.stream_name,
+                )
+                return
+
+            # Fetch all updates between then and now.
+            limited = True
+            while limited:
+                updates, current_token, limited = await stream.get_updates_since(
+                    current_token, cmd.token
+                )
+                if updates:
+                    await self.on_rdata(
+                        cmd.stream_name,
+                        current_token,
+                        [stream.parse_row(update[1]) for update in updates],
+                    )
+
+            # We've now caught up to position sent to us, notify handler.
+            await self._replication_data_handler.on_position(cmd.stream_name, cmd.token)
+
+            # Handle any RDATA that came in while we were catching up.
+            rows = self._pending_batches.pop(cmd.stream_name, [])
+            if rows:
+                await self._replication_data_handler.on_rdata(
+                    cmd.stream_name, rows[-1].token, rows
+                )
+
+            self._streams_connected.add(cmd.stream_name)
+
+    async def on_SYNC(self, cmd: SyncCommand):
+        pass
+
+    async def on_REMOTE_SERVER_UP(self, cmd: RemoteServerUpCommand):
+        """"Called when get a new REMOTE_SERVER_UP command."""
+        self._replication_data_handler.on_remote_server_up(cmd.data)
+
+    def get_currently_syncing_users(self):
+        """Get the list of currently syncing users (if any). This is called
+        when a connection has been established and we need to send the
+        currently syncing users.
+        """
+        return self._presence_handler.get_currently_syncing_users()
+
+    def update_connection(self, connection):
+        """Called when a connection has been established (or lost with None).
+        """
+        self._connection = connection
+
+    def finished_connecting(self):
+        """Called when we have successfully subscribed and caught up to all
+        streams we're interested in.
+        """
+        logger.info("Finished connecting to server")
+
+        # We don't reset the delay any earlier as otherwise if there is a
+        # problem during start up we'll end up tight looping connecting to the
+        # server.
+        if self._factory:
+            self._factory.resetDelay()
+
+    def send_command(self, cmd: Command):
+        """Send a command to master (when we get establish a connection if we
+        don't have one already.)
+        """
+        if self._connection:
+            self._connection.send_command(cmd)
+        else:
+            logger.warning("Dropping command as not connected: %r", cmd.NAME)
+
+    def send_federation_ack(self, token: int):
+        """Ack data for the federation stream. This allows the master to drop
+        data stored purely in memory.
+        """
+        self.send_command(FederationAckCommand(token))
+
+    def send_user_sync(
+        self, instance_id: str, user_id: str, is_syncing: bool, last_sync_ms: int
+    ):
+        """Poke the master that a user has started/stopped syncing.
+        """
+        self.send_command(
+            UserSyncCommand(instance_id, user_id, is_syncing, last_sync_ms)
+        )
+
+    def send_remove_pusher(self, app_id: str, push_key: str, user_id: str):
+        """Poke the master to remove a pusher for a user
+        """
+        cmd = RemovePusherCommand(app_id, push_key, user_id)
+        self.send_command(cmd)
+
+    def send_invalidate_cache(self, cache_func: Callable, keys: tuple):
+        """Poke the master to invalidate a cache.
+        """
+        cmd = InvalidateCacheCommand(cache_func.__name__, keys)
+        self.send_command(cmd)
+
+    def send_user_ip(
+        self,
+        user_id: str,
+        access_token: str,
+        ip: str,
+        user_agent: str,
+        device_id: str,
+        last_seen: int,
+    ):
+        """Tell the master that the user made a request.
+        """
+        cmd = UserIpCommand(user_id, access_token, ip, user_agent, device_id, last_seen)
+        self.send_command(cmd)
+
+    def send_remote_server_up(self, server: str):
+        self.send_command(RemoteServerUpCommand(server))
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index dae246825f..f2a37f568e 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -46,12 +46,11 @@ indicate which side is sending, these are *not* included on the wire::
     > ERROR server stopping
     * connection closed by server *
 """
-import abc
 import fcntl
 import logging
 import struct
 from collections import defaultdict
-from typing import Any, DefaultDict, Dict, List, Set
+from typing import TYPE_CHECKING, DefaultDict, List
 
 from six import iteritems
 
@@ -78,13 +77,12 @@ from synapse.replication.tcp.commands import (
     SyncCommand,
     UserSyncCommand,
 )
-from synapse.replication.tcp.streams import STREAMS_MAP, Stream
 from synapse.types import Collection
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
-MYPY = False
-if MYPY:
+if TYPE_CHECKING:
+    from synapse.replication.tcp.handler import ReplicationCommandHandler
     from synapse.server import HomeServer
 
 
@@ -475,71 +473,6 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
         self.streamer.lost_connection(self)
 
 
-class AbstractReplicationClientHandler(metaclass=abc.ABCMeta):
-    """
-    The interface for the handler that should be passed to
-    ClientReplicationStreamProtocol
-    """
-
-    @abc.abstractmethod
-    async def on_rdata(self, stream_name, token, rows):
-        """Called to handle a batch of replication data with a given stream token.
-
-        Args:
-            stream_name (str): name of the replication stream for this batch of rows
-            token (int): stream token for this batch of rows
-            rows (list): a list of Stream.ROW_TYPE objects as returned by
-                Stream.parse_row.
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    async def on_position(self, stream_name, token):
-        """Called when we get new position data."""
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def on_sync(self, data):
-        """Called when get a new SYNC command."""
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    async def on_remote_server_up(self, server: str):
-        """Called when get a new REMOTE_SERVER_UP command."""
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def get_streams_to_replicate(self):
-        """Called when a new connection has been established and we need to
-        subscribe to streams.
-
-        Returns:
-            map from stream name to the most recent update we have for
-            that stream (ie, the point we want to start replicating from)
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def get_currently_syncing_users(self):
-        """Get the list of currently syncing users (if any). This is called
-        when a connection has been established and we need to send the
-        currently syncing users."""
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def update_connection(self, connection):
-        """Called when a connection has been established (or lost with None).
-        """
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def finished_connecting(self):
-        """Called when we have successfully subscribed and caught up to all
-        streams we're interested in.
-        """
-        raise NotImplementedError()
-
-
 class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
     VALID_INBOUND_COMMANDS = VALID_SERVER_COMMANDS
     VALID_OUTBOUND_COMMANDS = VALID_CLIENT_COMMANDS
@@ -550,7 +483,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         client_name: str,
         server_name: str,
         clock: Clock,
-        handler: AbstractReplicationClientHandler,
+        command_handler: "ReplicationCommandHandler",
     ):
         BaseReplicationStreamProtocol.__init__(self, clock)
 
@@ -558,20 +491,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
 
         self.client_name = client_name
         self.server_name = server_name
-        self.handler = handler
-
-        self.streams = {
-            stream.NAME: stream(hs) for stream in STREAMS_MAP.values()
-        }  # type: Dict[str, Stream]
-
-        # Set of stream names that have been subscribe to, but haven't yet
-        # caught up with. This is used to track when the client has been fully
-        # connected to the remote.
-        self.streams_connecting = set(STREAMS_MAP)  # type: Set[str]
-
-        # Map of stream to batched updates. See RdataCommand for info on how
-        # batching works.
-        self.pending_batches = {}  # type: Dict[str, List[Any]]
+        self.handler = command_handler
 
     def connectionMade(self):
         self.send_command(NameCommand(self.client_name))
@@ -589,89 +509,39 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
 
         # We've now finished connecting to so inform the client handler
         self.handler.update_connection(self)
+        self.handler.finished_connecting()
 
-    async def on_SERVER(self, cmd):
-        if cmd.data != self.server_name:
-            logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data)
-            self.send_error("Wrong remote")
-
-    async def on_RDATA(self, cmd):
-        stream_name = cmd.stream_name
-        inbound_rdata_count.labels(stream_name).inc()
-
-        try:
-            row = STREAMS_MAP[stream_name].parse_row(cmd.row)
-        except Exception:
-            logger.exception(
-                "[%s] Failed to parse RDATA: %r %r", self.id(), stream_name, cmd.row
-            )
-            raise
-
-        if cmd.token is None or stream_name in self.streams_connecting:
-            # I.e. this is part of a batch of updates for this stream. Batch
-            # until we get an update for the stream with a non None token
-            self.pending_batches.setdefault(stream_name, []).append(row)
-        else:
-            # Check if this is the last of a batch of updates
-            rows = self.pending_batches.pop(stream_name, [])
-            rows.append(row)
-            await self.handler.on_rdata(stream_name, cmd.token, rows)
-
-    async def on_POSITION(self, cmd: PositionCommand):
-        stream = self.streams.get(cmd.stream_name)
-        if not stream:
-            logger.error("Got POSITION for unknown stream: %s", cmd.stream_name)
-            return
-
-        # Find where we previously streamed up to.
-        current_token = self.handler.get_streams_to_replicate().get(cmd.stream_name)
-        if current_token is None:
-            logger.warning(
-                "Got POSITION for stream we're not subscribed to: %s", cmd.stream_name
-            )
-            return
-
-        # Fetch all updates between then and now.
-        limited = True
-        while limited:
-            updates, current_token, limited = await stream.get_updates_since(
-                current_token, cmd.token
-            )
-
-            # Check if the connection was closed underneath us, if so we bail
-            # rather than risk having concurrent catch ups going on.
-            if self.state == ConnectionStates.CLOSED:
-                return
-
-            if updates:
-                await self.handler.on_rdata(
-                    cmd.stream_name,
-                    current_token,
-                    [stream.parse_row(update[1]) for update in updates],
-                )
+    async def handle_command(self, cmd: Command):
+        """Handle a command we have received over the replication stream.
 
-        # We've now caught up to position sent to us, notify handler.
-        await self.handler.on_position(cmd.stream_name, cmd.token)
+        Delegates to `command_handler.on_<COMMAND>`, which must return an
+        awaitable.
 
-        self.streams_connecting.discard(cmd.stream_name)
-        if not self.streams_connecting:
-            self.handler.finished_connecting()
+        Args:
+            cmd: received command
+        """
+        handled = False
 
-        # Check if the connection was closed underneath us, if so we bail
-        # rather than risk having concurrent catch ups going on.
-        if self.state == ConnectionStates.CLOSED:
-            return
+        # First call any command handlers on this instance. These are for TCP
+        # specific handling.
+        cmd_func = getattr(self, "on_%s" % (cmd.NAME,), None)
+        if cmd_func:
+            await cmd_func(cmd)
+            handled = True
 
-        # Handle any RDATA that came in while we were catching up.
-        rows = self.pending_batches.pop(cmd.stream_name, [])
-        if rows:
-            await self.handler.on_rdata(cmd.stream_name, rows[-1].token, rows)
+        # Then call out to the handler.
+        cmd_func = getattr(self.handler, "on_%s" % (cmd.NAME,), None)
+        if cmd_func:
+            await cmd_func(cmd)
+            handled = True
 
-    async def on_SYNC(self, cmd):
-        self.handler.on_sync(cmd.data)
+        if not handled:
+            logger.warning("Unhandled command: %r", cmd)
 
-    async def on_REMOTE_SERVER_UP(self, cmd: RemoteServerUpCommand):
-        self.handler.on_remote_server_up(cmd.data)
+    async def on_SERVER(self, cmd):
+        if cmd.data != self.server_name:
+            logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data)
+            self.send_error("Wrong remote")
 
     def replicate(self):
         """Send the subscription request to the server
@@ -768,8 +638,3 @@ tcp_outbound_commands = LaterGauge(
         for k, count in iteritems(p.outbound_commands_counter)
     },
 )
-
-# number of updates received for each RDATA stream
-inbound_rdata_count = Counter(
-    "synapse_replication_tcp_protocol_inbound_rdata_count", "", ["stream_name"]
-)
diff --git a/synapse/server.py b/synapse/server.py
index 9228e1c892..9d273c980c 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -87,6 +87,8 @@ from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.notifier import Notifier
 from synapse.push.action_generator import ActionGenerator
 from synapse.push.pusherpool import PusherPool
+from synapse.replication.tcp.client import ReplicationDataHandler
+from synapse.replication.tcp.handler import ReplicationCommandHandler
 from synapse.replication.tcp.resource import ReplicationStreamer
 from synapse.rest.media.v1.media_repository import (
     MediaRepository,
@@ -206,6 +208,7 @@ class HomeServer(object):
         "password_policy_handler",
         "storage",
         "replication_streamer",
+        "replication_data_handler",
     ]
 
     REQUIRED_ON_MASTER_STARTUP = ["user_directory_handler", "stats_handler"]
@@ -468,7 +471,7 @@ class HomeServer(object):
         return ReadMarkerHandler(self)
 
     def build_tcp_replication(self):
-        raise NotImplementedError()
+        return ReplicationCommandHandler(self)
 
     def build_action_generator(self):
         return ActionGenerator(self)
@@ -562,6 +565,9 @@ class HomeServer(object):
     def build_replication_streamer(self) -> ReplicationStreamer:
         return ReplicationStreamer(self)
 
+    def build_replication_data_handler(self):
+        return ReplicationDataHandler(self.get_datastore())
+
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
 
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 9d1dfa71e7..9013e9bac9 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -19,6 +19,7 @@ import synapse.handlers.set_password
 import synapse.http.client
 import synapse.notifier
 import synapse.replication.tcp.client
+import synapse.replication.tcp.handler
 import synapse.rest.media.v1.media_repository
 import synapse.server_notices.server_notices_manager
 import synapse.server_notices.server_notices_sender
@@ -106,7 +107,11 @@ class HomeServer(object):
         pass
     def get_tcp_replication(
         self,
-    ) -> synapse.replication.tcp.client.ReplicationClientHandler:
+    ) -> synapse.replication.tcp.handler.ReplicationCommandHandler:
+        pass
+    def get_replication_data_handler(
+        self,
+    ) -> synapse.replication.tcp.client.ReplicationDataHandler:
         pass
     def get_federation_registry(
         self,
diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py
index 2a1e7c7166..8902a5ab69 100644
--- a/tests/replication/slave/storage/_base.py
+++ b/tests/replication/slave/storage/_base.py
@@ -17,8 +17,9 @@ from mock import Mock, NonCallableMock
 
 from synapse.replication.tcp.client import (
     ReplicationClientFactory,
-    ReplicationClientHandler,
+    ReplicationDataHandler,
 )
+from synapse.replication.tcp.handler import ReplicationCommandHandler
 from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
 from synapse.storage.database import make_conn
 
@@ -51,15 +52,19 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase):
         self.event_id = 0
 
         server_factory = ReplicationStreamProtocolFactory(self.hs)
-        self.streamer = server_factory.streamer
+        self.streamer = hs.get_replication_streamer()
 
-        handler_factory = Mock()
-        self.replication_handler = ReplicationClientHandler(self.slaved_store)
-        self.replication_handler.factory = handler_factory
+        # We now do some gut wrenching so that we have a client that is based
+        # off of the slave store rather than the main store.
+        self.replication_handler = ReplicationCommandHandler(self.hs)
+        self.replication_handler._replication_data_handler = ReplicationDataHandler(
+            self.slaved_store
+        )
 
         client_factory = ReplicationClientFactory(
             self.hs, "client_name", self.replication_handler
         )
+        client_factory.handler = self.replication_handler
 
         server = server_factory.buildProtocol(None)
         client = client_factory.buildProtocol(None)
diff --git a/tests/replication/tcp/streams/_base.py b/tests/replication/tcp/streams/_base.py
index a755fe2879..32238fe79a 100644
--- a/tests/replication/tcp/streams/_base.py
+++ b/tests/replication/tcp/streams/_base.py
@@ -15,7 +15,7 @@
 
 from mock import Mock
 
-from synapse.replication.tcp.commands import ReplicateCommand
+from synapse.replication.tcp.handler import ReplicationCommandHandler
 from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
 from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
 
@@ -26,15 +26,20 @@ from tests.server import FakeTransport
 class BaseStreamTestCase(unittest.HomeserverTestCase):
     """Base class for tests of the replication streams"""
 
+    def make_homeserver(self, reactor, clock):
+        self.test_handler = Mock(wraps=TestReplicationDataHandler())
+        return self.setup_test_homeserver(replication_data_handler=self.test_handler)
+
     def prepare(self, reactor, clock, hs):
         # build a replication server
-        server_factory = ReplicationStreamProtocolFactory(self.hs)
-        self.streamer = server_factory.streamer
+        server_factory = ReplicationStreamProtocolFactory(hs)
+        self.streamer = hs.get_replication_streamer()
         self.server = server_factory.buildProtocol(None)
 
-        self.test_handler = Mock(wraps=TestReplicationClientHandler())
+        repl_handler = ReplicationCommandHandler(hs)
+        repl_handler.handler = self.test_handler
         self.client = ClientReplicationStreamProtocol(
-            hs, "client", "test", clock, self.test_handler,
+            hs, "client", "test", clock, repl_handler,
         )
 
         self._client_transport = None
@@ -69,13 +74,9 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         self.streamer.on_notifier_poke()
         self.pump(0.1)
 
-    def replicate_stream(self):
-        """Make the client end a REPLICATE command to set up a subscription to a stream"""
-        self.client.send_command(ReplicateCommand())
-
 
-class TestReplicationClientHandler(object):
-    """Drop-in for ReplicationClientHandler which just collects RDATA rows"""
+class TestReplicationDataHandler:
+    """Drop-in for ReplicationDataHandler which just collects RDATA rows"""
 
     def __init__(self):
         self.streams = set()
@@ -88,18 +89,9 @@ class TestReplicationClientHandler(object):
                 positions[stream] = max(token, positions.get(stream, 0))
         return positions
 
-    def get_currently_syncing_users(self):
-        return []
-
-    def update_connection(self, connection):
-        pass
-
-    def finished_connecting(self):
-        pass
-
-    async def on_position(self, stream_name, token):
-        """Called when we get new position data."""
-
     async def on_rdata(self, stream_name, token, rows):
         for r in rows:
             self._received_rdata_rows.append((stream_name, token, r))
+
+    async def on_position(self, stream_name, token):
+        pass
diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py
index 0ec0825a0e..a0206f7363 100644
--- a/tests/replication/tcp/streams/test_receipts.py
+++ b/tests/replication/tcp/streams/test_receipts.py
@@ -24,7 +24,6 @@ class ReceiptsStreamTestCase(BaseStreamTestCase):
         self.reconnect()
 
         # make the client subscribe to the receipts stream
-        self.replicate_stream()
         self.test_handler.streams.add("receipts")
 
         # tell the master to send a new receipt
-- 
cgit 1.5.1


From 37f6823f5b91f27b9dd8de8fc0e52d5ea889647c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Apr 2020 16:23:08 +0100
Subject: Add instance name to RDATA/POSITION commands (#7364)

This is primarily for allowing us to send those commands from workers, but for now simply allows us to ignore echoed RDATA/POSITION commands that we sent (we get echoes of sent commands when using redis). Currently we log a WARNING on the master process every time we receive an echoed RDATA.
---
 changelog.d/7364.misc                    |  1 +
 docs/tcp_replication.md                  | 41 +++++++++++++++++++-------------
 synapse/app/_base.py                     |  4 ++--
 synapse/logging/opentracing.py           | 23 ++++++++----------
 synapse/replication/tcp/commands.py      | 37 +++++++++++++++++++---------
 synapse/replication/tcp/handler.py       | 17 ++++++++++---
 synapse/server.py                        | 13 ++++++++--
 synapse/server.pyi                       |  2 ++
 tests/replication/slave/storage/_base.py |  1 +
 tests/replication/tcp/test_commands.py   |  6 +++--
 10 files changed, 95 insertions(+), 50 deletions(-)
 create mode 100644 changelog.d/7364.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/7364.misc b/changelog.d/7364.misc
new file mode 100644
index 0000000000..bb5d727cf4
--- /dev/null
+++ b/changelog.d/7364.misc
@@ -0,0 +1 @@
+Add an `instance_name` to `RDATA` and `POSITION` replication commands.
diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md
index b922d9cf7e..ab2fffbfe4 100644
--- a/docs/tcp_replication.md
+++ b/docs/tcp_replication.md
@@ -15,15 +15,17 @@ example flow would be (where '>' indicates master to worker and
 
     > SERVER example.com
     < REPLICATE
-    > POSITION events 53
-    > RDATA events 54 ["$foo1:bar.com", ...]
-    > RDATA events 55 ["$foo4:bar.com", ...]
+    > POSITION events master 53
+    > RDATA events master 54 ["$foo1:bar.com", ...]
+    > RDATA events master 55 ["$foo4:bar.com", ...]
 
 The example shows the server accepting a new connection and sending its identity
 with the `SERVER` command, followed by the client server to respond with the
 position of all streams. The server then periodically sends `RDATA` commands
-which have the format `RDATA <stream_name> <token> <row>`, where the format of
-`<row>` is defined by the individual streams.
+which have the format `RDATA <stream_name> <instance_name> <token> <row>`, where
+the format of `<row>` is defined by the individual streams. The
+`<instance_name>` is the name of the Synapse process that generated the data
+(usually "master").
 
 Error reporting happens by either the client or server sending an ERROR
 command, and usually the connection will be closed.
@@ -52,7 +54,7 @@ The basic structure of the protocol is line based, where the initial
 word of each line specifies the command. The rest of the line is parsed
 based on the command. For example, the RDATA command is defined as:
 
-    RDATA <stream_name> <token> <row_json>
+    RDATA <stream_name> <instance_name> <token> <row_json>
 
 (Note that <row_json> may contains spaces, but cannot contain
 newlines.)
@@ -136,11 +138,11 @@ the wire:
     < NAME synapse.app.appservice
     < PING 1490197665618
     < REPLICATE
-    > POSITION events 1
-    > POSITION backfill 1
-    > POSITION caches 1
-    > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513]
-    > RDATA events 14 ["$149019767112vOHxz:localhost:8823",
+    > POSITION events master 1
+    > POSITION backfill master 1
+    > POSITION caches master 1
+    > RDATA caches master 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513]
+    > RDATA events master 14 ["$149019767112vOHxz:localhost:8823",
         "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null]
     < PING 1490197675618
     > ERROR server stopping
@@ -151,10 +153,10 @@ position without needing to send data with the `RDATA` command.
 
 An example of a batched set of `RDATA` is:
 
-    > RDATA caches batch ["get_user_by_id",["@test:localhost:8823"],1490197670513]
-    > RDATA caches batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513]
-    > RDATA caches batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513]
-    > RDATA caches 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513]
+    > RDATA caches master batch ["get_user_by_id",["@test:localhost:8823"],1490197670513]
+    > RDATA caches master batch ["get_user_by_id",["@test2:localhost:8823"],1490197670513]
+    > RDATA caches master batch ["get_user_by_id",["@test3:localhost:8823"],1490197670513]
+    > RDATA caches master 54 ["get_user_by_id",["@test4:localhost:8823"],1490197670513]
 
 In this case the client shouldn't advance their caches token until it
 sees the the last `RDATA`.
@@ -178,6 +180,11 @@ client (C):
    updates, and if so then fetch them out of band. Sent in response to a
    REPLICATE command (but can happen at any time).
 
+   The POSITION command includes the source of the stream. Currently all streams
+   are written by a single process (usually "master"). If fetching missing
+   updates via HTTP API, rather than via the DB, then processes should make the
+   request to the appropriate process.
+
 #### ERROR (S, C)
 
    There was an error
@@ -234,12 +241,12 @@ Each individual cache invalidation results in a row being sent down
 replication, which includes the cache name (the name of the function)
 and they key to invalidate. For example:
 
-    > RDATA caches 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251]
+    > RDATA caches master 550953771 ["get_user_by_id", ["@bob:example.com"], 1550574873251]
 
 Alternatively, an entire cache can be invalidated by sending down a `null`
 instead of the key. For example:
 
-    > RDATA caches 550953772 ["get_user_by_id", null, 1550574873252]
+    > RDATA caches master 550953772 ["get_user_by_id", null, 1550574873252]
 
 However, there are times when a number of caches need to be invalidated
 at the same time with the same key. To reduce traffic we batch those
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 4d84f4595a..628292b890 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -270,7 +270,7 @@ def start(hs, listeners=None):
 
         # Start the tracer
         synapse.logging.opentracing.init_tracer(  # type: ignore[attr-defined] # noqa
-            hs.config
+            hs
         )
 
         # It is now safe to start your Synapse.
@@ -316,7 +316,7 @@ def setup_sentry(hs):
         scope.set_tag("matrix_server_name", hs.config.server_name)
 
         app = hs.config.worker_app if hs.config.worker_app else "synapse.app.homeserver"
-        name = hs.config.worker_name if hs.config.worker_name else "master"
+        name = hs.get_instance_name()
         scope.set_tag("worker_app", app)
         scope.set_tag("worker_name", name)
 
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 0638cec429..5dddf57008 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -171,7 +171,7 @@ import logging
 import re
 import types
 from functools import wraps
-from typing import Dict
+from typing import TYPE_CHECKING, Dict
 
 from canonicaljson import json
 
@@ -179,6 +179,9 @@ from twisted.internet import defer
 
 from synapse.config import ConfigError
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 # Helper class
 
 
@@ -297,14 +300,11 @@ def _noop_context_manager(*args, **kwargs):
 # Setup
 
 
-def init_tracer(config):
+def init_tracer(hs: "HomeServer"):
     """Set the whitelists and initialise the JaegerClient tracer
-
-    Args:
-        config (HomeserverConfig): The config used by the homeserver
     """
     global opentracing
-    if not config.opentracer_enabled:
+    if not hs.config.opentracer_enabled:
         # We don't have a tracer
         opentracing = None
         return
@@ -315,18 +315,15 @@ def init_tracer(config):
             "installed."
         )
 
-    # Include the worker name
-    name = config.worker_name if config.worker_name else "master"
-
     # Pull out the jaeger config if it was given. Otherwise set it to something sensible.
     # See https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/config.py
 
-    set_homeserver_whitelist(config.opentracer_whitelist)
+    set_homeserver_whitelist(hs.config.opentracer_whitelist)
 
     JaegerConfig(
-        config=config.jaeger_config,
-        service_name="{} {}".format(config.server_name, name),
-        scope_manager=LogContextScopeManager(config),
+        config=hs.config.jaeger_config,
+        service_name="{} {}".format(hs.config.server_name, hs.get_instance_name()),
+        scope_manager=LogContextScopeManager(hs.config),
     ).initialize_tracer()
 
 
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index c7880d4b63..f58e384d17 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -95,7 +95,7 @@ class RdataCommand(Command):
 
     Format::
 
-        RDATA <stream_name> <token> <row_json>
+        RDATA <stream_name> <instance_name> <token> <row_json>
 
     The `<token>` may either be a numeric stream id OR "batch". The latter case
     is used to support sending multiple updates with the same stream ID. This
@@ -105,33 +105,40 @@ class RdataCommand(Command):
     The client should batch all incoming RDATA with a token of "batch" (per
     stream_name) until it sees an RDATA with a numeric stream ID.
 
+    The `<instance_name>` is the source of the new data (usually "master").
+
     `<token>` of "batch" maps to the instance variable `token` being None.
 
     An example of a batched series of RDATA::
 
-        RDATA presence batch ["@foo:example.com", "online", ...]
-        RDATA presence batch ["@bar:example.com", "online", ...]
-        RDATA presence 59 ["@baz:example.com", "online", ...]
+        RDATA presence master batch ["@foo:example.com", "online", ...]
+        RDATA presence master batch ["@bar:example.com", "online", ...]
+        RDATA presence master 59 ["@baz:example.com", "online", ...]
     """
 
     NAME = "RDATA"
 
-    def __init__(self, stream_name, token, row):
+    def __init__(self, stream_name, instance_name, token, row):
         self.stream_name = stream_name
+        self.instance_name = instance_name
         self.token = token
         self.row = row
 
     @classmethod
     def from_line(cls, line):
-        stream_name, token, row_json = line.split(" ", 2)
+        stream_name, instance_name, token, row_json = line.split(" ", 3)
         return cls(
-            stream_name, None if token == "batch" else int(token), json.loads(row_json)
+            stream_name,
+            instance_name,
+            None if token == "batch" else int(token),
+            json.loads(row_json),
         )
 
     def to_line(self):
         return " ".join(
             (
                 self.stream_name,
+                self.instance_name,
                 str(self.token) if self.token is not None else "batch",
                 _json_encoder.encode(self.row),
             )
@@ -145,23 +152,31 @@ class PositionCommand(Command):
     """Sent by the server to tell the client the stream postition without
     needing to send an RDATA.
 
+    Format::
+
+        POSITION <stream_name> <instance_name> <token>
+
     On receipt of a POSITION command clients should check if they have missed
     any updates, and if so then fetch them out of band.
+
+    The `<instance_name>` is the process that sent the command and is the source
+    of the stream.
     """
 
     NAME = "POSITION"
 
-    def __init__(self, stream_name, token):
+    def __init__(self, stream_name, instance_name, token):
         self.stream_name = stream_name
+        self.instance_name = instance_name
         self.token = token
 
     @classmethod
     def from_line(cls, line):
-        stream_name, token = line.split(" ", 1)
-        return cls(stream_name, int(token))
+        stream_name, instance_name, token = line.split(" ", 2)
+        return cls(stream_name, instance_name, int(token))
 
     def to_line(self):
-        return " ".join((self.stream_name, str(self.token)))
+        return " ".join((self.stream_name, self.instance_name, str(self.token)))
 
 
 class ErrorCommand(_SimpleCommand):
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index b8f49a8d0f..6f7054d5af 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -79,6 +79,7 @@ class ReplicationCommandHandler:
         self._notifier = hs.get_notifier()
         self._clock = hs.get_clock()
         self._instance_id = hs.get_instance_id()
+        self._instance_name = hs.get_instance_name()
 
         # Set of streams that we've caught up with.
         self._streams_connected = set()  # type: Set[str]
@@ -156,7 +157,7 @@ class ReplicationCommandHandler:
                 hs.config.redis.redis_host, hs.config.redis.redis_port, self._factory,
             )
         else:
-            client_name = hs.config.worker_name
+            client_name = hs.get_instance_name()
             self._factory = DirectTcpReplicationClientFactory(hs, client_name, self)
             host = hs.config.worker_replication_host
             port = hs.config.worker_replication_port
@@ -170,7 +171,9 @@ class ReplicationCommandHandler:
 
         for stream_name, stream in self._streams.items():
             current_token = stream.current_token()
-            self.send_command(PositionCommand(stream_name, current_token))
+            self.send_command(
+                PositionCommand(stream_name, self._instance_name, current_token)
+            )
 
     async def on_USER_SYNC(self, conn: AbstractConnection, cmd: UserSyncCommand):
         user_sync_counter.inc()
@@ -235,6 +238,10 @@ class ReplicationCommandHandler:
             await self._server_notices_sender.on_user_ip(cmd.user_id)
 
     async def on_RDATA(self, conn: AbstractConnection, cmd: RdataCommand):
+        if cmd.instance_name == self._instance_name:
+            # Ignore RDATA that are just our own echoes
+            return
+
         stream_name = cmd.stream_name
         inbound_rdata_count.labels(stream_name).inc()
 
@@ -286,6 +293,10 @@ class ReplicationCommandHandler:
         await self._replication_data_handler.on_rdata(stream_name, token, rows)
 
     async def on_POSITION(self, conn: AbstractConnection, cmd: PositionCommand):
+        if cmd.instance_name == self._instance_name:
+            # Ignore POSITION that are just our own echoes
+            return
+
         stream = self._streams.get(cmd.stream_name)
         if not stream:
             logger.error("Got POSITION for unknown stream: %s", cmd.stream_name)
@@ -485,7 +496,7 @@ class ReplicationCommandHandler:
 
         We need to check if the client is interested in the stream or not
         """
-        self.send_command(RdataCommand(stream_name, token, data))
+        self.send_command(RdataCommand(stream_name, self._instance_name, token, data))
 
 
 UpdateToken = TypeVar("UpdateToken")
diff --git a/synapse/server.py b/synapse/server.py
index 9d273c980c..bf97a16c09 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -234,7 +234,8 @@ class HomeServer(object):
         self._listening_services = []
         self.start_time = None
 
-        self.instance_id = random_string(5)
+        self._instance_id = random_string(5)
+        self._instance_name = config.worker_name or "master"
 
         self.clock = Clock(reactor)
         self.distributor = Distributor()
@@ -254,7 +255,15 @@ class HomeServer(object):
         This is used to distinguish running instances in worker-based
         deployments.
         """
-        return self.instance_id
+        return self._instance_id
+
+    def get_instance_name(self) -> str:
+        """A unique name for this synapse process.
+
+        Used to identify the process over replication and in config. Does not
+        change over restarts.
+        """
+        return self._instance_name
 
     def setup(self):
         logger.info("Setting up.")
diff --git a/synapse/server.pyi b/synapse/server.pyi
index fc5886f762..18043a2593 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -122,6 +122,8 @@ class HomeServer(object):
         pass
     def get_instance_id(self) -> str:
         pass
+    def get_instance_name(self) -> str:
+        pass
     def get_event_builder_factory(self) -> EventBuilderFactory:
         pass
     def get_storage(self) -> synapse.storage.Storage:
diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py
index 395c7d0306..1615dfab5e 100644
--- a/tests/replication/slave/storage/_base.py
+++ b/tests/replication/slave/storage/_base.py
@@ -57,6 +57,7 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase):
         # We now do some gut wrenching so that we have a client that is based
         # off of the slave store rather than the main store.
         self.replication_handler = ReplicationCommandHandler(self.hs)
+        self.replication_handler._instance_name = "worker"
         self.replication_handler._replication_data_handler = ReplicationDataHandler(
             self.slaved_store
         )
diff --git a/tests/replication/tcp/test_commands.py b/tests/replication/tcp/test_commands.py
index 3cbcb513cc..7ddfd0a733 100644
--- a/tests/replication/tcp/test_commands.py
+++ b/tests/replication/tcp/test_commands.py
@@ -28,15 +28,17 @@ class ParseCommandTestCase(TestCase):
         self.assertIsInstance(cmd, ReplicateCommand)
 
     def test_parse_rdata(self):
-        line = 'RDATA events 6287863 ["ev", ["$eventid", "!roomid", "type", null, null, null]]'
+        line = 'RDATA events master 6287863 ["ev", ["$eventid", "!roomid", "type", null, null, null]]'
         cmd = parse_command_from_line(line)
         self.assertIsInstance(cmd, RdataCommand)
         self.assertEqual(cmd.stream_name, "events")
+        self.assertEqual(cmd.instance_name, "master")
         self.assertEqual(cmd.token, 6287863)
 
     def test_parse_rdata_batch(self):
-        line = 'RDATA presence batch ["@foo:example.com", "online"]'
+        line = 'RDATA presence master batch ["@foo:example.com", "online"]'
         cmd = parse_command_from_line(line)
         self.assertIsInstance(cmd, RdataCommand)
         self.assertEqual(cmd.stream_name, "presence")
+        self.assertEqual(cmd.instance_name, "master")
         self.assertIsNone(cmd.token)
-- 
cgit 1.5.1


From 616af44137c78d481024da83bb51ed0d50a49522 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quentingliech@gmail.com>
Date: Fri, 8 May 2020 14:30:40 +0200
Subject: Implement OpenID Connect-based login (#7256)

---
 changelog.d/7256.feature               |   1 +
 docs/dev/oidc.md                       | 175 ++++++
 docs/sample_config.yaml                |  95 ++++
 mypy.ini                               |   3 +
 synapse/app/homeserver.py              |  12 +
 synapse/config/_base.pyi               |   2 +
 synapse/config/homeserver.py           |   2 +
 synapse/config/oidc_config.py          | 177 ++++++
 synapse/config/sso.py                  |  17 +-
 synapse/handlers/auth.py               |   4 +-
 synapse/handlers/oidc_handler.py       | 998 +++++++++++++++++++++++++++++++++
 synapse/http/client.py                 |   7 +
 synapse/python_dependencies.py         |   1 +
 synapse/res/templates/sso_error.html   |  18 +
 synapse/rest/client/v1/login.py        |  28 +-
 synapse/rest/oidc/__init__.py          |  27 +
 synapse/rest/oidc/callback_resource.py |  31 +
 synapse/server.py                      |   6 +
 synapse/server.pyi                     |   5 +
 tests/handlers/test_oidc.py            | 565 +++++++++++++++++++
 tox.ini                                |   1 +
 21 files changed, 2163 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/7256.feature
 create mode 100644 docs/dev/oidc.md
 create mode 100644 synapse/config/oidc_config.py
 create mode 100644 synapse/handlers/oidc_handler.py
 create mode 100644 synapse/res/templates/sso_error.html
 create mode 100644 synapse/rest/oidc/__init__.py
 create mode 100644 synapse/rest/oidc/callback_resource.py
 create mode 100644 tests/handlers/test_oidc.py

(limited to 'synapse/server.py')

diff --git a/changelog.d/7256.feature b/changelog.d/7256.feature
new file mode 100644
index 0000000000..7ad767bf71
--- /dev/null
+++ b/changelog.d/7256.feature
@@ -0,0 +1 @@
+Add OpenID Connect login/registration support. Contributed by Quentin Gliech, on behalf of [les Connecteurs](https://connecteu.rs).
diff --git a/docs/dev/oidc.md b/docs/dev/oidc.md
new file mode 100644
index 0000000000..a90c5d2441
--- /dev/null
+++ b/docs/dev/oidc.md
@@ -0,0 +1,175 @@
+# How to test OpenID Connect
+
+Any OpenID Connect Provider (OP) should work with Synapse, as long as it supports the authorization code flow.
+There are a few options for that:
+
+ - start a local OP. Synapse has been tested with [Hydra][hydra] and [Dex][dex-idp].
+   Note that for an OP to work, it should be served under a secure (HTTPS) origin.
+   A certificate signed with a self-signed, locally trusted CA should work. In that case, start Synapse with a `SSL_CERT_FILE` environment variable set to the path of the CA.
+ - use a publicly available OP. Synapse has been tested with [Google][google-idp].
+ - setup a SaaS OP, like [Auth0][auth0] and [Okta][okta]. Auth0 has a free tier which has been tested with Synapse.
+
+[google-idp]: https://developers.google.com/identity/protocols/OpenIDConnect#authenticatingtheuser
+[auth0]: https://auth0.com/
+[okta]: https://www.okta.com/
+[dex-idp]: https://github.com/dexidp/dex
+[hydra]: https://www.ory.sh/docs/hydra/
+
+
+## Sample configs
+
+Here are a few configs for providers that should work with Synapse.
+
+### [Dex][dex-idp]
+
+[Dex][dex-idp] is a simple, open-source, certified OpenID Connect Provider.
+Although it is designed to help building a full-blown provider, with some external database, it can be configured with static passwords in a config file.
+
+Follow the [Getting Started guide](https://github.com/dexidp/dex/blob/master/Documentation/getting-started.md) to install Dex.
+
+Edit `examples/config-dev.yaml` config file from the Dex repo to add a client:
+
+```yaml
+staticClients:
+- id: synapse
+  secret: secret
+  redirectURIs:
+  - '[synapse base url]/_synapse/oidc/callback'
+  name: 'Synapse'
+```
+
+Run with `dex serve examples/config-dex.yaml`
+
+Synapse config:
+
+```yaml
+oidc_config:
+   enabled: true
+   skip_verification: true # This is needed as Dex is served on an insecure endpoint
+   issuer: "http://127.0.0.1:5556/dex"
+   discover: true
+   client_id: "synapse"
+   client_secret: "secret"
+   scopes:
+     - openid
+     - profile
+   user_mapping_provider:
+     config:
+       localpart_template: '{{ user.name }}'
+       display_name_template: '{{ user.name|capitalize }}'
+```
+
+### [Auth0][auth0]
+
+1. Create a regular web application for Synapse
+2. Set the Allowed Callback URLs to `[synapse base url]/_synapse/oidc/callback`
+3. Add a rule to add the `preferred_username` claim.
+   <details>
+    <summary>Code sample</summary>
+
+    ```js
+    function addPersistenceAttribute(user, context, callback) {
+      user.user_metadata = user.user_metadata || {};
+      user.user_metadata.preferred_username = user.user_metadata.preferred_username || user.user_id;
+      context.idToken.preferred_username = user.user_metadata.preferred_username;
+
+      auth0.users.updateUserMetadata(user.user_id, user.user_metadata)
+        .then(function(){
+            callback(null, user, context);
+        })
+        .catch(function(err){
+            callback(err);
+        });
+    }
+    ```
+
+  </details>
+
+
+```yaml
+oidc_config:
+   enabled: true
+   issuer: "https://your-tier.eu.auth0.com/" # TO BE FILLED
+   discover: true
+   client_id: "your-client-id" # TO BE FILLED
+   client_secret: "your-client-secret" # TO BE FILLED
+   scopes:
+     - openid
+     - profile
+   user_mapping_provider:
+     config:
+       localpart_template: '{{ user.preferred_username }}'
+       display_name_template: '{{ user.name }}'
+```
+
+### GitHub
+
+GitHub is a bit special as it is not an OpenID Connect compliant provider, but just a regular OAuth2 provider.
+The `/user` API endpoint can be used to retrieve informations from the user.
+As the OIDC login mechanism needs an attribute to uniquely identify users and that endpoint does not return a `sub` property, an alternative `subject_claim` has to be set.
+
+1. Create a new OAuth application: https://github.com/settings/applications/new
+2. Set the callback URL to `[synapse base url]/_synapse/oidc/callback`
+
+```yaml
+oidc_config:
+   enabled: true
+   issuer: "https://github.com/"
+   discover: false
+   client_id: "your-client-id" # TO BE FILLED
+   client_secret: "your-client-secret" # TO BE FILLED
+   authorization_endpoint: "https://github.com/login/oauth/authorize"
+   token_endpoint: "https://github.com/login/oauth/access_token"
+   userinfo_endpoint: "https://api.github.com/user"
+   scopes:
+     - read:user
+   user_mapping_provider:
+     config:
+       subject_claim: 'id'
+       localpart_template: '{{ user.login }}'
+       display_name_template: '{{ user.name }}'
+```
+
+### Google
+
+1. Setup a project in the Google API Console
+2. Obtain the OAuth 2.0 credentials (see <https://developers.google.com/identity/protocols/oauth2/openid-connect>)
+3. Add this Authorized redirect URI: `[synapse base url]/_synapse/oidc/callback`
+
+```yaml
+oidc_config:
+   enabled: true
+   issuer: "https://accounts.google.com/"
+   discover: true
+   client_id: "your-client-id" # TO BE FILLED
+   client_secret: "your-client-secret" # TO BE FILLED
+   scopes:
+     - openid
+     - profile
+   user_mapping_provider:
+     config:
+       localpart_template: '{{ user.given_name|lower }}'
+       display_name_template: '{{ user.name }}'
+```
+
+### Twitch
+
+1. Setup a developer account on [Twitch](https://dev.twitch.tv/)
+2. Obtain the OAuth 2.0 credentials by [creating an app](https://dev.twitch.tv/console/apps/)
+3. Add this OAuth Redirect URL: `[synapse base url]/_synapse/oidc/callback`
+
+```yaml
+oidc_config:
+   enabled: true
+   issuer: "https://id.twitch.tv/oauth2/"
+   discover: true
+   client_id: "your-client-id" # TO BE FILLED
+   client_secret: "your-client-secret" # TO BE FILLED
+   client_auth_method: "client_secret_post"
+   scopes:
+     - openid
+   user_mapping_provider:
+     config:
+       localpart_template: '{{ user.preferred_username }}'
+       display_name_template: '{{ user.name }}'
+```
diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml
index 98ead7dc0e..1e397f7734 100644
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -1470,6 +1470,94 @@ saml2_config:
   #template_dir: "res/templates"
 
 
+# Enable OpenID Connect for registration and login. Uses authlib.
+#
+oidc_config:
+    # enable OpenID Connect. Defaults to false.
+    #
+    #enabled: true
+
+    # use the OIDC discovery mechanism to discover endpoints. Defaults to true.
+    #
+    #discover: true
+
+    # the OIDC issuer. Used to validate tokens and discover the providers endpoints. Required.
+    #
+    #issuer: "https://accounts.example.com/"
+
+    # oauth2 client id to use. Required.
+    #
+    #client_id: "provided-by-your-issuer"
+
+    # oauth2 client secret to use. Required.
+    #
+    #client_secret: "provided-by-your-issuer"
+
+    # auth method to use when exchanging the token.
+    # Valid values are "client_secret_basic" (default), "client_secret_post" and "none".
+    #
+    #client_auth_method: "client_auth_basic"
+
+    # list of scopes to ask. This should include the "openid" scope. Defaults to ["openid"].
+    #
+    #scopes: ["openid"]
+
+    # the oauth2 authorization endpoint. Required if provider discovery is disabled.
+    #
+    #authorization_endpoint: "https://accounts.example.com/oauth2/auth"
+
+    # the oauth2 token endpoint. Required if provider discovery is disabled.
+    #
+    #token_endpoint: "https://accounts.example.com/oauth2/token"
+
+    # the OIDC userinfo endpoint. Required if discovery is disabled and the "openid" scope is not asked.
+    #
+    #userinfo_endpoint: "https://accounts.example.com/userinfo"
+
+    # URI where to fetch the JWKS. Required if discovery is disabled and the "openid" scope is used.
+    #
+    #jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
+
+    # skip metadata verification. Defaults to false.
+    # Use this if you are connecting to a provider that is not OpenID Connect compliant.
+    # Avoid this in production.
+    #
+    #skip_verification: false
+
+
+    # An external module can be provided here as a custom solution to mapping
+    # attributes returned from a OIDC provider onto a matrix user.
+    #
+    user_mapping_provider:
+      # The custom module's class. Uncomment to use a custom module.
+      # Default is 'synapse.handlers.oidc_handler.JinjaOidcMappingProvider'.
+      #
+      #module: mapping_provider.OidcMappingProvider
+
+      # Custom configuration values for the module. Below options are intended
+      # for the built-in provider, they should be changed if using a custom
+      # module. This section will be passed as a Python dictionary to the
+      # module's `parse_config` method.
+      #
+      # Below is the config of the default mapping provider, based on Jinja2
+      # templates. Those templates are used to render user attributes, where the
+      # userinfo object is available through the `user` variable.
+      #
+      config:
+        # name of the claim containing a unique identifier for the user.
+        # Defaults to `sub`, which OpenID Connect compliant providers should provide.
+        #
+        #subject_claim: "sub"
+
+        # Jinja2 template for the localpart of the MXID
+        #
+        localpart_template: "{{ user.preferred_username }}"
+
+        # Jinja2 template for the display name to set on first login. Optional.
+        #
+        #display_name_template: "{{ user.given_name }} {{ user.last_name }}"
+
+
 
 # Enable CAS for registration and login.
 #
@@ -1554,6 +1642,13 @@ sso:
     #
     #   This template has no additional variables.
     #
+    # * HTML page to display to users if something goes wrong during the
+    #   OpenID Connect authentication process: 'sso_error.html'.
+    #
+    #   When rendering, this template is given two variables:
+    #     * error: the technical name of the error
+    #     * error_description: a human-readable message for the error
+    #
     # You can see the default templates at:
     # https://github.com/matrix-org/synapse/tree/master/synapse/res/templates
     #
diff --git a/mypy.ini b/mypy.ini
index 69be2f67ad..3533797d68 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -75,3 +75,6 @@ ignore_missing_imports = True
 
 [mypy-jwt.*]
 ignore_missing_imports = True
+
+[mypy-authlib.*]
+ignore_missing_imports = True
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index cbd1ea475a..bc8695d8dd 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -192,6 +192,11 @@ class SynapseHomeServer(HomeServer):
                 }
             )
 
+            if self.get_config().oidc_enabled:
+                from synapse.rest.oidc import OIDCResource
+
+                resources["/_synapse/oidc"] = OIDCResource(self)
+
             if self.get_config().saml2_enabled:
                 from synapse.rest.saml2 import SAML2Resource
 
@@ -422,6 +427,13 @@ def setup(config_options):
                 # Check if it needs to be reprovisioned every day.
                 hs.get_clock().looping_call(reprovision_acme, 24 * 60 * 60 * 1000)
 
+            # Load the OIDC provider metadatas, if OIDC is enabled.
+            if hs.config.oidc_enabled:
+                oidc = hs.get_oidc_handler()
+                # Loading the provider metadata also ensures the provider config is valid.
+                yield defer.ensureDeferred(oidc.load_metadata())
+                yield defer.ensureDeferred(oidc.load_jwks())
+
             _base.start(hs, config.listeners)
 
             hs.get_datastore().db.updates.start_doing_background_updates()
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index 3053fc9d27..9e576060d4 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -13,6 +13,7 @@ from synapse.config import (
     key,
     logger,
     metrics,
+    oidc_config,
     password,
     password_auth_providers,
     push,
@@ -59,6 +60,7 @@ class RootConfig:
     saml2: saml2_config.SAML2Config
     cas: cas.CasConfig
     sso: sso.SSOConfig
+    oidc: oidc_config.OIDCConfig
     jwt: jwt_config.JWTConfig
     password: password.PasswordConfig
     email: emailconfig.EmailConfig
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index be6c6afa74..996d3e6bf7 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -27,6 +27,7 @@ from .jwt_config import JWTConfig
 from .key import KeyConfig
 from .logger import LoggingConfig
 from .metrics import MetricsConfig
+from .oidc_config import OIDCConfig
 from .password import PasswordConfig
 from .password_auth_providers import PasswordAuthProviderConfig
 from .push import PushConfig
@@ -66,6 +67,7 @@ class HomeServerConfig(RootConfig):
         AppServiceConfig,
         KeyConfig,
         SAML2Config,
+        OIDCConfig,
         CasConfig,
         SSOConfig,
         JWTConfig,
diff --git a/synapse/config/oidc_config.py b/synapse/config/oidc_config.py
new file mode 100644
index 0000000000..5af110745e
--- /dev/null
+++ b/synapse/config/oidc_config.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Quentin Gliech
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.python_dependencies import DependencyException, check_requirements
+from synapse.util.module_loader import load_module
+
+from ._base import Config, ConfigError
+
+DEFAULT_USER_MAPPING_PROVIDER = "synapse.handlers.oidc_handler.JinjaOidcMappingProvider"
+
+
+class OIDCConfig(Config):
+    section = "oidc"
+
+    def read_config(self, config, **kwargs):
+        self.oidc_enabled = False
+
+        oidc_config = config.get("oidc_config")
+
+        if not oidc_config or not oidc_config.get("enabled", False):
+            return
+
+        try:
+            check_requirements("oidc")
+        except DependencyException as e:
+            raise ConfigError(e.message)
+
+        public_baseurl = self.public_baseurl
+        if public_baseurl is None:
+            raise ConfigError("oidc_config requires a public_baseurl to be set")
+        self.oidc_callback_url = public_baseurl + "_synapse/oidc/callback"
+
+        self.oidc_enabled = True
+        self.oidc_discover = oidc_config.get("discover", True)
+        self.oidc_issuer = oidc_config["issuer"]
+        self.oidc_client_id = oidc_config["client_id"]
+        self.oidc_client_secret = oidc_config["client_secret"]
+        self.oidc_client_auth_method = oidc_config.get(
+            "client_auth_method", "client_secret_basic"
+        )
+        self.oidc_scopes = oidc_config.get("scopes", ["openid"])
+        self.oidc_authorization_endpoint = oidc_config.get("authorization_endpoint")
+        self.oidc_token_endpoint = oidc_config.get("token_endpoint")
+        self.oidc_userinfo_endpoint = oidc_config.get("userinfo_endpoint")
+        self.oidc_jwks_uri = oidc_config.get("jwks_uri")
+        self.oidc_subject_claim = oidc_config.get("subject_claim", "sub")
+        self.oidc_skip_verification = oidc_config.get("skip_verification", False)
+
+        ump_config = oidc_config.get("user_mapping_provider", {})
+        ump_config.setdefault("module", DEFAULT_USER_MAPPING_PROVIDER)
+        ump_config.setdefault("config", {})
+
+        (
+            self.oidc_user_mapping_provider_class,
+            self.oidc_user_mapping_provider_config,
+        ) = load_module(ump_config)
+
+        # Ensure loaded user mapping module has defined all necessary methods
+        required_methods = [
+            "get_remote_user_id",
+            "map_user_attributes",
+        ]
+        missing_methods = [
+            method
+            for method in required_methods
+            if not hasattr(self.oidc_user_mapping_provider_class, method)
+        ]
+        if missing_methods:
+            raise ConfigError(
+                "Class specified by oidc_config."
+                "user_mapping_provider.module is missing required "
+                "methods: %s" % (", ".join(missing_methods),)
+            )
+
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        # Enable OpenID Connect for registration and login. Uses authlib.
+        #
+        oidc_config:
+            # enable OpenID Connect. Defaults to false.
+            #
+            #enabled: true
+
+            # use the OIDC discovery mechanism to discover endpoints. Defaults to true.
+            #
+            #discover: true
+
+            # the OIDC issuer. Used to validate tokens and discover the providers endpoints. Required.
+            #
+            #issuer: "https://accounts.example.com/"
+
+            # oauth2 client id to use. Required.
+            #
+            #client_id: "provided-by-your-issuer"
+
+            # oauth2 client secret to use. Required.
+            #
+            #client_secret: "provided-by-your-issuer"
+
+            # auth method to use when exchanging the token.
+            # Valid values are "client_secret_basic" (default), "client_secret_post" and "none".
+            #
+            #client_auth_method: "client_auth_basic"
+
+            # list of scopes to ask. This should include the "openid" scope. Defaults to ["openid"].
+            #
+            #scopes: ["openid"]
+
+            # the oauth2 authorization endpoint. Required if provider discovery is disabled.
+            #
+            #authorization_endpoint: "https://accounts.example.com/oauth2/auth"
+
+            # the oauth2 token endpoint. Required if provider discovery is disabled.
+            #
+            #token_endpoint: "https://accounts.example.com/oauth2/token"
+
+            # the OIDC userinfo endpoint. Required if discovery is disabled and the "openid" scope is not asked.
+            #
+            #userinfo_endpoint: "https://accounts.example.com/userinfo"
+
+            # URI where to fetch the JWKS. Required if discovery is disabled and the "openid" scope is used.
+            #
+            #jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
+
+            # skip metadata verification. Defaults to false.
+            # Use this if you are connecting to a provider that is not OpenID Connect compliant.
+            # Avoid this in production.
+            #
+            #skip_verification: false
+
+
+            # An external module can be provided here as a custom solution to mapping
+            # attributes returned from a OIDC provider onto a matrix user.
+            #
+            user_mapping_provider:
+              # The custom module's class. Uncomment to use a custom module.
+              # Default is {mapping_provider!r}.
+              #
+              #module: mapping_provider.OidcMappingProvider
+
+              # Custom configuration values for the module. Below options are intended
+              # for the built-in provider, they should be changed if using a custom
+              # module. This section will be passed as a Python dictionary to the
+              # module's `parse_config` method.
+              #
+              # Below is the config of the default mapping provider, based on Jinja2
+              # templates. Those templates are used to render user attributes, where the
+              # userinfo object is available through the `user` variable.
+              #
+              config:
+                # name of the claim containing a unique identifier for the user.
+                # Defaults to `sub`, which OpenID Connect compliant providers should provide.
+                #
+                #subject_claim: "sub"
+
+                # Jinja2 template for the localpart of the MXID
+                #
+                localpart_template: "{{{{ user.preferred_username }}}}"
+
+                # Jinja2 template for the display name to set on first login. Optional.
+                #
+                #display_name_template: "{{{{ user.given_name }}}} {{{{ user.last_name }}}}"
+        """.format(
+            mapping_provider=DEFAULT_USER_MAPPING_PROVIDER
+        )
diff --git a/synapse/config/sso.py b/synapse/config/sso.py
index cac6bc0139..aff642f015 100644
--- a/synapse/config/sso.py
+++ b/synapse/config/sso.py
@@ -36,17 +36,13 @@ class SSOConfig(Config):
         if not template_dir:
             template_dir = pkg_resources.resource_filename("synapse", "res/templates",)
 
-        self.sso_redirect_confirm_template_dir = template_dir
+        self.sso_template_dir = template_dir
         self.sso_account_deactivated_template = self.read_file(
-            os.path.join(
-                self.sso_redirect_confirm_template_dir, "sso_account_deactivated.html"
-            ),
+            os.path.join(self.sso_template_dir, "sso_account_deactivated.html"),
             "sso_account_deactivated_template",
         )
         self.sso_auth_success_template = self.read_file(
-            os.path.join(
-                self.sso_redirect_confirm_template_dir, "sso_auth_success.html"
-            ),
+            os.path.join(self.sso_template_dir, "sso_auth_success.html"),
             "sso_auth_success_template",
         )
 
@@ -137,6 +133,13 @@ class SSOConfig(Config):
             #
             #   This template has no additional variables.
             #
+            # * HTML page to display to users if something goes wrong during the
+            #   OpenID Connect authentication process: 'sso_error.html'.
+            #
+            #   When rendering, this template is given two variables:
+            #     * error: the technical name of the error
+            #     * error_description: a human-readable message for the error
+            #
             # You can see the default templates at:
             # https://github.com/matrix-org/synapse/tree/master/synapse/res/templates
             #
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 7613e5b6ab..f8d2331bf1 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -126,13 +126,13 @@ class AuthHandler(BaseHandler):
         # It notifies the user they are about to give access to their matrix account
         # to the client.
         self._sso_redirect_confirm_template = load_jinja2_templates(
-            hs.config.sso_redirect_confirm_template_dir, ["sso_redirect_confirm.html"],
+            hs.config.sso_template_dir, ["sso_redirect_confirm.html"],
         )[0]
         # The following template is shown during user interactive authentication
         # in the fallback auth scenario. It notifies the user that they are
         # authenticating for an operation to occur on their account.
         self._sso_auth_confirm_template = load_jinja2_templates(
-            hs.config.sso_redirect_confirm_template_dir, ["sso_auth_confirm.html"],
+            hs.config.sso_template_dir, ["sso_auth_confirm.html"],
         )[0]
         # The following template is shown after a successful user interactive
         # authentication session. It tells the user they can close the window.
diff --git a/synapse/handlers/oidc_handler.py b/synapse/handlers/oidc_handler.py
new file mode 100644
index 0000000000..178f263439
--- /dev/null
+++ b/synapse/handlers/oidc_handler.py
@@ -0,0 +1,998 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Quentin Gliech
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+from typing import Dict, Generic, List, Optional, Tuple, TypeVar
+from urllib.parse import urlencode
+
+import attr
+import pymacaroons
+from authlib.common.security import generate_token
+from authlib.jose import JsonWebToken
+from authlib.oauth2.auth import ClientAuth
+from authlib.oauth2.rfc6749.parameters import prepare_grant_uri
+from authlib.oidc.core import CodeIDToken, ImplicitIDToken, UserInfo
+from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url
+from jinja2 import Environment, Template
+from pymacaroons.exceptions import (
+    MacaroonDeserializationException,
+    MacaroonInvalidSignatureException,
+)
+from typing_extensions import TypedDict
+
+from twisted.web.client import readBody
+
+from synapse.config import ConfigError
+from synapse.http.server import finish_request
+from synapse.http.site import SynapseRequest
+from synapse.push.mailer import load_jinja2_templates
+from synapse.server import HomeServer
+from synapse.types import UserID, map_username_to_mxid_localpart
+
+logger = logging.getLogger(__name__)
+
+SESSION_COOKIE_NAME = b"oidc_session"
+
+#: A token exchanged from the token endpoint, as per RFC6749 sec 5.1. and
+#: OpenID.Core sec 3.1.3.3.
+Token = TypedDict(
+    "Token",
+    {
+        "access_token": str,
+        "token_type": str,
+        "id_token": Optional[str],
+        "refresh_token": Optional[str],
+        "expires_in": int,
+        "scope": Optional[str],
+    },
+)
+
+#: A JWK, as per RFC7517 sec 4. The type could be more precise than that, but
+#: there is no real point of doing this in our case.
+JWK = Dict[str, str]
+
+#: A JWK Set, as per RFC7517 sec 5.
+JWKS = TypedDict("JWKS", {"keys": List[JWK]})
+
+
+class OidcError(Exception):
+    """Used to catch errors when calling the token_endpoint
+    """
+
+    def __init__(self, error, error_description=None):
+        self.error = error
+        self.error_description = error_description
+
+    def __str__(self):
+        if self.error_description:
+            return "{}: {}".format(self.error, self.error_description)
+        return self.error
+
+
+class MappingException(Exception):
+    """Used to catch errors when mapping the UserInfo object
+    """
+
+
+class OidcHandler:
+    """Handles requests related to the OpenID Connect login flow.
+    """
+
+    def __init__(self, hs: HomeServer):
+        self._callback_url = hs.config.oidc_callback_url  # type: str
+        self._scopes = hs.config.oidc_scopes  # type: List[str]
+        self._client_auth = ClientAuth(
+            hs.config.oidc_client_id,
+            hs.config.oidc_client_secret,
+            hs.config.oidc_client_auth_method,
+        )  # type: ClientAuth
+        self._client_auth_method = hs.config.oidc_client_auth_method  # type: str
+        self._subject_claim = hs.config.oidc_subject_claim
+        self._provider_metadata = OpenIDProviderMetadata(
+            issuer=hs.config.oidc_issuer,
+            authorization_endpoint=hs.config.oidc_authorization_endpoint,
+            token_endpoint=hs.config.oidc_token_endpoint,
+            userinfo_endpoint=hs.config.oidc_userinfo_endpoint,
+            jwks_uri=hs.config.oidc_jwks_uri,
+        )  # type: OpenIDProviderMetadata
+        self._provider_needs_discovery = hs.config.oidc_discover  # type: bool
+        self._user_mapping_provider = hs.config.oidc_user_mapping_provider_class(
+            hs.config.oidc_user_mapping_provider_config
+        )  # type: OidcMappingProvider
+        self._skip_verification = hs.config.oidc_skip_verification  # type: bool
+
+        self._http_client = hs.get_proxied_http_client()
+        self._auth_handler = hs.get_auth_handler()
+        self._registration_handler = hs.get_registration_handler()
+        self._datastore = hs.get_datastore()
+        self._clock = hs.get_clock()
+        self._hostname = hs.hostname  # type: str
+        self._server_name = hs.config.server_name  # type: str
+        self._macaroon_secret_key = hs.config.macaroon_secret_key
+        self._error_template = load_jinja2_templates(
+            hs.config.sso_template_dir, ["sso_error.html"]
+        )[0]
+
+        # identifier for the external_ids table
+        self._auth_provider_id = "oidc"
+
+    def _render_error(
+        self, request, error: str, error_description: Optional[str] = None
+    ) -> None:
+        """Renders the error template and respond with it.
+
+        This is used to show errors to the user. The template of this page can
+        be found under ``synapse/res/templates/sso_error.html``.
+
+        Args:
+            request: The incoming request from the browser.
+                We'll respond with an HTML page describing the error.
+            error: A technical identifier for this error. Those include
+                well-known OAuth2/OIDC error types like invalid_request or
+                access_denied.
+            error_description: A human-readable description of the error.
+        """
+        html_bytes = self._error_template.render(
+            error=error, error_description=error_description
+        ).encode("utf-8")
+
+        request.setResponseCode(400)
+        request.setHeader(b"Content-Type", b"text/html; charset=utf-8")
+        request.setHeader(b"Content-Length", b"%i" % len(html_bytes))
+        request.write(html_bytes)
+        finish_request(request)
+
+    def _validate_metadata(self):
+        """Verifies the provider metadata.
+
+        This checks the validity of the currently loaded provider. Not
+        everything is checked, only:
+
+          - ``issuer``
+          - ``authorization_endpoint``
+          - ``token_endpoint``
+          - ``response_types_supported`` (checks if "code" is in it)
+          - ``jwks_uri``
+
+        Raises:
+            ValueError: if something in the provider is not valid
+        """
+        # Skip verification to allow non-compliant providers (e.g. issuers not running on a secure origin)
+        if self._skip_verification is True:
+            return
+
+        m = self._provider_metadata
+        m.validate_issuer()
+        m.validate_authorization_endpoint()
+        m.validate_token_endpoint()
+
+        if m.get("token_endpoint_auth_methods_supported") is not None:
+            m.validate_token_endpoint_auth_methods_supported()
+            if (
+                self._client_auth_method
+                not in m["token_endpoint_auth_methods_supported"]
+            ):
+                raise ValueError(
+                    '"{auth_method}" not in "token_endpoint_auth_methods_supported" ({supported!r})'.format(
+                        auth_method=self._client_auth_method,
+                        supported=m["token_endpoint_auth_methods_supported"],
+                    )
+                )
+
+        if m.get("response_types_supported") is not None:
+            m.validate_response_types_supported()
+
+            if "code" not in m["response_types_supported"]:
+                raise ValueError(
+                    '"code" not in "response_types_supported" (%r)'
+                    % (m["response_types_supported"],)
+                )
+
+        # If the openid scope was not requested, we need a userinfo endpoint to fetch user infos
+        if self._uses_userinfo:
+            if m.get("userinfo_endpoint") is None:
+                raise ValueError(
+                    'provider has no "userinfo_endpoint", even though it is required because the "openid" scope is not requested'
+                )
+        else:
+            # If we're not using userinfo, we need a valid jwks to validate the ID token
+            if m.get("jwks") is None:
+                if m.get("jwks_uri") is not None:
+                    m.validate_jwks_uri()
+                else:
+                    raise ValueError('"jwks_uri" must be set')
+
+    @property
+    def _uses_userinfo(self) -> bool:
+        """Returns True if the ``userinfo_endpoint`` should be used.
+
+        This is based on the requested scopes: if the scopes include
+        ``openid``, the provider should give use an ID token containing the
+        user informations. If not, we should fetch them using the
+        ``access_token`` with the ``userinfo_endpoint``.
+        """
+
+        # Maybe that should be user-configurable and not inferred?
+        return "openid" not in self._scopes
+
+    async def load_metadata(self) -> OpenIDProviderMetadata:
+        """Load and validate the provider metadata.
+
+        The values metadatas are discovered if ``oidc_config.discovery`` is
+        ``True`` and then cached.
+
+        Raises:
+            ValueError: if something in the provider is not valid
+
+        Returns:
+            The provider's metadata.
+        """
+        # If we are using the OpenID Discovery documents, it needs to be loaded once
+        # FIXME: should there be a lock here?
+        if self._provider_needs_discovery:
+            url = get_well_known_url(self._provider_metadata["issuer"], external=True)
+            metadata_response = await self._http_client.get_json(url)
+            # TODO: maybe update the other way around to let user override some values?
+            self._provider_metadata.update(metadata_response)
+            self._provider_needs_discovery = False
+
+        self._validate_metadata()
+
+        return self._provider_metadata
+
+    async def load_jwks(self, force: bool = False) -> JWKS:
+        """Load the JSON Web Key Set used to sign ID tokens.
+
+        If we're not using the ``userinfo_endpoint``, user infos are extracted
+        from the ID token, which is a JWT signed by keys given by the provider.
+        The keys are then cached.
+
+        Args:
+            force: Force reloading the keys.
+
+        Returns:
+            The key set
+
+            Looks like this::
+
+                {
+                    'keys': [
+                        {
+                            'kid': 'abcdef',
+                            'kty': 'RSA',
+                            'alg': 'RS256',
+                            'use': 'sig',
+                            'e': 'XXXX',
+                            'n': 'XXXX',
+                        }
+                    ]
+                }
+        """
+        if self._uses_userinfo:
+            # We're not using jwt signing, return an empty jwk set
+            return {"keys": []}
+
+        # First check if the JWKS are loaded in the provider metadata.
+        # It can happen either if the provider gives its JWKS in the discovery
+        # document directly or if it was already loaded once.
+        metadata = await self.load_metadata()
+        jwk_set = metadata.get("jwks")
+        if jwk_set is not None and not force:
+            return jwk_set
+
+        # Loading the JWKS using the `jwks_uri` metadata
+        uri = metadata.get("jwks_uri")
+        if not uri:
+            raise RuntimeError('Missing "jwks_uri" in metadata')
+
+        jwk_set = await self._http_client.get_json(uri)
+
+        # Caching the JWKS in the provider's metadata
+        self._provider_metadata["jwks"] = jwk_set
+        return jwk_set
+
+    async def _exchange_code(self, code: str) -> Token:
+        """Exchange an authorization code for a token.
+
+        This calls the ``token_endpoint`` with the authorization code we
+        received in the callback to exchange it for a token. The call uses the
+        ``ClientAuth`` to authenticate with the client with its ID and secret.
+
+        Args:
+            code: The autorization code we got from the callback.
+
+        Returns:
+            A dict containing various tokens.
+
+            May look like this::
+
+                {
+                    'token_type': 'bearer',
+                    'access_token': 'abcdef',
+                    'expires_in': 3599,
+                    'id_token': 'ghijkl',
+                    'refresh_token': 'mnopqr',
+                }
+
+        Raises:
+            OidcError: when the ``token_endpoint`` returned an error.
+        """
+        metadata = await self.load_metadata()
+        token_endpoint = metadata.get("token_endpoint")
+        headers = {
+            "Content-Type": "application/x-www-form-urlencoded",
+            "User-Agent": self._http_client.user_agent,
+            "Accept": "application/json",
+        }
+
+        args = {
+            "grant_type": "authorization_code",
+            "code": code,
+            "redirect_uri": self._callback_url,
+        }
+        body = urlencode(args, True)
+
+        # Fill the body/headers with credentials
+        uri, headers, body = self._client_auth.prepare(
+            method="POST", uri=token_endpoint, headers=headers, body=body
+        )
+        headers = {k: [v] for (k, v) in headers.items()}
+
+        # Do the actual request
+        # We're not using the SimpleHttpClient util methods as we don't want to
+        # check the HTTP status code and we do the body encoding ourself.
+        response = await self._http_client.request(
+            method="POST", uri=uri, data=body.encode("utf-8"), headers=headers,
+        )
+
+        # This is used in multiple error messages below
+        status = "{code} {phrase}".format(
+            code=response.code, phrase=response.phrase.decode("utf-8")
+        )
+
+        resp_body = await readBody(response)
+
+        if response.code >= 500:
+            # In case of a server error, we should first try to decode the body
+            # and check for an error field. If not, we respond with a generic
+            # error message.
+            try:
+                resp = json.loads(resp_body.decode("utf-8"))
+                error = resp["error"]
+                description = resp.get("error_description", error)
+            except (ValueError, KeyError):
+                # Catch ValueError for the JSON decoding and KeyError for the "error" field
+                error = "server_error"
+                description = (
+                    (
+                        'Authorization server responded with a "{status}" error '
+                        "while exchanging the authorization code."
+                    ).format(status=status),
+                )
+
+            raise OidcError(error, description)
+
+        # Since it is a not a 5xx code, body should be a valid JSON. It will
+        # raise if not.
+        resp = json.loads(resp_body.decode("utf-8"))
+
+        if "error" in resp:
+            error = resp["error"]
+            # In case the authorization server responded with an error field,
+            # it should be a 4xx code. If not, warn about it but don't do
+            # anything special and report the original error message.
+            if response.code < 400:
+                logger.debug(
+                    "Invalid response from the authorization server: "
+                    'responded with a "{status}" '
+                    "but body has an error field: {error!r}".format(
+                        status=status, error=resp["error"]
+                    )
+                )
+
+            description = resp.get("error_description", error)
+            raise OidcError(error, description)
+
+        # Now, this should not be an error. According to RFC6749 sec 5.1, it
+        # should be a 200 code. We're a bit more flexible than that, and will
+        # only throw on a 4xx code.
+        if response.code >= 400:
+            description = (
+                'Authorization server responded with a "{status}" error '
+                'but did not include an "error" field in its response.'.format(
+                    status=status
+                )
+            )
+            logger.warning(description)
+            # Body was still valid JSON. Might be useful to log it for debugging.
+            logger.warning("Code exchange response: {resp!r}".format(resp=resp))
+            raise OidcError("server_error", description)
+
+        return resp
+
+    async def _fetch_userinfo(self, token: Token) -> UserInfo:
+        """Fetch user informations from the ``userinfo_endpoint``.
+
+        Args:
+            token: the token given by the ``token_endpoint``.
+                Must include an ``access_token`` field.
+
+        Returns:
+            UserInfo: an object representing the user.
+        """
+        metadata = await self.load_metadata()
+
+        resp = await self._http_client.get_json(
+            metadata["userinfo_endpoint"],
+            headers={"Authorization": ["Bearer {}".format(token["access_token"])]},
+        )
+
+        return UserInfo(resp)
+
+    async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo:
+        """Return an instance of UserInfo from token's ``id_token``.
+
+        Args:
+            token: the token given by the ``token_endpoint``.
+                Must include an ``id_token`` field.
+            nonce: the nonce value originally sent in the initial authorization
+                request. This value should match the one inside the token.
+
+        Returns:
+            An object representing the user.
+        """
+        metadata = await self.load_metadata()
+        claims_params = {
+            "nonce": nonce,
+            "client_id": self._client_auth.client_id,
+        }
+        if "access_token" in token:
+            # If we got an `access_token`, there should be an `at_hash` claim
+            # in the `id_token` that we can check against.
+            claims_params["access_token"] = token["access_token"]
+            claims_cls = CodeIDToken
+        else:
+            claims_cls = ImplicitIDToken
+
+        alg_values = metadata.get("id_token_signing_alg_values_supported", ["RS256"])
+
+        jwt = JsonWebToken(alg_values)
+
+        claim_options = {"iss": {"values": [metadata["issuer"]]}}
+
+        # Try to decode the keys in cache first, then retry by forcing the keys
+        # to be reloaded
+        jwk_set = await self.load_jwks()
+        try:
+            claims = jwt.decode(
+                token["id_token"],
+                key=jwk_set,
+                claims_cls=claims_cls,
+                claims_options=claim_options,
+                claims_params=claims_params,
+            )
+        except ValueError:
+            jwk_set = await self.load_jwks(force=True)  # try reloading the jwks
+            claims = jwt.decode(
+                token["id_token"],
+                key=jwk_set,
+                claims_cls=claims_cls,
+                claims_options=claim_options,
+                claims_params=claims_params,
+            )
+
+        claims.validate(leeway=120)  # allows 2 min of clock skew
+        return UserInfo(claims)
+
+    async def handle_redirect_request(
+        self, request: SynapseRequest, client_redirect_url: bytes
+    ) -> None:
+        """Handle an incoming request to /login/sso/redirect
+
+        It redirects the browser to the authorization endpoint with a few
+        parameters:
+
+          - ``client_id``: the client ID set in ``oidc_config.client_id``
+          - ``response_type``: ``code``
+          - ``redirect_uri``: the callback URL ; ``{base url}/_synapse/oidc/callback``
+          - ``scope``: the list of scopes set in ``oidc_config.scopes``
+          - ``state``: a random string
+          - ``nonce``: a random string
+
+        In addition to redirecting the client, we are setting a cookie with
+        a signed macaroon token containing the state, the nonce and the
+        client_redirect_url params. Those are then checked when the client
+        comes back from the provider.
+
+
+        Args:
+            request: the incoming request from the browser.
+                We'll respond to it with a redirect and a cookie.
+            client_redirect_url: the URL that we should redirect the client to
+                when everything is done
+        """
+
+        state = generate_token()
+        nonce = generate_token()
+
+        cookie = self._generate_oidc_session_token(
+            state=state, nonce=nonce, client_redirect_url=client_redirect_url.decode(),
+        )
+        request.addCookie(
+            SESSION_COOKIE_NAME,
+            cookie,
+            path="/_synapse/oidc",
+            max_age="3600",
+            httpOnly=True,
+            sameSite="lax",
+        )
+
+        metadata = await self.load_metadata()
+        authorization_endpoint = metadata.get("authorization_endpoint")
+        uri = prepare_grant_uri(
+            authorization_endpoint,
+            client_id=self._client_auth.client_id,
+            response_type="code",
+            redirect_uri=self._callback_url,
+            scope=self._scopes,
+            state=state,
+            nonce=nonce,
+        )
+        request.redirect(uri)
+        finish_request(request)
+
+    async def handle_oidc_callback(self, request: SynapseRequest) -> None:
+        """Handle an incoming request to /_synapse/oidc/callback
+
+        Since we might want to display OIDC-related errors in a user-friendly
+        way, we don't raise SynapseError from here. Instead, we call
+        ``self._render_error`` which displays an HTML page for the error.
+
+        Most of the OpenID Connect logic happens here:
+
+          - first, we check if there was any error returned by the provider and
+            display it
+          - then we fetch the session cookie, decode and verify it
+          - the ``state`` query parameter should match with the one stored in the
+            session cookie
+          - once we known this session is legit, exchange the code with the
+            provider using the ``token_endpoint`` (see ``_exchange_code``)
+          - once we have the token, use it to either extract the UserInfo from
+            the ``id_token`` (``_parse_id_token``), or use the ``access_token``
+            to fetch UserInfo from the ``userinfo_endpoint``
+            (``_fetch_userinfo``)
+          - map those UserInfo to a Matrix user (``_map_userinfo_to_user``) and
+            finish the login
+
+        Args:
+            request: the incoming request from the browser.
+        """
+
+        # The provider might redirect with an error.
+        # In that case, just display it as-is.
+        if b"error" in request.args:
+            error = request.args[b"error"][0].decode()
+            description = request.args.get(b"error_description", [b""])[0].decode()
+
+            # Most of the errors returned by the provider could be due by
+            # either the provider misbehaving or Synapse being misconfigured.
+            # The only exception of that is "access_denied", where the user
+            # probably cancelled the login flow. In other cases, log those errors.
+            if error != "access_denied":
+                logger.error("Error from the OIDC provider: %s %s", error, description)
+
+            self._render_error(request, error, description)
+            return
+
+        # Fetch the session cookie
+        session = request.getCookie(SESSION_COOKIE_NAME)
+        if session is None:
+            logger.info("No session cookie found")
+            self._render_error(request, "missing_session", "No session cookie found")
+            return
+
+        # Remove the cookie. There is a good chance that if the callback failed
+        # once, it will fail next time and the code will already be exchanged.
+        # Removing it early avoids spamming the provider with token requests.
+        request.addCookie(
+            SESSION_COOKIE_NAME,
+            b"",
+            path="/_synapse/oidc",
+            expires="Thu, Jan 01 1970 00:00:00 UTC",
+            httpOnly=True,
+            sameSite="lax",
+        )
+
+        # Check for the state query parameter
+        if b"state" not in request.args:
+            logger.info("State parameter is missing")
+            self._render_error(request, "invalid_request", "State parameter is missing")
+            return
+
+        state = request.args[b"state"][0].decode()
+
+        # Deserialize the session token and verify it.
+        try:
+            nonce, client_redirect_url = self._verify_oidc_session_token(session, state)
+        except MacaroonDeserializationException as e:
+            logger.exception("Invalid session")
+            self._render_error(request, "invalid_session", str(e))
+            return
+        except MacaroonInvalidSignatureException as e:
+            logger.exception("Could not verify session")
+            self._render_error(request, "mismatching_session", str(e))
+            return
+
+        # Exchange the code with the provider
+        if b"code" not in request.args:
+            logger.info("Code parameter is missing")
+            self._render_error(request, "invalid_request", "Code parameter is missing")
+            return
+
+        logger.info("Exchanging code")
+        code = request.args[b"code"][0].decode()
+        try:
+            token = await self._exchange_code(code)
+        except OidcError as e:
+            logger.exception("Could not exchange code")
+            self._render_error(request, e.error, e.error_description)
+            return
+
+        # Now that we have a token, get the userinfo, either by decoding the
+        # `id_token` or by fetching the `userinfo_endpoint`.
+        if self._uses_userinfo:
+            logger.info("Fetching userinfo")
+            try:
+                userinfo = await self._fetch_userinfo(token)
+            except Exception as e:
+                logger.exception("Could not fetch userinfo")
+                self._render_error(request, "fetch_error", str(e))
+                return
+        else:
+            logger.info("Extracting userinfo from id_token")
+            try:
+                userinfo = await self._parse_id_token(token, nonce=nonce)
+            except Exception as e:
+                logger.exception("Invalid id_token")
+                self._render_error(request, "invalid_token", str(e))
+                return
+
+        # Call the mapper to register/login the user
+        try:
+            user_id = await self._map_userinfo_to_user(userinfo, token)
+        except MappingException as e:
+            logger.exception("Could not map user")
+            self._render_error(request, "mapping_error", str(e))
+            return
+
+        # and finally complete the login
+        await self._auth_handler.complete_sso_login(
+            user_id, request, client_redirect_url
+        )
+
+    def _generate_oidc_session_token(
+        self,
+        state: str,
+        nonce: str,
+        client_redirect_url: str,
+        duration_in_ms: int = (60 * 60 * 1000),
+    ) -> str:
+        """Generates a signed token storing data about an OIDC session.
+
+        When Synapse initiates an authorization flow, it creates a random state
+        and a random nonce. Those parameters are given to the provider and
+        should be verified when the client comes back from the provider.
+        It is also used to store the client_redirect_url, which is used to
+        complete the SSO login flow.
+
+        Args:
+            state: The ``state`` parameter passed to the OIDC provider.
+            nonce: The ``nonce`` parameter passed to the OIDC provider.
+            client_redirect_url: The URL the client gave when it initiated the
+                flow.
+            duration_in_ms: An optional duration for the token in milliseconds.
+                Defaults to an hour.
+
+        Returns:
+            A signed macaroon token with the session informations.
+        """
+        macaroon = pymacaroons.Macaroon(
+            location=self._server_name, identifier="key", key=self._macaroon_secret_key,
+        )
+        macaroon.add_first_party_caveat("gen = 1")
+        macaroon.add_first_party_caveat("type = session")
+        macaroon.add_first_party_caveat("state = %s" % (state,))
+        macaroon.add_first_party_caveat("nonce = %s" % (nonce,))
+        macaroon.add_first_party_caveat(
+            "client_redirect_url = %s" % (client_redirect_url,)
+        )
+        now = self._clock.time_msec()
+        expiry = now + duration_in_ms
+        macaroon.add_first_party_caveat("time < %d" % (expiry,))
+        return macaroon.serialize()
+
+    def _verify_oidc_session_token(self, session: str, state: str) -> Tuple[str, str]:
+        """Verifies and extract an OIDC session token.
+
+        This verifies that a given session token was issued by this homeserver
+        and extract the nonce and client_redirect_url caveats.
+
+        Args:
+            session: The session token to verify
+            state: The state the OIDC provider gave back
+
+        Returns:
+            The nonce and the client_redirect_url for this session
+        """
+        macaroon = pymacaroons.Macaroon.deserialize(session)
+
+        v = pymacaroons.Verifier()
+        v.satisfy_exact("gen = 1")
+        v.satisfy_exact("type = session")
+        v.satisfy_exact("state = %s" % (state,))
+        v.satisfy_general(lambda c: c.startswith("nonce = "))
+        v.satisfy_general(lambda c: c.startswith("client_redirect_url = "))
+        v.satisfy_general(self._verify_expiry)
+
+        v.verify(macaroon, self._macaroon_secret_key)
+
+        # Extract the `nonce` and `client_redirect_url` from the token
+        nonce = self._get_value_from_macaroon(macaroon, "nonce")
+        client_redirect_url = self._get_value_from_macaroon(
+            macaroon, "client_redirect_url"
+        )
+
+        return nonce, client_redirect_url
+
+    def _get_value_from_macaroon(self, macaroon: pymacaroons.Macaroon, key: str) -> str:
+        """Extracts a caveat value from a macaroon token.
+
+        Args:
+            macaroon: the token
+            key: the key of the caveat to extract
+
+        Returns:
+            The extracted value
+
+        Raises:
+            Exception: if the caveat was not in the macaroon
+        """
+        prefix = key + " = "
+        for caveat in macaroon.caveats:
+            if caveat.caveat_id.startswith(prefix):
+                return caveat.caveat_id[len(prefix) :]
+        raise Exception("No %s caveat in macaroon" % (key,))
+
+    def _verify_expiry(self, caveat: str) -> bool:
+        prefix = "time < "
+        if not caveat.startswith(prefix):
+            return False
+        expiry = int(caveat[len(prefix) :])
+        now = self._clock.time_msec()
+        return now < expiry
+
+    async def _map_userinfo_to_user(self, userinfo: UserInfo, token: Token) -> str:
+        """Maps a UserInfo object to a mxid.
+
+        UserInfo should have a claim that uniquely identifies users. This claim
+        is usually `sub`, but can be configured with `oidc_config.subject_claim`.
+        It is then used as an `external_id`.
+
+        If we don't find the user that way, we should register the user,
+        mapping the localpart and the display name from the UserInfo.
+
+        If a user already exists with the mxid we've mapped, raise an exception.
+
+        Args:
+            userinfo: an object representing the user
+            token: a dict with the tokens obtained from the provider
+
+        Raises:
+            MappingException: if there was an error while mapping some properties
+
+        Returns:
+            The mxid of the user
+        """
+        try:
+            remote_user_id = self._user_mapping_provider.get_remote_user_id(userinfo)
+        except Exception as e:
+            raise MappingException(
+                "Failed to extract subject from OIDC response: %s" % (e,)
+            )
+
+        logger.info(
+            "Looking for existing mapping for user %s:%s",
+            self._auth_provider_id,
+            remote_user_id,
+        )
+
+        registered_user_id = await self._datastore.get_user_by_external_id(
+            self._auth_provider_id, remote_user_id,
+        )
+
+        if registered_user_id is not None:
+            logger.info("Found existing mapping %s", registered_user_id)
+            return registered_user_id
+
+        try:
+            attributes = await self._user_mapping_provider.map_user_attributes(
+                userinfo, token
+            )
+        except Exception as e:
+            raise MappingException(
+                "Could not extract user attributes from OIDC response: " + str(e)
+            )
+
+        logger.debug(
+            "Retrieved user attributes from user mapping provider: %r", attributes
+        )
+
+        if not attributes["localpart"]:
+            raise MappingException("localpart is empty")
+
+        localpart = map_username_to_mxid_localpart(attributes["localpart"])
+
+        user_id = UserID(localpart, self._hostname)
+        if await self._datastore.get_users_by_id_case_insensitive(user_id.to_string()):
+            # This mxid is taken
+            raise MappingException(
+                "mxid '{}' is already taken".format(user_id.to_string())
+            )
+
+        # It's the first time this user is logging in and the mapped mxid was
+        # not taken, register the user
+        registered_user_id = await self._registration_handler.register_user(
+            localpart=localpart, default_display_name=attributes["display_name"],
+        )
+
+        await self._datastore.record_user_external_id(
+            self._auth_provider_id, remote_user_id, registered_user_id,
+        )
+        return registered_user_id
+
+
+UserAttribute = TypedDict(
+    "UserAttribute", {"localpart": str, "display_name": Optional[str]}
+)
+C = TypeVar("C")
+
+
+class OidcMappingProvider(Generic[C]):
+    """A mapping provider maps a UserInfo object to user attributes.
+
+    It should provide the API described by this class.
+    """
+
+    def __init__(self, config: C):
+        """
+        Args:
+            config: A custom config object from this module, parsed by ``parse_config()``
+        """
+
+    @staticmethod
+    def parse_config(config: dict) -> C:
+        """Parse the dict provided by the homeserver's config
+
+        Args:
+            config: A dictionary containing configuration options for this provider
+
+        Returns:
+            A custom config object for this module
+        """
+        raise NotImplementedError()
+
+    def get_remote_user_id(self, userinfo: UserInfo) -> str:
+        """Get a unique user ID for this user.
+
+        Usually, in an OIDC-compliant scenario, it should be the ``sub`` claim from the UserInfo object.
+
+        Args:
+            userinfo: An object representing the user given by the OIDC provider
+
+        Returns:
+            A unique user ID
+        """
+        raise NotImplementedError()
+
+    async def map_user_attributes(
+        self, userinfo: UserInfo, token: Token
+    ) -> UserAttribute:
+        """Map a ``UserInfo`` objects into user attributes.
+
+        Args:
+            userinfo: An object representing the user given by the OIDC provider
+            token: A dict with the tokens returned by the provider
+
+        Returns:
+            A dict containing the ``localpart`` and (optionally) the ``display_name``
+        """
+        raise NotImplementedError()
+
+
+# Used to clear out "None" values in templates
+def jinja_finalize(thing):
+    return thing if thing is not None else ""
+
+
+env = Environment(finalize=jinja_finalize)
+
+
+@attr.s
+class JinjaOidcMappingConfig:
+    subject_claim = attr.ib()  # type: str
+    localpart_template = attr.ib()  # type: Template
+    display_name_template = attr.ib()  # type: Optional[Template]
+
+
+class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
+    """An implementation of a mapping provider based on Jinja templates.
+
+    This is the default mapping provider.
+    """
+
+    def __init__(self, config: JinjaOidcMappingConfig):
+        self._config = config
+
+    @staticmethod
+    def parse_config(config: dict) -> JinjaOidcMappingConfig:
+        subject_claim = config.get("subject_claim", "sub")
+
+        if "localpart_template" not in config:
+            raise ConfigError(
+                "missing key: oidc_config.user_mapping_provider.config.localpart_template"
+            )
+
+        try:
+            localpart_template = env.from_string(config["localpart_template"])
+        except Exception as e:
+            raise ConfigError(
+                "invalid jinja template for oidc_config.user_mapping_provider.config.localpart_template: %r"
+                % (e,)
+            )
+
+        display_name_template = None  # type: Optional[Template]
+        if "display_name_template" in config:
+            try:
+                display_name_template = env.from_string(config["display_name_template"])
+            except Exception as e:
+                raise ConfigError(
+                    "invalid jinja template for oidc_config.user_mapping_provider.config.display_name_template: %r"
+                    % (e,)
+                )
+
+        return JinjaOidcMappingConfig(
+            subject_claim=subject_claim,
+            localpart_template=localpart_template,
+            display_name_template=display_name_template,
+        )
+
+    def get_remote_user_id(self, userinfo: UserInfo) -> str:
+        return userinfo[self._config.subject_claim]
+
+    async def map_user_attributes(
+        self, userinfo: UserInfo, token: Token
+    ) -> UserAttribute:
+        localpart = self._config.localpart_template.render(user=userinfo).strip()
+
+        display_name = None  # type: Optional[str]
+        if self._config.display_name_template is not None:
+            display_name = self._config.display_name_template.render(
+                user=userinfo
+            ).strip()
+
+            if display_name == "":
+                display_name = None
+
+        return UserAttribute(localpart=localpart, display_name=display_name)
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 3797545824..58eb47c69c 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -359,6 +359,7 @@ class SimpleHttpClient(object):
         actual_headers = {
             b"Content-Type": [b"application/x-www-form-urlencoded"],
             b"User-Agent": [self.user_agent],
+            b"Accept": [b"application/json"],
         }
         if headers:
             actual_headers.update(headers)
@@ -399,6 +400,7 @@ class SimpleHttpClient(object):
         actual_headers = {
             b"Content-Type": [b"application/json"],
             b"User-Agent": [self.user_agent],
+            b"Accept": [b"application/json"],
         }
         if headers:
             actual_headers.update(headers)
@@ -434,6 +436,10 @@ class SimpleHttpClient(object):
 
             ValueError: if the response was not JSON
         """
+        actual_headers = {b"Accept": [b"application/json"]}
+        if headers:
+            actual_headers.update(headers)
+
         body = yield self.get_raw(uri, args, headers=headers)
         return json.loads(body)
 
@@ -467,6 +473,7 @@ class SimpleHttpClient(object):
         actual_headers = {
             b"Content-Type": [b"application/json"],
             b"User-Agent": [self.user_agent],
+            b"Accept": [b"application/json"],
         }
         if headers:
             actual_headers.update(headers)
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 39c99a2802..8b4312e5a3 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -92,6 +92,7 @@ CONDITIONAL_REQUIREMENTS = {
         'eliot<1.8.0;python_version<"3.5.3"',
     ],
     "saml2": ["pysaml2>=4.5.0"],
+    "oidc": ["authlib>=0.14.0"],
     "systemd": ["systemd-python>=231"],
     "url_preview": ["lxml>=3.5.0"],
     "test": ["mock>=2.0", "parameterized"],
diff --git a/synapse/res/templates/sso_error.html b/synapse/res/templates/sso_error.html
new file mode 100644
index 0000000000..43a211386b
--- /dev/null
+++ b/synapse/res/templates/sso_error.html
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>SSO error</title>
+</head>
+<body>
+    <p>Oops! Something went wrong during authentication.</p>
+    <p>
+        Try logging in again from your Matrix client and if the problem persists
+        please contact the server's administrator.
+    </p>
+    <p>Error: <code>{{ error }}</code></p>
+    {% if error_description %}
+    <pre><code>{{ error_description }}</code></pre>
+    {% endif %}
+</body>
+</html>
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 4de2f97d06..de7eca21f8 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -83,6 +83,7 @@ class LoginRestServlet(RestServlet):
         self.jwt_algorithm = hs.config.jwt_algorithm
         self.saml2_enabled = hs.config.saml2_enabled
         self.cas_enabled = hs.config.cas_enabled
+        self.oidc_enabled = hs.config.oidc_enabled
         self.auth_handler = self.hs.get_auth_handler()
         self.registration_handler = hs.get_registration_handler()
         self.handlers = hs.get_handlers()
@@ -96,9 +97,7 @@ class LoginRestServlet(RestServlet):
         flows = []
         if self.jwt_enabled:
             flows.append({"type": LoginRestServlet.JWT_TYPE})
-        if self.saml2_enabled:
-            flows.append({"type": LoginRestServlet.SSO_TYPE})
-            flows.append({"type": LoginRestServlet.TOKEN_TYPE})
+
         if self.cas_enabled:
             flows.append({"type": LoginRestServlet.SSO_TYPE})
 
@@ -114,6 +113,11 @@ class LoginRestServlet(RestServlet):
             # fall back to the fallback API if they don't understand one of the
             # login flow types returned.
             flows.append({"type": LoginRestServlet.TOKEN_TYPE})
+        elif self.saml2_enabled:
+            flows.append({"type": LoginRestServlet.SSO_TYPE})
+            flows.append({"type": LoginRestServlet.TOKEN_TYPE})
+        elif self.oidc_enabled:
+            flows.append({"type": LoginRestServlet.SSO_TYPE})
 
         flows.extend(
             ({"type": t} for t in self.auth_handler.get_supported_login_types())
@@ -465,6 +469,22 @@ class SAMLRedirectServlet(BaseSSORedirectServlet):
         return self._saml_handler.handle_redirect_request(client_redirect_url)
 
 
+class OIDCRedirectServlet(RestServlet):
+    """Implementation for /login/sso/redirect for the OIDC login flow."""
+
+    PATTERNS = client_patterns("/login/sso/redirect", v1=True)
+
+    def __init__(self, hs):
+        self._oidc_handler = hs.get_oidc_handler()
+
+    async def on_GET(self, request):
+        args = request.args
+        if b"redirectUrl" not in args:
+            return 400, "Redirect URL not specified for SSO auth"
+        client_redirect_url = args[b"redirectUrl"][0]
+        await self._oidc_handler.handle_redirect_request(request, client_redirect_url)
+
+
 def register_servlets(hs, http_server):
     LoginRestServlet(hs).register(http_server)
     if hs.config.cas_enabled:
@@ -472,3 +492,5 @@ def register_servlets(hs, http_server):
         CasTicketServlet(hs).register(http_server)
     elif hs.config.saml2_enabled:
         SAMLRedirectServlet(hs).register(http_server)
+    elif hs.config.oidc_enabled:
+        OIDCRedirectServlet(hs).register(http_server)
diff --git a/synapse/rest/oidc/__init__.py b/synapse/rest/oidc/__init__.py
new file mode 100644
index 0000000000..d958dd65bb
--- /dev/null
+++ b/synapse/rest/oidc/__init__.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Quentin Gliech
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from twisted.web.resource import Resource
+
+from synapse.rest.oidc.callback_resource import OIDCCallbackResource
+
+logger = logging.getLogger(__name__)
+
+
+class OIDCResource(Resource):
+    def __init__(self, hs):
+        Resource.__init__(self)
+        self.putChild(b"callback", OIDCCallbackResource(hs))
diff --git a/synapse/rest/oidc/callback_resource.py b/synapse/rest/oidc/callback_resource.py
new file mode 100644
index 0000000000..c03194f001
--- /dev/null
+++ b/synapse/rest/oidc/callback_resource.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Quentin Gliech
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from synapse.http.server import DirectServeResource, wrap_html_request_handler
+
+logger = logging.getLogger(__name__)
+
+
+class OIDCCallbackResource(DirectServeResource):
+    isLeaf = 1
+
+    def __init__(self, hs):
+        super().__init__()
+        self._oidc_handler = hs.get_oidc_handler()
+
+    @wrap_html_request_handler
+    async def _async_render_GET(self, request):
+        return await self._oidc_handler.handle_oidc_callback(request)
diff --git a/synapse/server.py b/synapse/server.py
index bf97a16c09..b4aea81e24 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -204,6 +204,7 @@ class HomeServer(object):
         "account_validity_handler",
         "cas_handler",
         "saml_handler",
+        "oidc_handler",
         "event_client_serializer",
         "password_policy_handler",
         "storage",
@@ -562,6 +563,11 @@ class HomeServer(object):
 
         return SamlHandler(self)
 
+    def build_oidc_handler(self):
+        from synapse.handlers.oidc_handler import OidcHandler
+
+        return OidcHandler(self)
+
     def build_event_client_serializer(self):
         return EventClientSerializer(self)
 
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 18043a2593..31a9cc0389 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -13,6 +13,7 @@ import synapse.handlers.device
 import synapse.handlers.e2e_keys
 import synapse.handlers.message
 import synapse.handlers.presence
+import synapse.handlers.register
 import synapse.handlers.room
 import synapse.handlers.room_member
 import synapse.handlers.set_password
@@ -128,3 +129,7 @@ class HomeServer(object):
         pass
     def get_storage(self) -> synapse.storage.Storage:
         pass
+    def get_registration_handler(self) -> synapse.handlers.register.RegistrationHandler:
+        pass
+    def get_macaroon_generator(self) -> synapse.handlers.auth.MacaroonGenerator:
+        pass
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
new file mode 100644
index 0000000000..61963aa90d
--- /dev/null
+++ b/tests/handlers/test_oidc.py
@@ -0,0 +1,565 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Quentin Gliech
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+from urllib.parse import parse_qs, urlparse
+
+from mock import Mock, patch
+
+import attr
+import pymacaroons
+
+from twisted.internet import defer
+from twisted.python.failure import Failure
+from twisted.web._newclient import ResponseDone
+
+from synapse.handlers.oidc_handler import (
+    MappingException,
+    OidcError,
+    OidcHandler,
+    OidcMappingProvider,
+)
+from synapse.types import UserID
+
+from tests.unittest import HomeserverTestCase, override_config
+
+
+@attr.s
+class FakeResponse:
+    code = attr.ib()
+    body = attr.ib()
+    phrase = attr.ib()
+
+    def deliverBody(self, protocol):
+        protocol.dataReceived(self.body)
+        protocol.connectionLost(Failure(ResponseDone()))
+
+
+# These are a few constants that are used as config parameters in the tests.
+ISSUER = "https://issuer/"
+CLIENT_ID = "test-client-id"
+CLIENT_SECRET = "test-client-secret"
+BASE_URL = "https://synapse/"
+CALLBACK_URL = BASE_URL + "_synapse/oidc/callback"
+SCOPES = ["openid"]
+
+AUTHORIZATION_ENDPOINT = ISSUER + "authorize"
+TOKEN_ENDPOINT = ISSUER + "token"
+USERINFO_ENDPOINT = ISSUER + "userinfo"
+WELL_KNOWN = ISSUER + ".well-known/openid-configuration"
+JWKS_URI = ISSUER + ".well-known/jwks.json"
+
+# config for common cases
+COMMON_CONFIG = {
+    "discover": False,
+    "authorization_endpoint": AUTHORIZATION_ENDPOINT,
+    "token_endpoint": TOKEN_ENDPOINT,
+    "jwks_uri": JWKS_URI,
+}
+
+
+# The cookie name and path don't really matter, just that it has to be coherent
+# between the callback & redirect handlers.
+COOKIE_NAME = b"oidc_session"
+COOKIE_PATH = "/_synapse/oidc"
+
+MockedMappingProvider = Mock(OidcMappingProvider)
+
+
+def simple_async_mock(return_value=None, raises=None):
+    # AsyncMock is not available in python3.5, this mimics part of its behaviour
+    async def cb(*args, **kwargs):
+        if raises:
+            raise raises
+        return return_value
+
+    return Mock(side_effect=cb)
+
+
+async def get_json(url):
+    # Mock get_json calls to handle jwks & oidc discovery endpoints
+    if url == WELL_KNOWN:
+        # Minimal discovery document, as defined in OpenID.Discovery
+        # https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata
+        return {
+            "issuer": ISSUER,
+            "authorization_endpoint": AUTHORIZATION_ENDPOINT,
+            "token_endpoint": TOKEN_ENDPOINT,
+            "jwks_uri": JWKS_URI,
+            "userinfo_endpoint": USERINFO_ENDPOINT,
+            "response_types_supported": ["code"],
+            "subject_types_supported": ["public"],
+            "id_token_signing_alg_values_supported": ["RS256"],
+        }
+    elif url == JWKS_URI:
+        return {"keys": []}
+
+
+class OidcHandlerTestCase(HomeserverTestCase):
+    def make_homeserver(self, reactor, clock):
+
+        self.http_client = Mock(spec=["get_json"])
+        self.http_client.get_json.side_effect = get_json
+        self.http_client.user_agent = "Synapse Test"
+
+        config = self.default_config()
+        config["public_baseurl"] = BASE_URL
+        oidc_config = config.get("oidc_config", {})
+        oidc_config["enabled"] = True
+        oidc_config["client_id"] = CLIENT_ID
+        oidc_config["client_secret"] = CLIENT_SECRET
+        oidc_config["issuer"] = ISSUER
+        oidc_config["scopes"] = SCOPES
+        oidc_config["user_mapping_provider"] = {
+            "module": __name__ + ".MockedMappingProvider"
+        }
+        config["oidc_config"] = oidc_config
+
+        hs = self.setup_test_homeserver(
+            http_client=self.http_client,
+            proxied_http_client=self.http_client,
+            config=config,
+        )
+
+        self.handler = OidcHandler(hs)
+
+        return hs
+
+    def metadata_edit(self, values):
+        return patch.dict(self.handler._provider_metadata, values)
+
+    def assertRenderedError(self, error, error_description=None):
+        args = self.handler._render_error.call_args[0]
+        self.assertEqual(args[1], error)
+        if error_description is not None:
+            self.assertEqual(args[2], error_description)
+        # Reset the render_error mock
+        self.handler._render_error.reset_mock()
+
+    def test_config(self):
+        """Basic config correctly sets up the callback URL and client auth correctly."""
+        self.assertEqual(self.handler._callback_url, CALLBACK_URL)
+        self.assertEqual(self.handler._client_auth.client_id, CLIENT_ID)
+        self.assertEqual(self.handler._client_auth.client_secret, CLIENT_SECRET)
+
+    @override_config({"oidc_config": {"discover": True}})
+    @defer.inlineCallbacks
+    def test_discovery(self):
+        """The handler should discover the endpoints from OIDC discovery document."""
+        # This would throw if some metadata were invalid
+        metadata = yield defer.ensureDeferred(self.handler.load_metadata())
+        self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
+
+        self.assertEqual(metadata.issuer, ISSUER)
+        self.assertEqual(metadata.authorization_endpoint, AUTHORIZATION_ENDPOINT)
+        self.assertEqual(metadata.token_endpoint, TOKEN_ENDPOINT)
+        self.assertEqual(metadata.jwks_uri, JWKS_URI)
+        # FIXME: it seems like authlib does not have that defined in its metadata models
+        # self.assertEqual(metadata.userinfo_endpoint, USERINFO_ENDPOINT)
+
+        # subsequent calls should be cached
+        self.http_client.reset_mock()
+        yield defer.ensureDeferred(self.handler.load_metadata())
+        self.http_client.get_json.assert_not_called()
+
+    @override_config({"oidc_config": COMMON_CONFIG})
+    @defer.inlineCallbacks
+    def test_no_discovery(self):
+        """When discovery is disabled, it should not try to load from discovery document."""
+        yield defer.ensureDeferred(self.handler.load_metadata())
+        self.http_client.get_json.assert_not_called()
+
+    @override_config({"oidc_config": COMMON_CONFIG})
+    @defer.inlineCallbacks
+    def test_load_jwks(self):
+        """JWKS loading is done once (then cached) if used."""
+        jwks = yield defer.ensureDeferred(self.handler.load_jwks())
+        self.http_client.get_json.assert_called_once_with(JWKS_URI)
+        self.assertEqual(jwks, {"keys": []})
+
+        # subsequent calls should be cached…
+        self.http_client.reset_mock()
+        yield defer.ensureDeferred(self.handler.load_jwks())
+        self.http_client.get_json.assert_not_called()
+
+        # …unless forced
+        self.http_client.reset_mock()
+        yield defer.ensureDeferred(self.handler.load_jwks(force=True))
+        self.http_client.get_json.assert_called_once_with(JWKS_URI)
+
+        # Throw if the JWKS uri is missing
+        with self.metadata_edit({"jwks_uri": None}):
+            with self.assertRaises(RuntimeError):
+                yield defer.ensureDeferred(self.handler.load_jwks(force=True))
+
+        # Return empty key set if JWKS are not used
+        self.handler._scopes = []  # not asking the openid scope
+        self.http_client.get_json.reset_mock()
+        jwks = yield defer.ensureDeferred(self.handler.load_jwks(force=True))
+        self.http_client.get_json.assert_not_called()
+        self.assertEqual(jwks, {"keys": []})
+
+    @override_config({"oidc_config": COMMON_CONFIG})
+    def test_validate_config(self):
+        """Provider metadatas are extensively validated."""
+        h = self.handler
+
+        # Default test config does not throw
+        h._validate_metadata()
+
+        with self.metadata_edit({"issuer": None}):
+            self.assertRaisesRegex(ValueError, "issuer", h._validate_metadata)
+
+        with self.metadata_edit({"issuer": "http://insecure/"}):
+            self.assertRaisesRegex(ValueError, "issuer", h._validate_metadata)
+
+        with self.metadata_edit({"issuer": "https://invalid/?because=query"}):
+            self.assertRaisesRegex(ValueError, "issuer", h._validate_metadata)
+
+        with self.metadata_edit({"authorization_endpoint": None}):
+            self.assertRaisesRegex(
+                ValueError, "authorization_endpoint", h._validate_metadata
+            )
+
+        with self.metadata_edit({"authorization_endpoint": "http://insecure/auth"}):
+            self.assertRaisesRegex(
+                ValueError, "authorization_endpoint", h._validate_metadata
+            )
+
+        with self.metadata_edit({"token_endpoint": None}):
+            self.assertRaisesRegex(ValueError, "token_endpoint", h._validate_metadata)
+
+        with self.metadata_edit({"token_endpoint": "http://insecure/token"}):
+            self.assertRaisesRegex(ValueError, "token_endpoint", h._validate_metadata)
+
+        with self.metadata_edit({"jwks_uri": None}):
+            self.assertRaisesRegex(ValueError, "jwks_uri", h._validate_metadata)
+
+        with self.metadata_edit({"jwks_uri": "http://insecure/jwks.json"}):
+            self.assertRaisesRegex(ValueError, "jwks_uri", h._validate_metadata)
+
+        with self.metadata_edit({"response_types_supported": ["id_token"]}):
+            self.assertRaisesRegex(
+                ValueError, "response_types_supported", h._validate_metadata
+            )
+
+        with self.metadata_edit(
+            {"token_endpoint_auth_methods_supported": ["client_secret_basic"]}
+        ):
+            # should not throw, as client_secret_basic is the default auth method
+            h._validate_metadata()
+
+        with self.metadata_edit(
+            {"token_endpoint_auth_methods_supported": ["client_secret_post"]}
+        ):
+            self.assertRaisesRegex(
+                ValueError,
+                "token_endpoint_auth_methods_supported",
+                h._validate_metadata,
+            )
+
+        # Tests for configs that the userinfo endpoint
+        self.assertFalse(h._uses_userinfo)
+        h._scopes = []  # do not request the openid scope
+        self.assertTrue(h._uses_userinfo)
+        self.assertRaisesRegex(ValueError, "userinfo_endpoint", h._validate_metadata)
+
+        with self.metadata_edit(
+            {"userinfo_endpoint": USERINFO_ENDPOINT, "jwks_uri": None}
+        ):
+            # Shouldn't raise with a valid userinfo, even without
+            h._validate_metadata()
+
+    @override_config({"oidc_config": {"skip_verification": True}})
+    def test_skip_verification(self):
+        """Provider metadata validation can be disabled by config."""
+        with self.metadata_edit({"issuer": "http://insecure"}):
+            # This should not throw
+            self.handler._validate_metadata()
+
+    @defer.inlineCallbacks
+    def test_redirect_request(self):
+        """The redirect request has the right arguments & generates a valid session cookie."""
+        req = Mock(spec=["addCookie", "redirect", "finish"])
+        yield defer.ensureDeferred(
+            self.handler.handle_redirect_request(req, b"http://client/redirect")
+        )
+        url = req.redirect.call_args[0][0]
+        url = urlparse(url)
+        auth_endpoint = urlparse(AUTHORIZATION_ENDPOINT)
+
+        self.assertEqual(url.scheme, auth_endpoint.scheme)
+        self.assertEqual(url.netloc, auth_endpoint.netloc)
+        self.assertEqual(url.path, auth_endpoint.path)
+
+        params = parse_qs(url.query)
+        self.assertEqual(params["redirect_uri"], [CALLBACK_URL])
+        self.assertEqual(params["response_type"], ["code"])
+        self.assertEqual(params["scope"], [" ".join(SCOPES)])
+        self.assertEqual(params["client_id"], [CLIENT_ID])
+        self.assertEqual(len(params["state"]), 1)
+        self.assertEqual(len(params["nonce"]), 1)
+
+        # Check what is in the cookie
+        # note: python3.5 mock does not have the .called_once() method
+        calls = req.addCookie.call_args_list
+        self.assertEqual(len(calls), 1)  # called once
+        # For some reason, call.args does not work with python3.5
+        args = calls[0][0]
+        kwargs = calls[0][1]
+        self.assertEqual(args[0], COOKIE_NAME)
+        self.assertEqual(kwargs["path"], COOKIE_PATH)
+        cookie = args[1]
+
+        macaroon = pymacaroons.Macaroon.deserialize(cookie)
+        state = self.handler._get_value_from_macaroon(macaroon, "state")
+        nonce = self.handler._get_value_from_macaroon(macaroon, "nonce")
+        redirect = self.handler._get_value_from_macaroon(
+            macaroon, "client_redirect_url"
+        )
+
+        self.assertEqual(params["state"], [state])
+        self.assertEqual(params["nonce"], [nonce])
+        self.assertEqual(redirect, "http://client/redirect")
+
+    @defer.inlineCallbacks
+    def test_callback_error(self):
+        """Errors from the provider returned in the callback are displayed."""
+        self.handler._render_error = Mock()
+        request = Mock(args={})
+        request.args[b"error"] = [b"invalid_client"]
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_client", "")
+
+        request.args[b"error_description"] = [b"some description"]
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_client", "some description")
+
+    @defer.inlineCallbacks
+    def test_callback(self):
+        """Code callback works and display errors if something went wrong.
+
+        A lot of scenarios are tested here:
+         - when the callback works, with userinfo from ID token
+         - when the user mapping fails
+         - when ID token verification fails
+         - when the callback works, with userinfo fetched from the userinfo endpoint
+         - when the userinfo fetching fails
+         - when the code exchange fails
+        """
+        token = {
+            "type": "bearer",
+            "id_token": "id_token",
+            "access_token": "access_token",
+        }
+        userinfo = {
+            "sub": "foo",
+            "preferred_username": "bar",
+        }
+        user_id = UserID("foo", "domain.org")
+        self.handler._render_error = Mock(return_value=None)
+        self.handler._exchange_code = simple_async_mock(return_value=token)
+        self.handler._parse_id_token = simple_async_mock(return_value=userinfo)
+        self.handler._fetch_userinfo = simple_async_mock(return_value=userinfo)
+        self.handler._map_userinfo_to_user = simple_async_mock(return_value=user_id)
+        self.handler._auth_handler.complete_sso_login = simple_async_mock()
+        request = Mock(spec=["args", "getCookie", "addCookie"])
+
+        code = "code"
+        state = "state"
+        nonce = "nonce"
+        client_redirect_url = "http://client/redirect"
+        session = self.handler._generate_oidc_session_token(
+            state=state, nonce=nonce, client_redirect_url=client_redirect_url,
+        )
+        request.getCookie.return_value = session
+
+        request.args = {}
+        request.args[b"code"] = [code.encode("utf-8")]
+        request.args[b"state"] = [state.encode("utf-8")]
+
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+
+        self.handler._auth_handler.complete_sso_login.assert_called_once_with(
+            user_id, request, client_redirect_url,
+        )
+        self.handler._exchange_code.assert_called_once_with(code)
+        self.handler._parse_id_token.assert_called_once_with(token, nonce=nonce)
+        self.handler._map_userinfo_to_user.assert_called_once_with(userinfo, token)
+        self.handler._fetch_userinfo.assert_not_called()
+        self.handler._render_error.assert_not_called()
+
+        # Handle mapping errors
+        self.handler._map_userinfo_to_user = simple_async_mock(
+            raises=MappingException()
+        )
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("mapping_error")
+        self.handler._map_userinfo_to_user = simple_async_mock(return_value=user_id)
+
+        # Handle ID token errors
+        self.handler._parse_id_token = simple_async_mock(raises=Exception())
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_token")
+
+        self.handler._auth_handler.complete_sso_login.reset_mock()
+        self.handler._exchange_code.reset_mock()
+        self.handler._parse_id_token.reset_mock()
+        self.handler._map_userinfo_to_user.reset_mock()
+        self.handler._fetch_userinfo.reset_mock()
+
+        # With userinfo fetching
+        self.handler._scopes = []  # do not ask the "openid" scope
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+
+        self.handler._auth_handler.complete_sso_login.assert_called_once_with(
+            user_id, request, client_redirect_url,
+        )
+        self.handler._exchange_code.assert_called_once_with(code)
+        self.handler._parse_id_token.assert_not_called()
+        self.handler._map_userinfo_to_user.assert_called_once_with(userinfo, token)
+        self.handler._fetch_userinfo.assert_called_once_with(token)
+        self.handler._render_error.assert_not_called()
+
+        # Handle userinfo fetching error
+        self.handler._fetch_userinfo = simple_async_mock(raises=Exception())
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("fetch_error")
+
+        # Handle code exchange failure
+        self.handler._exchange_code = simple_async_mock(
+            raises=OidcError("invalid_request")
+        )
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_request")
+
+    @defer.inlineCallbacks
+    def test_callback_session(self):
+        """The callback verifies the session presence and validity"""
+        self.handler._render_error = Mock(return_value=None)
+        request = Mock(spec=["args", "getCookie", "addCookie"])
+
+        # Missing cookie
+        request.args = {}
+        request.getCookie.return_value = None
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("missing_session", "No session cookie found")
+
+        # Missing session parameter
+        request.args = {}
+        request.getCookie.return_value = "session"
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_request", "State parameter is missing")
+
+        # Invalid cookie
+        request.args = {}
+        request.args[b"state"] = [b"state"]
+        request.getCookie.return_value = "session"
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_session")
+
+        # Mismatching session
+        session = self.handler._generate_oidc_session_token(
+            state="state", nonce="nonce", client_redirect_url="http://client/redirect",
+        )
+        request.args = {}
+        request.args[b"state"] = [b"mismatching state"]
+        request.getCookie.return_value = session
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("mismatching_session")
+
+        # Valid session
+        request.args = {}
+        request.args[b"state"] = [b"state"]
+        request.getCookie.return_value = session
+        yield defer.ensureDeferred(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("invalid_request")
+
+    @override_config({"oidc_config": {"client_auth_method": "client_secret_post"}})
+    @defer.inlineCallbacks
+    def test_exchange_code(self):
+        """Code exchange behaves correctly and handles various error scenarios."""
+        token = {"type": "bearer"}
+        token_json = json.dumps(token).encode("utf-8")
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(code=200, phrase=b"OK", body=token_json)
+        )
+        code = "code"
+        ret = yield defer.ensureDeferred(self.handler._exchange_code(code))
+        kwargs = self.http_client.request.call_args[1]
+
+        self.assertEqual(ret, token)
+        self.assertEqual(kwargs["method"], "POST")
+        self.assertEqual(kwargs["uri"], TOKEN_ENDPOINT)
+
+        args = parse_qs(kwargs["data"].decode("utf-8"))
+        self.assertEqual(args["grant_type"], ["authorization_code"])
+        self.assertEqual(args["code"], [code])
+        self.assertEqual(args["client_id"], [CLIENT_ID])
+        self.assertEqual(args["client_secret"], [CLIENT_SECRET])
+        self.assertEqual(args["redirect_uri"], [CALLBACK_URL])
+
+        # Test error handling
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(
+                code=400,
+                phrase=b"Bad Request",
+                body=b'{"error": "foo", "error_description": "bar"}',
+            )
+        )
+        with self.assertRaises(OidcError) as exc:
+            yield defer.ensureDeferred(self.handler._exchange_code(code))
+        self.assertEqual(exc.exception.error, "foo")
+        self.assertEqual(exc.exception.error_description, "bar")
+
+        # Internal server error with no JSON body
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(
+                code=500, phrase=b"Internal Server Error", body=b"Not JSON",
+            )
+        )
+        with self.assertRaises(OidcError) as exc:
+            yield defer.ensureDeferred(self.handler._exchange_code(code))
+        self.assertEqual(exc.exception.error, "server_error")
+
+        # Internal server error with JSON body
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(
+                code=500,
+                phrase=b"Internal Server Error",
+                body=b'{"error": "internal_server_error"}',
+            )
+        )
+        with self.assertRaises(OidcError) as exc:
+            yield defer.ensureDeferred(self.handler._exchange_code(code))
+        self.assertEqual(exc.exception.error, "internal_server_error")
+
+        # 4xx error without "error" field
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(code=400, phrase=b"Bad request", body=b"{}",)
+        )
+        with self.assertRaises(OidcError) as exc:
+            yield defer.ensureDeferred(self.handler._exchange_code(code))
+        self.assertEqual(exc.exception.error, "server_error")
+
+        # 2xx error with "error" field
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(
+                code=200, phrase=b"OK", body=b'{"error": "some_error"}',
+            )
+        )
+        with self.assertRaises(OidcError) as exc:
+            yield defer.ensureDeferred(self.handler._exchange_code(code))
+        self.assertEqual(exc.exception.error, "some_error")
diff --git a/tox.ini b/tox.ini
index c699f3e46a..ad1902d47d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -185,6 +185,7 @@ commands = mypy \
             synapse/handlers/auth.py \
             synapse/handlers/cas_handler.py \
             synapse/handlers/directory.py \
+            synapse/handlers/oidc_handler.py \
             synapse/handlers/presence.py \
             synapse/handlers/saml_handler.py \
             synapse/handlers/sync.py \
-- 
cgit 1.5.1


From 4734a7bbe4d08d68c5f04dd76cd5bcfb4cd9b6be Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 14 May 2020 14:01:39 +0100
Subject: Move EventStream handling into default ReplicationDataHandler (#7493)

This is so that the logic can happen on both master and workers when we move event persistence out.
---
 changelog.d/7493.misc             |  1 +
 synapse/app/generic_worker.py     | 33 ++-------------------------------
 synapse/replication/tcp/client.py | 37 +++++++++++++++++++++++++++++++++----
 synapse/server.py                 |  2 +-
 synapse/server.pyi                |  3 +++
 5 files changed, 40 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/7493.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/7493.misc b/changelog.d/7493.misc
new file mode 100644
index 0000000000..575c55a99b
--- /dev/null
+++ b/changelog.d/7493.misc
@@ -0,0 +1 @@
+Move EventStream handling into default ReplicationDataHandler.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index bccb1140b2..2e3add7ac5 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -26,7 +26,6 @@ from twisted.web.resource import NoResource
 
 import synapse
 import synapse.events
-from synapse.api.constants import EventTypes
 from synapse.api.errors import HttpResponseException, SynapseError
 from synapse.api.urls import (
     CLIENT_API_PREFIX,
@@ -81,11 +80,6 @@ from synapse.replication.tcp.streams import (
     ToDeviceStream,
     TypingStream,
 )
-from synapse.replication.tcp.streams.events import (
-    EventsStream,
-    EventsStreamEventRow,
-    EventsStreamRow,
-)
 from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.client.v1 import events
 from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
@@ -633,7 +627,7 @@ class GenericWorkerServer(HomeServer):
 
 class GenericWorkerReplicationHandler(ReplicationDataHandler):
     def __init__(self, hs):
-        super(GenericWorkerReplicationHandler, self).__init__(hs.get_datastore())
+        super(GenericWorkerReplicationHandler, self).__init__(hs)
 
         self.store = hs.get_datastore()
         self.typing_handler = hs.get_typing_handler()
@@ -659,30 +653,7 @@ class GenericWorkerReplicationHandler(ReplicationDataHandler):
                     stream_name, token, rows
                 )
 
-            if stream_name == EventsStream.NAME:
-                # We shouldn't get multiple rows per token for events stream, so
-                # we don't need to optimise this for multiple rows.
-                for row in rows:
-                    if row.type != EventsStreamEventRow.TypeId:
-                        continue
-                    assert isinstance(row, EventsStreamRow)
-
-                    event = await self.store.get_event(
-                        row.data.event_id, allow_rejected=True
-                    )
-                    if event.rejected_reason:
-                        continue
-
-                    extra_users = ()
-                    if event.type == EventTypes.Member:
-                        extra_users = (event.state_key,)
-                    max_token = self.store.get_room_max_stream_ordering()
-                    self.notifier.on_new_room_event(
-                        event, token, max_token, extra_users
-                    )
-
-                await self.pusher_pool.on_new_notifications(token, token)
-            elif stream_name == PushRulesStream.NAME:
+            if stream_name == PushRulesStream.NAME:
                 self.notifier.on_new_event(
                     "push_rules_key", token, users=[row.user_id for row in rows]
                 )
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 20cb8a654f..28826302f5 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -16,12 +16,17 @@
 """
 
 import logging
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Tuple
 
 from twisted.internet.protocol import ReconnectingClientFactory
 
-from synapse.replication.slave.storage._base import BaseSlavedStore
+from synapse.api.constants import EventTypes
 from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
+from synapse.replication.tcp.streams.events import (
+    EventsStream,
+    EventsStreamEventRow,
+    EventsStreamRow,
+)
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -83,8 +88,10 @@ class ReplicationDataHandler:
     to handle updates in additional ways.
     """
 
-    def __init__(self, store: BaseSlavedStore):
-        self.store = store
+    def __init__(self, hs: "HomeServer"):
+        self.store = hs.get_datastore()
+        self.pusher_pool = hs.get_pusherpool()
+        self.notifier = hs.get_notifier()
 
     async def on_rdata(
         self, stream_name: str, instance_name: str, token: int, rows: list
@@ -102,6 +109,28 @@ class ReplicationDataHandler:
         """
         self.store.process_replication_rows(stream_name, instance_name, token, rows)
 
+        if stream_name == EventsStream.NAME:
+            # We shouldn't get multiple rows per token for events stream, so
+            # we don't need to optimise this for multiple rows.
+            for row in rows:
+                if row.type != EventsStreamEventRow.TypeId:
+                    continue
+                assert isinstance(row, EventsStreamRow)
+
+                event = await self.store.get_event(
+                    row.data.event_id, allow_rejected=True
+                )
+                if event.rejected_reason:
+                    continue
+
+                extra_users = ()  # type: Tuple[str, ...]
+                if event.type == EventTypes.Member:
+                    extra_users = (event.state_key,)
+                max_token = self.store.get_room_max_stream_ordering()
+                self.notifier.on_new_room_event(event, token, max_token, extra_users)
+
+            await self.pusher_pool.on_new_notifications(token, token)
+
     async def on_position(self, stream_name: str, instance_name: str, token: int):
         self.store.process_replication_rows(stream_name, instance_name, token, [])
 
diff --git a/synapse/server.py b/synapse/server.py
index b4aea81e24..c530f1aa1a 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -581,7 +581,7 @@ class HomeServer(object):
         return ReplicationStreamer(self)
 
     def build_replication_data_handler(self):
-        return ReplicationDataHandler(self.get_datastore())
+        return ReplicationDataHandler(self)
 
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 31a9cc0389..9e7fad7e6e 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -19,6 +19,7 @@ import synapse.handlers.room_member
 import synapse.handlers.set_password
 import synapse.http.client
 import synapse.notifier
+import synapse.push.pusherpool
 import synapse.replication.tcp.client
 import synapse.replication.tcp.handler
 import synapse.rest.media.v1.media_repository
@@ -133,3 +134,5 @@ class HomeServer(object):
         pass
     def get_macaroon_generator(self) -> synapse.handlers.auth.MacaroonGenerator:
         pass
+    def get_pusherpool(self) -> synapse.push.pusherpool.PusherPool:
+        pass
-- 
cgit 1.5.1


From 1531b214fc57714c14046a8f66c7b5fe5ec5dcdd Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 22 May 2020 14:21:54 +0100
Subject: Add ability to wait for replication streams (#7542)

The idea here is that if an instance persists an event via the replication HTTP API it can return before we receive that event over replication, which can lead to races where code assumes that persisting an event immediately updates various caches (e.g. current state of the room).

Most of Synapse doesn't hit such races, so we don't do the waiting automagically, instead we do so where necessary to avoid unnecessary delays. We may decide to change our minds here if it turns out there are a lot of subtle races going on.

People probably want to look at this commit by commit.
---
 changelog.d/7542.misc                             |  1 +
 synapse/handlers/federation.py                    | 33 ++++++---
 synapse/handlers/message.py                       | 36 ++++++---
 synapse/handlers/room.py                          | 65 +++++++++++-----
 synapse/handlers/room_member.py                   | 65 ++++++++++------
 synapse/handlers/room_member_worker.py            | 11 +--
 synapse/replication/http/federation.py            | 13 +++-
 synapse/replication/http/membership.py            | 14 ++--
 synapse/replication/http/send_event.py            |  4 +-
 synapse/replication/http/streams.py               |  5 +-
 synapse/replication/tcp/client.py                 | 90 ++++++++++++++++++++++-
 synapse/rest/admin/rooms.py                       | 10 ++-
 synapse/rest/client/v1/room.py                    | 20 +++--
 synapse/rest/client/v2_alpha/relations.py         |  2 +-
 synapse/server.py                                 |  5 ++
 synapse/server.pyi                                |  5 ++
 synapse/server_notices/server_notices_manager.py  |  6 +-
 synapse/storage/data_stores/main/events_worker.py |  6 +-
 synapse/storage/data_stores/main/roommember.py    |  2 +
 tests/federation/test_complexity.py               |  8 +-
 tests/handlers/test_typing.py                     |  5 +-
 tests/storage/test_cleanup_extrems.py             |  4 +-
 tests/storage/test_event_metrics.py               |  2 +-
 tests/test_federation.py                          |  4 +-
 24 files changed, 304 insertions(+), 112 deletions(-)
 create mode 100644 changelog.d/7542.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/7542.misc b/changelog.d/7542.misc
new file mode 100644
index 0000000000..7dd9b4823b
--- /dev/null
+++ b/changelog.d/7542.misc
@@ -0,0 +1 @@
+Add ability to wait for replication streams.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index bb03cc9add..e354c803db 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -126,6 +126,7 @@ class FederationHandler(BaseHandler):
         self._server_notices_mxid = hs.config.server_notices_mxid
         self.config = hs.config
         self.http_client = hs.get_simple_http_client()
+        self._replication = hs.get_replication_data_handler()
 
         self._send_events_to_master = ReplicationFederationSendEventsRestServlet.make_client(
             hs
@@ -1221,7 +1222,7 @@ class FederationHandler(BaseHandler):
 
     async def do_invite_join(
         self, target_hosts: Iterable[str], room_id: str, joinee: str, content: JsonDict
-    ) -> None:
+    ) -> Tuple[str, int]:
         """ Attempts to join the `joinee` to the room `room_id` via the
         servers contained in `target_hosts`.
 
@@ -1304,15 +1305,23 @@ class FederationHandler(BaseHandler):
                 room_id=room_id, room_version=room_version_obj,
             )
 
-            await self._persist_auth_tree(
+            max_stream_id = await self._persist_auth_tree(
                 origin, auth_chain, state, event, room_version_obj
             )
 
+            # We wait here until this instance has seen the events come down
+            # replication (if we're using replication) as the below uses caches.
+            #
+            # TODO: Currently the events stream is written to from master
+            await self._replication.wait_for_stream_position(
+                "master", "events", max_stream_id
+            )
+
             # Check whether this room is the result of an upgrade of a room we already know
             # about. If so, migrate over user information
             predecessor = await self.store.get_room_predecessor(room_id)
             if not predecessor or not isinstance(predecessor.get("room_id"), str):
-                return
+                return event.event_id, max_stream_id
             old_room_id = predecessor["room_id"]
             logger.debug(
                 "Found predecessor for %s during remote join: %s", room_id, old_room_id
@@ -1325,6 +1334,7 @@ class FederationHandler(BaseHandler):
             )
 
             logger.debug("Finished joining %s to %s", joinee, room_id)
+            return event.event_id, max_stream_id
         finally:
             room_queue = self.room_queues[room_id]
             del self.room_queues[room_id]
@@ -1554,7 +1564,7 @@ class FederationHandler(BaseHandler):
 
     async def do_remotely_reject_invite(
         self, target_hosts: Iterable[str], room_id: str, user_id: str, content: JsonDict
-    ) -> EventBase:
+    ) -> Tuple[EventBase, int]:
         origin, event, room_version = await self._make_and_verify_event(
             target_hosts, room_id, user_id, "leave", content=content
         )
@@ -1574,9 +1584,9 @@ class FederationHandler(BaseHandler):
         await self.federation_client.send_leave(target_hosts, event)
 
         context = await self.state_handler.compute_event_context(event)
-        await self.persist_events_and_notify([(event, context)])
+        stream_id = await self.persist_events_and_notify([(event, context)])
 
-        return event
+        return event, stream_id
 
     async def _make_and_verify_event(
         self,
@@ -1888,7 +1898,7 @@ class FederationHandler(BaseHandler):
         state: List[EventBase],
         event: EventBase,
         room_version: RoomVersion,
-    ) -> None:
+    ) -> int:
         """Checks the auth chain is valid (and passes auth checks) for the
         state and event. Then persists the auth chain and state atomically.
         Persists the event separately. Notifies about the persisted events
@@ -1982,7 +1992,7 @@ class FederationHandler(BaseHandler):
             event, old_state=state
         )
 
-        await self.persist_events_and_notify([(event, new_event_context)])
+        return await self.persist_events_and_notify([(event, new_event_context)])
 
     async def _prep_event(
         self,
@@ -2835,7 +2845,7 @@ class FederationHandler(BaseHandler):
         self,
         event_and_contexts: Sequence[Tuple[EventBase, EventContext]],
         backfilled: bool = False,
-    ) -> None:
+    ) -> int:
         """Persists events and tells the notifier/pushers about them, if
         necessary.
 
@@ -2845,11 +2855,12 @@ class FederationHandler(BaseHandler):
                 backfilling or not
         """
         if self.config.worker_app:
-            await self._send_events_to_master(
+            result = await self._send_events_to_master(
                 store=self.store,
                 event_and_contexts=event_and_contexts,
                 backfilled=backfilled,
             )
+            return result["max_stream_id"]
         else:
             max_stream_id = await self.storage.persistence.persist_events(
                 event_and_contexts, backfilled=backfilled
@@ -2864,6 +2875,8 @@ class FederationHandler(BaseHandler):
                 for event, _ in event_and_contexts:
                     await self._notify_persisted_event(event, max_stream_id)
 
+            return max_stream_id
+
     async def _notify_persisted_event(
         self, event: EventBase, max_stream_id: int
     ) -> None:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 8f362896a2..f445e2aa2a 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import Optional
+from typing import Optional, Tuple
 
 from six import iteritems, itervalues, string_types
 
@@ -42,6 +42,7 @@ from synapse.api.errors import (
 )
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions
 from synapse.api.urls import ConsentURIBuilder
+from synapse.events import EventBase
 from synapse.events.validator import EventValidator
 from synapse.logging.context import run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -630,7 +631,9 @@ class EventCreationHandler(object):
         msg = self._block_events_without_consent_error % {"consent_uri": consent_uri}
         raise ConsentNotGivenError(msg=msg, consent_uri=consent_uri)
 
-    async def send_nonmember_event(self, requester, event, context, ratelimit=True):
+    async def send_nonmember_event(
+        self, requester, event, context, ratelimit=True
+    ) -> int:
         """
         Persists and notifies local clients and federation of an event.
 
@@ -639,6 +642,9 @@ class EventCreationHandler(object):
             context (Context) the context of the event.
             ratelimit (bool): Whether to rate limit this send.
             is_guest (bool): Whether the sender is a guest.
+
+        Return:
+            The stream_id of the persisted event.
         """
         if event.type == EventTypes.Member:
             raise SynapseError(
@@ -659,7 +665,7 @@ class EventCreationHandler(object):
                 )
                 return prev_state
 
-        await self.handle_new_client_event(
+        return await self.handle_new_client_event(
             requester=requester, event=event, context=context, ratelimit=ratelimit
         )
 
@@ -688,7 +694,7 @@ class EventCreationHandler(object):
 
     async def create_and_send_nonmember_event(
         self, requester, event_dict, ratelimit=True, txn_id=None
-    ):
+    ) -> Tuple[EventBase, int]:
         """
         Creates an event, then sends it.
 
@@ -711,10 +717,10 @@ class EventCreationHandler(object):
                     spam_error = "Spam is not permitted here"
                 raise SynapseError(403, spam_error, Codes.FORBIDDEN)
 
-            await self.send_nonmember_event(
+            stream_id = await self.send_nonmember_event(
                 requester, event, context, ratelimit=ratelimit
             )
-        return event
+        return event, stream_id
 
     @measure_func("create_new_client_event")
     @defer.inlineCallbacks
@@ -774,7 +780,7 @@ class EventCreationHandler(object):
     @measure_func("handle_new_client_event")
     async def handle_new_client_event(
         self, requester, event, context, ratelimit=True, extra_users=[]
-    ):
+    ) -> int:
         """Processes a new event. This includes checking auth, persisting it,
         notifying users, sending to remote servers, etc.
 
@@ -787,6 +793,9 @@ class EventCreationHandler(object):
             context (EventContext)
             ratelimit (bool)
             extra_users (list(UserID)): Any extra users to notify about event
+
+        Return:
+            The stream_id of the persisted event.
         """
 
         if event.is_state() and (event.type, event.state_key) == (
@@ -827,7 +836,7 @@ class EventCreationHandler(object):
         try:
             # If we're a worker we need to hit out to the master.
             if self.config.worker_app:
-                await self.send_event_to_master(
+                result = await self.send_event_to_master(
                     event_id=event.event_id,
                     store=self.store,
                     requester=requester,
@@ -836,14 +845,17 @@ class EventCreationHandler(object):
                     ratelimit=ratelimit,
                     extra_users=extra_users,
                 )
+                stream_id = result["stream_id"]
+                event.internal_metadata.stream_ordering = stream_id
                 success = True
-                return
+                return stream_id
 
-            await self.persist_and_notify_client_event(
+            stream_id = await self.persist_and_notify_client_event(
                 requester, event, context, ratelimit=ratelimit, extra_users=extra_users
             )
 
             success = True
+            return stream_id
         finally:
             if not success:
                 # Ensure that we actually remove the entries in the push actions
@@ -886,7 +898,7 @@ class EventCreationHandler(object):
 
     async def persist_and_notify_client_event(
         self, requester, event, context, ratelimit=True, extra_users=[]
-    ):
+    ) -> int:
         """Called when we have fully built the event, have already
         calculated the push actions for the event, and checked auth.
 
@@ -1076,6 +1088,8 @@ class EventCreationHandler(object):
             # matters as sometimes presence code can take a while.
             run_in_background(self._bump_active_time, requester.user)
 
+        return event_stream_id
+
     async def _bump_active_time(self, user):
         try:
             presence = self.hs.get_presence_handler()
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 13850ba672..2698a129ca 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -22,6 +22,7 @@ import logging
 import math
 import string
 from collections import OrderedDict
+from typing import Tuple
 
 from six import iteritems, string_types
 
@@ -518,7 +519,7 @@ class RoomCreationHandler(BaseHandler):
 
     async def create_room(
         self, requester, config, ratelimit=True, creator_join_profile=None
-    ):
+    ) -> Tuple[dict, int]:
         """ Creates a new room.
 
         Args:
@@ -535,9 +536,9 @@ class RoomCreationHandler(BaseHandler):
                 `avatar_url` and/or `displayname`.
 
         Returns:
-            Deferred[dict]:
-                a dict containing the keys `room_id` and, if an alias was
-                requested, `room_alias`.
+                First, a dict containing the keys `room_id` and, if an alias
+                was, requested, `room_alias`. Secondly, the stream_id of the
+                last persisted event.
         Raises:
             SynapseError if the room ID couldn't be stored, or something went
             horribly wrong.
@@ -669,7 +670,7 @@ class RoomCreationHandler(BaseHandler):
         # override any attempt to set room versions via the creation_content
         creation_content["room_version"] = room_version.identifier
 
-        await self._send_events_for_new_room(
+        last_stream_id = await self._send_events_for_new_room(
             requester,
             room_id,
             preset_config=preset_config,
@@ -683,7 +684,10 @@ class RoomCreationHandler(BaseHandler):
 
         if "name" in config:
             name = config["name"]
-            await self.event_creation_handler.create_and_send_nonmember_event(
+            (
+                _,
+                last_stream_id,
+            ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 requester,
                 {
                     "type": EventTypes.Name,
@@ -697,7 +701,10 @@ class RoomCreationHandler(BaseHandler):
 
         if "topic" in config:
             topic = config["topic"]
-            await self.event_creation_handler.create_and_send_nonmember_event(
+            (
+                _,
+                last_stream_id,
+            ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 requester,
                 {
                     "type": EventTypes.Topic,
@@ -715,7 +722,7 @@ class RoomCreationHandler(BaseHandler):
             if is_direct:
                 content["is_direct"] = is_direct
 
-            await self.room_member_handler.update_membership(
+            _, last_stream_id = await self.room_member_handler.update_membership(
                 requester,
                 UserID.from_string(invitee),
                 room_id,
@@ -729,7 +736,7 @@ class RoomCreationHandler(BaseHandler):
             id_access_token = invite_3pid.get("id_access_token")  # optional
             address = invite_3pid["address"]
             medium = invite_3pid["medium"]
-            await self.hs.get_room_member_handler().do_3pid_invite(
+            last_stream_id = await self.hs.get_room_member_handler().do_3pid_invite(
                 room_id,
                 requester.user,
                 medium,
@@ -745,7 +752,7 @@ class RoomCreationHandler(BaseHandler):
         if room_alias:
             result["room_alias"] = room_alias.to_string()
 
-        return result
+        return result, last_stream_id
 
     async def _send_events_for_new_room(
         self,
@@ -758,7 +765,13 @@ class RoomCreationHandler(BaseHandler):
         room_alias=None,
         power_level_content_override=None,  # Doesn't apply when initial state has power level state event content
         creator_join_profile=None,
-    ):
+    ) -> int:
+        """Sends the initial events into a new room.
+
+        Returns:
+            The stream_id of the last event persisted.
+        """
+
         def create(etype, content, **kwargs):
             e = {"type": etype, "content": content}
 
@@ -767,12 +780,16 @@ class RoomCreationHandler(BaseHandler):
 
             return e
 
-        async def send(etype, content, **kwargs):
+        async def send(etype, content, **kwargs) -> int:
             event = create(etype, content, **kwargs)
             logger.debug("Sending %s in new room", etype)
-            await self.event_creation_handler.create_and_send_nonmember_event(
+            (
+                _,
+                last_stream_id,
+            ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 creator, event, ratelimit=False
             )
+            return last_stream_id
 
         config = RoomCreationHandler.PRESETS_DICT[preset_config]
 
@@ -797,7 +814,9 @@ class RoomCreationHandler(BaseHandler):
         # of the first events that get sent into a room.
         pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None)
         if pl_content is not None:
-            await send(etype=EventTypes.PowerLevels, content=pl_content)
+            last_sent_stream_id = await send(
+                etype=EventTypes.PowerLevels, content=pl_content
+            )
         else:
             power_level_content = {
                 "users": {creator_id: 100},
@@ -830,33 +849,39 @@ class RoomCreationHandler(BaseHandler):
             if power_level_content_override:
                 power_level_content.update(power_level_content_override)
 
-            await send(etype=EventTypes.PowerLevels, content=power_level_content)
+            last_sent_stream_id = await send(
+                etype=EventTypes.PowerLevels, content=power_level_content
+            )
 
         if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state:
-            await send(
+            last_sent_stream_id = await send(
                 etype=EventTypes.CanonicalAlias,
                 content={"alias": room_alias.to_string()},
             )
 
         if (EventTypes.JoinRules, "") not in initial_state:
-            await send(
+            last_sent_stream_id = await send(
                 etype=EventTypes.JoinRules, content={"join_rule": config["join_rules"]}
             )
 
         if (EventTypes.RoomHistoryVisibility, "") not in initial_state:
-            await send(
+            last_sent_stream_id = await send(
                 etype=EventTypes.RoomHistoryVisibility,
                 content={"history_visibility": config["history_visibility"]},
             )
 
         if config["guest_can_join"]:
             if (EventTypes.GuestAccess, "") not in initial_state:
-                await send(
+                last_sent_stream_id = await send(
                     etype=EventTypes.GuestAccess, content={"guest_access": "can_join"}
                 )
 
         for (etype, state_key), content in initial_state.items():
-            await send(etype=etype, state_key=state_key, content=content)
+            last_sent_stream_id = await send(
+                etype=etype, state_key=state_key, content=content
+            )
+
+        return last_sent_stream_id
 
     async def _generate_room_id(
         self, creator_id: str, is_public: str, room_version: RoomVersion,
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index e51e1c32fe..691b6705b2 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -17,7 +17,7 @@
 
 import abc
 import logging
-from typing import Dict, Iterable, List, Optional, Tuple, Union
+from typing import Dict, Iterable, List, Optional, Tuple
 
 from six.moves import http_client
 
@@ -84,7 +84,7 @@ class RoomMemberHandler(object):
         room_id: str,
         user: UserID,
         content: dict,
-    ) -> Optional[dict]:
+    ) -> Tuple[str, int]:
         """Try and join a room that this server is not in
 
         Args:
@@ -104,7 +104,7 @@ class RoomMemberHandler(object):
         room_id: str,
         target: UserID,
         content: dict,
-    ) -> dict:
+    ) -> Tuple[Optional[str], int]:
         """Attempt to reject an invite for a room this server is not in. If we
         fail to do so we locally mark the invite as rejected.
 
@@ -154,7 +154,7 @@ class RoomMemberHandler(object):
         ratelimit: bool = True,
         content: Optional[dict] = None,
         require_consent: bool = True,
-    ) -> EventBase:
+    ) -> Tuple[str, int]:
         user_id = target.to_string()
 
         if content is None:
@@ -187,9 +187,10 @@ class RoomMemberHandler(object):
         )
         if duplicate is not None:
             # Discard the new event since this membership change is a no-op.
-            return duplicate
+            _, stream_id = await self.store.get_event_ordering(duplicate.event_id)
+            return duplicate.event_id, stream_id
 
-        await self.event_creation_handler.handle_new_client_event(
+        stream_id = await self.event_creation_handler.handle_new_client_event(
             requester, event, context, extra_users=[target], ratelimit=ratelimit
         )
 
@@ -213,7 +214,7 @@ class RoomMemberHandler(object):
                 if prev_member_event.membership == Membership.JOIN:
                     await self._user_left_room(target, room_id)
 
-        return event
+        return event.event_id, stream_id
 
     async def copy_room_tags_and_direct_to_room(
         self, old_room_id, new_room_id, user_id
@@ -263,7 +264,7 @@ class RoomMemberHandler(object):
         ratelimit: bool = True,
         content: Optional[dict] = None,
         require_consent: bool = True,
-    ) -> Union[EventBase, Optional[dict]]:
+    ) -> Tuple[Optional[str], int]:
         key = (room_id,)
 
         with (await self.member_linearizer.queue(key)):
@@ -294,7 +295,7 @@ class RoomMemberHandler(object):
         ratelimit: bool = True,
         content: Optional[dict] = None,
         require_consent: bool = True,
-    ) -> Union[EventBase, Optional[dict]]:
+    ) -> Tuple[Optional[str], int]:
         content_specified = bool(content)
         if content is None:
             content = {}
@@ -398,7 +399,13 @@ class RoomMemberHandler(object):
                 same_membership = old_membership == effective_membership_state
                 same_sender = requester.user.to_string() == old_state.sender
                 if same_sender and same_membership and same_content:
-                    return old_state
+                    _, stream_id = await self.store.get_event_ordering(
+                        old_state.event_id
+                    )
+                    return (
+                        old_state.event_id,
+                        stream_id,
+                    )
 
             if old_membership in ["ban", "leave"] and action == "kick":
                 raise AuthError(403, "The target user is not in the room")
@@ -705,7 +712,7 @@ class RoomMemberHandler(object):
         requester: Requester,
         txn_id: Optional[str],
         id_access_token: Optional[str] = None,
-    ) -> None:
+    ) -> int:
         if self.config.block_non_admin_invites:
             is_requester_admin = await self.auth.is_server_admin(requester.user)
             if not is_requester_admin:
@@ -737,11 +744,11 @@ class RoomMemberHandler(object):
         )
 
         if invitee:
-            await self.update_membership(
+            _, stream_id = await self.update_membership(
                 requester, UserID.from_string(invitee), room_id, "invite", txn_id=txn_id
             )
         else:
-            await self._make_and_store_3pid_invite(
+            stream_id = await self._make_and_store_3pid_invite(
                 requester,
                 id_server,
                 medium,
@@ -752,6 +759,8 @@ class RoomMemberHandler(object):
                 id_access_token=id_access_token,
             )
 
+        return stream_id
+
     async def _make_and_store_3pid_invite(
         self,
         requester: Requester,
@@ -762,7 +771,7 @@ class RoomMemberHandler(object):
         user: UserID,
         txn_id: Optional[str],
         id_access_token: Optional[str] = None,
-    ) -> None:
+    ) -> int:
         room_state = await self.state_handler.get_current_state(room_id)
 
         inviter_display_name = ""
@@ -817,7 +826,10 @@ class RoomMemberHandler(object):
             id_access_token=id_access_token,
         )
 
-        await self.event_creation_handler.create_and_send_nonmember_event(
+        (
+            event,
+            stream_id,
+        ) = await self.event_creation_handler.create_and_send_nonmember_event(
             requester,
             {
                 "type": EventTypes.ThirdPartyInvite,
@@ -835,6 +847,7 @@ class RoomMemberHandler(object):
             ratelimit=False,
             txn_id=txn_id,
         )
+        return stream_id
 
     async def _is_host_in_room(
         self, current_state_ids: Dict[Tuple[str, str], str]
@@ -916,7 +929,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         room_id: str,
         user: UserID,
         content: dict,
-    ) -> None:
+    ) -> Tuple[str, int]:
         """Implements RoomMemberHandler._remote_join
         """
         # filter ourselves out of remote_room_hosts: do_invite_join ignores it
@@ -945,7 +958,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         # join dance for now, since we're kinda implicitly checking
         # that we are allowed to join when we decide whether or not we
         # need to do the invite/join dance.
-        await self.federation_handler.do_invite_join(
+        event_id, stream_id = await self.federation_handler.do_invite_join(
             remote_room_hosts, room_id, user.to_string(), content
         )
         await self._user_joined_room(user, room_id)
@@ -955,14 +968,14 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         if self.hs.config.limit_remote_rooms.enabled:
             if too_complex is False:
                 # We checked, and we're under the limit.
-                return
+                return event_id, stream_id
 
             # Check again, but with the local state events
             too_complex = await self._is_local_room_too_complex(room_id)
 
             if too_complex is False:
                 # We're under the limit.
-                return
+                return event_id, stream_id
 
             # The room is too large. Leave.
             requester = types.create_requester(user, None, False, None)
@@ -975,6 +988,8 @@ class RoomMemberMasterHandler(RoomMemberHandler):
                 errcode=Codes.RESOURCE_LIMIT_EXCEEDED,
             )
 
+        return event_id, stream_id
+
     async def _remote_reject_invite(
         self,
         requester: Requester,
@@ -982,15 +997,15 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         room_id: str,
         target: UserID,
         content: dict,
-    ) -> dict:
+    ) -> Tuple[Optional[str], int]:
         """Implements RoomMemberHandler._remote_reject_invite
         """
         fed_handler = self.federation_handler
         try:
-            ret = await fed_handler.do_remotely_reject_invite(
+            event, stream_id = await fed_handler.do_remotely_reject_invite(
                 remote_room_hosts, room_id, target.to_string(), content=content,
             )
-            return ret
+            return event.event_id, stream_id
         except Exception as e:
             # if we were unable to reject the exception, just mark
             # it as rejected on our end and plough ahead.
@@ -1000,8 +1015,10 @@ class RoomMemberMasterHandler(RoomMemberHandler):
             #
             logger.warning("Failed to reject invite: %s", e)
 
-            await self.store.locally_reject_invite(target.to_string(), room_id)
-            return {}
+            stream_id = await self.store.locally_reject_invite(
+                target.to_string(), room_id
+            )
+            return None, stream_id
 
     async def _user_joined_room(self, target: UserID, room_id: str) -> None:
         """Implements RoomMemberHandler._user_joined_room
diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py
index 5c776cc0be..02e0c4103d 100644
--- a/synapse/handlers/room_member_worker.py
+++ b/synapse/handlers/room_member_worker.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 import logging
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 from synapse.api.errors import SynapseError
 from synapse.handlers.room_member import RoomMemberHandler
@@ -43,7 +43,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
         room_id: str,
         user: UserID,
         content: dict,
-    ) -> Optional[dict]:
+    ) -> Tuple[str, int]:
         """Implements RoomMemberHandler._remote_join
         """
         if len(remote_room_hosts) == 0:
@@ -59,7 +59,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
 
         await self._user_joined_room(user, room_id)
 
-        return ret
+        return ret["event_id"], ret["stream_id"]
 
     async def _remote_reject_invite(
         self,
@@ -68,16 +68,17 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
         room_id: str,
         target: UserID,
         content: dict,
-    ) -> dict:
+    ) -> Tuple[Optional[str], int]:
         """Implements RoomMemberHandler._remote_reject_invite
         """
-        return await self._remote_reject_client(
+        ret = await self._remote_reject_client(
             requester=requester,
             remote_room_hosts=remote_room_hosts,
             room_id=room_id,
             user_id=target.to_string(),
             content=content,
         )
+        return ret["event_id"], ret["stream_id"]
 
     async def _user_joined_room(self, target: UserID, room_id: str) -> None:
         """Implements RoomMemberHandler._user_joined_room
diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py
index 7e23b565b9..c287c4e269 100644
--- a/synapse/replication/http/federation.py
+++ b/synapse/replication/http/federation.py
@@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)
 
 class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint):
     """Handles events newly received from federation, including persisting and
-    notifying.
+    notifying. Returns the maximum stream ID of the persisted events.
 
     The API looks like:
 
@@ -46,6 +46,13 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint):
                 "context": { .. serialized event context .. },
             }],
             "backfilled": false
+        }
+
+        200 OK
+
+        {
+            "max_stream_id": 32443,
+        }
     """
 
     NAME = "fed_send_events"
@@ -115,11 +122,11 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint):
 
         logger.info("Got %d events from federation", len(event_and_contexts))
 
-        await self.federation_handler.persist_events_and_notify(
+        max_stream_id = await self.federation_handler.persist_events_and_notify(
             event_and_contexts, backfilled
         )
 
-        return 200, {}
+        return 200, {"max_stream_id": max_stream_id}
 
 
 class ReplicationFederationSendEduRestServlet(ReplicationEndpoint):
diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py
index 3577611fd7..050fd34562 100644
--- a/synapse/replication/http/membership.py
+++ b/synapse/replication/http/membership.py
@@ -76,11 +76,11 @@ class ReplicationRemoteJoinRestServlet(ReplicationEndpoint):
 
         logger.info("remote_join: %s into room: %s", user_id, room_id)
 
-        await self.federation_handler.do_invite_join(
+        event_id, stream_id = await self.federation_handler.do_invite_join(
             remote_room_hosts, room_id, user_id, event_content
         )
 
-        return 200, {}
+        return 200, {"event_id": event_id, "stream_id": stream_id}
 
 
 class ReplicationRemoteRejectInviteRestServlet(ReplicationEndpoint):
@@ -136,10 +136,10 @@ class ReplicationRemoteRejectInviteRestServlet(ReplicationEndpoint):
         logger.info("remote_reject_invite: %s out of room: %s", user_id, room_id)
 
         try:
-            event = await self.federation_handler.do_remotely_reject_invite(
+            event, stream_id = await self.federation_handler.do_remotely_reject_invite(
                 remote_room_hosts, room_id, user_id, event_content,
             )
-            ret = event.get_pdu_json()
+            event_id = event.event_id
         except Exception as e:
             # if we were unable to reject the exception, just mark
             # it as rejected on our end and plough ahead.
@@ -149,10 +149,10 @@ class ReplicationRemoteRejectInviteRestServlet(ReplicationEndpoint):
             #
             logger.warning("Failed to reject invite: %s", e)
 
-            await self.store.locally_reject_invite(user_id, room_id)
-            ret = {}
+            stream_id = await self.store.locally_reject_invite(user_id, room_id)
+            event_id = None
 
-        return 200, ret
+        return 200, {"event_id": event_id, "stream_id": stream_id}
 
 
 class ReplicationUserJoinedLeftRoomRestServlet(ReplicationEndpoint):
diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py
index b74b088ff4..c981723c1a 100644
--- a/synapse/replication/http/send_event.py
+++ b/synapse/replication/http/send_event.py
@@ -119,11 +119,11 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint):
             "Got event to send with ID: %s into room: %s", event.event_id, event.room_id
         )
 
-        await self.event_creation_handler.persist_and_notify_client_event(
+        stream_id = await self.event_creation_handler.persist_and_notify_client_event(
             requester, event, context, ratelimit=ratelimit, extra_users=extra_users
         )
 
-        return 200, {}
+        return 200, {"stream_id": stream_id}
 
 
 def register_servlets(hs, http_server):
diff --git a/synapse/replication/http/streams.py b/synapse/replication/http/streams.py
index b705a8e16c..bde97eef32 100644
--- a/synapse/replication/http/streams.py
+++ b/synapse/replication/http/streams.py
@@ -51,10 +51,7 @@ class ReplicationGetStreamUpdates(ReplicationEndpoint):
         super().__init__(hs)
 
         self._instance_name = hs.get_instance_name()
-
-        # We pull the streams from the replication handler (if we try and make
-        # them ourselves we end up in an import loop).
-        self.streams = hs.get_tcp_replication().get_streams()
+        self.streams = hs.get_replication_streams()
 
     @staticmethod
     def _serialize_payload(stream_name, from_token, upto_token):
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 28826302f5..508ad1b720 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -14,19 +14,23 @@
 # limitations under the License.
 """A replication client for use by synapse workers.
 """
-
+import heapq
 import logging
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Dict, List, Tuple
 
+from twisted.internet.defer import Deferred
 from twisted.internet.protocol import ReconnectingClientFactory
 
 from synapse.api.constants import EventTypes
+from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
 from synapse.replication.tcp.streams.events import (
     EventsStream,
     EventsStreamEventRow,
     EventsStreamRow,
 )
+from synapse.util.async_helpers import timeout_deferred
+from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -35,6 +39,10 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+# How long we allow callers to wait for replication updates before timing out.
+_WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 30
+
+
 class DirectTcpReplicationClientFactory(ReconnectingClientFactory):
     """Factory for building connections to the master. Will reconnect if the
     connection is lost.
@@ -92,6 +100,16 @@ class ReplicationDataHandler:
         self.store = hs.get_datastore()
         self.pusher_pool = hs.get_pusherpool()
         self.notifier = hs.get_notifier()
+        self._reactor = hs.get_reactor()
+        self._clock = hs.get_clock()
+        self._streams = hs.get_replication_streams()
+        self._instance_name = hs.get_instance_name()
+
+        # Map from stream to list of deferreds waiting for the stream to
+        # arrive at a particular position. The lists are sorted by stream position.
+        self._streams_to_waiters = (
+            {}
+        )  # type: Dict[str, List[Tuple[int, Deferred[None]]]]
 
     async def on_rdata(
         self, stream_name: str, instance_name: str, token: int, rows: list
@@ -131,8 +149,76 @@ class ReplicationDataHandler:
 
             await self.pusher_pool.on_new_notifications(token, token)
 
+        # Notify any waiting deferreds. The list is ordered by position so we
+        # just iterate through the list until we reach a position that is
+        # greater than the received row position.
+        waiting_list = self._streams_to_waiters.get(stream_name, [])
+
+        # Index of first item with a position after the current token, i.e we
+        # have called all deferreds before this index. If not overwritten by
+        # loop below means either a) no items in list so no-op or b) all items
+        # in list were called and so the list should be cleared. Setting it to
+        # `len(list)` works for both cases.
+        index_of_first_deferred_not_called = len(waiting_list)
+
+        for idx, (position, deferred) in enumerate(waiting_list):
+            if position <= token:
+                try:
+                    with PreserveLoggingContext():
+                        deferred.callback(None)
+                except Exception:
+                    # The deferred has been cancelled or timed out.
+                    pass
+            else:
+                # The list is sorted by position so we don't need to continue
+                # checking any futher entries in the list.
+                index_of_first_deferred_not_called = idx
+                break
+
+        # Drop all entries in the waiting list that were called in the above
+        # loop. (This maintains the order so no need to resort)
+        waiting_list[:] = waiting_list[index_of_first_deferred_not_called:]
+
     async def on_position(self, stream_name: str, instance_name: str, token: int):
         self.store.process_replication_rows(stream_name, instance_name, token, [])
 
     def on_remote_server_up(self, server: str):
         """Called when get a new REMOTE_SERVER_UP command."""
+
+    async def wait_for_stream_position(
+        self, instance_name: str, stream_name: str, position: int
+    ):
+        """Wait until this instance has received updates up to and including
+        the given stream position.
+        """
+
+        if instance_name == self._instance_name:
+            # We don't get told about updates written by this process, and
+            # anyway in that case we don't need to wait.
+            return
+
+        current_position = self._streams[stream_name].current_token(self._instance_name)
+        if position <= current_position:
+            # We're already past the position
+            return
+
+        # Create a new deferred that times out after N seconds, as we don't want
+        # to wedge here forever.
+        deferred = Deferred()
+        deferred = timeout_deferred(
+            deferred, _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS, self._reactor
+        )
+
+        waiting_list = self._streams_to_waiters.setdefault(stream_name, [])
+
+        # We insert into the list using heapq as it is more efficient than
+        # pushing then resorting each time.
+        heapq.heappush(waiting_list, (position, deferred))
+
+        # We measure here to get in flight counts and average waiting time.
+        with Measure(self._clock, "repl.wait_for_stream_position"):
+            logger.info("Waiting for repl stream %r to reach %s", stream_name, position)
+            await make_deferred_yieldable(deferred)
+            logger.info(
+                "Finished waiting for repl stream %r to reach %s", stream_name, position
+            )
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 7d40001988..0a13e1ed34 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -59,6 +59,7 @@ class ShutdownRoomRestServlet(RestServlet):
         self.event_creation_handler = hs.get_event_creation_handler()
         self.room_member_handler = hs.get_room_member_handler()
         self.auth = hs.get_auth()
+        self._replication = hs.get_replication_data_handler()
 
     async def on_POST(self, request, room_id):
         requester = await self.auth.get_user_by_req(request)
@@ -73,7 +74,7 @@ class ShutdownRoomRestServlet(RestServlet):
         message = content.get("message", self.DEFAULT_MESSAGE)
         room_name = content.get("room_name", "Content Violation Notification")
 
-        info = await self._room_creation_handler.create_room(
+        info, stream_id = await self._room_creation_handler.create_room(
             room_creator_requester,
             config={
                 "preset": "public_chat",
@@ -94,6 +95,13 @@ class ShutdownRoomRestServlet(RestServlet):
         # desirable in case the first attempt at blocking the room failed below.
         await self.store.block_room(room_id, requester_user_id)
 
+        # We now wait for the create room to come back in via replication so
+        # that we can assume that all the joins/invites have propogated before
+        # we try and auto join below.
+        #
+        # TODO: Currently the events stream is written to from master
+        await self._replication.wait_for_stream_position("master", "events", stream_id)
+
         users = await self.state.get_current_users_in_room(room_id)
         kicked_users = []
         failed_to_kick_users = []
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 6b5830cc3f..105e0cf4d2 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -93,7 +93,7 @@ class RoomCreateRestServlet(TransactionRestServlet):
     async def on_POST(self, request):
         requester = await self.auth.get_user_by_req(request)
 
-        info = await self._room_creation_handler.create_room(
+        info, _ = await self._room_creation_handler.create_room(
             requester, self.get_room_config(request)
         )
 
@@ -202,7 +202,7 @@ class RoomStateEventRestServlet(TransactionRestServlet):
 
         if event_type == EventTypes.Member:
             membership = content.get("membership", None)
-            event = await self.room_member_handler.update_membership(
+            event_id, _ = await self.room_member_handler.update_membership(
                 requester,
                 target=UserID.from_string(state_key),
                 room_id=room_id,
@@ -210,14 +210,18 @@ class RoomStateEventRestServlet(TransactionRestServlet):
                 content=content,
             )
         else:
-            event = await self.event_creation_handler.create_and_send_nonmember_event(
+            (
+                event,
+                _,
+            ) = await self.event_creation_handler.create_and_send_nonmember_event(
                 requester, event_dict, txn_id=txn_id
             )
+            event_id = event.event_id
 
         ret = {}  # type: dict
-        if event:
-            set_tag("event_id", event.event_id)
-            ret = {"event_id": event.event_id}
+        if event_id:
+            set_tag("event_id", event_id)
+            ret = {"event_id": event_id}
         return 200, ret
 
 
@@ -247,7 +251,7 @@ class RoomSendEventRestServlet(TransactionRestServlet):
         if b"ts" in request.args and requester.app_service:
             event_dict["origin_server_ts"] = parse_integer(request, "ts", 0)
 
-        event = await self.event_creation_handler.create_and_send_nonmember_event(
+        event, _ = await self.event_creation_handler.create_and_send_nonmember_event(
             requester, event_dict, txn_id=txn_id
         )
 
@@ -781,7 +785,7 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         requester = await self.auth.get_user_by_req(request)
         content = parse_json_object_from_request(request)
 
-        event = await self.event_creation_handler.create_and_send_nonmember_event(
+        event, _ = await self.event_creation_handler.create_and_send_nonmember_event(
             requester,
             {
                 "type": EventTypes.Redaction,
diff --git a/synapse/rest/client/v2_alpha/relations.py b/synapse/rest/client/v2_alpha/relations.py
index 63f07b63da..89002ffbff 100644
--- a/synapse/rest/client/v2_alpha/relations.py
+++ b/synapse/rest/client/v2_alpha/relations.py
@@ -111,7 +111,7 @@ class RelationSendServlet(RestServlet):
             "sender": requester.user.to_string(),
         }
 
-        event = await self.event_creation_handler.create_and_send_nonmember_event(
+        event, _ = await self.event_creation_handler.create_and_send_nonmember_event(
             requester, event_dict=event_dict, txn_id=txn_id
         )
 
diff --git a/synapse/server.py b/synapse/server.py
index c530f1aa1a..ca2deb49bb 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -90,6 +90,7 @@ from synapse.push.pusherpool import PusherPool
 from synapse.replication.tcp.client import ReplicationDataHandler
 from synapse.replication.tcp.handler import ReplicationCommandHandler
 from synapse.replication.tcp.resource import ReplicationStreamer
+from synapse.replication.tcp.streams import STREAMS_MAP
 from synapse.rest.media.v1.media_repository import (
     MediaRepository,
     MediaRepositoryResource,
@@ -210,6 +211,7 @@ class HomeServer(object):
         "storage",
         "replication_streamer",
         "replication_data_handler",
+        "replication_streams",
     ]
 
     REQUIRED_ON_MASTER_STARTUP = ["user_directory_handler", "stats_handler"]
@@ -583,6 +585,9 @@ class HomeServer(object):
     def build_replication_data_handler(self):
         return ReplicationDataHandler(self)
 
+    def build_replication_streams(self):
+        return {stream.NAME: stream(self) for stream in STREAMS_MAP.values()}
+
     def remove_pusher(self, app_id, push_key, user_id):
         return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)
 
diff --git a/synapse/server.pyi b/synapse/server.pyi
index 9e7fad7e6e..fe8024d2d4 100644
--- a/synapse/server.pyi
+++ b/synapse/server.pyi
@@ -1,3 +1,5 @@
+from typing import Dict
+
 import twisted.internet
 
 import synapse.api.auth
@@ -28,6 +30,7 @@ import synapse.server_notices.server_notices_sender
 import synapse.state
 import synapse.storage
 from synapse.events.builder import EventBuilderFactory
+from synapse.replication.tcp.streams import Stream
 
 class HomeServer(object):
     @property
@@ -136,3 +139,5 @@ class HomeServer(object):
         pass
     def get_pusherpool(self) -> synapse.push.pusherpool.PusherPool:
         pass
+    def get_replication_streams(self) -> Dict[str, Stream]:
+        pass
diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py
index 999c621b92..bf2454c01c 100644
--- a/synapse/server_notices/server_notices_manager.py
+++ b/synapse/server_notices/server_notices_manager.py
@@ -83,10 +83,10 @@ class ServerNoticesManager(object):
         if state_key is not None:
             event_dict["state_key"] = state_key
 
-        res = await self._event_creation_handler.create_and_send_nonmember_event(
+        event, _ = await self._event_creation_handler.create_and_send_nonmember_event(
             requester, event_dict, ratelimit=False
         )
-        return res
+        return event
 
     @cached()
     async def get_or_create_notice_room_for_user(self, user_id):
@@ -143,7 +143,7 @@ class ServerNoticesManager(object):
             }
 
         requester = create_requester(self.server_notices_mxid)
-        info = await self._room_creation_handler.create_room(
+        info, _ = await self._room_creation_handler.create_room(
             requester,
             config={
                 "preset": RoomCreationPreset.PRIVATE_CHAT,
diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py
index 9130b74eb5..b880a71782 100644
--- a/synapse/storage/data_stores/main/events_worker.py
+++ b/synapse/storage/data_stores/main/events_worker.py
@@ -1289,12 +1289,12 @@ class EventsWorkerStore(SQLBaseStore):
     async def is_event_after(self, event_id1, event_id2):
         """Returns True if event_id1 is after event_id2 in the stream
         """
-        to_1, so_1 = await self._get_event_ordering(event_id1)
-        to_2, so_2 = await self._get_event_ordering(event_id2)
+        to_1, so_1 = await self.get_event_ordering(event_id1)
+        to_2, so_2 = await self.get_event_ordering(event_id2)
         return (to_1, so_1) > (to_2, so_2)
 
     @cachedInlineCallbacks(max_entries=5000)
-    def _get_event_ordering(self, event_id):
+    def get_event_ordering(self, event_id):
         res = yield self.db.simple_select_one(
             table="events",
             retcols=["topological_ordering", "stream_ordering"],
diff --git a/synapse/storage/data_stores/main/roommember.py b/synapse/storage/data_stores/main/roommember.py
index 1e9c850152..7c5ca81ae0 100644
--- a/synapse/storage/data_stores/main/roommember.py
+++ b/synapse/storage/data_stores/main/roommember.py
@@ -1069,6 +1069,8 @@ class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore):
         with self._stream_id_gen.get_next() as stream_ordering:
             yield self.db.runInteraction("locally_reject_invite", f, stream_ordering)
 
+        return stream_ordering
+
     def forget(self, user_id, room_id):
         """Indicate that user_id wishes to discard history for room_id."""
 
diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py
index 94980733c4..0c9987be54 100644
--- a/tests/federation/test_complexity.py
+++ b/tests/federation/test_complexity.py
@@ -79,7 +79,9 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
 
         # Mock out some things, because we don't want to test the whole join
         fed_transport.client.get_json = Mock(return_value=defer.succeed({"v1": 9999}))
-        handler.federation_handler.do_invite_join = Mock(return_value=defer.succeed(1))
+        handler.federation_handler.do_invite_join = Mock(
+            return_value=defer.succeed(("", 1))
+        )
 
         d = handler._remote_join(
             None,
@@ -115,7 +117,9 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
 
         # Mock out some things, because we don't want to test the whole join
         fed_transport.client.get_json = Mock(return_value=defer.succeed(None))
-        handler.federation_handler.do_invite_join = Mock(return_value=defer.succeed(1))
+        handler.federation_handler.do_invite_join = Mock(
+            return_value=defer.succeed(("", 1))
+        )
 
         # Artificially raise the complexity
         self.hs.get_datastore().get_current_state_event_counts = lambda x: defer.succeed(
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 51e2b37218..2fa8d4739b 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -86,7 +86,10 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         reactor.pump((1000,))
 
         hs = self.setup_test_homeserver(
-            notifier=Mock(), http_client=mock_federation_client, keyring=mock_keyring
+            notifier=Mock(),
+            http_client=mock_federation_client,
+            keyring=mock_keyring,
+            replication_streams={},
         )
 
         hs.datastores = datastores
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index 0e04b2cf92..43425c969a 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -39,7 +39,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         # Create a test user and room
         self.user = UserID("alice", "test")
         self.requester = Requester(self.user, None, False, None, None)
-        info = self.get_success(self.room_creator.create_room(self.requester, {}))
+        info, _ = self.get_success(self.room_creator.create_room(self.requester, {}))
         self.room_id = info["room_id"]
 
     def run_background_update(self):
@@ -261,7 +261,7 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         self.user = UserID.from_string(self.register_user("user1", "password"))
         self.token1 = self.login("user1", "password")
         self.requester = Requester(self.user, None, False, None, None)
-        info = self.get_success(self.room_creator.create_room(self.requester, {}))
+        info, _ = self.get_success(self.room_creator.create_room(self.requester, {}))
         self.room_id = info["room_id"]
         self.event_creator = homeserver.get_event_creation_handler()
         homeserver.config.user_consent_version = self.CONSENT_VERSION
diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py
index a7b7fd36d3..a7b85004e5 100644
--- a/tests/storage/test_event_metrics.py
+++ b/tests/storage/test_event_metrics.py
@@ -33,7 +33,7 @@ class ExtremStatisticsTestCase(HomeserverTestCase):
         events = [(3, 2), (6, 2), (4, 6)]
 
         for event_count, extrems in events:
-            info = self.get_success(room_creator.create_room(requester, {}))
+            info, _ = self.get_success(room_creator.create_room(requester, {}))
             room_id = info["room_id"]
 
             last_event = None
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 13ff14863e..c5099dd039 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -28,13 +28,13 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
         user_id = UserID("us", "test")
         our_user = Requester(user_id, None, False, None, None)
         room_creator = self.homeserver.get_room_creation_handler()
-        room = ensureDeferred(
+        room_deferred = ensureDeferred(
             room_creator.create_room(
                 our_user, room_creator.PRESETS_DICT["public_chat"], ratelimit=False
             )
         )
         self.reactor.advance(0.1)
-        self.room_id = self.successResultOf(room)["room_id"]
+        self.room_id = self.successResultOf(room_deferred)[0]["room_id"]
 
         self.store = self.homeserver.get_datastore()
 
-- 
cgit 1.5.1


From f4e6495b5d3267976f34088fa7459b388b801eb6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 5 Jun 2020 10:47:20 +0100
Subject: Performance improvements and refactor of Ratelimiter (#7595)

While working on https://github.com/matrix-org/synapse/issues/5665 I found myself digging into the `Ratelimiter` class and seeing that it was both:

* Rather undocumented, and
* causing a *lot* of config checks

This PR attempts to refactor and comment the `Ratelimiter` class, as well as encourage config file accesses to only be done at instantiation.

Best to be reviewed commit-by-commit.
---
 changelog.d/7595.misc                       |   1 +
 synapse/api/ratelimiting.py                 | 153 +++++++++++++++++++++-------
 synapse/config/ratelimiting.py              |   8 +-
 synapse/handlers/_base.py                   |  60 ++++++-----
 synapse/handlers/auth.py                    |  24 ++---
 synapse/handlers/message.py                 |   1 -
 synapse/handlers/register.py                |   9 +-
 synapse/rest/client/v1/login.py             |  65 ++++--------
 synapse/rest/client/v2_alpha/register.py    |  16 +--
 synapse/server.py                           |  17 ++--
 synapse/util/ratelimitutils.py              |   2 +-
 tests/api/test_ratelimiting.py              |  96 +++++++++++++----
 tests/handlers/test_profile.py              |   6 +-
 tests/replication/slave/storage/_base.py    |   9 +-
 tests/rest/client/v1/test_events.py         |   8 +-
 tests/rest/client/v1/test_login.py          |  49 +++++++--
 tests/rest/client/v1/test_rooms.py          |   9 +-
 tests/rest/client/v1/test_typing.py         |  10 +-
 tests/rest/client/v2_alpha/test_register.py |   9 +-
 19 files changed, 322 insertions(+), 230 deletions(-)
 create mode 100644 changelog.d/7595.misc

(limited to 'synapse/server.py')

diff --git a/changelog.d/7595.misc b/changelog.d/7595.misc
new file mode 100644
index 0000000000..7a0646b1a3
--- /dev/null
+++ b/changelog.d/7595.misc
@@ -0,0 +1 @@
+Refactor `Ratelimiter` to limit the amount of expensive config value accesses.
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index 7a049b3af7..ec6b3a69a2 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -1,4 +1,5 @@
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,75 +17,157 @@ from collections import OrderedDict
 from typing import Any, Optional, Tuple
 
 from synapse.api.errors import LimitExceededError
+from synapse.util import Clock
 
 
 class Ratelimiter(object):
     """
-    Ratelimit message sending by user.
+    Ratelimit actions marked by arbitrary keys.
+
+    Args:
+        clock: A homeserver clock, for retrieving the current time
+        rate_hz: The long term number of actions that can be performed in a second.
+        burst_count: How many actions that can be performed before being limited.
     """
 
-    def __init__(self):
-        self.message_counts = (
-            OrderedDict()
-        )  # type: OrderedDict[Any, Tuple[float, int, Optional[float]]]
+    def __init__(self, clock: Clock, rate_hz: float, burst_count: int):
+        self.clock = clock
+        self.rate_hz = rate_hz
+        self.burst_count = burst_count
+
+        # A ordered dictionary keeping track of actions, when they were last
+        # performed and how often. Each entry is a mapping from a key of arbitrary type
+        # to a tuple representing:
+        #   * How many times an action has occurred since a point in time
+        #   * The point in time
+        #   * The rate_hz of this particular entry. This can vary per request
+        self.actions = OrderedDict()  # type: OrderedDict[Any, Tuple[float, int, float]]
 
-    def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True):
+    def can_do_action(
+        self,
+        key: Any,
+        rate_hz: Optional[float] = None,
+        burst_count: Optional[int] = None,
+        update: bool = True,
+        _time_now_s: Optional[int] = None,
+    ) -> Tuple[bool, float]:
         """Can the entity (e.g. user or IP address) perform the action?
+
         Args:
             key: The key we should use when rate limiting. Can be a user ID
                 (when sending events), an IP address, etc.
-            time_now_s: The time now.
-            rate_hz: The long term number of messages a user can send in a
-                second.
-            burst_count: How many messages the user can send before being
-                limited.
-            update (bool): Whether to update the message rates or not. This is
-                useful to check if a message would be allowed to be sent before
-                its ready to be actually sent.
+            rate_hz: The long term number of actions that can be performed in a second.
+                Overrides the value set during instantiation if set.
+            burst_count: How many actions that can be performed before being limited.
+                Overrides the value set during instantiation if set.
+            update: Whether to count this check as performing the action
+            _time_now_s: The current time. Optional, defaults to the current time according
+                to self.clock. Only used by tests.
+
         Returns:
-            A pair of a bool indicating if they can send a message now and a
-                time in seconds of when they can next send a message.
+            A tuple containing:
+                * A bool indicating if they can perform the action now
+                * The reactor timestamp for when the action can be performed next.
+                  -1 if rate_hz is less than or equal to zero
         """
-        self.prune_message_counts(time_now_s)
-        message_count, time_start, _ignored = self.message_counts.get(
-            key, (0.0, time_now_s, None)
-        )
+        # Override default values if set
+        time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()
+        rate_hz = rate_hz if rate_hz is not None else self.rate_hz
+        burst_count = burst_count if burst_count is not None else self.burst_count
+
+        # Remove any expired entries
+        self._prune_message_counts(time_now_s)
+
+        # Check if there is an existing count entry for this key
+        action_count, time_start, _ = self.actions.get(key, (0.0, time_now_s, 0.0))
+
+        # Check whether performing another action is allowed
         time_delta = time_now_s - time_start
-        sent_count = message_count - time_delta * rate_hz
-        if sent_count < 0:
+        performed_count = action_count - time_delta * rate_hz
+        if performed_count < 0:
+            # Allow, reset back to count 1
             allowed = True
             time_start = time_now_s
-            message_count = 1.0
-        elif sent_count > burst_count - 1.0:
+            action_count = 1.0
+        elif performed_count > burst_count - 1.0:
+            # Deny, we have exceeded our burst count
             allowed = False
         else:
+            # We haven't reached our limit yet
             allowed = True
-            message_count += 1
+            action_count += 1.0
 
         if update:
-            self.message_counts[key] = (message_count, time_start, rate_hz)
+            self.actions[key] = (action_count, time_start, rate_hz)
 
         if rate_hz > 0:
-            time_allowed = time_start + (message_count - burst_count + 1) / rate_hz
+            # Find out when the count of existing actions expires
+            time_allowed = time_start + (action_count - burst_count + 1) / rate_hz
+
+            # Don't give back a time in the past
             if time_allowed < time_now_s:
                 time_allowed = time_now_s
+
         else:
+            # XXX: Why is this -1? This seems to only be used in
+            # self.ratelimit. I guess so that clients get a time in the past and don't
+            # feel afraid to try again immediately
             time_allowed = -1
 
         return allowed, time_allowed
 
-    def prune_message_counts(self, time_now_s):
-        for key in list(self.message_counts.keys()):
-            message_count, time_start, rate_hz = self.message_counts[key]
+    def _prune_message_counts(self, time_now_s: int):
+        """Remove message count entries that have not exceeded their defined
+        rate_hz limit
+
+        Args:
+            time_now_s: The current time
+        """
+        # We create a copy of the key list here as the dictionary is modified during
+        # the loop
+        for key in list(self.actions.keys()):
+            action_count, time_start, rate_hz = self.actions[key]
+
+            # Rate limit = "seconds since we started limiting this action" * rate_hz
+            # If this limit has not been exceeded, wipe our record of this action
             time_delta = time_now_s - time_start
-            if message_count - time_delta * rate_hz > 0:
-                break
+            if action_count - time_delta * rate_hz > 0:
+                continue
             else:
-                del self.message_counts[key]
+                del self.actions[key]
+
+    def ratelimit(
+        self,
+        key: Any,
+        rate_hz: Optional[float] = None,
+        burst_count: Optional[int] = None,
+        update: bool = True,
+        _time_now_s: Optional[int] = None,
+    ):
+        """Checks if an action can be performed. If not, raises a LimitExceededError
+
+        Args:
+            key: An arbitrary key used to classify an action
+            rate_hz: The long term number of actions that can be performed in a second.
+                Overrides the value set during instantiation if set.
+            burst_count: How many actions that can be performed before being limited.
+                Overrides the value set during instantiation if set.
+            update: Whether to count this check as performing the action
+            _time_now_s: The current time. Optional, defaults to the current time according
+                to self.clock. Only used by tests.
+
+        Raises:
+            LimitExceededError: If an action could not be performed, along with the time in
+                milliseconds until the action can be performed again
+        """
+        time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()
 
-    def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True):
         allowed, time_allowed = self.can_do_action(
-            key, time_now_s, rate_hz, burst_count, update
+            key,
+            rate_hz=rate_hz,
+            burst_count=burst_count,
+            update=update,
+            _time_now_s=time_now_s,
         )
 
         if not allowed:
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 4a3bfc4354..2dd94bae2b 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -12,11 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Dict
+
 from ._base import Config
 
 
 class RateLimitConfig(object):
-    def __init__(self, config, defaults={"per_second": 0.17, "burst_count": 3.0}):
+    def __init__(
+        self,
+        config: Dict[str, float],
+        defaults={"per_second": 0.17, "burst_count": 3.0},
+    ):
         self.per_second = config.get("per_second", defaults["per_second"])
         self.burst_count = config.get("burst_count", defaults["burst_count"])
 
diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py
index 3b781d9836..61dc4beafe 100644
--- a/synapse/handlers/_base.py
+++ b/synapse/handlers/_base.py
@@ -19,7 +19,7 @@ from twisted.internet import defer
 
 import synapse.types
 from synapse.api.constants import EventTypes, Membership
-from synapse.api.errors import LimitExceededError
+from synapse.api.ratelimiting import Ratelimiter
 from synapse.types import UserID
 
 logger = logging.getLogger(__name__)
@@ -44,11 +44,26 @@ class BaseHandler(object):
         self.notifier = hs.get_notifier()
         self.state_handler = hs.get_state_handler()
         self.distributor = hs.get_distributor()
-        self.ratelimiter = hs.get_ratelimiter()
-        self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter()
         self.clock = hs.get_clock()
         self.hs = hs
 
+        # The rate_hz and burst_count are overridden on a per-user basis
+        self.request_ratelimiter = Ratelimiter(
+            clock=self.clock, rate_hz=0, burst_count=0
+        )
+        self._rc_message = self.hs.config.rc_message
+
+        # Check whether ratelimiting room admin message redaction is enabled
+        # by the presence of rate limits in the config
+        if self.hs.config.rc_admin_redaction:
+            self.admin_redaction_ratelimiter = Ratelimiter(
+                clock=self.clock,
+                rate_hz=self.hs.config.rc_admin_redaction.per_second,
+                burst_count=self.hs.config.rc_admin_redaction.burst_count,
+            )
+        else:
+            self.admin_redaction_ratelimiter = None
+
         self.server_name = hs.hostname
 
         self.event_builder_factory = hs.get_event_builder_factory()
@@ -70,7 +85,6 @@ class BaseHandler(object):
         Raises:
             LimitExceededError if the request should be ratelimited
         """
-        time_now = self.clock.time()
         user_id = requester.user.to_string()
 
         # The AS user itself is never rate limited.
@@ -83,48 +97,32 @@ class BaseHandler(object):
         if requester.app_service and not requester.app_service.is_rate_limited():
             return
 
+        messages_per_second = self._rc_message.per_second
+        burst_count = self._rc_message.burst_count
+
         # Check if there is a per user override in the DB.
         override = yield self.store.get_ratelimit_for_user(user_id)
         if override:
-            # If overriden with a null Hz then ratelimiting has been entirely
+            # If overridden with a null Hz then ratelimiting has been entirely
             # disabled for the user
             if not override.messages_per_second:
                 return
 
             messages_per_second = override.messages_per_second
             burst_count = override.burst_count
+
+        if is_admin_redaction and self.admin_redaction_ratelimiter:
+            # If we have separate config for admin redactions, use a separate
+            # ratelimiter as to not have user_ids clash
+            self.admin_redaction_ratelimiter.ratelimit(user_id, update=update)
         else:
-            # We default to different values if this is an admin redaction and
-            # the config is set
-            if is_admin_redaction and self.hs.config.rc_admin_redaction:
-                messages_per_second = self.hs.config.rc_admin_redaction.per_second
-                burst_count = self.hs.config.rc_admin_redaction.burst_count
-            else:
-                messages_per_second = self.hs.config.rc_message.per_second
-                burst_count = self.hs.config.rc_message.burst_count
-
-        if is_admin_redaction and self.hs.config.rc_admin_redaction:
-            # If we have separate config for admin redactions we use a separate
-            # ratelimiter
-            allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action(
-                user_id,
-                time_now,
-                rate_hz=messages_per_second,
-                burst_count=burst_count,
-                update=update,
-            )
-        else:
-            allowed, time_allowed = self.ratelimiter.can_do_action(
+            # Override rate and burst count per-user
+            self.request_ratelimiter.ratelimit(
                 user_id,
-                time_now,
                 rate_hz=messages_per_second,
                 burst_count=burst_count,
                 update=update,
             )
-        if not allowed:
-            raise LimitExceededError(
-                retry_after_ms=int(1000 * (time_allowed - time_now))
-            )
 
     async def maybe_kick_guest_users(self, event, context=None):
         # Technically this function invalidates current_state by changing it.
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 75b39e878c..119678e67b 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -108,7 +108,11 @@ class AuthHandler(BaseHandler):
 
         # Ratelimiter for failed auth during UIA. Uses same ratelimit config
         # as per `rc_login.failed_attempts`.
-        self._failed_uia_attempts_ratelimiter = Ratelimiter()
+        self._failed_uia_attempts_ratelimiter = Ratelimiter(
+            clock=self.clock,
+            rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
+            burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
+        )
 
         self._clock = self.hs.get_clock()
 
@@ -196,13 +200,7 @@ class AuthHandler(BaseHandler):
         user_id = requester.user.to_string()
 
         # Check if we should be ratelimited due to too many previous failed attempts
-        self._failed_uia_attempts_ratelimiter.ratelimit(
-            user_id,
-            time_now_s=self._clock.time(),
-            rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
-            burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
-            update=False,
-        )
+        self._failed_uia_attempts_ratelimiter.ratelimit(user_id, update=False)
 
         # build a list of supported flows
         flows = [[login_type] for login_type in self._supported_ui_auth_types]
@@ -212,14 +210,8 @@ class AuthHandler(BaseHandler):
                 flows, request, request_body, clientip, description
             )
         except LoginError:
-            # Update the ratelimite to say we failed (`can_do_action` doesn't raise).
-            self._failed_uia_attempts_ratelimiter.can_do_action(
-                user_id,
-                time_now_s=self._clock.time(),
-                rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
-                burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
-                update=True,
-            )
+            # Update the ratelimiter to say we failed (`can_do_action` doesn't raise).
+            self._failed_uia_attempts_ratelimiter.can_do_action(user_id)
             raise
 
         # find the completed login type
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 681f92cafd..649ca1f08a 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -362,7 +362,6 @@ class EventCreationHandler(object):
         self.profile_handler = hs.get_profile_handler()
         self.event_builder_factory = hs.get_event_builder_factory()
         self.server_name = hs.hostname
-        self.ratelimiter = hs.get_ratelimiter()
         self.notifier = hs.get_notifier()
         self.config = hs.config
         self.require_membership_for_aliases = hs.config.require_membership_for_aliases
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 55a03e53ea..cd746be7c8 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -425,14 +425,7 @@ class RegistrationHandler(BaseHandler):
         if not address:
             return
 
-        time_now = self.clock.time()
-
-        self.ratelimiter.ratelimit(
-            address,
-            time_now_s=time_now,
-            rate_hz=self.hs.config.rc_registration.per_second,
-            burst_count=self.hs.config.rc_registration.burst_count,
-        )
+        self.ratelimiter.ratelimit(address)
 
     def register_with_store(
         self,
diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 6ac7c5142b..dceb2792fa 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -87,11 +87,22 @@ class LoginRestServlet(RestServlet):
         self.auth_handler = self.hs.get_auth_handler()
         self.registration_handler = hs.get_registration_handler()
         self.handlers = hs.get_handlers()
-        self._clock = hs.get_clock()
         self._well_known_builder = WellKnownBuilder(hs)
-        self._address_ratelimiter = Ratelimiter()
-        self._account_ratelimiter = Ratelimiter()
-        self._failed_attempts_ratelimiter = Ratelimiter()
+        self._address_ratelimiter = Ratelimiter(
+            clock=hs.get_clock(),
+            rate_hz=self.hs.config.rc_login_address.per_second,
+            burst_count=self.hs.config.rc_login_address.burst_count,
+        )
+        self._account_ratelimiter = Ratelimiter(
+            clock=hs.get_clock(),
+            rate_hz=self.hs.config.rc_login_account.per_second,
+            burst_count=self.hs.config.rc_login_account.burst_count,
+        )
+        self._failed_attempts_ratelimiter = Ratelimiter(
+            clock=hs.get_clock(),
+            rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
+            burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
+        )
 
     def on_GET(self, request):
         flows = []
@@ -124,13 +135,7 @@ class LoginRestServlet(RestServlet):
         return 200, {}
 
     async def on_POST(self, request):
-        self._address_ratelimiter.ratelimit(
-            request.getClientIP(),
-            time_now_s=self.hs.clock.time(),
-            rate_hz=self.hs.config.rc_login_address.per_second,
-            burst_count=self.hs.config.rc_login_address.burst_count,
-            update=True,
-        )
+        self._address_ratelimiter.ratelimit(request.getClientIP())
 
         login_submission = parse_json_object_from_request(request)
         try:
@@ -198,13 +203,7 @@ class LoginRestServlet(RestServlet):
 
             # We also apply account rate limiting using the 3PID as a key, as
             # otherwise using 3PID bypasses the ratelimiting based on user ID.
-            self._failed_attempts_ratelimiter.ratelimit(
-                (medium, address),
-                time_now_s=self._clock.time(),
-                rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
-                burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
-                update=False,
-            )
+            self._failed_attempts_ratelimiter.ratelimit((medium, address), update=False)
 
             # Check for login providers that support 3pid login types
             (
@@ -238,13 +237,7 @@ class LoginRestServlet(RestServlet):
                 # If it returned None but the 3PID was bound then we won't hit
                 # this code path, which is fine as then the per-user ratelimit
                 # will kick in below.
-                self._failed_attempts_ratelimiter.can_do_action(
-                    (medium, address),
-                    time_now_s=self._clock.time(),
-                    rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
-                    burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
-                    update=True,
-                )
+                self._failed_attempts_ratelimiter.can_do_action((medium, address))
                 raise LoginError(403, "", errcode=Codes.FORBIDDEN)
 
             identifier = {"type": "m.id.user", "user": user_id}
@@ -263,11 +256,7 @@ class LoginRestServlet(RestServlet):
 
         # Check if we've hit the failed ratelimit (but don't update it)
         self._failed_attempts_ratelimiter.ratelimit(
-            qualified_user_id.lower(),
-            time_now_s=self._clock.time(),
-            rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
-            burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
-            update=False,
+            qualified_user_id.lower(), update=False
         )
 
         try:
@@ -279,13 +268,7 @@ class LoginRestServlet(RestServlet):
             # limiter. Using `can_do_action` avoids us raising a ratelimit
             # exception and masking the LoginError. The actual ratelimiting
             # should have happened above.
-            self._failed_attempts_ratelimiter.can_do_action(
-                qualified_user_id.lower(),
-                time_now_s=self._clock.time(),
-                rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
-                burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
-                update=True,
-            )
+            self._failed_attempts_ratelimiter.can_do_action(qualified_user_id.lower())
             raise
 
         result = await self._complete_login(
@@ -318,13 +301,7 @@ class LoginRestServlet(RestServlet):
         # Before we actually log them in we check if they've already logged in
         # too often. This happens here rather than before as we don't
         # necessarily know the user before now.
-        self._account_ratelimiter.ratelimit(
-            user_id.lower(),
-            time_now_s=self._clock.time(),
-            rate_hz=self.hs.config.rc_login_account.per_second,
-            burst_count=self.hs.config.rc_login_account.burst_count,
-            update=True,
-        )
+        self._account_ratelimiter.ratelimit(user_id.lower())
 
         if create_non_existent_users:
             canonical_uid = await self.auth_handler.check_user_exists(user_id)
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index addd4cae19..b9ffe86b2a 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -26,7 +26,6 @@ import synapse.types
 from synapse.api.constants import LoginType
 from synapse.api.errors import (
     Codes,
-    LimitExceededError,
     SynapseError,
     ThreepidValidationError,
     UnrecognizedRequestError,
@@ -396,20 +395,7 @@ class RegisterRestServlet(RestServlet):
 
         client_addr = request.getClientIP()
 
-        time_now = self.clock.time()
-
-        allowed, time_allowed = self.ratelimiter.can_do_action(
-            client_addr,
-            time_now_s=time_now,
-            rate_hz=self.hs.config.rc_registration.per_second,
-            burst_count=self.hs.config.rc_registration.burst_count,
-            update=False,
-        )
-
-        if not allowed:
-            raise LimitExceededError(
-                retry_after_ms=int(1000 * (time_allowed - time_now))
-            )
+        self.ratelimiter.ratelimit(client_addr, update=False)
 
         kind = b"user"
         if b"kind" in request.args:
diff --git a/synapse/server.py b/synapse/server.py
index ca2deb49bb..fe94836a2c 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -242,9 +242,12 @@ class HomeServer(object):
 
         self.clock = Clock(reactor)
         self.distributor = Distributor()
-        self.ratelimiter = Ratelimiter()
-        self.admin_redaction_ratelimiter = Ratelimiter()
-        self.registration_ratelimiter = Ratelimiter()
+
+        self.registration_ratelimiter = Ratelimiter(
+            clock=self.clock,
+            rate_hz=config.rc_registration.per_second,
+            burst_count=config.rc_registration.burst_count,
+        )
 
         self.datastores = None
 
@@ -314,15 +317,9 @@ class HomeServer(object):
     def get_distributor(self):
         return self.distributor
 
-    def get_ratelimiter(self):
-        return self.ratelimiter
-
-    def get_registration_ratelimiter(self):
+    def get_registration_ratelimiter(self) -> Ratelimiter:
         return self.registration_ratelimiter
 
-    def get_admin_redaction_ratelimiter(self):
-        return self.admin_redaction_ratelimiter
-
     def build_federation_client(self):
         return FederationClient(self)
 
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index 5ca4521ce3..e5efdfcd02 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -43,7 +43,7 @@ class FederationRateLimiter(object):
         self.ratelimiters = collections.defaultdict(new_limiter)
 
     def ratelimit(self, host):
-        """Used to ratelimit an incoming request from given host
+        """Used to ratelimit an incoming request from a given host
 
         Example usage:
 
diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py
index dbdd427cac..d580e729c5 100644
--- a/tests/api/test_ratelimiting.py
+++ b/tests/api/test_ratelimiting.py
@@ -1,39 +1,97 @@
-from synapse.api.ratelimiting import Ratelimiter
+from synapse.api.ratelimiting import LimitExceededError, Ratelimiter
 
 from tests import unittest
 
 
 class TestRatelimiter(unittest.TestCase):
-    def test_allowed(self):
-        limiter = Ratelimiter()
-        allowed, time_allowed = limiter.can_do_action(
-            key="test_id", time_now_s=0, rate_hz=0.1, burst_count=1
-        )
+    def test_allowed_via_can_do_action(self):
+        limiter = Ratelimiter(clock=None, rate_hz=0.1, burst_count=1)
+        allowed, time_allowed = limiter.can_do_action(key="test_id", _time_now_s=0)
         self.assertTrue(allowed)
         self.assertEquals(10.0, time_allowed)
 
-        allowed, time_allowed = limiter.can_do_action(
-            key="test_id", time_now_s=5, rate_hz=0.1, burst_count=1
-        )
+        allowed, time_allowed = limiter.can_do_action(key="test_id", _time_now_s=5)
         self.assertFalse(allowed)
         self.assertEquals(10.0, time_allowed)
 
-        allowed, time_allowed = limiter.can_do_action(
-            key="test_id", time_now_s=10, rate_hz=0.1, burst_count=1
-        )
+        allowed, time_allowed = limiter.can_do_action(key="test_id", _time_now_s=10)
         self.assertTrue(allowed)
         self.assertEquals(20.0, time_allowed)
 
-    def test_pruning(self):
-        limiter = Ratelimiter()
+    def test_allowed_via_ratelimit(self):
+        limiter = Ratelimiter(clock=None, rate_hz=0.1, burst_count=1)
+
+        # Shouldn't raise
+        limiter.ratelimit(key="test_id", _time_now_s=0)
+
+        # Should raise
+        with self.assertRaises(LimitExceededError) as context:
+            limiter.ratelimit(key="test_id", _time_now_s=5)
+        self.assertEqual(context.exception.retry_after_ms, 5000)
+
+        # Shouldn't raise
+        limiter.ratelimit(key="test_id", _time_now_s=10)
+
+    def test_allowed_via_can_do_action_and_overriding_parameters(self):
+        """Test that we can override options of can_do_action that would otherwise fail
+        an action
+        """
+        # Create a Ratelimiter with a very low allowed rate_hz and burst_count
+        limiter = Ratelimiter(clock=None, rate_hz=0.1, burst_count=1)
+
+        # First attempt should be allowed
+        allowed, time_allowed = limiter.can_do_action(("test_id",), _time_now_s=0,)
+        self.assertTrue(allowed)
+        self.assertEqual(10.0, time_allowed)
+
+        # Second attempt, 1s later, will fail
+        allowed, time_allowed = limiter.can_do_action(("test_id",), _time_now_s=1,)
+        self.assertFalse(allowed)
+        self.assertEqual(10.0, time_allowed)
+
+        # But, if we allow 10 actions/sec for this request, we should be allowed
+        # to continue.
         allowed, time_allowed = limiter.can_do_action(
-            key="test_id_1", time_now_s=0, rate_hz=0.1, burst_count=1
+            ("test_id",), _time_now_s=1, rate_hz=10.0
         )
+        self.assertTrue(allowed)
+        self.assertEqual(1.1, time_allowed)
 
-        self.assertIn("test_id_1", limiter.message_counts)
-
+        # Similarly if we allow a burst of 10 actions
         allowed, time_allowed = limiter.can_do_action(
-            key="test_id_2", time_now_s=10, rate_hz=0.1, burst_count=1
+            ("test_id",), _time_now_s=1, burst_count=10
         )
+        self.assertTrue(allowed)
+        self.assertEqual(1.0, time_allowed)
+
+    def test_allowed_via_ratelimit_and_overriding_parameters(self):
+        """Test that we can override options of the ratelimit method that would otherwise
+        fail an action
+        """
+        # Create a Ratelimiter with a very low allowed rate_hz and burst_count
+        limiter = Ratelimiter(clock=None, rate_hz=0.1, burst_count=1)
+
+        # First attempt should be allowed
+        limiter.ratelimit(key=("test_id",), _time_now_s=0)
+
+        # Second attempt, 1s later, will fail
+        with self.assertRaises(LimitExceededError) as context:
+            limiter.ratelimit(key=("test_id",), _time_now_s=1)
+        self.assertEqual(context.exception.retry_after_ms, 9000)
+
+        # But, if we allow 10 actions/sec for this request, we should be allowed
+        # to continue.
+        limiter.ratelimit(key=("test_id",), _time_now_s=1, rate_hz=10.0)
+
+        # Similarly if we allow a burst of 10 actions
+        limiter.ratelimit(key=("test_id",), _time_now_s=1, burst_count=10)
+
+    def test_pruning(self):
+        limiter = Ratelimiter(clock=None, rate_hz=0.1, burst_count=1)
+        limiter.can_do_action(key="test_id_1", _time_now_s=0)
+
+        self.assertIn("test_id_1", limiter.actions)
+
+        limiter.can_do_action(key="test_id_2", _time_now_s=10)
 
-        self.assertNotIn("test_id_1", limiter.message_counts)
+        self.assertNotIn("test_id_1", limiter.actions)
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index 8aa56f1496..29dd7d9c6e 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 
-from mock import Mock, NonCallableMock
+from mock import Mock
 
 from twisted.internet import defer
 
@@ -55,12 +55,8 @@ class ProfileTestCase(unittest.TestCase):
             federation_client=self.mock_federation,
             federation_server=Mock(),
             federation_registry=self.mock_registry,
-            ratelimiter=NonCallableMock(spec_set=["can_do_action"]),
         )
 
-        self.ratelimiter = hs.get_ratelimiter()
-        self.ratelimiter.can_do_action.return_value = (True, 0)
-
         self.store = hs.get_datastore()
 
         self.frank = UserID.from_string("@1234ABCD:test")
diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py
index 32cb04645f..56497b8476 100644
--- a/tests/replication/slave/storage/_base.py
+++ b/tests/replication/slave/storage/_base.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from mock import Mock, NonCallableMock
+from mock import Mock
 
 from tests.replication._base import BaseStreamTestCase
 
@@ -21,12 +21,7 @@ from tests.replication._base import BaseStreamTestCase
 class BaseSlavedStoreTestCase(BaseStreamTestCase):
     def make_homeserver(self, reactor, clock):
 
-        hs = self.setup_test_homeserver(
-            federation_client=Mock(),
-            ratelimiter=NonCallableMock(spec_set=["can_do_action"]),
-        )
-
-        hs.get_ratelimiter().can_do_action.return_value = (True, 0)
+        hs = self.setup_test_homeserver(federation_client=Mock())
 
         return hs
 
diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py
index b54b06482b..f75520877f 100644
--- a/tests/rest/client/v1/test_events.py
+++ b/tests/rest/client/v1/test_events.py
@@ -15,7 +15,7 @@
 
 """ Tests REST events for /events paths."""
 
-from mock import Mock, NonCallableMock
+from mock import Mock
 
 import synapse.rest.admin
 from synapse.rest.client.v1 import events, login, room
@@ -40,11 +40,7 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase):
         config["enable_registration"] = True
         config["auto_join_rooms"] = []
 
-        hs = self.setup_test_homeserver(
-            config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"])
-        )
-        self.ratelimiter = hs.get_ratelimiter()
-        self.ratelimiter.can_do_action.return_value = (True, 0)
+        hs = self.setup_test_homeserver(config=config)
 
         hs.get_handlers().federation_handler = Mock()
 
diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py
index 0f0f7ca72d..9033f09fd2 100644
--- a/tests/rest/client/v1/test_login.py
+++ b/tests/rest/client/v1/test_login.py
@@ -29,7 +29,6 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor, clock):
-
         self.hs = self.setup_test_homeserver()
         self.hs.config.enable_registration = True
         self.hs.config.registrations_require_3pid = []
@@ -38,10 +37,20 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
 
         return self.hs
 
+    @override_config(
+        {
+            "rc_login": {
+                "address": {"per_second": 0.17, "burst_count": 5},
+                # Prevent the account login ratelimiter from raising first
+                #
+                # This is normally covered by the default test homeserver config
+                # which sets these values to 10000, but as we're overriding the entire
+                # rc_login dict here, we need to set this manually as well
+                "account": {"per_second": 10000, "burst_count": 10000},
+            }
+        }
+    )
     def test_POST_ratelimiting_per_address(self):
-        self.hs.config.rc_login_address.burst_count = 5
-        self.hs.config.rc_login_address.per_second = 0.17
-
         # Create different users so we're sure not to be bothered by the per-user
         # ratelimiter.
         for i in range(0, 6):
@@ -80,10 +89,20 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
 
         self.assertEquals(channel.result["code"], b"200", channel.result)
 
+    @override_config(
+        {
+            "rc_login": {
+                "account": {"per_second": 0.17, "burst_count": 5},
+                # Prevent the address login ratelimiter from raising first
+                #
+                # This is normally covered by the default test homeserver config
+                # which sets these values to 10000, but as we're overriding the entire
+                # rc_login dict here, we need to set this manually as well
+                "address": {"per_second": 10000, "burst_count": 10000},
+            }
+        }
+    )
     def test_POST_ratelimiting_per_account(self):
-        self.hs.config.rc_login_account.burst_count = 5
-        self.hs.config.rc_login_account.per_second = 0.17
-
         self.register_user("kermit", "monkey")
 
         for i in range(0, 6):
@@ -119,10 +138,20 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
 
         self.assertEquals(channel.result["code"], b"200", channel.result)
 
+    @override_config(
+        {
+            "rc_login": {
+                # Prevent the address login ratelimiter from raising first
+                #
+                # This is normally covered by the default test homeserver config
+                # which sets these values to 10000, but as we're overriding the entire
+                # rc_login dict here, we need to set this manually as well
+                "address": {"per_second": 10000, "burst_count": 10000},
+                "failed_attempts": {"per_second": 0.17, "burst_count": 5},
+            }
+        }
+    )
     def test_POST_ratelimiting_per_account_failed_attempts(self):
-        self.hs.config.rc_login_failed_attempts.burst_count = 5
-        self.hs.config.rc_login_failed_attempts.per_second = 0.17
-
         self.register_user("kermit", "monkey")
 
         for i in range(0, 6):
diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py
index 7dd86d0c27..4886bbb401 100644
--- a/tests/rest/client/v1/test_rooms.py
+++ b/tests/rest/client/v1/test_rooms.py
@@ -20,7 +20,7 @@
 
 import json
 
-from mock import Mock, NonCallableMock
+from mock import Mock
 from six.moves.urllib import parse as urlparse
 
 from twisted.internet import defer
@@ -46,13 +46,8 @@ class RoomBase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
 
         self.hs = self.setup_test_homeserver(
-            "red",
-            http_client=None,
-            federation_client=Mock(),
-            ratelimiter=NonCallableMock(spec_set=["can_do_action"]),
+            "red", http_client=None, federation_client=Mock(),
         )
-        self.ratelimiter = self.hs.get_ratelimiter()
-        self.ratelimiter.can_do_action.return_value = (True, 0)
 
         self.hs.get_federation_handler = Mock(return_value=Mock())
 
diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py
index 4bc3aaf02d..18260bb90e 100644
--- a/tests/rest/client/v1/test_typing.py
+++ b/tests/rest/client/v1/test_typing.py
@@ -16,7 +16,7 @@
 
 """Tests REST events for /rooms paths."""
 
-from mock import Mock, NonCallableMock
+from mock import Mock
 
 from twisted.internet import defer
 
@@ -39,17 +39,11 @@ class RoomTypingTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
 
         hs = self.setup_test_homeserver(
-            "red",
-            http_client=None,
-            federation_client=Mock(),
-            ratelimiter=NonCallableMock(spec_set=["can_do_action"]),
+            "red", http_client=None, federation_client=Mock(),
         )
 
         self.event_source = hs.get_event_sources().sources["typing"]
 
-        self.ratelimiter = hs.get_ratelimiter()
-        self.ratelimiter.can_do_action.return_value = (True, 0)
-
         hs.get_handlers().federation_handler = Mock()
 
         def get_user_by_access_token(token=None, allow_guest=False):
diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py
index 5637ce2090..7deaf5b24a 100644
--- a/tests/rest/client/v2_alpha/test_register.py
+++ b/tests/rest/client/v2_alpha/test_register.py
@@ -29,6 +29,7 @@ from synapse.rest.client.v1 import login, logout
 from synapse.rest.client.v2_alpha import account, account_validity, register, sync
 
 from tests import unittest
+from tests.unittest import override_config
 
 
 class RegisterRestServletTestCase(unittest.HomeserverTestCase):
@@ -146,10 +147,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         self.assertEquals(channel.result["code"], b"403", channel.result)
         self.assertEquals(channel.json_body["error"], "Guest access is disabled")
 
+    @override_config({"rc_registration": {"per_second": 0.17, "burst_count": 5}})
     def test_POST_ratelimiting_guest(self):
-        self.hs.config.rc_registration.burst_count = 5
-        self.hs.config.rc_registration.per_second = 0.17
-
         for i in range(0, 6):
             url = self.url + b"?kind=guest"
             request, channel = self.make_request(b"POST", url, b"{}")
@@ -168,10 +167,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
         self.assertEquals(channel.result["code"], b"200", channel.result)
 
+    @override_config({"rc_registration": {"per_second": 0.17, "burst_count": 5}})
     def test_POST_ratelimiting(self):
-        self.hs.config.rc_registration.burst_count = 5
-        self.hs.config.rc_registration.per_second = 0.17
-
         for i in range(0, 6):
             params = {
                 "username": "kermit" + str(i),
-- 
cgit 1.5.1