summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--docs/admin_api/purge_history_api.rst2
-rw-r--r--docs/workers.rst39
-rw-r--r--synapse/app/event_creator.py170
-rwxr-xr-xsynapse/app/homeserver.py4
-rw-r--r--synapse/config/workers.py8
-rw-r--r--synapse/events/snapshot.py72
-rw-r--r--synapse/handlers/message.py28
-rw-r--r--synapse/python_dependencies.py2
-rw-r--r--synapse/replication/http/__init__.py31
-rw-r--r--synapse/replication/http/send_event.py108
-rw-r--r--synapse/replication/slave/storage/events.py20
-rw-r--r--synapse/rest/client/v1/admin.py4
-rw-r--r--synapse/rest/client/v1/room.py1
-rw-r--r--synapse/state.py34
-rw-r--r--synapse/storage/appservice.py13
-rw-r--r--synapse/storage/events.py99
-rw-r--r--synapse/types.py65
17 files changed, 608 insertions, 92 deletions
diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst
index b4e5bd9d75..a3a17e9f9f 100644
--- a/docs/admin_api/purge_history_api.rst
+++ b/docs/admin_api/purge_history_api.rst
@@ -23,5 +23,5 @@ To delete local events as well, set ``delete_local_events`` in the body:
 .. code:: json
 
    {
-       "delete_local_events": True,
+       "delete_local_events": true
    }
diff --git a/docs/workers.rst b/docs/workers.rst
index b39f79058e..dee04bbf3e 100644
--- a/docs/workers.rst
+++ b/docs/workers.rst
@@ -30,17 +30,29 @@ requests made to the federation port. The caveats regarding running a
 reverse-proxy on the federation port still apply (see
 https://github.com/matrix-org/synapse/blob/master/README.rst#reverse-proxying-the-federation-port).
 
-To enable workers, you need to add a replication listener to the master synapse, e.g.::
+To enable workers, you need to add two replication listeners to the master
+synapse, e.g.::
 
     listeners:
+      # The TCP replication port
       - port: 9092
         bind_address: '127.0.0.1'
         type: replication
+      # The HTTP replication port
+      - port: 9093
+        bind_address: '127.0.0.1'
+        type: http
+        resources:
+         - names: [replication]
 
-Under **no circumstances** should this replication API listener be exposed to the
-public internet; it currently implements no authentication whatsoever and is
+Under **no circumstances** should these replication API listeners be exposed to
+the public internet; it currently implements no authentication whatsoever and is
 unencrypted.
 
+(Roughly, the TCP port is used for streaming data from the master to the
+workers, and the HTTP port for the workers to send data to the main
+synapse process.)
+
 You then create a set of configs for the various worker processes.  These
 should be worker configuration files, and should be stored in a dedicated
 subdirectory, to allow synctl to manipulate them.
@@ -52,8 +64,13 @@ You should minimise the number of overrides though to maintain a usable config.
 
 You must specify the type of worker application (``worker_app``). The currently
 available worker applications are listed below. You must also specify the
-replication endpoint that it's talking to on the main synapse process
-(``worker_replication_host`` and ``worker_replication_port``).
+replication endpoints that it's talking to on the main synapse process.
+``worker_replication_host`` should specify the host of the main synapse,
+``worker_replication_port`` should point to the TCP replication listener port and
+``worker_replication_http_port`` should point to the HTTP replication port.
+
+Currently, only the ``event_creator`` worker requires specifying
+``worker_replication_http_port``.
 
 For instance::
 
@@ -62,6 +79,7 @@ For instance::
     # The replication listener on the synapse to talk to.
     worker_replication_host: 127.0.0.1
     worker_replication_port: 9092
+    worker_replication_http_port: 9093
 
     worker_listeners:
      - type: http
@@ -207,3 +225,14 @@ the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration
 file. For example::
 
     worker_main_http_uri: http://127.0.0.1:8008
+
+
+``synapse.app.event_creator``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Handles non-state event creation. It can handle REST endpoints matching:
+
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
+
+It will create events locally and then send them on to the main synapse
+instance to be persisted and handled.
diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py
new file mode 100644
index 0000000000..b2ce399258
--- /dev/null
+++ b/synapse/app/event_creator.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import sys
+
+import synapse
+from synapse import events
+from synapse.app import _base
+from synapse.config._base import ConfigError
+from synapse.config.homeserver import HomeServerConfig
+from synapse.config.logger import setup_logging
+from synapse.crypto import context_factory
+from synapse.http.server import JsonResource
+from synapse.http.site import SynapseSite
+from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
+from synapse.replication.slave.storage._base import BaseSlavedStore
+from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
+from synapse.replication.slave.storage.client_ips import SlavedClientIpStore
+from synapse.replication.slave.storage.devices import SlavedDeviceStore
+from synapse.replication.slave.storage.events import SlavedEventStore
+from synapse.replication.slave.storage.registration import SlavedRegistrationStore
+from synapse.replication.slave.storage.room import RoomStore
+from synapse.replication.tcp.client import ReplicationClientHandler
+from synapse.rest.client.v1.room import RoomSendEventRestServlet
+from synapse.server import HomeServer
+from synapse.storage.engines import create_engine
+from synapse.util.httpresourcetree import create_resource_tree
+from synapse.util.logcontext import LoggingContext
+from synapse.util.manhole import manhole
+from synapse.util.versionstring import get_version_string
+from twisted.internet import reactor
+from twisted.web.resource import Resource
+
+logger = logging.getLogger("synapse.app.event_creator")
+
+
+class EventCreatorSlavedStore(
+    SlavedDeviceStore,
+    SlavedClientIpStore,
+    SlavedApplicationServiceStore,
+    SlavedEventStore,
+    SlavedRegistrationStore,
+    RoomStore,
+    BaseSlavedStore,
+):
+    pass
+
+
+class EventCreatorServer(HomeServer):
+    def setup(self):
+        logger.info("Setting up.")
+        self.datastore = EventCreatorSlavedStore(self.get_db_conn(), self)
+        logger.info("Finished setting up.")
+
+    def _listen_http(self, listener_config):
+        port = listener_config["port"]
+        bind_addresses = listener_config["bind_addresses"]
+        site_tag = listener_config.get("tag", port)
+        resources = {}
+        for res in listener_config["resources"]:
+            for name in res["names"]:
+                if name == "metrics":
+                    resources[METRICS_PREFIX] = MetricsResource(self)
+                elif name == "client":
+                    resource = JsonResource(self, canonical_json=False)
+                    RoomSendEventRestServlet(self).register(resource)
+                    resources.update({
+                        "/_matrix/client/r0": resource,
+                        "/_matrix/client/unstable": resource,
+                        "/_matrix/client/v2_alpha": resource,
+                        "/_matrix/client/api/v1": resource,
+                    })
+
+        root_resource = create_resource_tree(resources, Resource())
+
+        _base.listen_tcp(
+            bind_addresses,
+            port,
+            SynapseSite(
+                "synapse.access.http.%s" % (site_tag,),
+                site_tag,
+                listener_config,
+                root_resource,
+            )
+        )
+
+        logger.info("Synapse event creator now listening on port %d", port)
+
+    def start_listening(self, listeners):
+        for listener in listeners:
+            if listener["type"] == "http":
+                self._listen_http(listener)
+            elif listener["type"] == "manhole":
+                _base.listen_tcp(
+                    listener["bind_addresses"],
+                    listener["port"],
+                    manhole(
+                        username="matrix",
+                        password="rabbithole",
+                        globals={"hs": self},
+                    )
+                )
+            else:
+                logger.warn("Unrecognized listener type: %s", listener["type"])
+
+        self.get_tcp_replication().start_replication(self)
+
+    def build_tcp_replication(self):
+        return ReplicationClientHandler(self.get_datastore())
+
+
+def start(config_options):
+    try:
+        config = HomeServerConfig.load_config(
+            "Synapse event creator", config_options
+        )
+    except ConfigError as e:
+        sys.stderr.write("\n" + e.message + "\n")
+        sys.exit(1)
+
+    assert config.worker_app == "synapse.app.event_creator"
+
+    assert config.worker_replication_http_port is not None
+
+    setup_logging(config, use_worker_options=True)
+
+    events.USE_FROZEN_DICTS = config.use_frozen_dicts
+
+    database_engine = create_engine(config.database_config)
+
+    tls_server_context_factory = context_factory.ServerContextFactory(config)
+
+    ss = EventCreatorServer(
+        config.server_name,
+        db_config=config.database_config,
+        tls_server_context_factory=tls_server_context_factory,
+        config=config,
+        version_string="Synapse/" + get_version_string(synapse),
+        database_engine=database_engine,
+    )
+
+    ss.setup()
+    ss.get_handlers()
+    ss.start_listening(config.worker_listeners)
+
+    def start():
+        ss.get_state_handler().start_caching()
+        ss.get_datastore().start_profiling()
+
+    reactor.callWhenRunning(start)
+
+    _base.start_worker_reactor("synapse-event-creator", config)
+
+
+if __name__ == '__main__':
+    with LoggingContext("main"):
+        start(sys.argv[1:])
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index cb82a415a6..e375f2bbcf 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -38,6 +38,7 @@ from synapse.metrics import register_memory_metrics
 from synapse.metrics.resource import METRICS_PREFIX, MetricsResource
 from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \
     check_requirements
+from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX
 from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
 from synapse.rest import ClientRestResource
 from synapse.rest.key.v1.server_key_resource import LocalKey
@@ -219,6 +220,9 @@ class SynapseHomeServer(HomeServer):
         if name == "metrics" and self.get_config().enable_metrics:
             resources[METRICS_PREFIX] = MetricsResource(self)
 
+        if name == "replication":
+            resources[REPLICATION_PREFIX] = ReplicationRestResource(self)
+
         return resources
 
     def start_listening(self):
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 4b6884918d..80baf0ce0e 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -33,8 +33,16 @@ class WorkerConfig(Config):
         self.worker_pid_file = config.get("worker_pid_file")
         self.worker_log_file = config.get("worker_log_file")
         self.worker_log_config = config.get("worker_log_config")
+
+        # The host used to connect to the main synapse
         self.worker_replication_host = config.get("worker_replication_host", None)
+
+        # The port on the main synapse for TCP replication
         self.worker_replication_port = config.get("worker_replication_port", None)
+
+        # The port on the main synapse for HTTP replication endpoint
+        self.worker_replication_http_port = config.get("worker_replication_http_port")
+
         self.worker_name = config.get("worker_name", self.worker_app)
 
         self.worker_main_http_uri = config.get("worker_main_http_uri", None)
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index 87e3fe7b97..7b80444f73 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -14,6 +14,9 @@
 # limitations under the License.
 
 
+from frozendict import frozendict
+
+
 class EventContext(object):
     """
     Attributes:
@@ -73,3 +76,72 @@ class EventContext(object):
         self.prev_state_events = None
 
         self.app_service = None
+
+    def serialize(self):
+        """Converts self to a type that can be serialized as JSON, and then
+        deserialized by `deserialize`
+
+        Returns:
+            dict
+        """
+        return {
+            "current_state_ids": _encode_state_dict(self.current_state_ids),
+            "prev_state_ids": _encode_state_dict(self.prev_state_ids),
+            "state_group": self.state_group,
+            "rejected": self.rejected,
+            "push_actions": self.push_actions,
+            "prev_group": self.prev_group,
+            "delta_ids": _encode_state_dict(self.delta_ids),
+            "prev_state_events": self.prev_state_events,
+            "app_service_id": self.app_service.id if self.app_service else None
+        }
+
+    @staticmethod
+    def deserialize(store, input):
+        """Converts a dict that was produced by `serialize` back into a
+        EventContext.
+
+        Args:
+            store (DataStore): Used to convert AS ID to AS object
+            input (dict): A dict produced by `serialize`
+
+        Returns:
+            EventContext
+        """
+        context = EventContext()
+        context.current_state_ids = _decode_state_dict(input["current_state_ids"])
+        context.prev_state_ids = _decode_state_dict(input["prev_state_ids"])
+        context.state_group = input["state_group"]
+        context.rejected = input["rejected"]
+        context.push_actions = input["push_actions"]
+        context.prev_group = input["prev_group"]
+        context.delta_ids = _decode_state_dict(input["delta_ids"])
+        context.prev_state_events = input["prev_state_events"]
+
+        app_service_id = input["app_service_id"]
+        if app_service_id:
+            context.app_service = store.get_app_service_by_id(app_service_id)
+
+        return context
+
+
+def _encode_state_dict(state_dict):
+    """Since dicts of (type, state_key) -> event_id cannot be serialized in
+    JSON we need to convert them to a form that can.
+    """
+    if state_dict is None:
+        return None
+
+    return [
+        (etype, state_key, v)
+        for (etype, state_key), v in state_dict.iteritems()
+    ]
+
+
+def _decode_state_dict(input):
+    """Decodes a state dict encoded using `_encode_state_dict` above
+    """
+    if input is None:
+        return None
+
+    return frozendict({(etype, state_key,): v for etype, state_key, v in input})
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4e9752ccbd..1c3ac03f20 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -28,6 +28,7 @@ from synapse.util.logcontext import preserve_fn
 from synapse.util.metrics import measure_func
 from synapse.util.frozenutils import unfreeze
 from synapse.visibility import filter_events_for_client
+from synapse.replication.http.send_event import send_event_to_master
 
 from ._base import BaseHandler
 
@@ -312,6 +313,9 @@ class EventCreationHandler(object):
         self.server_name = hs.hostname
         self.ratelimiter = hs.get_ratelimiter()
         self.notifier = hs.get_notifier()
+        self.config = hs.config
+
+        self.http_client = hs.get_simple_http_client()
 
         # This is only used to get at ratelimit function, and maybe_kick_guest_users
         self.base_handler = BaseHandler(hs)
@@ -419,12 +423,6 @@ class EventCreationHandler(object):
             ratelimit=ratelimit,
         )
 
-        if event.type == EventTypes.Message:
-            presence = self.hs.get_presence_handler()
-            # We don't want to block sending messages on any presence code. This
-            # matters as sometimes presence code can take a while.
-            preserve_fn(presence.bump_presence_active_time)(user)
-
     @defer.inlineCallbacks
     def deduplicate_state_event(self, event, context):
         """
@@ -559,6 +557,18 @@ class EventCreationHandler(object):
     ):
         # We now need to go and hit out to wherever we need to hit out to.
 
+        # If we're a worker we need to hit out to the master.
+        if self.config.worker_app:
+            yield send_event_to_master(
+                self.http_client,
+                host=self.config.worker_replication_host,
+                port=self.config.worker_replication_http_port,
+                requester=requester,
+                event=event,
+                context=context,
+            )
+            return
+
         if ratelimit:
             yield self.base_handler.ratelimit(requester)
 
@@ -692,3 +702,9 @@ class EventCreationHandler(object):
             )
 
         preserve_fn(_notify)()
+
+        if event.type == EventTypes.Message:
+            presence = self.hs.get_presence_handler()
+            # We don't want to block sending messages on any presence code. This
+            # matters as sometimes presence code can take a while.
+            preserve_fn(presence.bump_presence_active_time)(requester.user)
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 7052333c19..97b631e60d 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -36,7 +36,7 @@ REQUIREMENTS = {
     "pydenticon": ["pydenticon"],
     "ujson": ["ujson"],
     "blist": ["blist"],
-    "pysaml2>=3.0.0,<4.0.0": ["saml2>=3.0.0,<4.0.0"],
+    "pysaml2>=3.0.0": ["saml2>=3.0.0"],
     "pymacaroons-pynacl": ["pymacaroons"],
     "msgpack-python>=0.3.0": ["msgpack"],
     "phonenumbers>=8.2.0": ["phonenumbers"],
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
new file mode 100644
index 0000000000..b378b41646
--- /dev/null
+++ b/synapse/replication/http/__init__.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import send_event
+
+from synapse.http.server import JsonResource
+
+
+REPLICATION_PREFIX = "/_synapse/replication"
+
+
+class ReplicationRestResource(JsonResource):
+    def __init__(self, hs):
+        JsonResource.__init__(self, hs, canonical_json=False)
+        self.register_servlets(hs)
+
+    def register_servlets(self, hs):
+        send_event.register_servlets(hs, self)
diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py
new file mode 100644
index 0000000000..ff9b9d2f10
--- /dev/null
+++ b/synapse/replication/http/send_event.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+
+from synapse.events import FrozenEvent
+from synapse.events.snapshot import EventContext
+from synapse.http.servlet import RestServlet, parse_json_object_from_request
+from synapse.util.metrics import Measure
+from synapse.types import Requester
+
+import logging
+import re
+
+logger = logging.getLogger(__name__)
+
+
+def send_event_to_master(client, host, port, requester, event, context):
+    """Send event to be handled on the master
+
+    Args:
+        client (SimpleHttpClient)
+        host (str): host of master
+        port (int): port on master listening for HTTP replication
+        requester (Requester)
+        event (FrozenEvent)
+        context (EventContext)
+    """
+    uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,)
+
+    payload = {
+        "event": event.get_pdu_json(),
+        "internal_metadata": event.internal_metadata.get_dict(),
+        "rejected_reason": event.rejected_reason,
+        "context": context.serialize(),
+        "requester": requester.serialize(),
+    }
+
+    return client.post_json_get_json(uri, payload)
+
+
+class ReplicationSendEventRestServlet(RestServlet):
+    """Handles events newly created on workers, including persisting and
+    notifying.
+
+    The API looks like:
+
+        POST /_synapse/replication/send_event
+
+        {
+            "event": { .. serialized event .. },
+            "internal_metadata": { .. serialized internal_metadata .. },
+            "rejected_reason": ..,   // The event.rejected_reason field
+            "context": { .. serialized event context .. },
+            "requester": { .. serialized requester .. },
+        }
+    """
+    PATTERNS = [re.compile("^/_synapse/replication/send_event$")]
+
+    def __init__(self, hs):
+        super(ReplicationSendEventRestServlet, self).__init__()
+
+        self.event_creation_handler = hs.get_event_creation_handler()
+        self.store = hs.get_datastore()
+        self.clock = hs.get_clock()
+
+    @defer.inlineCallbacks
+    def on_POST(self, request):
+        with Measure(self.clock, "repl_send_event_parse"):
+            content = parse_json_object_from_request(request)
+
+            event_dict = content["event"]
+            internal_metadata = content["internal_metadata"]
+            rejected_reason = content["rejected_reason"]
+            event = FrozenEvent(event_dict, internal_metadata, rejected_reason)
+
+            requester = Requester.deserialize(self.store, content["requester"])
+            context = EventContext.deserialize(self.store, content["context"])
+
+        if requester.user:
+            request.authenticated_entity = requester.user.to_string()
+
+        logger.info(
+            "Got event to send with ID: %s into room: %s",
+            event.event_id, event.room_id,
+        )
+
+        yield self.event_creation_handler.handle_new_client_event(
+            requester, event, context,
+        )
+
+        defer.returnValue((200, {}))
+
+
+def register_servlets(hs, http_server):
+    ReplicationSendEventRestServlet(hs).register(http_server)
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
index 8acb5df0f3..f8c164b48b 100644
--- a/synapse/replication/slave/storage/events.py
+++ b/synapse/replication/slave/storage/events.py
@@ -21,6 +21,7 @@ from synapse.storage.event_push_actions import EventPushActionsStore
 from synapse.storage.roommember import RoomMemberStore
 from synapse.storage.state import StateGroupWorkerStore
 from synapse.storage.stream import StreamStore
+from synapse.storage.signatures import SignatureStore
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 from ._base import BaseSlavedStore
 from ._slaved_id_tracker import SlavedIdTracker
@@ -170,6 +171,25 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore):
     get_federation_out_pos = DataStore.get_federation_out_pos.__func__
     update_federation_out_pos = DataStore.update_federation_out_pos.__func__
 
+    get_latest_event_ids_and_hashes_in_room = (
+        DataStore.get_latest_event_ids_and_hashes_in_room.__func__
+    )
+    _get_latest_event_ids_and_hashes_in_room = (
+        DataStore._get_latest_event_ids_and_hashes_in_room.__func__
+    )
+    _get_event_reference_hashes_txn = (
+        DataStore._get_event_reference_hashes_txn.__func__
+    )
+    add_event_hashes = (
+        DataStore.add_event_hashes.__func__
+    )
+    get_event_reference_hashes = (
+        SignatureStore.__dict__["get_event_reference_hashes"]
+    )
+    get_event_reference_hash = (
+        SignatureStore.__dict__["get_event_reference_hash"]
+    )
+
     def stream_positions(self):
         result = super(SlavedEventStore, self).stream_positions()
         result["events"] = self._stream_id_gen.get_current_token()
diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py
index 2ad486c67d..6073cc6fa2 100644
--- a/synapse/rest/client/v1/admin.py
+++ b/synapse/rest/client/v1/admin.py
@@ -131,9 +131,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet):
 
         body = parse_json_object_from_request(request, allow_empty_body=True)
 
-        delete_local_events = bool(
-            body.get("delete_local_history", False)
-        )
+        delete_local_events = bool(body.get("delete_local_events", False))
 
         yield self.handlers.message_handler.purge_history(
             room_id, event_id,
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index fbb2fc36e4..817fd47842 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -186,7 +186,6 @@ class RoomSendEventRestServlet(ClientV1RestServlet):
 
     def __init__(self, hs):
         super(RoomSendEventRestServlet, self).__init__(hs)
-        self.handlers = hs.get_handlers()
         self.event_creation_hander = hs.get_event_creation_handler()
 
     def register(self, http_server):
diff --git a/synapse/state.py b/synapse/state.py
index cc93bbcb6b..932f602508 100644
--- a/synapse/state.py
+++ b/synapse/state.py
@@ -350,7 +350,7 @@ class StateHandler(object):
             ))
 
         result = yield self._state_resolution_handler.resolve_state_groups(
-            room_id, state_groups_ids, self._state_map_factory,
+            room_id, state_groups_ids, None, self._state_map_factory,
         )
         defer.returnValue(result)
 
@@ -413,7 +413,9 @@ class StateResolutionHandler(object):
 
     @defer.inlineCallbacks
     @log_function
-    def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory):
+    def resolve_state_groups(
+        self, room_id, state_groups_ids, event_map, state_map_factory,
+    ):
         """Resolves conflicts between a set of state groups
 
         Always generates a new state group (unless we hit the cache), so should
@@ -425,6 +427,14 @@ class StateResolutionHandler(object):
                  map from state group id to the state in that state group
                 (where 'state' is a map from state key to event id)
 
+            event_map(dict[str,FrozenEvent]|None):
+                a dict from event_id to event, for any events that we happen to
+                have in flight (eg, those currently being persisted). This will be
+                used as a starting point fof finding the state we need; any missing
+                events will be requested via state_map_factory.
+
+                If None, all events will be fetched via state_map_factory.
+
         Returns:
             Deferred[_StateCacheEntry]: resolved state
         """
@@ -465,6 +475,7 @@ class StateResolutionHandler(object):
                 with Measure(self.clock, "state._resolve_events"):
                     new_state = yield resolve_events_with_factory(
                         state_groups_ids.values(),
+                        event_map=event_map,
                         state_map_factory=state_map_factory,
                     )
             else:
@@ -597,11 +608,20 @@ def _seperate(state_sets):
 
 
 @defer.inlineCallbacks
-def resolve_events_with_factory(state_sets, state_map_factory):
+def resolve_events_with_factory(state_sets, event_map, state_map_factory):
     """
     Args:
         state_sets(list): List of dicts of (type, state_key) -> event_id,
             which are the different state groups to resolve.
+
+        event_map(dict[str,FrozenEvent]|None):
+            a dict from event_id to event, for any events that we happen to
+            have in flight (eg, those currently being persisted). This will be
+            used as a starting point fof finding the state we need; any missing
+            events will be requested via state_map_factory.
+
+            If None, all events will be fetched via state_map_factory.
+
         state_map_factory(func): will be called
             with a list of event_ids that are needed, and should return with
             a Deferred of dict of event_id to event.
@@ -622,12 +642,16 @@ def resolve_events_with_factory(state_sets, state_map_factory):
         for event_ids in conflicted_state.itervalues()
         for event_id in event_ids
     )
+    if event_map is not None:
+        needed_events -= set(event_map.iterkeys())
 
     logger.info("Asking for %d conflicted events", len(needed_events))
 
     # dict[str, FrozenEvent]: a map from state event id to event. Only includes
-    # the state events which are in conflict.
+    # the state events which are in conflict (and those in event_map)
     state_map = yield state_map_factory(needed_events)
+    if event_map is not None:
+        state_map.update(event_map)
 
     # get the ids of the auth events which allow us to authenticate the
     # conflicted state, picking only from the unconflicting state.
@@ -639,6 +663,8 @@ def resolve_events_with_factory(state_sets, state_map_factory):
 
     new_needed_events = set(auth_events.itervalues())
     new_needed_events -= needed_events
+    if event_map is not None:
+        new_needed_events -= set(event_map.iterkeys())
 
     logger.info("Asking for %d auth events", len(new_needed_events))
 
diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py
index d8c84b7141..79673b4273 100644
--- a/synapse/storage/appservice.py
+++ b/synapse/storage/appservice.py
@@ -99,6 +99,19 @@ class ApplicationServiceStore(SQLBaseStore):
                 return service
         return None
 
+    def get_app_service_by_id(self, as_id):
+        """Get the application service with the given appservice ID.
+
+        Args:
+            as_id (str): The application service ID.
+        Returns:
+            synapse.appservice.ApplicationService or None.
+        """
+        for service in self.services_cache:
+            if service.id == as_id:
+                return service
+        return None
+
     def get_app_service_rooms(self, service):
         """Get a list of RoomsForUser for this application service.
 
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 86a7c5920d..bbb6aa992c 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -27,7 +27,6 @@ from synapse.util.logutils import log_function
 from synapse.util.metrics import Measure
 from synapse.api.constants import EventTypes
 from synapse.api.errors import SynapseError
-from synapse.state import resolve_events_with_factory
 from synapse.util.caches.descriptors import cached
 from synapse.types import get_domain_from_id
 
@@ -237,6 +236,8 @@ class EventsStore(SQLBaseStore):
 
         self._event_persist_queue = _EventPeristenceQueue()
 
+        self._state_resolution_handler = hs.get_state_resolution_handler()
+
     def persist_events(self, events_and_contexts, backfilled=False):
         """
         Write events to the database
@@ -402,6 +403,7 @@ class EventsStore(SQLBaseStore):
                                 "Calculating state delta for room %s", room_id,
                             )
                             current_state = yield self._get_new_state_after_events(
+                                room_id,
                                 ev_ctx_rm, new_latest_event_ids,
                             )
                             if current_state is not None:
@@ -487,11 +489,14 @@ class EventsStore(SQLBaseStore):
         defer.returnValue(new_latest_event_ids)
 
     @defer.inlineCallbacks
-    def _get_new_state_after_events(self, events_context, new_latest_event_ids):
+    def _get_new_state_after_events(self, room_id, events_context, new_latest_event_ids):
         """Calculate the current state dict after adding some new events to
         a room
 
         Args:
+            room_id (str):
+                room to which the events are being added. Used for logging etc
+
             events_context (list[(EventBase, EventContext)]):
                 events and contexts which are being added to the room
 
@@ -503,8 +508,12 @@ class EventsStore(SQLBaseStore):
                 None if there are no changes to the room state, or
                 a dict of (type, state_key) -> event_id].
         """
-        state_sets = []
-        state_groups = set()
+
+        if not new_latest_event_ids:
+            defer.returnValue({})
+
+        # map from state_group to ((type, key) -> event_id) state map
+        state_groups = {}
         missing_event_ids = []
         was_updated = False
         for event_id in new_latest_event_ids:
@@ -515,16 +524,19 @@ class EventsStore(SQLBaseStore):
                     if ctx.current_state_ids is None:
                         raise Exception("Unknown current state")
 
+                    if ctx.state_group is None:
+                        # I don't think this can happen, but let's double-check
+                        raise Exception(
+                            "Context for new extremity event %s has no state "
+                            "group" % (event_id, ),
+                        )
+
                     # If we've already seen the state group don't bother adding
                     # it to the state sets again
                     if ctx.state_group not in state_groups:
-                        state_sets.append(ctx.current_state_ids)
+                        state_groups[ctx.state_group] = ctx.current_state_ids
                         if ctx.delta_ids or hasattr(ev, "state_key"):
                             was_updated = True
-                        if ctx.state_group:
-                            # Add this as a seen state group (if it has a state
-                            # group)
-                            state_groups.add(ctx.state_group)
                     break
             else:
                 # If we couldn't find it, then we'll need to pull
@@ -532,65 +544,37 @@ class EventsStore(SQLBaseStore):
                 was_updated = True
                 missing_event_ids.append(event_id)
 
+        if not was_updated:
+            return
+
         if missing_event_ids:
             # Now pull out the state for any missing events from DB
             event_to_groups = yield self._get_state_group_for_events(
                 missing_event_ids,
             )
 
-            groups = set(event_to_groups.itervalues()) - state_groups
+            groups = set(event_to_groups.itervalues()) - set(state_groups.iterkeys())
 
             if groups:
                 group_to_state = yield self._get_state_for_groups(groups)
-                state_sets.extend(group_to_state.itervalues())
+                state_groups.update(group_to_state)
 
-        if not new_latest_event_ids:
-            defer.returnValue({})
-        elif was_updated:
-            if len(state_sets) == 1:
-                # If there is only one state set, then we know what the current
-                # state is.
-                defer.returnValue(state_sets[0])
-            else:
-                # We work out the current state by passing the state sets to the
-                # state resolution algorithm. It may ask for some events, including
-                # the events we have yet to persist, so we need a slightly more
-                # complicated event lookup function than simply looking the events
-                # up in the db.
-
-                logger.info(
-                    "Resolving state with %i state sets", len(state_sets),
-                )
+        if len(state_groups) == 1:
+            # If there is only one state group, then we know what the current
+            # state is.
+            defer.returnValue(state_groups.values()[0])
 
-                events_map = {ev.event_id: ev for ev, _ in events_context}
-
-                @defer.inlineCallbacks
-                def get_events(ev_ids):
-                    # We get the events by first looking at the list of events we
-                    # are trying to persist, and then fetching the rest from the DB.
-                    db = []
-                    to_return = {}
-                    for ev_id in ev_ids:
-                        ev = events_map.get(ev_id, None)
-                        if ev:
-                            to_return[ev_id] = ev
-                        else:
-                            db.append(ev_id)
-
-                    if db:
-                        evs = yield self.get_events(
-                            ev_ids, get_prev_content=False, check_redacted=False,
-                        )
-                        to_return.update(evs)
-                    defer.returnValue(to_return)
+        def get_events(ev_ids):
+            return self.get_events(
+                ev_ids, get_prev_content=False, check_redacted=False,
+            )
+        events_map = {ev.event_id: ev for ev, _ in events_context}
+        logger.debug("calling resolve_state_groups from preserve_events")
+        res = yield self._state_resolution_handler.resolve_state_groups(
+            room_id, state_groups, events_map, get_events
+        )
 
-                current_state = yield resolve_events_with_factory(
-                    state_sets,
-                    state_map_factory=get_events,
-                )
-                defer.returnValue(current_state)
-        else:
-            return
+        defer.returnValue(res.state)
 
     @defer.inlineCallbacks
     def _calculate_state_delta(self, room_id, current_state):
@@ -2303,8 +2287,7 @@ class EventsStore(SQLBaseStore):
             "event_signatures",
             "rejections",
         ):
-            logger.info("[purge] removing remote non-state events from %s",
-                        table)
+            logger.info("[purge] removing events from %s", table)
 
             txn.executemany(
                 "DELETE FROM %s WHERE event_id = ?" % (table,),
diff --git a/synapse/types.py b/synapse/types.py
index 6e76c016d9..7cb24cecb2 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -19,20 +19,59 @@ from synapse.api.errors import SynapseError
 from collections import namedtuple
 
 
-Requester = namedtuple("Requester", [
+class Requester(namedtuple("Requester", [
     "user", "access_token_id", "is_guest", "device_id", "app_service",
-])
-"""
-Represents the user making a request
-
-Attributes:
-    user (UserID):  id of the user making the request
-    access_token_id (int|None):  *ID* of the access token used for this
-        request, or None if it came via the appservice API or similar
-    is_guest (bool):  True if the user making this request is a guest user
-    device_id (str|None):  device_id which was set at authentication time
-    app_service (ApplicationService|None):  the AS requesting on behalf of the user
-"""
+])):
+    """
+    Represents the user making a request
+
+    Attributes:
+        user (UserID):  id of the user making the request
+        access_token_id (int|None):  *ID* of the access token used for this
+            request, or None if it came via the appservice API or similar
+        is_guest (bool):  True if the user making this request is a guest user
+        device_id (str|None):  device_id which was set at authentication time
+        app_service (ApplicationService|None):  the AS requesting on behalf of the user
+    """
+
+    def serialize(self):
+        """Converts self to a type that can be serialized as JSON, and then
+        deserialized by `deserialize`
+
+        Returns:
+            dict
+        """
+        return {
+            "user_id": self.user.to_string(),
+            "access_token_id": self.access_token_id,
+            "is_guest": self.is_guest,
+            "device_id": self.device_id,
+            "app_server_id": self.app_service.id if self.app_service else None,
+        }
+
+    @staticmethod
+    def deserialize(store, input):
+        """Converts a dict that was produced by `serialize` back into a
+        Requester.
+
+        Args:
+            store (DataStore): Used to convert AS ID to AS object
+            input (dict): A dict produced by `serialize`
+
+        Returns:
+            Requester
+        """
+        appservice = None
+        if input["app_server_id"]:
+            appservice = store.get_app_service_by_id(input["app_server_id"])
+
+        return Requester(
+            user=UserID.from_string(input["user_id"]),
+            access_token_id=input["access_token_id"],
+            is_guest=input["is_guest"],
+            device_id=input["device_id"],
+            app_service=appservice,
+        )
 
 
 def create_requester(user_id, access_token_id=None, is_guest=False,