diff options
-rw-r--r-- | docs/admin_api/purge_history_api.rst | 2 | ||||
-rw-r--r-- | docs/workers.rst | 39 | ||||
-rw-r--r-- | synapse/app/event_creator.py | 170 | ||||
-rwxr-xr-x | synapse/app/homeserver.py | 4 | ||||
-rw-r--r-- | synapse/config/workers.py | 8 | ||||
-rw-r--r-- | synapse/events/snapshot.py | 72 | ||||
-rw-r--r-- | synapse/handlers/message.py | 28 | ||||
-rw-r--r-- | synapse/python_dependencies.py | 2 | ||||
-rw-r--r-- | synapse/replication/http/__init__.py | 31 | ||||
-rw-r--r-- | synapse/replication/http/send_event.py | 108 | ||||
-rw-r--r-- | synapse/replication/slave/storage/events.py | 20 | ||||
-rw-r--r-- | synapse/rest/client/v1/admin.py | 4 | ||||
-rw-r--r-- | synapse/rest/client/v1/room.py | 1 | ||||
-rw-r--r-- | synapse/state.py | 34 | ||||
-rw-r--r-- | synapse/storage/appservice.py | 13 | ||||
-rw-r--r-- | synapse/storage/events.py | 99 | ||||
-rw-r--r-- | synapse/types.py | 65 |
17 files changed, 608 insertions, 92 deletions
diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index b4e5bd9d75..a3a17e9f9f 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -23,5 +23,5 @@ To delete local events as well, set ``delete_local_events`` in the body: .. code:: json { - "delete_local_events": True, + "delete_local_events": true } diff --git a/docs/workers.rst b/docs/workers.rst index b39f79058e..dee04bbf3e 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -30,17 +30,29 @@ requests made to the federation port. The caveats regarding running a reverse-proxy on the federation port still apply (see https://github.com/matrix-org/synapse/blob/master/README.rst#reverse-proxying-the-federation-port). -To enable workers, you need to add a replication listener to the master synapse, e.g.:: +To enable workers, you need to add two replication listeners to the master +synapse, e.g.:: listeners: + # The TCP replication port - port: 9092 bind_address: '127.0.0.1' type: replication + # The HTTP replication port + - port: 9093 + bind_address: '127.0.0.1' + type: http + resources: + - names: [replication] -Under **no circumstances** should this replication API listener be exposed to the -public internet; it currently implements no authentication whatsoever and is +Under **no circumstances** should these replication API listeners be exposed to +the public internet; it currently implements no authentication whatsoever and is unencrypted. +(Roughly, the TCP port is used for streaming data from the master to the +workers, and the HTTP port for the workers to send data to the main +synapse process.) + You then create a set of configs for the various worker processes. These should be worker configuration files, and should be stored in a dedicated subdirectory, to allow synctl to manipulate them. @@ -52,8 +64,13 @@ You should minimise the number of overrides though to maintain a usable config. You must specify the type of worker application (``worker_app``). The currently available worker applications are listed below. You must also specify the -replication endpoint that it's talking to on the main synapse process -(``worker_replication_host`` and ``worker_replication_port``). +replication endpoints that it's talking to on the main synapse process. +``worker_replication_host`` should specify the host of the main synapse, +``worker_replication_port`` should point to the TCP replication listener port and +``worker_replication_http_port`` should point to the HTTP replication port. + +Currently, only the ``event_creator`` worker requires specifying +``worker_replication_http_port``. For instance:: @@ -62,6 +79,7 @@ For instance:: # The replication listener on the synapse to talk to. worker_replication_host: 127.0.0.1 worker_replication_port: 9092 + worker_replication_http_port: 9093 worker_listeners: - type: http @@ -207,3 +225,14 @@ the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration file. For example:: worker_main_http_uri: http://127.0.0.1:8008 + + +``synapse.app.event_creator`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Handles non-state event creation. It can handle REST endpoints matching: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send + +It will create events locally and then send them on to the main synapse +instance to be persisted and handled. diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py new file mode 100644 index 0000000000..b2ce399258 --- /dev/null +++ b/synapse/app/event_creator.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import sys + +import synapse +from synapse import events +from synapse.app import _base +from synapse.config._base import ConfigError +from synapse.config.homeserver import HomeServerConfig +from synapse.config.logger import setup_logging +from synapse.crypto import context_factory +from synapse.http.server import JsonResource +from synapse.http.site import SynapseSite +from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore +from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore +from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore +from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.client.v1.room import RoomSendEventRestServlet +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.util.httpresourcetree import create_resource_tree +from synapse.util.logcontext import LoggingContext +from synapse.util.manhole import manhole +from synapse.util.versionstring import get_version_string +from twisted.internet import reactor +from twisted.web.resource import Resource + +logger = logging.getLogger("synapse.app.event_creator") + + +class EventCreatorSlavedStore( + SlavedDeviceStore, + SlavedClientIpStore, + SlavedApplicationServiceStore, + SlavedEventStore, + SlavedRegistrationStore, + RoomStore, + BaseSlavedStore, +): + pass + + +class EventCreatorServer(HomeServer): + def setup(self): + logger.info("Setting up.") + self.datastore = EventCreatorSlavedStore(self.get_db_conn(), self) + logger.info("Finished setting up.") + + def _listen_http(self, listener_config): + port = listener_config["port"] + bind_addresses = listener_config["bind_addresses"] + site_tag = listener_config.get("tag", port) + resources = {} + for res in listener_config["resources"]: + for name in res["names"]: + if name == "metrics": + resources[METRICS_PREFIX] = MetricsResource(self) + elif name == "client": + resource = JsonResource(self, canonical_json=False) + RoomSendEventRestServlet(self).register(resource) + resources.update({ + "/_matrix/client/r0": resource, + "/_matrix/client/unstable": resource, + "/_matrix/client/v2_alpha": resource, + "/_matrix/client/api/v1": resource, + }) + + root_resource = create_resource_tree(resources, Resource()) + + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ) + ) + + logger.info("Synapse event creator now listening on port %d", port) + + def start_listening(self, listeners): + for listener in listeners: + if listener["type"] == "http": + self._listen_http(listener) + elif listener["type"] == "manhole": + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ) + ) + else: + logger.warn("Unrecognized listener type: %s", listener["type"]) + + self.get_tcp_replication().start_replication(self) + + def build_tcp_replication(self): + return ReplicationClientHandler(self.get_datastore()) + + +def start(config_options): + try: + config = HomeServerConfig.load_config( + "Synapse event creator", config_options + ) + except ConfigError as e: + sys.stderr.write("\n" + e.message + "\n") + sys.exit(1) + + assert config.worker_app == "synapse.app.event_creator" + + assert config.worker_replication_http_port is not None + + setup_logging(config, use_worker_options=True) + + events.USE_FROZEN_DICTS = config.use_frozen_dicts + + database_engine = create_engine(config.database_config) + + tls_server_context_factory = context_factory.ServerContextFactory(config) + + ss = EventCreatorServer( + config.server_name, + db_config=config.database_config, + tls_server_context_factory=tls_server_context_factory, + config=config, + version_string="Synapse/" + get_version_string(synapse), + database_engine=database_engine, + ) + + ss.setup() + ss.get_handlers() + ss.start_listening(config.worker_listeners) + + def start(): + ss.get_state_handler().start_caching() + ss.get_datastore().start_profiling() + + reactor.callWhenRunning(start) + + _base.start_worker_reactor("synapse-event-creator", config) + + +if __name__ == '__main__': + with LoggingContext("main"): + start(sys.argv[1:]) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index cb82a415a6..e375f2bbcf 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -38,6 +38,7 @@ from synapse.metrics import register_memory_metrics from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ check_requirements +from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource from synapse.rest.key.v1.server_key_resource import LocalKey @@ -219,6 +220,9 @@ class SynapseHomeServer(HomeServer): if name == "metrics" and self.get_config().enable_metrics: resources[METRICS_PREFIX] = MetricsResource(self) + if name == "replication": + resources[REPLICATION_PREFIX] = ReplicationRestResource(self) + return resources def start_listening(self): diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 4b6884918d..80baf0ce0e 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -33,8 +33,16 @@ class WorkerConfig(Config): self.worker_pid_file = config.get("worker_pid_file") self.worker_log_file = config.get("worker_log_file") self.worker_log_config = config.get("worker_log_config") + + # The host used to connect to the main synapse self.worker_replication_host = config.get("worker_replication_host", None) + + # The port on the main synapse for TCP replication self.worker_replication_port = config.get("worker_replication_port", None) + + # The port on the main synapse for HTTP replication endpoint + self.worker_replication_http_port = config.get("worker_replication_http_port") + self.worker_name = config.get("worker_name", self.worker_app) self.worker_main_http_uri = config.get("worker_main_http_uri", None) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 87e3fe7b97..7b80444f73 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -14,6 +14,9 @@ # limitations under the License. +from frozendict import frozendict + + class EventContext(object): """ Attributes: @@ -73,3 +76,72 @@ class EventContext(object): self.prev_state_events = None self.app_service = None + + def serialize(self): + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "current_state_ids": _encode_state_dict(self.current_state_ids), + "prev_state_ids": _encode_state_dict(self.prev_state_ids), + "state_group": self.state_group, + "rejected": self.rejected, + "push_actions": self.push_actions, + "prev_group": self.prev_group, + "delta_ids": _encode_state_dict(self.delta_ids), + "prev_state_events": self.prev_state_events, + "app_service_id": self.app_service.id if self.app_service else None + } + + @staticmethod + def deserialize(store, input): + """Converts a dict that was produced by `serialize` back into a + EventContext. + + Args: + store (DataStore): Used to convert AS ID to AS object + input (dict): A dict produced by `serialize` + + Returns: + EventContext + """ + context = EventContext() + context.current_state_ids = _decode_state_dict(input["current_state_ids"]) + context.prev_state_ids = _decode_state_dict(input["prev_state_ids"]) + context.state_group = input["state_group"] + context.rejected = input["rejected"] + context.push_actions = input["push_actions"] + context.prev_group = input["prev_group"] + context.delta_ids = _decode_state_dict(input["delta_ids"]) + context.prev_state_events = input["prev_state_events"] + + app_service_id = input["app_service_id"] + if app_service_id: + context.app_service = store.get_app_service_by_id(app_service_id) + + return context + + +def _encode_state_dict(state_dict): + """Since dicts of (type, state_key) -> event_id cannot be serialized in + JSON we need to convert them to a form that can. + """ + if state_dict is None: + return None + + return [ + (etype, state_key, v) + for (etype, state_key), v in state_dict.iteritems() + ] + + +def _decode_state_dict(input): + """Decodes a state dict encoded using `_encode_state_dict` above + """ + if input is None: + return None + + return frozendict({(etype, state_key,): v for etype, state_key, v in input}) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4e9752ccbd..1c3ac03f20 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -28,6 +28,7 @@ from synapse.util.logcontext import preserve_fn from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze from synapse.visibility import filter_events_for_client +from synapse.replication.http.send_event import send_event_to_master from ._base import BaseHandler @@ -312,6 +313,9 @@ class EventCreationHandler(object): self.server_name = hs.hostname self.ratelimiter = hs.get_ratelimiter() self.notifier = hs.get_notifier() + self.config = hs.config + + self.http_client = hs.get_simple_http_client() # This is only used to get at ratelimit function, and maybe_kick_guest_users self.base_handler = BaseHandler(hs) @@ -419,12 +423,6 @@ class EventCreationHandler(object): ratelimit=ratelimit, ) - if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(user) - @defer.inlineCallbacks def deduplicate_state_event(self, event, context): """ @@ -559,6 +557,18 @@ class EventCreationHandler(object): ): # We now need to go and hit out to wherever we need to hit out to. + # If we're a worker we need to hit out to the master. + if self.config.worker_app: + yield send_event_to_master( + self.http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + event=event, + context=context, + ) + return + if ratelimit: yield self.base_handler.ratelimit(requester) @@ -692,3 +702,9 @@ class EventCreationHandler(object): ) preserve_fn(_notify)() + + if event.type == EventTypes.Message: + presence = self.hs.get_presence_handler() + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + preserve_fn(presence.bump_presence_active_time)(requester.user) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 7052333c19..97b631e60d 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -36,7 +36,7 @@ REQUIREMENTS = { "pydenticon": ["pydenticon"], "ujson": ["ujson"], "blist": ["blist"], - "pysaml2>=3.0.0,<4.0.0": ["saml2>=3.0.0,<4.0.0"], + "pysaml2>=3.0.0": ["saml2>=3.0.0"], "pymacaroons-pynacl": ["pymacaroons"], "msgpack-python>=0.3.0": ["msgpack"], "phonenumbers>=8.2.0": ["phonenumbers"], diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py new file mode 100644 index 0000000000..b378b41646 --- /dev/null +++ b/synapse/replication/http/__init__.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import send_event + +from synapse.http.server import JsonResource + + +REPLICATION_PREFIX = "/_synapse/replication" + + +class ReplicationRestResource(JsonResource): + def __init__(self, hs): + JsonResource.__init__(self, hs, canonical_json=False) + self.register_servlets(hs) + + def register_servlets(self, hs): + send_event.register_servlets(hs, self) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py new file mode 100644 index 0000000000..ff9b9d2f10 --- /dev/null +++ b/synapse/replication/http/send_event.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.events import FrozenEvent +from synapse.events.snapshot import EventContext +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.util.metrics import Measure +from synapse.types import Requester + +import logging +import re + +logger = logging.getLogger(__name__) + + +def send_event_to_master(client, host, port, requester, event, context): + """Send event to be handled on the master + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + event (FrozenEvent) + context (EventContext) + """ + uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) + + payload = { + "event": event.get_pdu_json(), + "internal_metadata": event.internal_metadata.get_dict(), + "rejected_reason": event.rejected_reason, + "context": context.serialize(), + "requester": requester.serialize(), + } + + return client.post_json_get_json(uri, payload) + + +class ReplicationSendEventRestServlet(RestServlet): + """Handles events newly created on workers, including persisting and + notifying. + + The API looks like: + + POST /_synapse/replication/send_event + + { + "event": { .. serialized event .. }, + "internal_metadata": { .. serialized internal_metadata .. }, + "rejected_reason": .., // The event.rejected_reason field + "context": { .. serialized event context .. }, + "requester": { .. serialized requester .. }, + } + """ + PATTERNS = [re.compile("^/_synapse/replication/send_event$")] + + def __init__(self, hs): + super(ReplicationSendEventRestServlet, self).__init__() + + self.event_creation_handler = hs.get_event_creation_handler() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + with Measure(self.clock, "repl_send_event_parse"): + content = parse_json_object_from_request(request) + + event_dict = content["event"] + internal_metadata = content["internal_metadata"] + rejected_reason = content["rejected_reason"] + event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + requester = Requester.deserialize(self.store, content["requester"]) + context = EventContext.deserialize(self.store, content["context"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "Got event to send with ID: %s into room: %s", + event.event_id, event.room_id, + ) + + yield self.event_creation_handler.handle_new_client_event( + requester, event, context, + ) + + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + ReplicationSendEventRestServlet(hs).register(http_server) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 8acb5df0f3..f8c164b48b 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -21,6 +21,7 @@ from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore +from synapse.storage.signatures import SignatureStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -170,6 +171,25 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_federation_out_pos = DataStore.get_federation_out_pos.__func__ update_federation_out_pos = DataStore.update_federation_out_pos.__func__ + get_latest_event_ids_and_hashes_in_room = ( + DataStore.get_latest_event_ids_and_hashes_in_room.__func__ + ) + _get_latest_event_ids_and_hashes_in_room = ( + DataStore._get_latest_event_ids_and_hashes_in_room.__func__ + ) + _get_event_reference_hashes_txn = ( + DataStore._get_event_reference_hashes_txn.__func__ + ) + add_event_hashes = ( + DataStore.add_event_hashes.__func__ + ) + get_event_reference_hashes = ( + SignatureStore.__dict__["get_event_reference_hashes"] + ) + get_event_reference_hash = ( + SignatureStore.__dict__["get_event_reference_hash"] + ) + def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 2ad486c67d..6073cc6fa2 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -131,9 +131,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): body = parse_json_object_from_request(request, allow_empty_body=True) - delete_local_events = bool( - body.get("delete_local_history", False) - ) + delete_local_events = bool(body.get("delete_local_events", False)) yield self.handlers.message_handler.purge_history( room_id, event_id, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index fbb2fc36e4..817fd47842 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -186,7 +186,6 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): diff --git a/synapse/state.py b/synapse/state.py index cc93bbcb6b..932f602508 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -350,7 +350,7 @@ class StateHandler(object): )) result = yield self._state_resolution_handler.resolve_state_groups( - room_id, state_groups_ids, self._state_map_factory, + room_id, state_groups_ids, None, self._state_map_factory, ) defer.returnValue(result) @@ -413,7 +413,9 @@ class StateResolutionHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory): + def resolve_state_groups( + self, room_id, state_groups_ids, event_map, state_map_factory, + ): """Resolves conflicts between a set of state groups Always generates a new state group (unless we hit the cache), so should @@ -425,6 +427,14 @@ class StateResolutionHandler(object): map from state group id to the state in that state group (where 'state' is a map from state key to event id) + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + Returns: Deferred[_StateCacheEntry]: resolved state """ @@ -465,6 +475,7 @@ class StateResolutionHandler(object): with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( state_groups_ids.values(), + event_map=event_map, state_map_factory=state_map_factory, ) else: @@ -597,11 +608,20 @@ def _seperate(state_sets): @defer.inlineCallbacks -def resolve_events_with_factory(state_sets, state_map_factory): +def resolve_events_with_factory(state_sets, event_map, state_map_factory): """ Args: state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. + + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + state_map_factory(func): will be called with a list of event_ids that are needed, and should return with a Deferred of dict of event_id to event. @@ -622,12 +642,16 @@ def resolve_events_with_factory(state_sets, state_map_factory): for event_ids in conflicted_state.itervalues() for event_id in event_ids ) + if event_map is not None: + needed_events -= set(event_map.iterkeys()) logger.info("Asking for %d conflicted events", len(needed_events)) # dict[str, FrozenEvent]: a map from state event id to event. Only includes - # the state events which are in conflict. + # the state events which are in conflict (and those in event_map) state_map = yield state_map_factory(needed_events) + if event_map is not None: + state_map.update(event_map) # get the ids of the auth events which allow us to authenticate the # conflicted state, picking only from the unconflicting state. @@ -639,6 +663,8 @@ def resolve_events_with_factory(state_sets, state_map_factory): new_needed_events = set(auth_events.itervalues()) new_needed_events -= needed_events + if event_map is not None: + new_needed_events -= set(event_map.iterkeys()) logger.info("Asking for %d auth events", len(new_needed_events)) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index d8c84b7141..79673b4273 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -99,6 +99,19 @@ class ApplicationServiceStore(SQLBaseStore): return service return None + def get_app_service_by_id(self, as_id): + """Get the application service with the given appservice ID. + + Args: + as_id (str): The application service ID. + Returns: + synapse.appservice.ApplicationService or None. + """ + for service in self.services_cache: + if service.id == as_id: + return service + return None + def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 86a7c5920d..bbb6aa992c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -27,7 +27,6 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.state import resolve_events_with_factory from synapse.util.caches.descriptors import cached from synapse.types import get_domain_from_id @@ -237,6 +236,8 @@ class EventsStore(SQLBaseStore): self._event_persist_queue = _EventPeristenceQueue() + self._state_resolution_handler = hs.get_state_resolution_handler() + def persist_events(self, events_and_contexts, backfilled=False): """ Write events to the database @@ -402,6 +403,7 @@ class EventsStore(SQLBaseStore): "Calculating state delta for room %s", room_id, ) current_state = yield self._get_new_state_after_events( + room_id, ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: @@ -487,11 +489,14 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _get_new_state_after_events(self, events_context, new_latest_event_ids): + def _get_new_state_after_events(self, room_id, events_context, new_latest_event_ids): """Calculate the current state dict after adding some new events to a room Args: + room_id (str): + room to which the events are being added. Used for logging etc + events_context (list[(EventBase, EventContext)]): events and contexts which are being added to the room @@ -503,8 +508,12 @@ class EventsStore(SQLBaseStore): None if there are no changes to the room state, or a dict of (type, state_key) -> event_id]. """ - state_sets = [] - state_groups = set() + + if not new_latest_event_ids: + defer.returnValue({}) + + # map from state_group to ((type, key) -> event_id) state map + state_groups = {} missing_event_ids = [] was_updated = False for event_id in new_latest_event_ids: @@ -515,16 +524,19 @@ class EventsStore(SQLBaseStore): if ctx.current_state_ids is None: raise Exception("Unknown current state") + if ctx.state_group is None: + # I don't think this can happen, but let's double-check + raise Exception( + "Context for new extremity event %s has no state " + "group" % (event_id, ), + ) + # If we've already seen the state group don't bother adding # it to the state sets again if ctx.state_group not in state_groups: - state_sets.append(ctx.current_state_ids) + state_groups[ctx.state_group] = ctx.current_state_ids if ctx.delta_ids or hasattr(ev, "state_key"): was_updated = True - if ctx.state_group: - # Add this as a seen state group (if it has a state - # group) - state_groups.add(ctx.state_group) break else: # If we couldn't find it, then we'll need to pull @@ -532,65 +544,37 @@ class EventsStore(SQLBaseStore): was_updated = True missing_event_ids.append(event_id) + if not was_updated: + return + if missing_event_ids: # Now pull out the state for any missing events from DB event_to_groups = yield self._get_state_group_for_events( missing_event_ids, ) - groups = set(event_to_groups.itervalues()) - state_groups + groups = set(event_to_groups.itervalues()) - set(state_groups.iterkeys()) if groups: group_to_state = yield self._get_state_for_groups(groups) - state_sets.extend(group_to_state.itervalues()) + state_groups.update(group_to_state) - if not new_latest_event_ids: - defer.returnValue({}) - elif was_updated: - if len(state_sets) == 1: - # If there is only one state set, then we know what the current - # state is. - defer.returnValue(state_sets[0]) - else: - # We work out the current state by passing the state sets to the - # state resolution algorithm. It may ask for some events, including - # the events we have yet to persist, so we need a slightly more - # complicated event lookup function than simply looking the events - # up in the db. - - logger.info( - "Resolving state with %i state sets", len(state_sets), - ) + if len(state_groups) == 1: + # If there is only one state group, then we know what the current + # state is. + defer.returnValue(state_groups.values()[0]) - events_map = {ev.event_id: ev for ev, _ in events_context} - - @defer.inlineCallbacks - def get_events(ev_ids): - # We get the events by first looking at the list of events we - # are trying to persist, and then fetching the rest from the DB. - db = [] - to_return = {} - for ev_id in ev_ids: - ev = events_map.get(ev_id, None) - if ev: - to_return[ev_id] = ev - else: - db.append(ev_id) - - if db: - evs = yield self.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ) - to_return.update(evs) - defer.returnValue(to_return) + def get_events(ev_ids): + return self.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + events_map = {ev.event_id: ev for ev, _ in events_context} + logger.debug("calling resolve_state_groups from preserve_events") + res = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups, events_map, get_events + ) - current_state = yield resolve_events_with_factory( - state_sets, - state_map_factory=get_events, - ) - defer.returnValue(current_state) - else: - return + defer.returnValue(res.state) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): @@ -2303,8 +2287,7 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.info("[purge] removing remote non-state events from %s", - table) + logger.info("[purge] removing events from %s", table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), diff --git a/synapse/types.py b/synapse/types.py index 6e76c016d9..7cb24cecb2 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -19,20 +19,59 @@ from synapse.api.errors import SynapseError from collections import namedtuple -Requester = namedtuple("Requester", [ +class Requester(namedtuple("Requester", [ "user", "access_token_id", "is_guest", "device_id", "app_service", -]) -""" -Represents the user making a request - -Attributes: - user (UserID): id of the user making the request - access_token_id (int|None): *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest (bool): True if the user making this request is a guest user - device_id (str|None): device_id which was set at authentication time - app_service (ApplicationService|None): the AS requesting on behalf of the user -""" +])): + """ + Represents the user making a request + + Attributes: + user (UserID): id of the user making the request + access_token_id (int|None): *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest (bool): True if the user making this request is a guest user + device_id (str|None): device_id which was set at authentication time + app_service (ApplicationService|None): the AS requesting on behalf of the user + """ + + def serialize(self): + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "user_id": self.user.to_string(), + "access_token_id": self.access_token_id, + "is_guest": self.is_guest, + "device_id": self.device_id, + "app_server_id": self.app_service.id if self.app_service else None, + } + + @staticmethod + def deserialize(store, input): + """Converts a dict that was produced by `serialize` back into a + Requester. + + Args: + store (DataStore): Used to convert AS ID to AS object + input (dict): A dict produced by `serialize` + + Returns: + Requester + """ + appservice = None + if input["app_server_id"]: + appservice = store.get_app_service_by_id(input["app_server_id"]) + + return Requester( + user=UserID.from_string(input["user_id"]), + access_token_id=input["access_token_id"], + is_guest=input["is_guest"], + device_id=input["device_id"], + app_service=appservice, + ) def create_requester(user_id, access_token_id=None, is_guest=False, |