From e0b6db29ed260f4b54298c4d079f7fc6ce98a591 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Sep 2016 14:42:51 +0100 Subject: Split out public room list into a worker process --- synapse/app/client_reader.py | 215 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 synapse/app/client_reader.py (limited to 'synapse/app') diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py new file mode 100644 index 0000000000..f356f5fbd4 --- /dev/null +++ b/synapse/app/client_reader.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import synapse + +from synapse.config._base import ConfigError +from synapse.config.homeserver import HomeServerConfig +from synapse.config.logger import setup_logging +from synapse.http.site import SynapseSite +from synapse.http.server import JsonResource +from synapse.metrics.resource import MetricsResource, METRICS_PREFIX +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore +from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.keys import SlavedKeyStore +from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.slave.storage.directory import DirectoryStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore +from synapse.rest.client.v1.room import PublicRoomListRestServlet +from synapse.server import HomeServer +from synapse.storage.client_ips import ClientIpStore +from synapse.storage.engines import create_engine +from synapse.util.async import sleep +from synapse.util.httpresourcetree import create_resource_tree +from synapse.util.logcontext import LoggingContext +from synapse.util.manhole import manhole +from synapse.util.rlimit import change_resource_limit +from synapse.util.versionstring import get_version_string +from synapse.crypto import context_factory + + +from twisted.internet import reactor, defer +from twisted.web.resource import Resource + +from daemonize import Daemonize + +import sys +import logging +import gc + +logger = logging.getLogger("synapse.app.federation_reader") + + +class ClientReaderSlavedStore( + SlavedEventStore, + SlavedKeyStore, + RoomStore, + DirectoryStore, + SlavedApplicationServiceStore, + SlavedRegistrationStore, + BaseSlavedStore, + ClientIpStore, # After BaseSlavedStore because the constructor is different +): + pass + + +class ClientReaderServer(HomeServer): + def get_db_conn(self, run_new_connection=True): + # Any param beginning with cp_ is a parameter for adbapi, and should + # not be passed to the database engine. + db_params = { + k: v for k, v in self.db_config.get("args", {}).items() + if not k.startswith("cp_") + } + db_conn = self.database_engine.module.connect(**db_params) + + if run_new_connection: + self.database_engine.on_new_connection(db_conn) + return db_conn + + def setup(self): + logger.info("Setting up.") + self.datastore = ClientReaderSlavedStore(self.get_db_conn(), self) + logger.info("Finished setting up.") + + def _listen_http(self, listener_config): + port = listener_config["port"] + bind_address = listener_config.get("bind_address", "") + site_tag = listener_config.get("tag", port) + resources = {} + for res in listener_config["resources"]: + for name in res["names"]: + if name == "metrics": + resources[METRICS_PREFIX] = MetricsResource(self) + elif name == "client": + resource = JsonResource(self, canonical_json=False) + PublicRoomListRestServlet(self).register(resource) + resources.update({ + "/_matrix/client/r0": resource, + "/_matrix/client/unstable": resource, + "/_matrix/client/v2_alpha": resource, + "/_matrix/client/api/v1": resource, + }) + + root_resource = create_resource_tree(resources, Resource()) + reactor.listenTCP( + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ), + interface=bind_address + ) + logger.info("Synapse client reader now listening on port %d", port) + + def start_listening(self, listeners): + for listener in listeners: + if listener["type"] == "http": + self._listen_http(listener) + elif listener["type"] == "manhole": + reactor.listenTCP( + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ), + interface=listener.get("bind_address", '127.0.0.1') + ) + else: + logger.warn("Unrecognized listener type: %s", listener["type"]) + + @defer.inlineCallbacks + def replicate(self): + http_client = self.get_simple_http_client() + store = self.get_datastore() + replication_url = self.config.worker_replication_url + + while True: + try: + args = store.stream_positions() + args["timeout"] = 30000 + result = yield http_client.get_json(replication_url, args=args) + yield store.process_replication(result) + except: + logger.exception("Error replicating from %r", replication_url) + yield sleep(5) + + +def start(config_options): + try: + config = HomeServerConfig.load_config( + "Synapse client reader", config_options + ) + except ConfigError as e: + sys.stderr.write("\n" + e.message + "\n") + sys.exit(1) + + assert config.worker_app == "synapse.app.client_reader" + + setup_logging(config.worker_log_config, config.worker_log_file) + + database_engine = create_engine(config.database_config) + + tls_server_context_factory = context_factory.ServerContextFactory(config) + + ss = ClientReaderServer( + config.server_name, + db_config=config.database_config, + tls_server_context_factory=tls_server_context_factory, + config=config, + version_string="Synapse/" + get_version_string(synapse), + database_engine=database_engine, + ) + + ss.setup() + ss.get_handlers() + ss.start_listening(config.worker_listeners) + + def run(): + with LoggingContext("run"): + logger.info("Running") + change_resource_limit(config.soft_file_limit) + if config.gc_thresholds: + gc.set_threshold(*config.gc_thresholds) + reactor.run() + + def start(): + ss.get_datastore().start_profiling() + ss.replicate() + + reactor.callWhenRunning(start) + + if config.worker_daemonize: + daemon = Daemonize( + app="synapse-client-reader", + pid=config.worker_pid_file, + action=run, + auto_close_fds=False, + verbose=True, + logger=logger, + ) + daemon.start() + else: + run() + + +if __name__ == '__main__': + with LoggingContext("main"): + start(sys.argv[1:]) -- cgit 1.5.1 From 4be85281f9aa34871e8c04f8d34f734fbc20840b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Sep 2016 14:31:22 +0100 Subject: Enable state caches on workers --- synapse/app/appservice.py | 1 + synapse/app/client_reader.py | 1 + synapse/app/federation_reader.py | 1 + synapse/app/media_repository.py | 1 + synapse/app/pusher.py | 1 + synapse/app/synchrotron.py | 1 + 6 files changed, 6 insertions(+) (limited to 'synapse/app') diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 57587aed25..4e62a84b28 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -187,6 +187,7 @@ def start(config_options): def start(): ps.replicate() ps.get_datastore().start_profiling() + ps.get_state_handler().start_caching() reactor.callWhenRunning(start) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index f356f5fbd4..9aaa3bace2 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -191,6 +191,7 @@ def start(config_options): reactor.run() def start(): + ss.get_state_handler().start_caching() ss.get_datastore().start_profiling() ss.replicate() diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 7355499ae2..1f5ae1937e 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -182,6 +182,7 @@ def start(config_options): reactor.run() def start(): + ss.get_state_handler().start_caching() ss.get_datastore().start_profiling() ss.replicate() diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 9d4c4a0750..6e5ec01c6c 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -188,6 +188,7 @@ def start(config_options): reactor.run() def start(): + ss.get_state_handler().start_caching() ss.get_datastore().start_profiling() ss.replicate() diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 8d755a4b33..d59f4a571c 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -276,6 +276,7 @@ def start(config_options): ps.replicate() ps.get_pusherpool().start() ps.get_datastore().start_profiling() + ps.get_state_handler().start_caching() reactor.callWhenRunning(start) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index dbaa48035d..46d390fd0f 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -465,6 +465,7 @@ def start(config_options): def start(): ss.get_datastore().start_profiling() ss.replicate() + ss.get_state_handler().start_caching() reactor.callWhenRunning(start) -- cgit 1.5.1 From 883df2e9831a98d3f2598e52e6369e40c6f14ec5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sat, 17 Sep 2016 14:12:04 +0100 Subject: fix logger for client_reader worker --- synapse/app/client_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/app') diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 9aaa3bace2..9fccc73db3 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -51,7 +51,7 @@ import sys import logging import gc -logger = logging.getLogger("synapse.app.federation_reader") +logger = logging.getLogger("synapse.app.client_reader") class ClientReaderSlavedStore( -- cgit 1.5.1 From 87528f07561d16dbf35aeebdfcecb111ed385b4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Sep 2016 11:46:28 +0100 Subject: Support /initialSync in synchrotron worker --- synapse/app/synchrotron.py | 6 + synapse/handlers/initial_sync.py | 443 +++++++++++++++++++++++++++++++++ synapse/handlers/message.py | 381 +--------------------------- synapse/rest/client/v1/initial_sync.py | 5 +- synapse/rest/client/v1/room.py | 4 +- synapse/server.py | 5 + 6 files changed, 461 insertions(+), 383 deletions(-) create mode 100644 synapse/handlers/initial_sync.py (limited to 'synapse/app') diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 46d390fd0f..64b209ffe6 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -27,6 +27,8 @@ from synapse.http.server import JsonResource from synapse.metrics.resource import MetricsResource, METRICS_PREFIX from synapse.rest.client.v2_alpha import sync from synapse.rest.client.v1 import events +from synapse.rest.client.v1.room import RoomInitialSyncRestServlet +from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore @@ -37,6 +39,7 @@ from synapse.replication.slave.storage.filtering import SlavedFilteringStore from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.presence import SlavedPresenceStore from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore +from synapse.replication.slave.storage.room import RoomStore from synapse.server import HomeServer from synapse.storage.client_ips import ClientIpStore from synapse.storage.engines import create_engine @@ -74,6 +77,7 @@ class SynchrotronSlavedStore( SlavedFilteringStore, SlavedPresenceStore, SlavedDeviceInboxStore, + RoomStore, BaseSlavedStore, ClientIpStore, # After BaseSlavedStore because the constructor is different ): @@ -296,6 +300,8 @@ class SynchrotronServer(HomeServer): resource = JsonResource(self, canonical_json=False) sync.register_servlets(self, resource) events.register_servlets(self, resource) + InitialSyncRestServlet(self).register(resource) + RoomInitialSyncRestServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, "/_matrix/client/unstable": resource, diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py new file mode 100644 index 0000000000..fbfa5a0281 --- /dev/null +++ b/synapse/handlers/initial_sync.py @@ -0,0 +1,443 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, Membership +from synapse.api.errors import AuthError, Codes +from synapse.events.utils import serialize_event +from synapse.events.validator import EventValidator +from synapse.streams.config import PaginationConfig +from synapse.types import ( + UserID, StreamToken, +) +from synapse.util import unwrapFirstError +from synapse.util.async import concurrently_execute +from synapse.util.caches.snapshot_cache import SnapshotCache +from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred +from synapse.visibility import filter_events_for_client + +from ._base import BaseHandler + +import logging + + +logger = logging.getLogger(__name__) + + +class InitialSyncHandler(BaseHandler): + def __init__(self, hs): + super(InitialSyncHandler, self).__init__(hs) + self.hs = hs + self.state = hs.get_state_handler() + self.clock = hs.get_clock() + self.validator = EventValidator() + self.snapshot_cache = SnapshotCache() + + def snapshot_all_rooms(self, user_id=None, pagin_config=None, + as_client_event=True, include_archived=False): + """Retrieve a snapshot of all rooms the user is invited or has joined. + + This snapshot may include messages for all rooms where the user is + joined, depending on the pagination config. + + Args: + user_id (str): The ID of the user making the request. + pagin_config (synapse.api.streams.PaginationConfig): The pagination + config used to determine how many messages *PER ROOM* to return. + as_client_event (bool): True to get events in client-server format. + include_archived (bool): True to get rooms that the user has left + Returns: + A list of dicts with "room_id" and "membership" keys for all rooms + the user is currently invited or joined in on. Rooms where the user + is joined on, may return a "messages" key with messages, depending + on the specified PaginationConfig. + """ + key = ( + user_id, + pagin_config.from_token, + pagin_config.to_token, + pagin_config.direction, + pagin_config.limit, + as_client_event, + include_archived, + ) + now_ms = self.clock.time_msec() + result = self.snapshot_cache.get(now_ms, key) + if result is not None: + return result + + return self.snapshot_cache.set(now_ms, key, self._snapshot_all_rooms( + user_id, pagin_config, as_client_event, include_archived + )) + + @defer.inlineCallbacks + def _snapshot_all_rooms(self, user_id=None, pagin_config=None, + as_client_event=True, include_archived=False): + + memberships = [Membership.INVITE, Membership.JOIN] + if include_archived: + memberships.append(Membership.LEAVE) + + room_list = yield self.store.get_rooms_for_user_where_membership_is( + user_id=user_id, membership_list=memberships + ) + + user = UserID.from_string(user_id) + + rooms_ret = [] + + now_token = yield self.hs.get_event_sources().get_current_token() + + presence_stream = self.hs.get_event_sources().sources["presence"] + pagination_config = PaginationConfig(from_token=now_token) + presence, _ = yield presence_stream.get_pagination_rows( + user, pagination_config.get_source_config("presence"), None + ) + + receipt_stream = self.hs.get_event_sources().sources["receipt"] + receipt, _ = yield receipt_stream.get_pagination_rows( + user, pagination_config.get_source_config("receipt"), None + ) + + tags_by_room = yield self.store.get_tags_for_user(user_id) + + account_data, account_data_by_room = ( + yield self.store.get_account_data_for_user(user_id) + ) + + public_room_ids = yield self.store.get_public_room_ids() + + limit = pagin_config.limit + if limit is None: + limit = 10 + + @defer.inlineCallbacks + def handle_room(event): + d = { + "room_id": event.room_id, + "membership": event.membership, + "visibility": ( + "public" if event.room_id in public_room_ids + else "private" + ), + } + + if event.membership == Membership.INVITE: + time_now = self.clock.time_msec() + d["inviter"] = event.sender + + invite_event = yield self.store.get_event(event.event_id) + d["invite"] = serialize_event(invite_event, time_now, as_client_event) + + rooms_ret.append(d) + + if event.membership not in (Membership.JOIN, Membership.LEAVE): + return + + try: + if event.membership == Membership.JOIN: + room_end_token = now_token.room_key + deferred_room_state = self.state_handler.get_current_state( + event.room_id + ) + elif event.membership == Membership.LEAVE: + room_end_token = "s%d" % (event.stream_ordering,) + deferred_room_state = self.store.get_state_for_events( + [event.event_id], None + ) + deferred_room_state.addCallback( + lambda states: states[event.event_id] + ) + + (messages, token), current_state = yield preserve_context_over_deferred( + defer.gatherResults( + [ + preserve_fn(self.store.get_recent_events_for_room)( + event.room_id, + limit=limit, + end_token=room_end_token, + ), + deferred_room_state, + ] + ) + ).addErrback(unwrapFirstError) + + messages = yield filter_events_for_client( + self.store, user_id, messages + ) + + start_token = now_token.copy_and_replace("room_key", token[0]) + end_token = now_token.copy_and_replace("room_key", token[1]) + time_now = self.clock.time_msec() + + d["messages"] = { + "chunk": [ + serialize_event(m, time_now, as_client_event) + for m in messages + ], + "start": start_token.to_string(), + "end": end_token.to_string(), + } + + d["state"] = [ + serialize_event(c, time_now, as_client_event) + for c in current_state.values() + ] + + account_data_events = [] + tags = tags_by_room.get(event.room_id) + if tags: + account_data_events.append({ + "type": "m.tag", + "content": {"tags": tags}, + }) + + account_data = account_data_by_room.get(event.room_id, {}) + for account_data_type, content in account_data.items(): + account_data_events.append({ + "type": account_data_type, + "content": content, + }) + + d["account_data"] = account_data_events + except: + logger.exception("Failed to get snapshot") + + yield concurrently_execute(handle_room, room_list, 10) + + account_data_events = [] + for account_data_type, content in account_data.items(): + account_data_events.append({ + "type": account_data_type, + "content": content, + }) + + ret = { + "rooms": rooms_ret, + "presence": presence, + "account_data": account_data_events, + "receipts": receipt, + "end": now_token.to_string(), + } + + defer.returnValue(ret) + + @defer.inlineCallbacks + def room_initial_sync(self, requester, room_id, pagin_config=None): + """Capture the a snapshot of a room. If user is currently a member of + the room this will be what is currently in the room. If the user left + the room this will be what was in the room when they left. + + Args: + requester(Requester): The user to get a snapshot for. + room_id(str): The room to get a snapshot of. + pagin_config(synapse.streams.config.PaginationConfig): + The pagination config used to determine how many messages to + return. + Raises: + AuthError if the user wasn't in the room. + Returns: + A JSON serialisable dict with the snapshot of the room. + """ + + user_id = requester.user.to_string() + + membership, member_event_id = yield self._check_in_room_or_world_readable( + room_id, user_id, + ) + is_peeking = member_event_id is None + + if membership == Membership.JOIN: + result = yield self._room_initial_sync_joined( + user_id, room_id, pagin_config, membership, is_peeking + ) + elif membership == Membership.LEAVE: + result = yield self._room_initial_sync_parted( + user_id, room_id, pagin_config, membership, member_event_id, is_peeking + ) + + account_data_events = [] + tags = yield self.store.get_tags_for_room(user_id, room_id) + if tags: + account_data_events.append({ + "type": "m.tag", + "content": {"tags": tags}, + }) + + account_data = yield self.store.get_account_data_for_room(user_id, room_id) + for account_data_type, content in account_data.items(): + account_data_events.append({ + "type": account_data_type, + "content": content, + }) + + result["account_data"] = account_data_events + + defer.returnValue(result) + + @defer.inlineCallbacks + def _room_initial_sync_parted(self, user_id, room_id, pagin_config, + membership, member_event_id, is_peeking): + room_state = yield self.store.get_state_for_events( + [member_event_id], None + ) + + room_state = room_state[member_event_id] + + limit = pagin_config.limit if pagin_config else None + if limit is None: + limit = 10 + + stream_token = yield self.store.get_stream_token_for_event( + member_event_id + ) + + messages, token = yield self.store.get_recent_events_for_room( + room_id, + limit=limit, + end_token=stream_token + ) + + messages = yield filter_events_for_client( + self.store, user_id, messages, is_peeking=is_peeking + ) + + start_token = StreamToken.START.copy_and_replace("room_key", token[0]) + end_token = StreamToken.START.copy_and_replace("room_key", token[1]) + + time_now = self.clock.time_msec() + + defer.returnValue({ + "membership": membership, + "room_id": room_id, + "messages": { + "chunk": [serialize_event(m, time_now) for m in messages], + "start": start_token.to_string(), + "end": end_token.to_string(), + }, + "state": [serialize_event(s, time_now) for s in room_state.values()], + "presence": [], + "receipts": [], + }) + + @defer.inlineCallbacks + def _room_initial_sync_joined(self, user_id, room_id, pagin_config, + membership, is_peeking): + current_state = yield self.state.get_current_state( + room_id=room_id, + ) + + # TODO: These concurrently + time_now = self.clock.time_msec() + state = [ + serialize_event(x, time_now) + for x in current_state.values() + ] + + now_token = yield self.hs.get_event_sources().get_current_token() + + limit = pagin_config.limit if pagin_config else None + if limit is None: + limit = 10 + + room_members = [ + m for m in current_state.values() + if m.type == EventTypes.Member + and m.content["membership"] == Membership.JOIN + ] + + presence_handler = self.hs.get_presence_handler() + + @defer.inlineCallbacks + def get_presence(): + states = yield presence_handler.get_states( + [m.user_id for m in room_members], + as_event=True, + ) + + defer.returnValue(states) + + @defer.inlineCallbacks + def get_receipts(): + receipts_handler = self.hs.get_handlers().receipts_handler + receipts = yield receipts_handler.get_receipts_for_room( + room_id, + now_token.receipt_key + ) + defer.returnValue(receipts) + + presence, receipts, (messages, token) = yield defer.gatherResults( + [ + preserve_fn(get_presence)(), + preserve_fn(get_receipts)(), + preserve_fn(self.store.get_recent_events_for_room)( + room_id, + limit=limit, + end_token=now_token.room_key, + ) + ], + consumeErrors=True, + ).addErrback(unwrapFirstError) + + messages = yield filter_events_for_client( + self.store, user_id, messages, is_peeking=is_peeking, + ) + + start_token = now_token.copy_and_replace("room_key", token[0]) + end_token = now_token.copy_and_replace("room_key", token[1]) + + time_now = self.clock.time_msec() + + ret = { + "room_id": room_id, + "messages": { + "chunk": [serialize_event(m, time_now) for m in messages], + "start": start_token.to_string(), + "end": end_token.to_string(), + }, + "state": state, + "presence": presence, + "receipts": receipts, + } + if not is_peeking: + ret["membership"] = membership + + defer.returnValue(ret) + + @defer.inlineCallbacks + def _check_in_room_or_world_readable(self, room_id, user_id): + try: + # check_user_was_in_room will return the most recent membership + # event for the user if: + # * The user is a non-guest user, and was ever in the room + # * The user is a guest user, and has joined the room + # else it will throw. + member_event = yield self.auth.check_user_was_in_room(room_id, user_id) + defer.returnValue((member_event.membership, member_event.event_id)) + return + except AuthError: + visibility = yield self.state_handler.get_current_state( + room_id, EventTypes.RoomHistoryVisibility, "" + ) + if ( + visibility and + visibility.content["history_visibility"] == "world_readable" + ): + defer.returnValue((Membership.JOIN, None)) + return + raise AuthError( + 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN + ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 178209a209..30ea9630f7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -21,14 +21,11 @@ from synapse.crypto.event_signing import add_hashes_and_signatures from synapse.events.utils import serialize_event from synapse.events.validator import EventValidator from synapse.push.action_generator import ActionGenerator -from synapse.streams.config import PaginationConfig from synapse.types import ( - UserID, RoomAlias, RoomStreamToken, StreamToken, get_domain_from_id + UserID, RoomAlias, RoomStreamToken, get_domain_from_id ) -from synapse.util import unwrapFirstError -from synapse.util.async import concurrently_execute, run_on_reactor, ReadWriteLock -from synapse.util.caches.snapshot_cache import SnapshotCache -from synapse.util.logcontext import preserve_fn, preserve_context_over_deferred +from synapse.util.async import run_on_reactor, ReadWriteLock +from synapse.util.logcontext import preserve_fn from synapse.util.metrics import measure_func from synapse.visibility import filter_events_for_client @@ -49,7 +46,6 @@ class MessageHandler(BaseHandler): self.state = hs.get_state_handler() self.clock = hs.get_clock() self.validator = EventValidator() - self.snapshot_cache = SnapshotCache() self.pagination_lock = ReadWriteLock() @@ -392,377 +388,6 @@ class MessageHandler(BaseHandler): [serialize_event(c, now) for c in room_state.values()] ) - def snapshot_all_rooms(self, user_id=None, pagin_config=None, - as_client_event=True, include_archived=False): - """Retrieve a snapshot of all rooms the user is invited or has joined. - - This snapshot may include messages for all rooms where the user is - joined, depending on the pagination config. - - Args: - user_id (str): The ID of the user making the request. - pagin_config (synapse.api.streams.PaginationConfig): The pagination - config used to determine how many messages *PER ROOM* to return. - as_client_event (bool): True to get events in client-server format. - include_archived (bool): True to get rooms that the user has left - Returns: - A list of dicts with "room_id" and "membership" keys for all rooms - the user is currently invited or joined in on. Rooms where the user - is joined on, may return a "messages" key with messages, depending - on the specified PaginationConfig. - """ - key = ( - user_id, - pagin_config.from_token, - pagin_config.to_token, - pagin_config.direction, - pagin_config.limit, - as_client_event, - include_archived, - ) - now_ms = self.clock.time_msec() - result = self.snapshot_cache.get(now_ms, key) - if result is not None: - return result - - return self.snapshot_cache.set(now_ms, key, self._snapshot_all_rooms( - user_id, pagin_config, as_client_event, include_archived - )) - - @defer.inlineCallbacks - def _snapshot_all_rooms(self, user_id=None, pagin_config=None, - as_client_event=True, include_archived=False): - - memberships = [Membership.INVITE, Membership.JOIN] - if include_archived: - memberships.append(Membership.LEAVE) - - room_list = yield self.store.get_rooms_for_user_where_membership_is( - user_id=user_id, membership_list=memberships - ) - - user = UserID.from_string(user_id) - - rooms_ret = [] - - now_token = yield self.hs.get_event_sources().get_current_token() - - presence_stream = self.hs.get_event_sources().sources["presence"] - pagination_config = PaginationConfig(from_token=now_token) - presence, _ = yield presence_stream.get_pagination_rows( - user, pagination_config.get_source_config("presence"), None - ) - - receipt_stream = self.hs.get_event_sources().sources["receipt"] - receipt, _ = yield receipt_stream.get_pagination_rows( - user, pagination_config.get_source_config("receipt"), None - ) - - tags_by_room = yield self.store.get_tags_for_user(user_id) - - account_data, account_data_by_room = ( - yield self.store.get_account_data_for_user(user_id) - ) - - public_room_ids = yield self.store.get_public_room_ids() - - limit = pagin_config.limit - if limit is None: - limit = 10 - - @defer.inlineCallbacks - def handle_room(event): - d = { - "room_id": event.room_id, - "membership": event.membership, - "visibility": ( - "public" if event.room_id in public_room_ids - else "private" - ), - } - - if event.membership == Membership.INVITE: - time_now = self.clock.time_msec() - d["inviter"] = event.sender - - invite_event = yield self.store.get_event(event.event_id) - d["invite"] = serialize_event(invite_event, time_now, as_client_event) - - rooms_ret.append(d) - - if event.membership not in (Membership.JOIN, Membership.LEAVE): - return - - try: - if event.membership == Membership.JOIN: - room_end_token = now_token.room_key - deferred_room_state = self.state_handler.get_current_state( - event.room_id - ) - elif event.membership == Membership.LEAVE: - room_end_token = "s%d" % (event.stream_ordering,) - deferred_room_state = self.store.get_state_for_events( - [event.event_id], None - ) - deferred_room_state.addCallback( - lambda states: states[event.event_id] - ) - - (messages, token), current_state = yield preserve_context_over_deferred( - defer.gatherResults( - [ - preserve_fn(self.store.get_recent_events_for_room)( - event.room_id, - limit=limit, - end_token=room_end_token, - ), - deferred_room_state, - ] - ) - ).addErrback(unwrapFirstError) - - messages = yield filter_events_for_client( - self.store, user_id, messages - ) - - start_token = now_token.copy_and_replace("room_key", token[0]) - end_token = now_token.copy_and_replace("room_key", token[1]) - time_now = self.clock.time_msec() - - d["messages"] = { - "chunk": [ - serialize_event(m, time_now, as_client_event) - for m in messages - ], - "start": start_token.to_string(), - "end": end_token.to_string(), - } - - d["state"] = [ - serialize_event(c, time_now, as_client_event) - for c in current_state.values() - ] - - account_data_events = [] - tags = tags_by_room.get(event.room_id) - if tags: - account_data_events.append({ - "type": "m.tag", - "content": {"tags": tags}, - }) - - account_data = account_data_by_room.get(event.room_id, {}) - for account_data_type, content in account_data.items(): - account_data_events.append({ - "type": account_data_type, - "content": content, - }) - - d["account_data"] = account_data_events - except: - logger.exception("Failed to get snapshot") - - yield concurrently_execute(handle_room, room_list, 10) - - account_data_events = [] - for account_data_type, content in account_data.items(): - account_data_events.append({ - "type": account_data_type, - "content": content, - }) - - ret = { - "rooms": rooms_ret, - "presence": presence, - "account_data": account_data_events, - "receipts": receipt, - "end": now_token.to_string(), - } - - defer.returnValue(ret) - - @defer.inlineCallbacks - def room_initial_sync(self, requester, room_id, pagin_config=None): - """Capture the a snapshot of a room. If user is currently a member of - the room this will be what is currently in the room. If the user left - the room this will be what was in the room when they left. - - Args: - requester(Requester): The user to get a snapshot for. - room_id(str): The room to get a snapshot of. - pagin_config(synapse.streams.config.PaginationConfig): - The pagination config used to determine how many messages to - return. - Raises: - AuthError if the user wasn't in the room. - Returns: - A JSON serialisable dict with the snapshot of the room. - """ - - user_id = requester.user.to_string() - - membership, member_event_id = yield self._check_in_room_or_world_readable( - room_id, user_id, - ) - is_peeking = member_event_id is None - - if membership == Membership.JOIN: - result = yield self._room_initial_sync_joined( - user_id, room_id, pagin_config, membership, is_peeking - ) - elif membership == Membership.LEAVE: - result = yield self._room_initial_sync_parted( - user_id, room_id, pagin_config, membership, member_event_id, is_peeking - ) - - account_data_events = [] - tags = yield self.store.get_tags_for_room(user_id, room_id) - if tags: - account_data_events.append({ - "type": "m.tag", - "content": {"tags": tags}, - }) - - account_data = yield self.store.get_account_data_for_room(user_id, room_id) - for account_data_type, content in account_data.items(): - account_data_events.append({ - "type": account_data_type, - "content": content, - }) - - result["account_data"] = account_data_events - - defer.returnValue(result) - - @defer.inlineCallbacks - def _room_initial_sync_parted(self, user_id, room_id, pagin_config, - membership, member_event_id, is_peeking): - room_state = yield self.store.get_state_for_events( - [member_event_id], None - ) - - room_state = room_state[member_event_id] - - limit = pagin_config.limit if pagin_config else None - if limit is None: - limit = 10 - - stream_token = yield self.store.get_stream_token_for_event( - member_event_id - ) - - messages, token = yield self.store.get_recent_events_for_room( - room_id, - limit=limit, - end_token=stream_token - ) - - messages = yield filter_events_for_client( - self.store, user_id, messages, is_peeking=is_peeking - ) - - start_token = StreamToken.START.copy_and_replace("room_key", token[0]) - end_token = StreamToken.START.copy_and_replace("room_key", token[1]) - - time_now = self.clock.time_msec() - - defer.returnValue({ - "membership": membership, - "room_id": room_id, - "messages": { - "chunk": [serialize_event(m, time_now) for m in messages], - "start": start_token.to_string(), - "end": end_token.to_string(), - }, - "state": [serialize_event(s, time_now) for s in room_state.values()], - "presence": [], - "receipts": [], - }) - - @defer.inlineCallbacks - def _room_initial_sync_joined(self, user_id, room_id, pagin_config, - membership, is_peeking): - current_state = yield self.state.get_current_state( - room_id=room_id, - ) - - # TODO: These concurrently - time_now = self.clock.time_msec() - state = [ - serialize_event(x, time_now) - for x in current_state.values() - ] - - now_token = yield self.hs.get_event_sources().get_current_token() - - limit = pagin_config.limit if pagin_config else None - if limit is None: - limit = 10 - - room_members = [ - m for m in current_state.values() - if m.type == EventTypes.Member - and m.content["membership"] == Membership.JOIN - ] - - presence_handler = self.hs.get_presence_handler() - - @defer.inlineCallbacks - def get_presence(): - states = yield presence_handler.get_states( - [m.user_id for m in room_members], - as_event=True, - ) - - defer.returnValue(states) - - @defer.inlineCallbacks - def get_receipts(): - receipts_handler = self.hs.get_handlers().receipts_handler - receipts = yield receipts_handler.get_receipts_for_room( - room_id, - now_token.receipt_key - ) - defer.returnValue(receipts) - - presence, receipts, (messages, token) = yield defer.gatherResults( - [ - preserve_fn(get_presence)(), - preserve_fn(get_receipts)(), - preserve_fn(self.store.get_recent_events_for_room)( - room_id, - limit=limit, - end_token=now_token.room_key, - ) - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) - - messages = yield filter_events_for_client( - self.store, user_id, messages, is_peeking=is_peeking, - ) - - start_token = now_token.copy_and_replace("room_key", token[0]) - end_token = now_token.copy_and_replace("room_key", token[1]) - - time_now = self.clock.time_msec() - - ret = { - "room_id": room_id, - "messages": { - "chunk": [serialize_event(m, time_now) for m in messages], - "start": start_token.to_string(), - "end": end_token.to_string(), - }, - "state": state, - "presence": presence, - "receipts": receipts, - } - if not is_peeking: - ret["membership"] = membership - - defer.returnValue(ret) - @measure_func("_create_new_client_event") @defer.inlineCallbacks def _create_new_client_event(self, builder, prev_event_ids=None): diff --git a/synapse/rest/client/v1/initial_sync.py b/synapse/rest/client/v1/initial_sync.py index 113a49e539..478e21eea8 100644 --- a/synapse/rest/client/v1/initial_sync.py +++ b/synapse/rest/client/v1/initial_sync.py @@ -25,16 +25,15 @@ class InitialSyncRestServlet(ClientV1RestServlet): def __init__(self, hs): super(InitialSyncRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.initial_sync_handler = hs.get_initial_sync_handler() @defer.inlineCallbacks def on_GET(self, request): requester = yield self.auth.get_user_by_req(request) as_client_event = "raw" not in request.args pagination_config = PaginationConfig.from_request(request) - handler = self.handlers.message_handler include_archived = request.args.get("archived", None) == ["true"] - content = yield handler.snapshot_all_rooms( + content = yield self.initial_sync_handler.snapshot_all_rooms( user_id=requester.user.to_string(), pagin_config=pagination_config, as_client_event=as_client_event, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 45287bf05b..20889e4af0 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -456,13 +456,13 @@ class RoomInitialSyncRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomInitialSyncRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.initial_sync_handler = hs.get_initial_sync_handler() @defer.inlineCallbacks def on_GET(self, request, room_id): requester = yield self.auth.get_user_by_req(request, allow_guest=True) pagination_config = PaginationConfig.from_request(request) - content = yield self.handlers.message_handler.room_initial_sync( + content = yield self.initial_sync_handler.room_initial_sync( room_id=room_id, requester=requester, pagin_config=pagination_config, diff --git a/synapse/server.py b/synapse/server.py index 69860f3d82..374124a147 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -43,6 +43,7 @@ from synapse.handlers.room_list import RoomListHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler from synapse.handlers.events import EventHandler, EventStreamHandler +from synapse.handlers.initial_sync import InitialSyncHandler from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.notifier import Notifier @@ -98,6 +99,7 @@ class HomeServer(object): 'e2e_keys_handler', 'event_handler', 'event_stream_handler', + 'initial_sync_handler', 'application_service_api', 'application_service_scheduler', 'application_service_handler', @@ -228,6 +230,9 @@ class HomeServer(object): def build_event_stream_handler(self): return EventStreamHandler(self) + def build_initial_sync_handler(self): + return InitialSyncHandler(self) + def build_event_sources(self): return EventSources(self) -- cgit 1.5.1 From 748d8fdc7bcdb43719e99a48cc74bf078f22396f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Sep 2016 15:31:47 +0100 Subject: Reduce DB hits for replication Some streams will occaisonally advance their positions without actually having any new rows to send over federation. Currently this means that the token will not advance on the workers, leading to them repeatedly sending a slightly out of date token. This in turns requires the master to hit the DB to check if there are any new rows, rather than hitting the no op logic where we check if the given token matches the current token. This commit changes the API to always return an entry if the position for a stream has changed, allowing workers to advance their tokens correctly. --- .gitignore | 8 +- synapse/app/pusher.py | 4 +- synapse/replication/resource.py | 139 +++++++++++++++++++++++-------- synapse/storage/room.py | 3 + tests/replication/slave/storage/_base.py | 3 +- tests/replication/test_resource.py | 3 +- 6 files changed, 115 insertions(+), 45 deletions(-) (limited to 'synapse/app') diff --git a/.gitignore b/.gitignore index f8c4000134..491047c352 100644 --- a/.gitignore +++ b/.gitignore @@ -24,10 +24,10 @@ homeserver*.yaml .coverage htmlcov -demo/*.db -demo/*.log -demo/*.log.* -demo/*.pid +demo/*/*.db +demo/*/*.log +demo/*/*.log.* +demo/*/*.pid demo/media_store.* demo/etc diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index d59f4a571c..1a6f5507a9 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -197,7 +197,7 @@ class PusherServer(HomeServer): yield start_pusher(user_id, app_id, pushkey) stream = results.get("events") - if stream: + if stream and stream["rows"]: min_stream_id = stream["rows"][0][0] max_stream_id = stream["position"] preserve_fn(pusher_pool.on_new_notifications)( @@ -205,7 +205,7 @@ class PusherServer(HomeServer): ) stream = results.get("receipts") - if stream: + if stream and stream["rows"]: rows = stream["rows"] affected_room_ids = set(row[1] for row in rows) min_stream_id = rows[0][0] diff --git a/synapse/replication/resource.py b/synapse/replication/resource.py index 9aab3ce23c..585bd1c4ad 100644 --- a/synapse/replication/resource.py +++ b/synapse/replication/resource.py @@ -17,6 +17,7 @@ from synapse.http.servlet import parse_integer, parse_string from synapse.http.server import request_handler, finish_request from synapse.replication.pusher_resource import PusherResource from synapse.replication.presence_resource import PresenceResource +from synapse.api.errors import SynapseError from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET @@ -166,7 +167,8 @@ class ReplicationResource(Resource): def replicate(): return self.replicate(request_streams, limit) - result = yield self.notifier.wait_for_replication(replicate, timeout) + writer = yield self.notifier.wait_for_replication(replicate, timeout) + result = writer.finish() for stream_name, stream_content in result.items(): logger.info( @@ -186,6 +188,9 @@ class ReplicationResource(Resource): current_token = yield self.current_replication_token() logger.debug("Replicating up to %r", current_token) + if limit == 0: + raise SynapseError(400, "Limit cannot be 0") + yield self.account_data(writer, current_token, limit, request_streams) yield self.events(writer, current_token, limit, request_streams) # TODO: implement limit @@ -200,7 +205,7 @@ class ReplicationResource(Resource): self.streams(writer, current_token, request_streams) logger.debug("Replicated %d rows", writer.total) - defer.returnValue(writer.finish()) + defer.returnValue(writer) def streams(self, writer, current_token, request_streams): request_token = request_streams.get("streams") @@ -233,31 +238,52 @@ class ReplicationResource(Resource): request_backfill = request_streams.get("backfill") if request_events is not None or request_backfill is not None: - if request_events is None: + if request_backfill is None: request_events = current_token.events if request_backfill is None: request_backfill = current_token.backfill + + no_new_tokens = ( + request_events == current_token.events + and request_backfill == current_token.backfill + ) + if no_new_tokens: + return + res = yield self.store.get_all_new_events( request_backfill, request_events, current_token.backfill, current_token.events, limit ) - writer.write_header_and_rows("events", res.new_forward_events, ( - "position", "internal", "json", "state_group" - )) - writer.write_header_and_rows("backfill", res.new_backfill_events, ( - "position", "internal", "json", "state_group" - )) + + upto_events_token = _position_from_rows( + res.new_forward_events, current_token.events + ) + + upto_backfill_token = _position_from_rows( + res.new_backfill_events, current_token.backfill + ) + + if request_events != upto_events_token: + writer.write_header_and_rows("events", res.new_forward_events, ( + "position", "internal", "json", "state_group" + ), position=upto_events_token) + + if request_backfill != upto_backfill_token: + writer.write_header_and_rows("backfill", res.new_backfill_events, ( + "position", "internal", "json", "state_group", + ), position=upto_backfill_token) + writer.write_header_and_rows( "forward_ex_outliers", res.forward_ex_outliers, - ("position", "event_id", "state_group") + ("position", "event_id", "state_group"), ) writer.write_header_and_rows( "backward_ex_outliers", res.backward_ex_outliers, - ("position", "event_id", "state_group") + ("position", "event_id", "state_group"), ) writer.write_header_and_rows( - "state_resets", res.state_resets, ("position",) + "state_resets", res.state_resets, ("position",), ) @defer.inlineCallbacks @@ -266,15 +292,16 @@ class ReplicationResource(Resource): request_presence = request_streams.get("presence") - if request_presence is not None: + if request_presence is not None and request_presence != current_position: presence_rows = yield self.presence_handler.get_all_presence_updates( request_presence, current_position ) + upto_token = _position_from_rows(presence_rows, current_position) writer.write_header_and_rows("presence", presence_rows, ( "position", "user_id", "state", "last_active_ts", "last_federation_update_ts", "last_user_sync_ts", "status_msg", "currently_active", - )) + ), position=upto_token) @defer.inlineCallbacks def typing(self, writer, current_token, request_streams): @@ -282,7 +309,7 @@ class ReplicationResource(Resource): request_typing = request_streams.get("typing") - if request_typing is not None: + if request_typing is not None and request_typing != current_position: # If they have a higher token than current max, we can assume that # they had been talking to a previous instance of the master. Since # we reset the token on restart, the best (but hacky) thing we can @@ -293,9 +320,10 @@ class ReplicationResource(Resource): typing_rows = yield self.typing_handler.get_all_typing_updates( request_typing, current_position ) + upto_token = _position_from_rows(typing_rows, current_position) writer.write_header_and_rows("typing", typing_rows, ( "position", "room_id", "typing" - )) + ), position=upto_token) @defer.inlineCallbacks def receipts(self, writer, current_token, limit, request_streams): @@ -303,13 +331,14 @@ class ReplicationResource(Resource): request_receipts = request_streams.get("receipts") - if request_receipts is not None: + if request_receipts is not None and request_receipts != current_position: receipts_rows = yield self.store.get_all_updated_receipts( request_receipts, current_position, limit ) + upto_token = _position_from_rows(receipts_rows, current_position) writer.write_header_and_rows("receipts", receipts_rows, ( "position", "room_id", "receipt_type", "user_id", "event_id", "data" - )) + ), position=upto_token) @defer.inlineCallbacks def account_data(self, writer, current_token, limit, request_streams): @@ -324,23 +353,36 @@ class ReplicationResource(Resource): user_account_data = current_position if room_account_data is None: room_account_data = current_position + + no_new_tokens = ( + user_account_data == current_position + and room_account_data == current_position + ) + if no_new_tokens: + return + user_rows, room_rows = yield self.store.get_all_updated_account_data( user_account_data, room_account_data, current_position, limit ) + + upto_users_token = _position_from_rows(user_rows, current_position) + upto_rooms_token = _position_from_rows(room_rows, current_position) + writer.write_header_and_rows("user_account_data", user_rows, ( "position", "user_id", "type", "content" - )) + ), position=upto_users_token) writer.write_header_and_rows("room_account_data", room_rows, ( "position", "user_id", "room_id", "type", "content" - )) + ), position=upto_rooms_token) if tag_account_data is not None: tag_rows = yield self.store.get_all_updated_tags( tag_account_data, current_position, limit ) + upto_tag_token = _position_from_rows(tag_rows, current_position) writer.write_header_and_rows("tag_account_data", tag_rows, ( "position", "user_id", "room_id", "tags" - )) + ), position=upto_tag_token) @defer.inlineCallbacks def push_rules(self, writer, current_token, limit, request_streams): @@ -348,14 +390,15 @@ class ReplicationResource(Resource): push_rules = request_streams.get("push_rules") - if push_rules is not None: + if push_rules is not None and push_rules != current_position: rows = yield self.store.get_all_push_rule_updates( push_rules, current_position, limit ) + upto_token = _position_from_rows(rows, current_position) writer.write_header_and_rows("push_rules", rows, ( "position", "event_stream_ordering", "user_id", "rule_id", "op", "priority_class", "priority", "conditions", "actions" - )) + ), position=upto_token) @defer.inlineCallbacks def pushers(self, writer, current_token, limit, request_streams): @@ -363,18 +406,19 @@ class ReplicationResource(Resource): pushers = request_streams.get("pushers") - if pushers is not None: + if pushers is not None and pushers != current_position: updated, deleted = yield self.store.get_all_updated_pushers( pushers, current_position, limit ) + upto_token = _position_from_rows(updated, current_position) writer.write_header_and_rows("pushers", updated, ( "position", "user_id", "access_token", "profile_tag", "kind", "app_id", "app_display_name", "device_display_name", "pushkey", "ts", "lang", "data" - )) + ), position=upto_token) writer.write_header_and_rows("deleted_pushers", deleted, ( "position", "user_id", "app_id", "pushkey" - )) + ), position=upto_token) @defer.inlineCallbacks def caches(self, writer, current_token, limit, request_streams): @@ -382,13 +426,14 @@ class ReplicationResource(Resource): caches = request_streams.get("caches") - if caches is not None: + if caches is not None and caches != current_position: updated_caches = yield self.store.get_all_updated_caches( caches, current_position, limit ) + upto_token = _position_from_rows(updated_caches, current_position) writer.write_header_and_rows("caches", updated_caches, ( "position", "cache_func", "keys", "invalidation_ts" - )) + ), position=upto_token) @defer.inlineCallbacks def to_device(self, writer, current_token, limit, request_streams): @@ -396,13 +441,14 @@ class ReplicationResource(Resource): to_device = request_streams.get("to_device") - if to_device is not None: + if to_device is not None and to_device != current_position: to_device_rows = yield self.store.get_all_new_device_messages( to_device, current_position, limit ) + upto_token = _position_from_rows(to_device_rows, current_position) writer.write_header_and_rows("to_device", to_device_rows, ( "position", "user_id", "device_id", "message_json" - )) + ), position=upto_token) @defer.inlineCallbacks def public_rooms(self, writer, current_token, limit, request_streams): @@ -410,13 +456,14 @@ class ReplicationResource(Resource): public_rooms = request_streams.get("public_rooms") - if public_rooms is not None: + if public_rooms is not None and public_rooms != current_position: public_rooms_rows = yield self.store.get_all_new_public_rooms( public_rooms, current_position, limit ) + upto_token = _position_from_rows(public_rooms_rows, current_position) writer.write_header_and_rows("public_rooms", public_rooms_rows, ( "position", "room_id", "visibility" - )) + ), position=upto_token) class _Writer(object): @@ -426,11 +473,11 @@ class _Writer(object): self.total = 0 def write_header_and_rows(self, name, rows, fields, position=None): - if not rows: - return - if position is None: - position = rows[-1][0] + if rows: + position = rows[-1][0] + else: + return self.streams[name] = { "position": position if type(position) is int else str(position), @@ -440,6 +487,9 @@ class _Writer(object): self.total += len(rows) + def __nonzero__(self): + return bool(self.total) + def finish(self): return self.streams @@ -461,3 +511,20 @@ class _ReplicationToken(collections.namedtuple("_ReplicationToken", ( def __str__(self): return "_".join(str(value) for value in self) + + +def _position_from_rows(rows, current_position): + """Calculates a position to return for a stream. Ideally we want to return the + position of the last row, as that will be the most correct. However, if there + are no rows we fall back to using the current position to stop us from + repeatedly hitting the storage layer unncessarily thinking there are updates. + (Not all advances of the token correspond to an actual update) + + We can't just always return the current position, as we often limit the + number of rows we replicate, and so the stream may lag. The assumption is + that if the storage layer returns no new rows then we are not lagging and + we are at the `current_position`. + """ + if rows: + return rows[-1][0] + return current_position diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 2ef13d7403..11813b44f6 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -320,6 +320,9 @@ class RoomStore(SQLBaseStore): txn.execute(sql, (prev_id, current_id, limit,)) return txn.fetchall() + if prev_id == current_id: + return defer.succeed([]) + return self.runInteraction( "get_all_new_public_rooms", get_all_new_public_rooms ) diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 1f13cd0bc0..b82868054d 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -42,7 +42,8 @@ class BaseSlavedStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def replicate(self): streams = self.slaved_store.stream_positions() - result = yield self.replication.replicate(streams, 100) + writer = yield self.replication.replicate(streams, 100) + result = writer.finish() yield self.slaved_store.process_replication(result) @defer.inlineCallbacks diff --git a/tests/replication/test_resource.py b/tests/replication/test_resource.py index b69832cc1b..f406934a62 100644 --- a/tests/replication/test_resource.py +++ b/tests/replication/test_resource.py @@ -120,7 +120,7 @@ class ReplicationResourceCase(unittest.TestCase): self.hs.clock.advance_time_msec(1) code, body = yield get self.assertEquals(code, 200) - self.assertEquals(body, {}) + self.assertEquals(body.get("rows", []), []) test_timeout.__name__ = "test_timeout_%s" % (stream) return test_timeout @@ -195,7 +195,6 @@ class ReplicationResourceCase(unittest.TestCase): self.assertIn("field_names", stream) field_names = stream["field_names"] self.assertIn("rows", stream) - self.assertTrue(stream["rows"]) for row in stream["rows"]: self.assertEquals( len(row), len(field_names), -- cgit 1.5.1 From c57f871184cf669374c07faa6a95eceb8a9321b2 Mon Sep 17 00:00:00 2001 From: Pedro Larroy Date: Sat, 8 Oct 2016 23:55:20 +0200 Subject: Use sys.executable instead of hardcoded python. fixes #1161 --- synapse/app/synctl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/app') diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index bb41962d47..c045588866 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -24,7 +24,7 @@ import subprocess import sys import yaml -SYNAPSE = ["python", "-B", "-m", "synapse.app.homeserver"] +SYNAPSE = [sys.executable, "-B", "-m", "synapse.app.homeserver"] GREEN = "\x1b[1;32m" RED = "\x1b[1;31m" -- cgit 1.5.1