From 6870fc496ff3da5075fec74e40515c03c929915f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 10:22:52 +0000 Subject: Move connecting logic into ClientReplicationStreamProtocol --- synapse/replication/tcp/protocol.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'synapse/replication/tcp/protocol.py') diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 0b3fe6cbf5..6123c995b9 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -511,6 +511,11 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.server_name = server_name self.handler = handler + # Set of stream names that have been subscribe to, but haven't yet + # caught up with. This is used to track when the client has been fully + # connected to the remote. + self.streams_connecting = set() + # Map of stream to batched updates. See RdataCommand for info on how # batching works. self.pending_batches = {} @@ -533,6 +538,10 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): # We've now finished connecting to so inform the client handler self.handler.update_connection(self) + # This will happen if we don't actually subscribe to any streams + if not self.streams_connecting: + self.handler.finished_connecting() + def on_SERVER(self, cmd): if cmd.data != self.server_name: logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data) @@ -562,6 +571,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): return self.handler.on_rdata(stream_name, cmd.token, rows) def on_POSITION(self, cmd): + # When we get a `POSITION` command it means we've finished getting + # missing updates for the given stream, and are now up to date. + self.streams_connecting.discard(cmd.stream_name) + if not self.streams_connecting: + self.handler.finished_connecting() + return self.handler.on_position(cmd.stream_name, cmd.token) def on_SYNC(self, cmd): @@ -578,6 +593,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.id(), stream_name, token ) + self.streams_connecting.add(stream_name) + self.send_command(ReplicateCommand(stream_name, token)) def on_connection_closed(self): -- cgit 1.5.1 From 6bb1c028f190d8ba561a6deaa474c060efb7c502 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 10:28:37 +0000 Subject: Limit cache invalidation replication line length (#4748) --- changelog.d/4748.misc | 1 + synapse/replication/tcp/protocol.py | 17 ++++++++++++++++- synapse/storage/_base.py | 15 +++++++++++---- 3 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4748.misc (limited to 'synapse/replication/tcp/protocol.py') diff --git a/changelog.d/4748.misc b/changelog.d/4748.misc new file mode 100644 index 0000000000..4dc18378e7 --- /dev/null +++ b/changelog.d/4748.misc @@ -0,0 +1 @@ +Improve replication performance by reducing cache invalidation traffic. diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 0b3fe6cbf5..429471c345 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -268,7 +268,17 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): if "\n" in string: raise Exception("Unexpected newline in command: %r", string) - self.sendLine(string.encode("utf-8")) + encoded_string = string.encode("utf-8") + + if len(encoded_string) > self.MAX_LENGTH: + raise Exception( + "Failed to send command %s as too long (%d > %d)" % ( + cmd.NAME, + len(encoded_string), self.MAX_LENGTH, + ) + ) + + self.sendLine(encoded_string) self.last_sent_command = self.clock.time_msec() @@ -361,6 +371,11 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): def id(self): return "%s-%s" % (self.name, self.conn_id) + def lineLengthExceeded(self, line): + """Called when we receive a line that is above the maximum line length + """ + self.send_error("Line length exceeded") + class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): VALID_INBOUND_COMMANDS = VALID_CLIENT_COMMANDS diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 190be34fb1..a0333d5309 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -30,6 +30,7 @@ from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import get_domain_from_id +from synapse.util import batch_iter from synapse.util.caches.descriptors import Cache from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.stringutils import exception_to_unicode @@ -1327,10 +1328,16 @@ class SQLBaseStore(object): """ txn.call_after(self._invalidate_state_caches, room_id, members_changed) - keys = itertools.chain([room_id], members_changed) - self._send_invalidation_to_replication( - txn, _CURRENT_STATE_CACHE_NAME, keys, - ) + # We need to be careful that the size of the `members_changed` list + # isn't so large that it causes problems sending over replication, so we + # send them in chunks. + # Max line length is 16K, and max user ID length is 255, so 50 should + # be safe. + for chunk in batch_iter(members_changed, 50): + keys = itertools.chain([room_id], chunk) + self._send_invalidation_to_replication( + txn, _CURRENT_STATE_CACHE_NAME, keys, + ) def _invalidate_state_caches(self, room_id, members_changed): """Invalidates caches that are based on the current state, but does -- cgit 1.5.1