From 6870fc496ff3da5075fec74e40515c03c929915f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 27 Feb 2019 10:22:52 +0000
Subject: Move connecting logic into ClientReplicationStreamProtocol

---
 synapse/replication/tcp/protocol.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'synapse/replication/tcp/protocol.py')

diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 0b3fe6cbf5..6123c995b9 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -511,6 +511,11 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         self.server_name = server_name
         self.handler = handler
 
+        # Set of stream names that have been subscribe to, but haven't yet
+        # caught up with. This is used to track when the client has been fully
+        # connected to the remote.
+        self.streams_connecting = set()
+
         # Map of stream to batched updates. See RdataCommand for info on how
         # batching works.
         self.pending_batches = {}
@@ -533,6 +538,10 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         # We've now finished connecting to so inform the client handler
         self.handler.update_connection(self)
 
+        # This will happen if we don't actually subscribe to any streams
+        if not self.streams_connecting:
+            self.handler.finished_connecting()
+
     def on_SERVER(self, cmd):
         if cmd.data != self.server_name:
             logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data)
@@ -562,6 +571,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
             return self.handler.on_rdata(stream_name, cmd.token, rows)
 
     def on_POSITION(self, cmd):
+        # When we get a `POSITION` command it means we've finished getting
+        # missing updates for the given stream, and are now up to date.
+        self.streams_connecting.discard(cmd.stream_name)
+        if not self.streams_connecting:
+            self.handler.finished_connecting()
+
         return self.handler.on_position(cmd.stream_name, cmd.token)
 
     def on_SYNC(self, cmd):
@@ -578,6 +593,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
             self.id(), stream_name, token
         )
 
+        self.streams_connecting.add(stream_name)
+
         self.send_command(ReplicateCommand(stream_name, token))
 
     def on_connection_closed(self):
-- 
cgit 1.5.1


From 6bb1c028f190d8ba561a6deaa474c060efb7c502 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@jki.re>
Date: Wed, 27 Feb 2019 10:28:37 +0000
Subject: Limit cache invalidation replication line length (#4748)

---
 changelog.d/4748.misc               |  1 +
 synapse/replication/tcp/protocol.py | 17 ++++++++++++++++-
 synapse/storage/_base.py            | 15 +++++++++++----
 3 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/4748.misc

(limited to 'synapse/replication/tcp/protocol.py')

diff --git a/changelog.d/4748.misc b/changelog.d/4748.misc
new file mode 100644
index 0000000000..4dc18378e7
--- /dev/null
+++ b/changelog.d/4748.misc
@@ -0,0 +1 @@
+Improve replication performance by reducing cache invalidation traffic.
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 0b3fe6cbf5..429471c345 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -268,7 +268,17 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
         if "\n" in string:
             raise Exception("Unexpected newline in command: %r", string)
 
-        self.sendLine(string.encode("utf-8"))
+        encoded_string = string.encode("utf-8")
+
+        if len(encoded_string) > self.MAX_LENGTH:
+            raise Exception(
+                "Failed to send command %s as too long (%d > %d)" % (
+                    cmd.NAME,
+                    len(encoded_string), self.MAX_LENGTH,
+                )
+            )
+
+        self.sendLine(encoded_string)
 
         self.last_sent_command = self.clock.time_msec()
 
@@ -361,6 +371,11 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
     def id(self):
         return "%s-%s" % (self.name, self.conn_id)
 
+    def lineLengthExceeded(self, line):
+        """Called when we receive a line that is above the maximum line length
+        """
+        self.send_error("Line length exceeded")
+
 
 class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
     VALID_INBOUND_COMMANDS = VALID_CLIENT_COMMANDS
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 190be34fb1..a0333d5309 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -30,6 +30,7 @@ from synapse.api.errors import StoreError
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import get_domain_from_id
+from synapse.util import batch_iter
 from synapse.util.caches.descriptors import Cache
 from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
 from synapse.util.stringutils import exception_to_unicode
@@ -1327,10 +1328,16 @@ class SQLBaseStore(object):
         """
         txn.call_after(self._invalidate_state_caches, room_id, members_changed)
 
-        keys = itertools.chain([room_id], members_changed)
-        self._send_invalidation_to_replication(
-            txn, _CURRENT_STATE_CACHE_NAME, keys,
-        )
+        # We need to be careful that the size of the `members_changed` list
+        # isn't so large that it causes problems sending over replication, so we
+        # send them in chunks.
+        # Max line length is 16K, and max user ID length is 255, so 50 should
+        # be safe.
+        for chunk in batch_iter(members_changed, 50):
+            keys = itertools.chain([room_id], chunk)
+            self._send_invalidation_to_replication(
+                txn, _CURRENT_STATE_CACHE_NAME, keys,
+            )
 
     def _invalidate_state_caches(self, room_id, members_changed):
         """Invalidates caches that are based on the current state, but does
-- 
cgit 1.5.1