diff --git a/synapse/replication/slave/storage/_base.py b/synapse/replication/slave/storage/_base.py
index 1353a32d00..817d1f67f9 100644
--- a/synapse/replication/slave/storage/_base.py
+++ b/synapse/replication/slave/storage/_base.py
@@ -59,12 +59,7 @@ class BaseSlavedStore(SQLBaseStore):
members_changed = set(row.keys[1:])
self._invalidate_state_caches(room_id, members_changed)
else:
- try:
- getattr(self, row.cache_func).invalidate(tuple(row.keys))
- except AttributeError:
- # We probably haven't pulled in the cache in this worker,
- # which is fine.
- pass
+ self._attempt_to_invalidate_cache(row.cache_func, tuple(row.keys))
def _invalidate_cache_and_stream(self, txn, cache_func, keys):
txn.call_after(cache_func.invalidate, keys)
diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py
index 92447b00d4..9e530defe0 100644
--- a/synapse/replication/slave/storage/presence.py
+++ b/synapse/replication/slave/storage/presence.py
@@ -54,8 +54,11 @@ class SlavedPresenceStore(BaseSlavedStore):
def stream_positions(self):
result = super(SlavedPresenceStore, self).stream_positions()
- position = self._presence_id_gen.get_current_token()
- result["presence"] = position
+
+ if self.hs.config.use_presence:
+ position = self._presence_id_gen.get_current_token()
+ result["presence"] = position
+
return result
def process_replication_rows(self, stream_name, token, rows):
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 586dddb40b..e558f90e1a 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -39,7 +39,7 @@ class ReplicationClientFactory(ReconnectingClientFactory):
Accepts a handler that will be called when new data is available or data
is required.
"""
- maxDelay = 5 # Try at least once every N seconds
+ maxDelay = 30 # Try at least once every N seconds
def __init__(self, hs, client_name, handler):
self.client_name = client_name
@@ -54,7 +54,6 @@ class ReplicationClientFactory(ReconnectingClientFactory):
def buildProtocol(self, addr):
logger.info("Connected to replication: %r", addr)
- self.resetDelay()
return ClientReplicationStreamProtocol(
self.client_name, self.server_name, self._clock, self.handler
)
@@ -90,15 +89,18 @@ class ReplicationClientHandler(object):
# Used for tests.
self.awaiting_syncs = {}
+ # The factory used to create connections.
+ self.factory = None
+
def start_replication(self, hs):
"""Helper method to start a replication connection to the remote server
using TCP.
"""
client_name = hs.config.worker_name
- factory = ReplicationClientFactory(hs, client_name, self)
+ self.factory = ReplicationClientFactory(hs, client_name, self)
host = hs.config.worker_replication_host
port = hs.config.worker_replication_port
- hs.get_reactor().connectTCP(host, port, factory)
+ hs.get_reactor().connectTCP(host, port, self.factory)
def on_rdata(self, stream_name, token, rows):
"""Called when we get new replication data. By default this just pokes
@@ -140,6 +142,7 @@ class ReplicationClientHandler(object):
args["account_data"] = user_account_data
elif room_account_data:
args["account_data"] = room_account_data
+
return args
def get_currently_syncing_users(self):
@@ -204,3 +207,14 @@ class ReplicationClientHandler(object):
for cmd in self.pending_commands:
connection.send_command(cmd)
self.pending_commands = []
+
+ def finished_connecting(self):
+ """Called when we have successfully subscribed and caught up to all
+ streams we're interested in.
+ """
+ logger.info("Finished connecting to server")
+
+ # We don't reset the delay any earlier as otherwise if there is a
+ # problem during start up we'll end up tight looping connecting to the
+ # server.
+ self.factory.resetDelay()
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 327556f6a1..2098c32a77 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -127,8 +127,11 @@ class RdataCommand(Command):
class PositionCommand(Command):
- """Sent by the client to tell the client the stream postition without
+ """Sent by the server to tell the client the stream postition without
needing to send an RDATA.
+
+ Sent to the client after all missing updates for a stream have been sent
+ to the client and they're now up to date.
"""
NAME = "POSITION"
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 0b3fe6cbf5..49ae5b3355 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -268,7 +268,17 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
if "\n" in string:
raise Exception("Unexpected newline in command: %r", string)
- self.sendLine(string.encode("utf-8"))
+ encoded_string = string.encode("utf-8")
+
+ if len(encoded_string) > self.MAX_LENGTH:
+ raise Exception(
+ "Failed to send command %s as too long (%d > %d)" % (
+ cmd.NAME,
+ len(encoded_string), self.MAX_LENGTH,
+ )
+ )
+
+ self.sendLine(encoded_string)
self.last_sent_command = self.clock.time_msec()
@@ -361,6 +371,11 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
def id(self):
return "%s-%s" % (self.name, self.conn_id)
+ def lineLengthExceeded(self, line):
+ """Called when we receive a line that is above the maximum line length
+ """
+ self.send_error("Line length exceeded")
+
class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol):
VALID_INBOUND_COMMANDS = VALID_CLIENT_COMMANDS
@@ -511,6 +526,11 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
self.server_name = server_name
self.handler = handler
+ # Set of stream names that have been subscribe to, but haven't yet
+ # caught up with. This is used to track when the client has been fully
+ # connected to the remote.
+ self.streams_connecting = set()
+
# Map of stream to batched updates. See RdataCommand for info on how
# batching works.
self.pending_batches = {}
@@ -533,6 +553,10 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
# We've now finished connecting to so inform the client handler
self.handler.update_connection(self)
+ # This will happen if we don't actually subscribe to any streams
+ if not self.streams_connecting:
+ self.handler.finished_connecting()
+
def on_SERVER(self, cmd):
if cmd.data != self.server_name:
logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data)
@@ -562,6 +586,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
return self.handler.on_rdata(stream_name, cmd.token, rows)
def on_POSITION(self, cmd):
+ # When we get a `POSITION` command it means we've finished getting
+ # missing updates for the given stream, and are now up to date.
+ self.streams_connecting.discard(cmd.stream_name)
+ if not self.streams_connecting:
+ self.handler.finished_connecting()
+
return self.handler.on_position(cmd.stream_name, cmd.token)
def on_SYNC(self, cmd):
@@ -578,6 +608,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
self.id(), stream_name, token
)
+ self.streams_connecting.add(stream_name)
+
self.send_command(ReplicateCommand(stream_name, token))
def on_connection_closed(self):
|