4 files changed, 22 insertions, 5 deletions
diff --git a/changelog.d/4954.misc b/changelog.d/4954.misc
new file mode 100644
index 0000000000..91f145950d
--- /dev/null
+++ b/changelog.d/4954.misc
@@ -0,0 +1 @@
+Refactor replication row generation/parsing.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 150975608f..206dc3b397 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -105,13 +105,14 @@ class ReplicationClientHandler(object):
     def on_rdata(self, stream_name, token, rows):
         """Called to handle a batch of replication data with a given stream token.
 
-        By default this just pokes the slave store. Can be overriden in subclasses to
+        By default this just pokes the slave store. Can be overridden in subclasses to
         handle more.
 
         Args:
             stream_name (str): name of the replication stream for this batch of rows
             token (int): stream token for this batch of rows
-            rows (list): a list of Stream.ROW_TYPE objects.
+            rows (list): a list of Stream.ROW_TYPE objects as returned by
+                Stream.parse_row.
 
         Returns:
             Deferred|None
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 02e5bf6cc8..9daec2c995 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -605,7 +605,7 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol):
         inbound_rdata_count.labels(stream_name).inc()
 
         try:
-            row = STREAMS_MAP[stream_name].ROW_TYPE(*cmd.row)
+            row = STREAMS_MAP[stream_name].parse_row(cmd.row)
         except Exception:
             logger.exception(
                 "[%s] Failed to parse RDATA: %r %r",
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index 18df89deed..13ab1bee05 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -112,9 +112,24 @@ class Stream(object):
     time it was called up until the point `advance_current_token` was called.
     """
     NAME = None  # The name of the stream
-    ROW_TYPE = None  # The type of the row
+    ROW_TYPE = None  # The type of the row. Used by the default impl of parse_row.
     _LIMITED = True  # Whether the update function takes a limit
 
+    @classmethod
+    def parse_row(cls, row):
+        """Parse a row received over replication
+
+        By default, assumes that the row data is an array object and passes its contents
+        to the constructor of the ROW_TYPE for this stream.
+
+        Args:
+            row: row data from the incoming RDATA command, after json decoding
+
+        Returns:
+            ROW_TYPE object for this stream
+        """
+        return cls.ROW_TYPE(*row)
+
     def __init__(self, hs):
         # The token from which we last asked for updates
         self.last_token = self.current_token()
@@ -186,7 +201,7 @@ class Stream(object):
                 from_token, current_token,
             )
 
-        updates = [(row[0], self.ROW_TYPE(*row[1:])) for row in rows]
+        updates = [(row[0], row[1:]) for row in rows]
 
         # check we didn't get more rows than the limit.
         # doing it like this allows the update_function to be a generator.