summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/4927.feature1
-rw-r--r--synapse/replication/tcp/protocol.py27
-rw-r--r--synapse/replication/tcp/streams.py11
-rw-r--r--synapse/storage/receipts.py4
-rw-r--r--tests/replication/tcp/__init__.py14
-rw-r--r--tests/replication/tcp/streams/__init__.py14
-rw-r--r--tests/replication/tcp/streams/_base.py74
-rw-r--r--tests/replication/tcp/streams/test_receipts.py46
8 files changed, 179 insertions, 12 deletions
diff --git a/changelog.d/4927.feature b/changelog.d/4927.feature
new file mode 100644
index 0000000000..8d74262250
--- /dev/null
+++ b/changelog.d/4927.feature
@@ -0,0 +1 @@
+Batch up outgoing read-receipts to reduce federation traffic.
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 55630ba9a7..e16fad5261 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -223,14 +223,25 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
             return
 
         # Now lets try and call on_<CMD_NAME> function
-        try:
-            run_as_background_process(
-                "replication-" + cmd.get_logcontext_id(),
-                getattr(self, "on_%s" % (cmd_name,)),
-                cmd,
-            )
-        except Exception:
-            logger.exception("[%s] Failed to handle line: %r", self.id(), line)
+        run_as_background_process(
+            "replication-" + cmd.get_logcontext_id(),
+            self.handle_command,
+            cmd,
+        )
+
+    def handle_command(self, cmd):
+        """Handle a command we have received over the replication stream.
+
+        By default delegates to on_<COMMAND>
+
+        Args:
+            cmd (synapse.replication.tcp.commands.Command): received command
+
+        Returns:
+            Deferred
+        """
+        handler = getattr(self, "on_%s" % (cmd.NAME,))
+        return handler(cmd)
 
     def close(self):
         logger.warn("[%s] Closing connection", self.id())
diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py
index c1e626be3f..e23084baae 100644
--- a/synapse/replication/tcp/streams.py
+++ b/synapse/replication/tcp/streams.py
@@ -23,7 +23,7 @@ Each stream is defined by the following information:
     current_token:      The function that returns the current token for the stream
     update_function:    The function that returns a list of updates between two tokens
 """
-
+import itertools
 import logging
 from collections import namedtuple
 
@@ -195,8 +195,8 @@ class Stream(object):
                 limit=MAX_EVENTS_BEHIND + 1,
             )
 
-            if len(rows) >= MAX_EVENTS_BEHIND:
-                raise Exception("stream %s has fallen behind" % (self.NAME))
+            # never turn more than MAX_EVENTS_BEHIND + 1 into updates.
+            rows = itertools.islice(rows, MAX_EVENTS_BEHIND + 1)
         else:
             rows = yield self.update_function(
                 from_token, current_token,
@@ -204,6 +204,11 @@ class Stream(object):
 
         updates = [(row[0], self.ROW_TYPE(*row[1:])) for row in rows]
 
+        # check we didn't get more rows than the limit.
+        # doing it like this allows the update_function to be a generator.
+        if self._LIMITED and len(updates) >= MAX_EVENTS_BEHIND:
+            raise Exception("stream %s has fallen behind" % (self.NAME))
+
         defer.returnValue((updates, current_token))
 
     def current_token(self):
diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py
index 0fd1ccc40a..89a1f7e3d7 100644
--- a/synapse/storage/receipts.py
+++ b/synapse/storage/receipts.py
@@ -301,7 +301,9 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 args.append(limit)
             txn.execute(sql, args)
 
-            return txn.fetchall()
+            return (
+                r[0:5] + (json.loads(r[5]), ) for r in txn
+            )
         return self.runInteraction(
             "get_all_updated_receipts", get_all_updated_receipts_txn
         )
diff --git a/tests/replication/tcp/__init__.py b/tests/replication/tcp/__init__.py
new file mode 100644
index 0000000000..1453d04571
--- /dev/null
+++ b/tests/replication/tcp/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/replication/tcp/streams/__init__.py b/tests/replication/tcp/streams/__init__.py
new file mode 100644
index 0000000000..1453d04571
--- /dev/null
+++ b/tests/replication/tcp/streams/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/replication/tcp/streams/_base.py b/tests/replication/tcp/streams/_base.py
new file mode 100644
index 0000000000..38b368a972
--- /dev/null
+++ b/tests/replication/tcp/streams/_base.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.replication.tcp.commands import ReplicateCommand
+from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
+from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
+
+from tests import unittest
+from tests.server import FakeTransport
+
+
+class BaseStreamTestCase(unittest.HomeserverTestCase):
+    """Base class for tests of the replication streams"""
+    def prepare(self, reactor, clock, hs):
+        # build a replication server
+        server_factory = ReplicationStreamProtocolFactory(self.hs)
+        self.streamer = server_factory.streamer
+        server = server_factory.buildProtocol(None)
+
+        # build a replication client, with a dummy handler
+        self.test_handler = TestReplicationClientHandler()
+        self.client = ClientReplicationStreamProtocol(
+            "client", "test", clock, self.test_handler
+        )
+
+        # wire them together
+        self.client.makeConnection(FakeTransport(server, reactor))
+        server.makeConnection(FakeTransport(self.client, reactor))
+
+    def replicate(self):
+        """Tell the master side of replication that something has happened, and then
+        wait for the replication to occur.
+        """
+        self.streamer.on_notifier_poke()
+        self.pump(0.1)
+
+    def replicate_stream(self, stream, token="NOW"):
+        """Make the client end a REPLICATE command to set up a subscription to a stream"""
+        self.client.send_command(ReplicateCommand(stream, token))
+
+
+class TestReplicationClientHandler(object):
+    """Drop-in for ReplicationClientHandler which just collects RDATA rows"""
+    def __init__(self):
+        self.received_rdata_rows = []
+
+    def get_streams_to_replicate(self):
+        return {}
+
+    def get_currently_syncing_users(self):
+        return []
+
+    def update_connection(self, connection):
+        pass
+
+    def finished_connecting(self):
+        pass
+
+    def on_rdata(self, stream_name, token, rows):
+        for r in rows:
+            self.received_rdata_rows.append(
+                (stream_name, token, r)
+            )
diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py
new file mode 100644
index 0000000000..9aa9dfe82e
--- /dev/null
+++ b/tests/replication/tcp/streams/test_receipts.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.replication.tcp.streams import ReceiptsStreamRow
+
+from tests.replication.tcp.streams._base import BaseStreamTestCase
+
+USER_ID = "@feeling:blue"
+ROOM_ID = "!room:blue"
+EVENT_ID = "$event:blue"
+
+
+class ReceiptsStreamTestCase(BaseStreamTestCase):
+    def test_receipt(self):
+        # make the client subscribe to the receipts stream
+        self.replicate_stream("receipts", "NOW")
+
+        # tell the master to send a new receipt
+        self.get_success(
+            self.hs.get_datastore().insert_receipt(
+                ROOM_ID, "m.read", USER_ID, [EVENT_ID], {"a": 1}
+            )
+        )
+        self.replicate()
+
+        # there should be one RDATA command
+        rdata_rows = self.test_handler.received_rdata_rows
+        self.assertEqual(1, len(rdata_rows))
+        self.assertEqual(rdata_rows[0][0], "receipts")
+        row = rdata_rows[0][2]  # type: ReceiptsStreamRow
+        self.assertEqual(ROOM_ID, row.room_id)
+        self.assertEqual("m.read", row.receipt_type)
+        self.assertEqual(USER_ID, row.user_id)
+        self.assertEqual(EVENT_ID, row.event_id)
+        self.assertEqual({"a": 1}, row.data)