From 1b238e88371516bfedb62d010e156820ab164b94 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Thu, 16 Nov 2023 14:25:35 +0000
Subject: Speed up persisting large number of outliers (#16649)

Recalculating the roots tuple every iteration could be very expensive, so instead let's do a topological sort.
---
 tests/util/test_itertools.py | 76 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

(limited to 'tests/util')

diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py
index 406c16cdcf..fabb05c7e4 100644
--- a/tests/util/test_itertools.py
+++ b/tests/util/test_itertools.py
@@ -13,7 +13,11 @@
 # limitations under the License.
 from typing import Dict, Iterable, List, Sequence
 
-from synapse.util.iterutils import chunk_seq, sorted_topologically
+from synapse.util.iterutils import (
+    chunk_seq,
+    sorted_topologically,
+    sorted_topologically_batched,
+)
 
 from tests.unittest import TestCase
 
@@ -107,3 +111,73 @@ class SortTopologically(TestCase):
         graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3, 2, 1]}
 
         self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4])
+
+
+class SortTopologicallyBatched(TestCase):
+    "Test cases for `sorted_topologically_batched`"
+
+    def test_empty(self) -> None:
+        "Test that an empty graph works correctly"
+
+        graph: Dict[int, List[int]] = {}
+        self.assertEqual(list(sorted_topologically_batched([], graph)), [])
+
+    def test_handle_empty_graph(self) -> None:
+        "Test that a graph where a node doesn't have an entry is treated as empty"
+
+        graph: Dict[int, List[int]] = {}
+
+        # For disconnected nodes the output is simply sorted.
+        self.assertEqual(list(sorted_topologically_batched([1, 2], graph)), [[1, 2]])
+
+    def test_disconnected(self) -> None:
+        "Test that a graph with no edges work"
+
+        graph: Dict[int, List[int]] = {1: [], 2: []}
+
+        # For disconnected nodes the output is simply sorted.
+        self.assertEqual(list(sorted_topologically_batched([1, 2], graph)), [[1, 2]])
+
+    def test_linear(self) -> None:
+        "Test that a simple `4 -> 3 -> 2 -> 1` graph works"
+
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3]}
+
+        self.assertEqual(
+            list(sorted_topologically_batched([4, 3, 2, 1], graph)),
+            [[1], [2], [3], [4]],
+        )
+
+    def test_subset(self) -> None:
+        "Test that only sorting a subset of the graph works"
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3]}
+
+        self.assertEqual(list(sorted_topologically_batched([4, 3], graph)), [[3], [4]])
+
+    def test_fork(self) -> None:
+        "Test that a forked graph works"
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [1], 4: [2, 3]}
+
+        # Valid orderings are `[1, 3, 2, 4]` or `[1, 2, 3, 4]`, but we should
+        # always get the same one.
+        self.assertEqual(
+            list(sorted_topologically_batched([4, 3, 2, 1], graph)), [[1], [2, 3], [4]]
+        )
+
+    def test_duplicates(self) -> None:
+        "Test that a graph with duplicate edges work"
+        graph: Dict[int, List[int]] = {1: [], 2: [1, 1], 3: [2, 2], 4: [3]}
+
+        self.assertEqual(
+            list(sorted_topologically_batched([4, 3, 2, 1], graph)),
+            [[1], [2], [3], [4]],
+        )
+
+    def test_multiple_paths(self) -> None:
+        "Test that a graph with multiple paths between two nodes work"
+        graph: Dict[int, List[int]] = {1: [], 2: [1], 3: [2], 4: [3, 2, 1]}
+
+        self.assertEqual(
+            list(sorted_topologically_batched([4, 3, 2, 1], graph)),
+            [[1], [2], [3], [4]],
+        )
-- 
cgit 1.5.1