From 1315a2e8be702a513d49c1142e9e52b642286635 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jan 2021 16:09:22 +0000 Subject: Use a chain cover index to efficiently calculate auth chain difference (#8868) --- synapse/util/iterutils.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'synapse/util/iterutils.py') diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index 06faeebe7f..f7b4857a84 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -13,8 +13,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import heapq from itertools import islice -from typing import Iterable, Iterator, Sequence, Tuple, TypeVar +from typing import ( + Dict, + Generator, + Iterable, + Iterator, + Mapping, + Sequence, + Set, + Tuple, + TypeVar, +) + +from synapse.types import Collection T = TypeVar("T") @@ -46,3 +59,41 @@ def chunk_seq(iseq: ISeq, maxlen: int) -> Iterable[ISeq]: If the input is empty, no chunks are returned. """ return (iseq[i : i + maxlen] for i in range(0, len(iseq), maxlen)) + + +def sorted_topologically( + nodes: Iterable[T], graph: Mapping[T, Collection[T]], +) -> Generator[T, None, None]: + """Given a set of nodes and a graph, yield the nodes in toplogical order. + + For example `sorted_topologically([1, 2], {1: [2]})` will yield `2, 1`. + """ + + # This is implemented by Kahn's algorithm. + + degree_map = {node: 0 for node in nodes} + reverse_graph = {} # type: Dict[T, Set[T]] + + for node, edges in graph.items(): + if node not in degree_map: + continue + + for edge in edges: + if edge in degree_map: + degree_map[node] += 1 + + reverse_graph.setdefault(edge, set()).add(node) + reverse_graph.setdefault(node, set()) + + zero_degree = [node for node, degree in degree_map.items() if degree == 0] + heapq.heapify(zero_degree) + + while zero_degree: + node = heapq.heappop(zero_degree) + yield node + + for edge in reverse_graph[node]: + if edge in degree_map: + degree_map[edge] -= 1 + if degree_map[edge] == 0: + heapq.heappush(zero_degree, edge) -- cgit 1.5.1 From 1a08e0cdab0b3475fd4189aa1e3b6f9aaa823ccf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 14 Jan 2021 18:57:32 +0000 Subject: Fix event chain bg update. (#9118) We passed in a graph to `sorted_topologically` which didn't have an entry for each node (as we dropped nodes with no edges). --- changelog.d/9118.misc | 1 + synapse/util/iterutils.py | 2 +- tests/util/test_itertools.py | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 changelog.d/9118.misc (limited to 'synapse/util/iterutils.py') diff --git a/changelog.d/9118.misc b/changelog.d/9118.misc new file mode 100644 index 0000000000..346741d982 --- /dev/null +++ b/changelog.d/9118.misc @@ -0,0 +1 @@ +Improve efficiency of large state resolutions. diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index f7b4857a84..6ef2b008a4 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -92,7 +92,7 @@ def sorted_topologically( node = heapq.heappop(zero_degree) yield node - for edge in reverse_graph[node]: + for edge in reverse_graph.get(node, []): if edge in degree_map: degree_map[edge] -= 1 if degree_map[edge] == 0: diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py index 1184cea5a3..522c8061f9 100644 --- a/tests/util/test_itertools.py +++ b/tests/util/test_itertools.py @@ -56,6 +56,14 @@ class SortTopologically(TestCase): graph = {} # type: Dict[int, List[int]] self.assertEqual(list(sorted_topologically([], graph)), []) + def test_handle_empty_graph(self): + "Test that a graph where a node doesn't have an entry is treated as empty" + + graph = {} # type: Dict[int, List[int]] + + # For disconnected nodes the output is simply sorted. + self.assertEqual(list(sorted_topologically([1, 2], graph)), [1, 2]) + def test_disconnected(self): "Test that a graph with no edges work" -- cgit 1.5.1 From 056327457ff471495741a539e99c840ed54afccd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 22 Jan 2021 19:44:08 +0000 Subject: Fix chain cover update to handle events with duplicate auth events (#9210) --- changelog.d/9210.bugfix | 1 + synapse/util/iterutils.py | 2 +- tests/util/test_itertools.py | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 changelog.d/9210.bugfix (limited to 'synapse/util/iterutils.py') diff --git a/changelog.d/9210.bugfix b/changelog.d/9210.bugfix new file mode 100644 index 0000000000..f9e0765570 --- /dev/null +++ b/changelog.d/9210.bugfix @@ -0,0 +1 @@ +Fix chain cover update to handle events with duplicate auth events. Introduced in v1.26.0rc1. diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index 6ef2b008a4..8d2411513f 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -78,7 +78,7 @@ def sorted_topologically( if node not in degree_map: continue - for edge in edges: + for edge in set(edges): if edge in degree_map: degree_map[node] += 1 diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py index 522c8061f9..1ef0af8e8f 100644 --- a/tests/util/test_itertools.py +++ b/tests/util/test_itertools.py @@ -92,3 +92,15 @@ class SortTopologically(TestCase): # Valid orderings are `[1, 3, 2, 4]` or `[1, 2, 3, 4]`, but we should # always get the same one. self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4]) + + def test_duplicates(self): + "Test that a graph with duplicate edges work" + graph = {1: [], 2: [1, 1], 3: [2, 2], 4: [3]} # type: Dict[int, List[int]] + + self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4]) + + def test_multiple_paths(self): + "Test that a graph with multiple paths between two nodes work" + graph = {1: [], 2: [1], 3: [2], 4: [3, 2, 1]} # type: Dict[int, List[int]] + + self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4]) -- cgit 1.5.1