diff options
author | Erik Johnston <erikj@matrix.org> | 2023-11-16 14:25:35 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-16 14:25:35 +0000 |
commit | 1b238e88371516bfedb62d010e156820ab164b94 (patch) | |
tree | e7e73a8b70a47651d4d7a5cfbff379ff36828816 /synapse | |
parent | Fix sending out of order `POSITION` over replication (#16639) (diff) | |
download | synapse-1b238e88371516bfedb62d010e156820ab164b94.tar.xz |
Speed up persisting large number of outliers (#16649)
Recalculating the roots tuple every iteration could be very expensive, so instead let's do a topological sort.
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/handlers/federation_event.py | 18 | ||||
-rw-r--r-- | synapse/util/iterutils.py | 51 |
2 files changed, 58 insertions, 11 deletions
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index ba6b94a8b7..f4c17894aa 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -88,7 +88,7 @@ from synapse.types import ( ) from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer, concurrently_execute -from synapse.util.iterutils import batch_iter, partition +from synapse.util.iterutils import batch_iter, partition, sorted_topologically_batched from synapse.util.retryutils import NotRetryingDestination from synapse.util.stringutils import shortstr @@ -1669,14 +1669,13 @@ class FederationEventHandler: # XXX: it might be possible to kick this process off in parallel with fetching # the events. - while event_map: - # build a list of events whose auth events are not in the queue. - roots = tuple( - ev - for ev in event_map.values() - if not any(aid in event_map for aid in ev.auth_event_ids()) - ) + # We need to persist an event's auth events before the event. + auth_graph = { + ev: [event_map[e_id] for e_id in ev.auth_event_ids() if e_id in event_map] + for ev in event_map.values() + } + for roots in sorted_topologically_batched(event_map.values(), auth_graph): if not roots: # if *none* of the remaining events are ready, that means # we have a loop. This either means a bug in our logic, or that @@ -1698,9 +1697,6 @@ class FederationEventHandler: await self._auth_and_persist_outliers_inner(room_id, roots) - for ev in roots: - del event_map[ev.event_id] - async def _auth_and_persist_outliers_inner( self, room_id: str, fetched_events: Collection[EventBase] ) -> None: diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index a0efb96d3b..f4c0194af0 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -135,3 +135,54 @@ def sorted_topologically( degree_map[edge] -= 1 if degree_map[edge] == 0: heapq.heappush(zero_degree, edge) + + +def sorted_topologically_batched( + nodes: Iterable[T], + graph: Mapping[T, Collection[T]], +) -> Generator[Collection[T], None, None]: + r"""Walk the graph topologically, returning batches of nodes where all nodes + that references it have been previously returned. + + For example, given the following graph: + + A + / \ + B C + \ / + D + + This function will return: `[[A], [B, C], [D]]`. + + This function is useful for e.g. batch persisting events in an auth chain, + where we can only persist an event if all its auth events have already been + persisted. + """ + + degree_map = {node: 0 for node in nodes} + reverse_graph: Dict[T, Set[T]] = {} + + for node, edges in graph.items(): + if node not in degree_map: + continue + + for edge in set(edges): + if edge in degree_map: + degree_map[node] += 1 + + reverse_graph.setdefault(edge, set()).add(node) + reverse_graph.setdefault(node, set()) + + zero_degree = [node for node, degree in degree_map.items() if degree == 0] + + while zero_degree: + new_zero_degree = [] + for node in zero_degree: + for edge in reverse_graph.get(node, []): + if edge in degree_map: + degree_map[edge] -= 1 + if degree_map[edge] == 0: + new_zero_degree.append(edge) + + yield zero_degree + zero_degree = new_zero_degree |