From 501c928c50a0386f55627b6f8c4178a6e565e8c6 Mon Sep 17 00:00:00 2001 From: Bernardo Meurer Costa Date: Tue, 28 Oct 2025 02:48:55 +0000 Subject: [PATCH] refactor(libstore): add BGL-based dependency graph for path analysis Introduces a reusable directed graph template built on Boost Graph Library (BGL) to provide graph operations for store path dependency analysis. This will be used by `nix why-depends` and future cycle detection. --- src/libstore-tests/dependency-graph.cc | 97 ++++++++ src/libstore-tests/meson.build | 1 + src/libstore/dependency-graph.cc | 13 + .../nix/store/dependency-graph-impl.hh | 232 ++++++++++++++++++ .../include/nix/store/dependency-graph.hh | 173 +++++++++++++ src/libstore/include/nix/store/meson.build | 2 + src/libstore/meson.build | 1 + 7 files changed, 519 insertions(+) create mode 100644 src/libstore-tests/dependency-graph.cc create mode 100644 src/libstore/dependency-graph.cc create mode 100644 src/libstore/include/nix/store/dependency-graph-impl.hh create mode 100644 src/libstore/include/nix/store/dependency-graph.hh diff --git a/src/libstore-tests/dependency-graph.cc b/src/libstore-tests/dependency-graph.cc new file mode 100644 index 00000000000..d951b03bd1e --- /dev/null +++ b/src/libstore-tests/dependency-graph.cc @@ -0,0 +1,97 @@ +#include "nix/store/dependency-graph-impl.hh" + +#include + +namespace nix { + +TEST(DependencyGraph, BasicAddEdge) +{ + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + depGraph.addEdge("b", "c"); + + EXPECT_TRUE(depGraph.hasNode("a")); + EXPECT_TRUE(depGraph.hasNode("b")); + EXPECT_TRUE(depGraph.hasNode("c")); + EXPECT_FALSE(depGraph.hasNode("d")); + + // Verify edges using high-level API + auto successors = depGraph.getSuccessors("a"); + EXPECT_EQ(successors.size(), 1); + EXPECT_EQ(successors[0], "b"); +} + +TEST(DependencyGraph, DfsTraversalOrder) +{ + // Build a graph: A->B->D, A->C->D + // Successors should be visited in distance order (B and C before recursing) + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + depGraph.addEdge("a", "c"); + depGraph.addEdge("b", "d"); + depGraph.addEdge("c", "d"); + + std::vector visitedNodes; + std::vector> visitedEdges; + + depGraph.dfsFromTarget( + "a", + "d", + [&](const std::string & node, size_t depth) { + visitedNodes.push_back(node); + return true; + }, + [&](const std::string & from, const std::string & to, bool isLast, size_t depth) { + visitedEdges.emplace_back(from, to); + }, + [](const std::string &) { return false; }); + + EXPECT_EQ(visitedNodes[0], "a"); + // B and C both at distance 1, could be in either order + EXPECT_TRUE( + (visitedNodes[1] == "b" && visitedNodes[2] == "d") || (visitedNodes[1] == "c" && visitedNodes[2] == "d")); +} + +TEST(DependencyGraph, GetSuccessors) +{ + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + depGraph.addEdge("a", "c"); + + auto successors = depGraph.getSuccessors("a"); + EXPECT_EQ(successors.size(), 2); + EXPECT_TRUE(std::ranges::contains(successors, "b")); + EXPECT_TRUE(std::ranges::contains(successors, "c")); +} + +TEST(DependencyGraph, GetAllNodes) +{ + FilePathGraph depGraph; + depGraph.addEdge("foo", "bar"); + depGraph.addEdge("bar", "baz"); + + auto nodes = depGraph.getAllNodes(); + EXPECT_EQ(nodes.size(), 3); + EXPECT_TRUE(std::ranges::contains(nodes, "foo")); + EXPECT_TRUE(std::ranges::contains(nodes, "bar")); + EXPECT_TRUE(std::ranges::contains(nodes, "baz")); +} + +TEST(DependencyGraph, ThrowsOnMissingNode) +{ + FilePathGraph depGraph; + depGraph.addEdge("a", "b"); + + EXPECT_THROW((void) depGraph.getSuccessors("nonexistent"), nix::Error); +} + +TEST(DependencyGraph, EmptyGraph) +{ + FilePathGraph depGraph; + + EXPECT_FALSE(depGraph.hasNode("anything")); + EXPECT_EQ(depGraph.numVertices(), 0); + EXPECT_EQ(depGraph.getAllNodes().size(), 0); +} + +} // namespace nix diff --git a/src/libstore-tests/meson.build b/src/libstore-tests/meson.build index 4d464ad8917..b29eddb43c0 100644 --- a/src/libstore-tests/meson.build +++ b/src/libstore-tests/meson.build @@ -56,6 +56,7 @@ subdir('nix-meson-build-support/common') sources = files( 'common-protocol.cc', 'content-address.cc', + 'dependency-graph.cc', 'derivation-advanced-attrs.cc', 'derivation.cc', 'derived-path.cc', diff --git a/src/libstore/dependency-graph.cc b/src/libstore/dependency-graph.cc new file mode 100644 index 00000000000..f3af6790355 --- /dev/null +++ b/src/libstore/dependency-graph.cc @@ -0,0 +1,13 @@ +#include "nix/store/dependency-graph.hh" +#include "nix/store/dependency-graph-impl.hh" + +#include + +namespace nix { + +// Explicit instantiations for common types +template class DependencyGraph; +template class DependencyGraph; +template class DependencyGraph; + +} // namespace nix diff --git a/src/libstore/include/nix/store/dependency-graph-impl.hh b/src/libstore/include/nix/store/dependency-graph-impl.hh new file mode 100644 index 00000000000..13777b9bf25 --- /dev/null +++ b/src/libstore/include/nix/store/dependency-graph-impl.hh @@ -0,0 +1,232 @@ +#pragma once +/** + * @file + * + * Template implementations (as opposed to mere declarations). + * + * This file is an example of the "impl.hh" pattern. See the + * contributing guide. + * + * One only needs to include this when instantiating DependencyGraph + * with custom NodeId or EdgeProperty types beyond the pre-instantiated + * common types (StorePath, std::string). + */ + +#include "nix/store/dependency-graph.hh" +#include "nix/store/store-api.hh" +#include "nix/util/error.hh" + +#include +#include +#include + +#include +#include + +namespace nix { + +template +DependencyGraph::DependencyGraph(Store & store, const StorePathSet & closure) + requires std::same_as +{ + for (auto & path : closure) { + for (auto & ref : store.queryPathInfo(path)->references) { + addEdge(path, ref); + } + } +} + +template +typename DependencyGraph::vertex_descriptor +DependencyGraph::addOrGetVertex(const NodeId & id) +{ + auto it = nodeToVertex.find(id); + if (it != nodeToVertex.end()) { + return it->second; + } + + auto v = boost::add_vertex(VertexProperty{std::make_optional(id)}, graph); + nodeToVertex[id] = v; + return v; +} + +template +void DependencyGraph::addEdge(const NodeId & from, const NodeId & to) +{ + auto vFrom = addOrGetVertex(from); + auto vTo = addOrGetVertex(to); + + // Check for existing edge to prevent duplicates (idempotent) + auto [existingEdge, found] = boost::edge(vFrom, vTo, graph); + if (!found) { + boost::add_edge(vFrom, vTo, graph); + } + // If edge exists, this is a no-op (idempotent) +} + +template +void DependencyGraph::addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop) + requires(!std::same_as) +{ + auto vFrom = addOrGetVertex(from); + auto vTo = addOrGetVertex(to); + + auto [existingEdge, found] = boost::edge(vFrom, vTo, graph); + if (found) { + // Merge properties for existing edge + if constexpr (std::same_as) { + // Set handles deduplication automatically + auto & edgeFiles = graph[existingEdge].files; + edgeFiles.insert(prop.files.begin(), prop.files.end()); + } else { + // For other property types, overwrite with new value + graph[existingEdge] = prop; + } + } else { + // New edge + boost::add_edge(vFrom, vTo, prop, graph); + } +} + +template +std::optional::vertex_descriptor> +DependencyGraph::getVertex(const NodeId & id) const +{ + auto it = nodeToVertex.find(id); + if (it == nodeToVertex.end()) { + return std::nullopt; + } + return it->second; +} + +template +const NodeId & DependencyGraph::getNodeId(vertex_descriptor v) const +{ + return *graph[v].id; +} + +template +bool DependencyGraph::hasNode(const NodeId & id) const +{ + return nodeToVertex.contains(id); +} + +template +typename DependencyGraph::vertex_descriptor +DependencyGraph::getVertexOrThrow(const NodeId & id) const +{ + auto opt = getVertex(id); + if (!opt.has_value()) { + // Note: NodeId is not included as it may not be formattable in all instantiations + throw Error("node not found in graph"); + } + return *opt; +} + +template +template +void DependencyGraph::dfsFromTarget( + const NodeId & start, + const NodeId & target, + NodeVisitor && visitNode, + EdgeVisitor && visitEdge, + StopPredicate && shouldStop) const +{ + // Compute distances locally for this traversal + auto targetVertex = getVertexOrThrow(target); + size_t n = boost::num_vertices(graph); + + std::vector distances(n, std::numeric_limits::max()); + distances[targetVertex] = 0; + + // Use reverse_graph to follow incoming edges + auto reversedGraph = boost::make_reverse_graph(graph); + + // Create uniform weight map (all edges have weight 1) + auto weightMap = + boost::make_constant_property::edge_descriptor>(1); + + // Run Dijkstra on reversed graph with uniform weights + boost::dijkstra_shortest_paths( + reversedGraph, + targetVertex, + boost::weight_map(weightMap).distance_map( + boost::make_iterator_property_map(distances.begin(), boost::get(boost::vertex_index, reversedGraph)))); + + // DFS with distance-based ordering + std::function dfs = [&](const NodeId & node, size_t depth) -> bool { + // Visit node - if returns false, skip this subtree + if (!visitNode(node, depth)) { + return false; + } + + // Check if we should stop the entire traversal + if (shouldStop(node)) { + return true; // Signal to stop + } + + // Get and sort successors by distance + auto successors = getSuccessors(node); + auto sortedSuccessors = successors | std::views::transform([&](const auto & ref) -> std::pair { + auto v = getVertexOrThrow(ref); + return {distances[v], ref}; // Use local distances + }) + | std::views::filter([](const auto & p) { + // Filter unreachable nodes + return p.first != std::numeric_limits::max(); + }) + | std::ranges::to(); + + std::ranges::sort(sortedSuccessors); + + // Visit each edge and recurse + for (size_t i = 0; i < sortedSuccessors.size(); ++i) { + const auto & [dist, successor] = sortedSuccessors[i]; + bool isLast = (i == sortedSuccessors.size() - 1); + + visitEdge(node, successor, isLast, depth); + + if (dfs(successor, depth + 1)) { + return true; // Propagate stop signal + } + } + + return false; // Continue traversal + }; + + dfs(start, 0); +} + +template +std::vector DependencyGraph::getSuccessors(const NodeId & node) const +{ + auto v = getVertexOrThrow(node); + auto [adjBegin, adjEnd] = boost::adjacent_vertices(v, graph); + + return std::ranges::subrange(adjBegin, adjEnd) | std::views::transform([&](auto v) { return getNodeId(v); }) + | std::ranges::to(); +} + +template +std::optional +DependencyGraph::getEdgeProperty(const NodeId & from, const NodeId & to) const + requires(!std::same_as) +{ + auto vFrom = getVertexOrThrow(from); + auto vTo = getVertexOrThrow(to); + + auto [edge, found] = boost::edge(vFrom, vTo, graph); + if (!found) { + return std::nullopt; + } + + return graph[edge]; +} + +template +std::vector DependencyGraph::getAllNodes() const +{ + return nodeToVertex | std::views::keys | std::ranges::to(); +} + +} // namespace nix diff --git a/src/libstore/include/nix/store/dependency-graph.hh b/src/libstore/include/nix/store/dependency-graph.hh new file mode 100644 index 00000000000..e28fd74b3ca --- /dev/null +++ b/src/libstore/include/nix/store/dependency-graph.hh @@ -0,0 +1,173 @@ +#pragma once +///@file + +#include "nix/store/path.hh" +#include "nix/util/canon-path.hh" + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace nix { + +class Store; + +/** + * Concept for types usable as graph node IDs. + */ +template +concept GraphNodeId = std::copyable && std::totally_ordered; + +/** + * Directed graph for dependency analysis using Boost Graph Library. + * + * The graph is fully mutable - edges can be added at any time. + * Query methods compute what they need on-demand without caching. + * + * **Edge Semantics:** + * - addEdge() is idempotent - calling twice with same nodes is safe + * - For FileListEdgeProperty: files are automatically merged and deduplicated + * - For other property types: later addEdge() calls overwrite earlier values + * + * @tparam NodeId Node identifier type (e.g., StorePath, std::string) + * @tparam EdgeProperty Optional edge metadata type + */ +template +class DependencyGraph +{ +public: + /** + * Bundled vertex property. Uses optional for default constructibility. + */ + struct VertexProperty + { + std::optional id; + }; + + /** + * BGL adjacency_list: bidirectional, vector storage. + */ + using Graph = boost::adjacency_list; + + using vertex_descriptor = typename boost::graph_traits::vertex_descriptor; + using edge_descriptor = typename boost::graph_traits::edge_descriptor; + +private: + Graph graph; + std::map nodeToVertex; + + // Internal helpers + vertex_descriptor addOrGetVertex(const NodeId & id); + std::optional getVertex(const NodeId & id) const; + const NodeId & getNodeId(vertex_descriptor v) const; + vertex_descriptor getVertexOrThrow(const NodeId & id) const; + +public: + DependencyGraph() = default; + + /** + * Build graph from Store closure (StorePath graphs only). + * + * @param store Store to query for references + * @param closure Store paths to include + */ + DependencyGraph(Store & store, const StorePathSet & closure) + requires std::same_as; + + /** + * Add edge, creating vertices if needed. + */ + void addEdge(const NodeId & from, const NodeId & to); + + /** + * Add edge with property. If edge exists, merges properties: + * - FileListEdgeProperty: files are merged and automatically deduplicated + * - Other properties: later value overwrites earlier value + */ + void addEdge(const NodeId & from, const NodeId & to, const EdgeProperty & prop) + requires(!std::same_as); + + [[nodiscard]] bool hasNode(const NodeId & id) const; + + /** + * DFS traversal with distance-based successor ordering. + * Successors visited in order of increasing distance to target. + * Automatically computes distances if needed (lazy). + * + * Example traversal from A to D: + * + * A (dist=3) + * ├─→ B (dist=2) + * │ └─→ D (dist=0) [target] + * └─→ C (dist=2) + * └─→ D (dist=0) + * + * Callbacks invoked: + * visitNode(A, depth=0) -> true + * visitEdge(A, B, isLast=false, depth=0) + * visitNode(B, depth=1) -> true + * visitEdge(B, D, isLast=true, depth=1) + * visitNode(D, depth=2) -> true + * shouldStop(D) -> true [stops traversal] + * + * @param start Starting node for traversal + * @param target Target node (used for distance-based sorting) + * @param visitNode Called when entering node: (node, depth) -> bool. Return false to skip subtree. + * @param visitEdge Called for each edge: (from, to, isLastEdge, depth) -> void + * @param shouldStop Called after visiting node: (node) -> bool. Return true to stop entire traversal. + */ + template + void dfsFromTarget( + const NodeId & start, + const NodeId & target, + NodeVisitor && visitNode, + EdgeVisitor && visitEdge, + StopPredicate && shouldStop) const; + + /** + * Get successor nodes (outgoing edges). + */ + [[nodiscard]] std::vector getSuccessors(const NodeId & node) const; + + /** + * Get edge property. Returns nullopt if edge doesn't exist. + */ + [[nodiscard]] std::optional getEdgeProperty(const NodeId & from, const NodeId & to) const + requires(!std::same_as); + + [[nodiscard]] std::vector getAllNodes() const; + + [[nodiscard]] size_t numVertices() const + { + return boost::num_vertices(graph); + } +}; + +/** + * Edge property storing which files created a dependency. + * Files are stored in a std::set, guaranteeing: + * - Automatic deduplication + * - Deterministic ordering (sorted by CanonPath comparison) + */ +struct FileListEdgeProperty +{ + std::set files; +}; + +// Convenience typedefs +using StorePathGraph = DependencyGraph; +using FilePathGraph = DependencyGraph; +using StorePathGraphWithFiles = DependencyGraph; + +// Provided by src/libstore/dependency-graph.cc +extern template class DependencyGraph; +extern template class DependencyGraph; +extern template class DependencyGraph; + +} // namespace nix diff --git a/src/libstore/include/nix/store/meson.build b/src/libstore/include/nix/store/meson.build index 5d6626ff838..6e4431da728 100644 --- a/src/libstore/include/nix/store/meson.build +++ b/src/libstore/include/nix/store/meson.build @@ -31,6 +31,8 @@ headers = [ config_pub_h ] + files( 'common-ssh-store-config.hh', 'content-address.hh', 'daemon.hh', + 'dependency-graph-impl.hh', + 'dependency-graph.hh', 'derivation-options.hh', 'derivations.hh', 'derived-path-map.hh', diff --git a/src/libstore/meson.build b/src/libstore/meson.build index d1b3666cc34..bdc1c629541 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -277,6 +277,7 @@ sources = files( 'common-ssh-store-config.cc', 'content-address.cc', 'daemon.cc', + 'dependency-graph.cc', 'derivation-options.cc', 'derivations.cc', 'derived-path-map.cc',