From 1e208da062d59021e722b31ffbad05cba5524d59 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 3 Oct 2025 21:01:00 +0200 Subject: [PATCH] Parallel Branch-and-Bound (#412) This PR implement a parallel branch-and-bound procedure, which is split into two phases. In the first phase, the algorithm will greedily expand the search tree until a certain depth and then add the bottom nodes to a global heap. The parallel expansion is implemented using `omp task`. In the second phase, some threads will explore the tree using best first search with plunging, i.e., they take the first node from the global heap and then explore the entire branch that starts on this node. Any unexplored node are insert into the heap. The remaining threads will perform deep dives in order to find feasible solutions. The solver keep a small heap contains the most promising nodes to perform the dives, which is keep in sync with the global heap. This PR also - Replace the `std::thread`-based parallelization in the strong branching with OpenMP in order to use dynamic scheduling. This ensures that all threads have similar amount of work and improve parallel performance. - Fixed invalid memory access when trying to access the status of a fathomed node. - Replaced `std::mutex` with `omp atomic` whatever applicable. - Added dedicated classes `dive_queue_t` and `search_tree_t` to store the diving heap and the search tree, respectively. This is an extension of #305. Closes #320. Closes #417. ## Benchmark results (MIPLIB2017): master branch (53d6e74) ``` Average Gap: 0.2174861712 ``` This PR: ``` Average Gap: 0.1989485546 ``` i.e., a `1.8%` improvement. In terms of the geomean of the gap ratio, this is equal to `1.62x`. Authors: - Nicolas L. Guidotti (https://github.com/nguidotti) Approvers: - Rajesh Gandham (https://github.com/rg20) - Chris Maes (https://github.com/chris-maes) URL: https://github.com/NVIDIA/cuopt/pull/412 --- .../linear_programming/cuopt/run_mip.cpp | 3 + cpp/src/dual_simplex/branch_and_bound.cpp | 916 ++++++++++-------- cpp/src/dual_simplex/branch_and_bound.hpp | 204 ++-- cpp/src/dual_simplex/mip_node.cpp | 2 +- cpp/src/dual_simplex/mip_node.hpp | 116 ++- cpp/src/dual_simplex/phase2.cpp | 4 +- cpp/src/dual_simplex/pseudo_costs.cpp | 152 +-- cpp/src/dual_simplex/pseudo_costs.hpp | 11 +- .../dual_simplex/simplex_solver_settings.hpp | 28 +- cpp/src/math_optimization/solver_settings.cu | 2 +- cpp/src/mip/diversity/diversity_manager.cu | 6 +- cpp/src/mip/diversity/recombiners/sub_mip.cuh | 7 +- .../rounding/lb_constraint_prop.cu | 1 - cpp/src/mip/solver.cu | 10 +- cpp/src/utilities/omp_helpers.hpp | 138 +++ 15 files changed, 1002 insertions(+), 598 deletions(-) create mode 100644 cpp/src/utilities/omp_helpers.hpp diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 64b1bb4bb..a4f52cb4e 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -382,6 +383,8 @@ int main(int argc, char* argv[]) double memory_limit = program.get("--memory-limit"); bool track_allocations = program.get("--track-allocations")[0] == 't'; + if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } + if (program.is_used("--out-dir")) { out_dir = program.get("--out-dir"); result_file = out_dir + "/final_result.csv"; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 1a7ea7d82..5870a1b46 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -15,9 +15,9 @@ * limitations under the License. */ -#include +#include +#include #include - #include #include #include @@ -29,10 +29,12 @@ #include #include +#include #include #include -#include +#include #include +#include #include #include @@ -133,37 +135,6 @@ void set_uninitialized_steepest_edge_norms(std::vector& edge_norms) } } -constexpr int write_graphviz = false; - -template -void graphviz_node(const simplex_solver_settings_t& settings, - mip_node_t* node_ptr, - std::string label, - f_t val) -{ - if (write_graphviz) { - settings.log.printf("Node%d [label=\"%s %.16e\"]\n", node_ptr->node_id, label.c_str(), val); - } -} - -template -void graphviz_edge(const simplex_solver_settings_t& settings, - mip_node_t* origin_ptr, - mip_node_t* dest_ptr, - i_t branch_var, - i_t branch_dir, - f_t bound) -{ - if (write_graphviz) { - settings.log.printf("Node%d -> Node%d [label=\"x%d %s %e\"]\n", - origin_ptr->node_id, - dest_ptr->node_id, - branch_var, - branch_dir == 0 ? "<=" : ">=", - bound); - } -} - dual::status_t convert_lp_status_to_dual_status(lp_status_t status) { if (status == lp_status_t::OPTIMAL) { @@ -199,7 +170,18 @@ f_t relative_gap(f_t obj_value, f_t lower_bound) f_t user_mip_gap = obj_value == 0.0 ? (lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) : std::abs(obj_value - lower_bound) / std::abs(obj_value); - // Handle NaNs (i.e., NaN != NaN) + if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } + return user_mip_gap; +} + +template +f_t user_relative_gap(const lp_problem_t& lp, f_t obj_value, f_t lower_bound) +{ + f_t user_obj = compute_user_objective(lp, obj_value); + f_t user_lower_bound = compute_user_objective(lp, lower_bound); + f_t user_mip_gap = user_obj == 0.0 + ? (user_lower_bound == 0.0 ? 0.0 : std::numeric_limits::infinity()) + : std::abs(user_obj - user_lower_bound) / std::abs(user_obj); if (std::isnan(user_mip_gap)) { return std::numeric_limits::infinity(); } return user_mip_gap; } @@ -229,7 +211,8 @@ branch_and_bound_t::branch_and_bound_t( original_lp_(1, 1, 1), incumbent_(1), root_relax_soln_(1, 1), - pc_(1) + pc_(1), + status_(mip_exploration_status_t::UNSET) { stats_.start_time = tic(); convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_); @@ -238,14 +221,6 @@ branch_and_bound_t::branch_and_bound_t( mutex_upper_.lock(); upper_bound_ = inf; mutex_upper_.unlock(); - - mutex_lower_.lock(); - lower_bound_ = -inf; - mutex_lower_.unlock(); - - mutex_branching_.lock(); - currently_branching_ = false; - mutex_branching_.unlock(); } template @@ -260,12 +235,27 @@ f_t branch_and_bound_t::get_upper_bound() template f_t branch_and_bound_t::get_lower_bound() { - mutex_lower_.lock(); - const f_t lower_bound = lower_bound_; - mutex_lower_.unlock(); + f_t lower_bound = lower_bound_ceiling_.load(); + mutex_heap_.lock(); + if (heap_.size() > 0) { lower_bound = std::min(heap_.top()->lower_bound, lower_bound); } + mutex_heap_.unlock(); + + for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { + lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); + } + return lower_bound; } +template +i_t branch_and_bound_t::get_heap_size() +{ + mutex_heap_.lock(); + i_t size = heap_.size(); + mutex_heap_.unlock(); + return size; +} + template void branch_and_bound_t::set_new_solution(const std::vector& solution) { @@ -304,16 +294,13 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu mutex_upper_.unlock(); if (is_feasible) { - mutex_branching_.lock(); - bool currently_branching = currently_branching_; - mutex_branching_.unlock(); - if (currently_branching) { + if (status_ == mip_exploration_status_t::RUNNING) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string gap = user_mip_gap(user_obj, user_lower); settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", + "H %+13.6e %+10.6e %s %9.2f\n", user_obj, user_lower, gap.c_str(), @@ -424,6 +411,7 @@ void branch_and_bound_t::repair_heuristic_solutions() f_t obj = compute_user_objective(original_lp_, repaired_obj); f_t lower = compute_user_objective(original_lp_, get_lower_bound()); std::string user_gap = user_mip_gap(obj, lower); + settings_.log.printf( "H %+13.6e %+10.6e %s %9.2f\n", obj, @@ -445,58 +433,83 @@ void branch_and_bound_t::repair_heuristic_solutions() } template -void branch_and_bound_t::branch(mip_node_t* parent_node, - i_t branch_var, - f_t branch_var_val, - const std::vector& parent_vstatus) +mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t& solution, + f_t lower_bound) { - // down child - auto down_child = std::make_unique>( - original_lp_, parent_node, ++stats_.num_nodes, branch_var, 0, branch_var_val, parent_vstatus); + mip_status_t mip_status = mip_status_t::UNSET; - graphviz_edge( - settings_, parent_node, down_child.get(), branch_var, 0, std::floor(branch_var_val)); + if (status_ == mip_exploration_status_t::NUMERICAL) { + settings_.log.printf("Numerical issue encountered. Stopping the solver...\n"); + mip_status = mip_status_t::NUMERICAL; + } - // up child - auto up_child = std::make_unique>( - original_lp_, parent_node, ++stats_.num_nodes, branch_var, 1, branch_var_val, parent_vstatus); + if (status_ == mip_exploration_status_t::TIME_LIMIT) { + settings_.log.printf("Time limit reached. Stopping the solver...\n"); + mip_status = mip_status_t::TIME_LIMIT; + } - graphviz_edge(settings_, parent_node, up_child.get(), branch_var, 1, std::ceil(branch_var_val)); + f_t upper_bound = get_upper_bound(); + f_t gap = upper_bound - lower_bound; + f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, lower_bound); + f_t gap_rel = user_relative_gap(original_lp_, upper_bound, lower_bound); - assert(parent_vstatus.size() == original_lp_.num_cols); - parent_node->add_children(std::move(down_child), - std::move(up_child)); // child pointers moved into the tree -} + settings_.log.printf( + "Explored %d nodes in %.2fs.\n", stats_.nodes_explored, toc(stats_.start_time)); + settings_.log.printf("Absolute Gap %e Objective %.16e Lower Bound %.16e\n", gap, obj, user_lower); -template -void branch_and_bound_t::update_tree(mip_node_t* node_ptr, node_status_t status) -{ - std::vector*> stack; - node_ptr->set_status(status, stack); - remove_fathomed_nodes(stack); + if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) { + mip_status = mip_status_t::OPTIMAL; + if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) { + settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", + settings_.absolute_mip_gap_tol); + } else if (gap > 0 && gap_rel <= settings_.relative_mip_gap_tol) { + settings_.log.printf("Optimal solution found within relative MIP gap tolerance (%.1e)\n", + settings_.relative_mip_gap_tol); + } else { + settings_.log.printf("Optimal solution found.\n"); + } + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + } + + if (stats_.nodes_explored > 0 && stats_.nodes_unexplored == 0 && upper_bound == inf) { + settings_.log.printf("Integer infeasible.\n"); + mip_status = mip_status_t::INFEASIBLE; + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + } + + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = lower_bound; + solution.nodes_explored = stats_.nodes_explored; + solution.simplex_iterations = stats_.total_lp_iters; + + return mip_status; } template void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - char symbol) + char thread_type) { bool send_solution = false; i_t nodes_explored = stats_.nodes_explored; i_t nodes_unexplored = stats_.nodes_unexplored; - f_t gap; mutex_upper_.lock(); if (leaf_objective < upper_bound_) { incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); upper_bound_ = leaf_objective; f_t lower_bound = get_lower_bound(); - gap = upper_bound_ - lower_bound; f_t obj = compute_user_objective(original_lp_, upper_bound_); f_t lower = compute_user_objective(original_lp_, lower_bound); - settings_.log.printf("%c%8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", - symbol, + settings_.log.printf("%c%10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + thread_type, nodes_explored, nodes_unexplored, obj, @@ -515,29 +528,57 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, settings_.solution_callback(original_x, upper_bound_); } mutex_upper_.unlock(); +} - if (send_solution) { - mutex_gap_.lock(); - gap_ = gap; - mutex_gap_.unlock(); +template +std::pair*, mip_node_t*> +branch_and_bound_t::child_selection(mip_node_t* node_ptr) +{ + const i_t branch_var = node_ptr->get_down_child()->branch_var; + const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; + const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); + const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); + const f_t down_dist = branch_var_val - down_val; + const f_t up_dist = up_val - branch_var_val; + constexpr f_t eps = 1e-6; + + if (down_dist < up_dist + eps) { + return std::make_pair(node_ptr->get_down_child(), node_ptr->get_up_child()); + + } else { + return std::make_pair(node_ptr->get_up_child(), node_ptr->get_down_child()); } } template -dual::status_t branch_and_bound_t::node_dual_simplex( - i_t leaf_id, - lp_problem_t& leaf_problem, - std::vector& leaf_vstatus, - lp_solution_t& leaf_solution, - std::vector& bounds_changed, - csc_matrix_t& Arow, - f_t upper_bound, - logger_t& log) +node_status_t branch_and_bound_t::solve_node(search_tree_t& search_tree, + mip_node_t* node_ptr, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + f_t upper_bound, + logger_t& log, + char thread_type) { - i_t node_iter = 0; + f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + + std::vector& leaf_vstatus = node_ptr->vstatus; + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); assert(leaf_vstatus.size() == leaf_problem.num_cols); - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + + // Set the correct bounds for the leaf problem + leaf_problem.lower = original_lp_.lower; + leaf_problem.upper = original_lp_.upper; + + std::vector bounds_changed(leaf_problem.num_cols, false); + // Technically, we can get the already strengthened bounds from the node/parent instead of + // getting it from the original problem and re-strengthening. But this requires storing + // two vectors at each node and potentially cause memory issues + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + i_t node_iter = 0; + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); lp_settings.cut_off = upper_bound + settings_.dual_tol; @@ -562,307 +603,410 @@ dual::status_t branch_and_bound_t::node_dual_simplex( leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { - log.printf("Numerical issue node %d. Resolving from scratch.\n", leaf_id); + log.printf("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id); lp_status_t second_status = solve_linear_program_advanced( leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } } - mutex_stats_.lock(); stats_.total_lp_solve_time += toc(lp_start_time); stats_.total_lp_iters += node_iter; - mutex_stats_.unlock(); - - return lp_status; -} - -template -mip_status_t branch_and_bound_t::solve_node_lp( - mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - csc_matrix_t& Arow, - const std::vector& var_types, - f_t upper_bound) -{ - logger_t log; - log.log = false; - std::vector& leaf_vstatus = node_ptr->vstatus; - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - - // Set the correct bounds for the leaf problem - leaf_problem.lower = original_lp_.lower; - leaf_problem.upper = original_lp_.upper; - - std::vector bounds_changed(original_lp_.num_cols, false); - // Technically, we can get the already strengthened bounds from the node/parent instead of - // getting it from the original problem and re-strengthening. But this requires storing - // two vectors at each node and potentially cause memory issues - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - - dual::status_t lp_status = node_dual_simplex(node_ptr->node_id, - leaf_problem, - leaf_vstatus, - leaf_solution, - bounds_changed, - Arow, - upper_bound, - settings_.log); if (lp_status == dual::status_t::DUAL_UNBOUNDED) { - node_ptr->lower_bound = inf; - graphviz_node(settings_, node_ptr, "infeasible", 0.0); - update_tree(node_ptr, node_status_t::INFEASIBLE); // Node was infeasible. Do not branch + node_ptr->lower_bound = inf; + search_tree.graphviz_node(log, node_ptr, "infeasible", 0.0); + search_tree.update_tree(node_ptr, node_status_t::INFEASIBLE); + return node_status_t::INFEASIBLE; } else if (lp_status == dual::status_t::CUTOFF) { + // Node was cut off. Do not branch node_ptr->lower_bound = upper_bound; f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - graphviz_node(settings_, node_ptr, "cut off", leaf_objective); - update_tree(node_ptr, node_status_t::FATHOMED); - // Node was cut off. Do not branch + search_tree.graphviz_node(log, node_ptr, "cut off", leaf_objective); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); + return node_status_t::FATHOMED; + } else if (lp_status == dual::status_t::OPTIMAL) { // LP was feasible - std::vector fractional; - const i_t leaf_fractional = - fractional_variables(settings_, leaf_solution.x, var_types_, fractional); - f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - graphviz_node(settings_, node_ptr, "lower bound", leaf_objective); - - mutex_pc_.lock(); - pc_.update_pseudo_costs(node_ptr, leaf_objective); - mutex_pc_.unlock(); + std::vector leaf_fractional; + i_t leaf_num_fractional = + fractional_variables(settings_, leaf_solution.x, var_types_, leaf_fractional); + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); node_ptr->lower_bound = leaf_objective; + search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); + pc_.update_pseudo_costs(node_ptr, leaf_objective); - constexpr f_t fathom_tol = 1e-5; - if (leaf_fractional == 0) { - add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, 'B'); - graphviz_node(settings_, node_ptr, "integer feasible", leaf_objective); - update_tree(node_ptr, node_status_t::INTEGER_FEASIBLE); + if (leaf_num_fractional == 0) { + // Found a integer feasible solution + add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); + search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); + search_tree.update_tree(node_ptr, node_status_t::INTEGER_FEASIBLE); + return node_status_t::INTEGER_FEASIBLE; - } else if (leaf_objective <= upper_bound + fathom_tol) { + } else if (leaf_objective <= upper_bound + abs_fathom_tol) { // Choose fractional variable to branch on - mutex_pc_.lock(); - const i_t branch_var = pc_.variable_selection( - fractional, leaf_solution.x, leaf_problem.lower, leaf_problem.upper, log); - mutex_pc_.unlock(); + const i_t branch_var = + pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); assert(leaf_vstatus.size() == leaf_problem.num_cols); - branch(node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus); + search_tree.branch( + node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, original_lp_, log); node_ptr->status = node_status_t::HAS_CHILDREN; + return node_status_t::HAS_CHILDREN; } else { - graphviz_node(settings_, node_ptr, "fathomed", leaf_objective); - update_tree(node_ptr, node_status_t::FATHOMED); + search_tree.graphviz_node(log, node_ptr, "fathomed", leaf_objective); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); + return node_status_t::FATHOMED; } + } else if (lp_status == dual::status_t::TIME_LIMIT) { + search_tree.graphviz_node(log, node_ptr, "timeout", 0.0); + search_tree.update_tree(node_ptr, node_status_t::TIME_LIMIT); + return node_status_t::TIME_LIMIT; + } else { - graphviz_node(settings_, node_ptr, "numerical", 0.0); - settings_.log.printf("Encountered LP status %d. This indicates a numerical issue.\n", - lp_status); - return mip_status_t::NUMERICAL; - } + if (thread_type == 'B') { + lower_bound_ceiling_.fetch_min(node_ptr->lower_bound); + log.printf( + "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " + "to " + "%+10.6e.\n", + lp_status, + node_ptr->node_id, + compute_user_objective(original_lp_, lower_bound_ceiling_.load())); + } - return mip_status_t::UNSET; + search_tree.graphviz_node(log, node_ptr, "numerical", 0.0); + search_tree.update_tree(node_ptr, node_status_t::NUMERICAL); + return node_status_t::NUMERICAL; + } } template -mip_status_t branch_and_bound_t::explore_tree(i_t branch_var, - mip_solution_t& solution) +void branch_and_bound_t::exploration_ramp_up(search_tree_t* search_tree, + mip_node_t* node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + i_t initial_heap_size) { - mip_status_t status = mip_status_t::UNSET; - logger_t log; - log.log = false; - auto compare = [](mip_node_t* a, mip_node_t* b) { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - }; + if (status_ != mip_exploration_status_t::RUNNING) { return; } + if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); } + + f_t lower_bound = node->lower_bound; + f_t upper_bound = get_upper_bound(); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t abs_gap = upper_bound - lower_bound; + i_t nodes_explored = 0; + i_t nodes_unexplored = 0; + + nodes_explored = (stats_.nodes_explored++); + nodes_unexplored = (stats_.nodes_unexplored--); + stats_.nodes_since_last_log++; + + if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + search_tree->graphviz_node(settings_.log, node, "cutoff", node->lower_bound); + search_tree->update_tree(node, node_status_t::FATHOMED); + return; + } - std::priority_queue*, std::vector*>, decltype(compare)> - heap(compare); + f_t now = toc(stats_.start_time); - mip_node_t root_node(root_objective_, root_vstatus_); - graphviz_node(settings_, &root_node, "lower bound", root_objective_); + if (omp_get_thread_num() == 0) { + f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log); - branch(&root_node, branch_var, root_relax_soln_.x[branch_var], root_vstatus_); + if (((stats_.nodes_since_last_log >= 10 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + (time_since_last_log >= 1)) || + (time_since_last_log > 30) || now > settings_.time_limit) { + f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, root_objective_); + std::string gap_user = user_mip_gap(obj, user_lower); - // the stack does not own the unique_ptr the tree does - heap.push(root_node.get_down_child()); - heap.push(root_node.get_up_child()); + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + nodes_explored, + nodes_unexplored, + obj, + user_lower, + node->depth, + nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, + gap_user.c_str(), + now); - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - csc_matrix_t Arow(1, 1, 1); - original_lp_.A.transpose(Arow); + stats_.nodes_since_last_log = 0; + } + } + + if (toc(stats_.start_time) > settings_.time_limit) { + status_ = mip_exploration_status_t::TIME_LIMIT; + return; + } + node_status_t node_status = + solve_node(*search_tree, node, leaf_problem, Arow, upper_bound, settings_.log, 'B'); + + if (node_status == node_status_t::TIME_LIMIT) { + status_ = mip_exploration_status_t::TIME_LIMIT; + return; + + } else if (node_status == node_status_t::HAS_CHILDREN) { + stats_.nodes_unexplored += 2; + + // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase + if (stats_.nodes_unexplored < initial_heap_size) { +#pragma omp task + exploration_ramp_up( + search_tree, node->get_down_child(), leaf_problem, Arow, initial_heap_size); + +#pragma omp task + exploration_ramp_up(search_tree, node->get_up_child(), leaf_problem, Arow, initial_heap_size); + + } else { + // We've generated enough nodes, push further nodes onto the heap + mutex_heap_.lock(); + heap_.push(node->get_down_child()); + heap_.push(node->get_up_child()); + mutex_heap_.unlock(); + } + } +} - f_t lower_bound = get_lower_bound(); - f_t gap = get_upper_bound() - lower_bound; - f_t last_log = 0; +template +void branch_and_bound_t::explore_subtree(i_t id, + search_tree_t& search_tree, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow) +{ + std::deque*> stack; + stack.push_front(start_node); - f_t user_upper_bound = compute_user_objective(original_lp_, get_upper_bound()); - f_t user_lower_bound = compute_user_objective(original_lp_, lower_bound); - f_t rel_gap = relative_gap(user_upper_bound, user_lower_bound); - while (gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && - heap.size() > 0) { - repair_heuristic_solutions(); + while (stack.size() > 0 && status_ == mip_exploration_status_t::RUNNING) { + if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); } - // Get a node off the heap - mip_node_t* node_ptr = heap.top(); - heap.pop(); - stats_.nodes_explored++; + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); + f_t lower_bound = node_ptr->lower_bound; f_t upper_bound = get_upper_bound(); - if (upper_bound < node_ptr->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the current - // upper bound - update_tree(node_ptr, node_status_t::FATHOMED); - graphviz_node(settings_, node_ptr, "cutoff", node_ptr->lower_bound); + f_t abs_gap = upper_bound - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + + // This is based on three assumptions: + // - The stack only contains sibling nodes, i.e., the current node and it sibling (if + // applicable) + // - The current node and its siblings uses the lower bound of the parent before solving the LP + // relaxation + // - The lower bound of the parent is lower or equal to its children + assert(id < local_lower_bounds_.size()); + local_lower_bounds_[id] = lower_bound; + i_t nodes_explored = stats_.nodes_explored++; + i_t nodes_unexplored = stats_.nodes_unexplored--; + stats_.nodes_since_last_log++; + + if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); continue; } - mutex_lower_.lock(); - lower_bound = lower_bound_ = node_ptr->lower_bound; - mutex_lower_.unlock(); - - mutex_gap_.lock(); - gap_ = gap = upper_bound - lower_bound; - mutex_gap_.unlock(); - - i_t nodes_explored = stats_.nodes_explored; - f_t now = toc(stats_.start_time); - f_t time_since_log = last_log == 0 ? 1.0 : toc(last_log); - if ((nodes_explored % 1000 == 0 || gap < 10 * settings_.absolute_mip_gap_tol || - nodes_explored < 1000) && - (time_since_log >= 1) || - (time_since_log > 60) || now > settings_.time_limit) { - f_t user_obj = compute_user_objective(original_lp_, upper_bound); - f_t user_lower = compute_user_objective(original_lp_, lower_bound); - std::string user_gap = user_mip_gap(user_obj, user_lower); - - settings_.log.printf(" %8d %8lu %+13.6e %+10.6e %4d %7.1e %s %9.2f\n", - nodes_explored, - heap.size(), - user_obj, - user_lower, - node_ptr->depth, - nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, - user_gap.c_str(), - now); - last_log = tic(); + + f_t now = toc(stats_.start_time); + + if (id == 0) { + f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log); + + if (((stats_.nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); + std::string gap_user = user_mip_gap(obj, user_lower); + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + nodes_explored, + nodes_unexplored, + obj, + user_lower, + node_ptr->depth, + nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, + gap_user.c_str(), + now); + stats_.last_log = tic(); + stats_.nodes_since_last_log = 0; + } } if (toc(stats_.start_time) > settings_.time_limit) { - settings_.log.printf("Hit time limit. Stopping\n"); - status = mip_status_t::TIME_LIMIT; + status_ = mip_exploration_status_t::TIME_LIMIT; break; } - status = solve_node_lp(node_ptr, leaf_problem, Arow, var_types_, upper_bound); + node_status_t node_status = + solve_node(search_tree, node_ptr, leaf_problem, Arow, upper_bound, settings_.log, 'B'); + + if (node_status == node_status_t::TIME_LIMIT) { + status_ = mip_exploration_status_t::TIME_LIMIT; + break; + + } else if (node_status == node_status_t::HAS_CHILDREN) { + // The stack should only contain the children of the current parent. + // If the stack size is greater than 0, + // we pop the current node from the stack and place it in the global heap, + // since we are about to add the two children to the stack + if (stack.size() > 0) { + mip_node_t* node = stack.back(); + stack.pop_back(); + + // The order here matters. We want to create a copy of the node + // before adding to the global heap. Otherwise, + // some thread may consume the node (possibly fathoming it) + // before we had the chance to add to the diving queue. + // This lead to a SIGSEGV. Although, in this case, it + // would be better if we discard the node instead. + if (get_heap_size() > settings_.num_bfs_threads) { + mutex_dive_queue_.lock(); + dive_queue_.push(node->detach_copy()); + mutex_dive_queue_.unlock(); + } + + mutex_heap_.lock(); + heap_.push(node); + mutex_heap_.unlock(); + } - if (status == mip_status_t::NUMERICAL) { break; } + stats_.nodes_unexplored += 2; - if (node_ptr->status == node_status_t::HAS_CHILDREN) { - // the heap does not own the unique_ptr the tree does - heap.push(node_ptr->get_down_child()); - heap.push(node_ptr->get_up_child()); + auto [first, second] = child_selection(node_ptr); + stack.push_front(second); + stack.push_front(first); } } +} - stats_.nodes_unexplored = heap.size(); +template +void branch_and_bound_t::best_first_thread(i_t id, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow) +{ + f_t lower_bound = -inf; + f_t upper_bound = inf; + f_t abs_gap = inf; + f_t rel_gap = inf; + + while (status_ == mip_exploration_status_t::RUNNING && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_subtrees_ > 0 || get_heap_size() > 0)) { + mip_node_t* node_ptr = nullptr; + + // If there any node left in the heap, we pop the top node and explore it. + mutex_heap_.lock(); + if (heap_.size() > 0) { + node_ptr = heap_.top(); + heap_.pop(); + active_subtrees_++; + } + mutex_heap_.unlock(); + + if (node_ptr != nullptr) { + if (get_upper_bound() < node_ptr->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree.update_tree(node_ptr, node_status_t::FATHOMED); + active_subtrees_--; + continue; + } - if (stats_.nodes_unexplored == 0) { - mutex_lower_.lock(); - lower_bound = lower_bound_ = root_node.lower_bound; - mutex_lower_.unlock(); + // Best-first search with plunging + explore_subtree(id, search_tree, node_ptr, leaf_problem, Arow); + active_subtrees_--; + } - mutex_gap_.lock(); - gap_ = gap = get_upper_bound() - lower_bound; - mutex_gap_.unlock(); + lower_bound = get_lower_bound(); + upper_bound = get_upper_bound(); + abs_gap = upper_bound - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); } - return status; + // Check if it is the last thread that exited the loop and no + // timeout or numerical error has happen. + if (status_ == mip_exploration_status_t::RUNNING) { + if (active_subtrees_ == 0) { + status_ = mip_exploration_status_t::COMPLETED; + } else { + local_lower_bounds_[id] = inf; + } + } } template -mip_status_t branch_and_bound_t::dive(i_t branch_var, mip_solution_t& solution) +void branch_and_bound_t::diving_thread(lp_problem_t& leaf_problem, + const csc_matrix_t& Arow) { - mip_status_t status = mip_status_t::UNSET; - logger_t log; log.log = false; - std::vector*> node_stack; - - mip_node_t root_node(root_objective_, root_vstatus_); - graphviz_node(settings_, &root_node, "lower bound", root_objective_); - - branch(&root_node, branch_var, root_relax_soln_.x[branch_var], root_vstatus_); + while (status_ == mip_exploration_status_t::RUNNING && + (active_subtrees_ > 0 || get_heap_size() > 0)) { + std::optional> start_node; - // the stack does not own the unique_ptr the tree does - node_stack.push_back(root_node.get_down_child()); - node_stack.push_back(root_node.get_up_child()); + mutex_dive_queue_.lock(); + if (dive_queue_.size() > 0) { start_node = dive_queue_.pop(); } + mutex_dive_queue_.unlock(); - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; + if (start_node.has_value()) { + if (get_upper_bound() < start_node->lower_bound) { continue; } - csc_matrix_t Arow(1, 1, 1); - original_lp_.A.transpose(Arow); + search_tree_t subtree(std::move(start_node.value())); + std::deque*> stack; + stack.push_front(&subtree.root); - f_t lower_bound = get_lower_bound(); - f_t gap = get_upper_bound() - lower_bound; - i_t nodes_explored = 0; - - while (node_stack.size() > 0) { - // Get a node off the stack - mip_node_t* node_ptr = node_stack.back(); - node_stack.pop_back(); - nodes_explored++; - - f_t upper_bound = get_upper_bound(); - lower_bound = get_lower_bound(); - gap = upper_bound - lower_bound; - f_t user_upper_bound = compute_user_objective(original_lp_, get_upper_bound()); - f_t user_lower_bound = compute_user_objective(original_lp_, lower_bound); - f_t rel_gap = relative_gap(user_upper_bound, user_lower_bound); - if (gap < settings_.absolute_mip_gap_tol && rel_gap < settings_.relative_mip_gap_tol) { - update_tree(node_ptr, node_status_t::FATHOMED); - continue; - } + while (stack.size() > 0 && status_ == mip_exploration_status_t::RUNNING) { + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); + f_t upper_bound = get_upper_bound(); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node_ptr->lower_bound); - if (toc(stats_.start_time) > settings_.time_limit) { - status = mip_status_t::TIME_LIMIT; - break; - } + if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + continue; + } - status = solve_node_lp(node_ptr, leaf_problem, Arow, var_types_, upper_bound); - - if (status == mip_status_t::NUMERICAL) { continue; } - - if (node_ptr->status == node_status_t::HAS_CHILDREN) { - // Martin's child selection - const i_t branch_var = node_ptr->get_down_child()->branch_var; - const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; - const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); - const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); - const f_t down_dist = branch_var_val - down_val; - const f_t up_dist = up_val - branch_var_val; - - if (down_dist < up_dist) { - node_stack.push_back(node_ptr->get_up_child()); - node_stack.push_back(node_ptr->get_down_child()); - } else { - node_stack.push_back(node_ptr->get_down_child()); - node_stack.push_back(node_ptr->get_up_child()); + node_status_t node_status = + solve_node(subtree, node_ptr, leaf_problem, Arow, upper_bound, log, 'D'); + + if (node_status == node_status_t::TIME_LIMIT) { + break; + + } else if (node_status == node_status_t::HAS_CHILDREN) { + auto [first, second] = child_selection(node_ptr); + stack.push_front(second); + stack.push_front(first); + + // If the diving thread is consuming the nodes faster than the + // best first search, then we split the current subtree at the + // lowest possible point and move to the queue, so it can + // be picked by another thread. + if (dive_queue_.size() < min_diving_queue_size_) { + mutex_dive_queue_.lock(); + mip_node_t* new_node = stack.back(); + stack.pop_back(); + dive_queue_.push(new_node->detach_copy()); + mutex_dive_queue_.unlock(); + } + } } } } - - return status; } template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { - mip_status_t status = mip_status_t::UNSET; + logger_t log; + log.log = false; + status_ = mip_exploration_status_t::UNSET; + stats_.nodes_unexplored = 0; + stats_.nodes_explored = 0; if (guess_.size() != 0) { std::vector crushed_guess; @@ -888,6 +1032,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lp_settings.inside_mip = 1; lp_status_t root_status = solve_linear_program_advanced( original_lp_, stats_.start_time, lp_settings, root_relax_soln_, root_vstatus_, edge_norms_); + stats_.total_lp_iters = root_relax_soln_.iterations; stats_.total_lp_solve_time = toc(stats_.start_time); assert(root_vstatus_.size() == original_lp_.num_cols); if (root_status == lp_status_t::INFEASIBLE) { @@ -907,13 +1052,17 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } return mip_status_t::UNBOUNDED; } + if (root_status == lp_status_t::TIME_LIMIT) { - settings_.log.printf("Hit time limit\n"); - return mip_status_t::TIME_LIMIT; + status_ = mip_exploration_status_t::TIME_LIMIT; + return set_final_solution(solution, -inf); } + set_uninitialized_steepest_edge_norms(edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; uncrush_primal_solution(original_problem_, original_lp_, root_relax_soln_.x, original_x); @@ -928,9 +1077,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.set_simplex_solution_callback( original_x, original_dual, compute_user_objective(original_lp_, root_objective_)); } - mutex_lower_.lock(); - lower_bound_ = root_objective_; - mutex_lower_.unlock(); std::vector fractional; const i_t num_fractional = @@ -944,12 +1090,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // We should be done here uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); solution.objective = incumbent_.objective; - solution.lower_bound = lower_bound_; + solution.lower_bound = root_objective_; solution.nodes_explored = 0; solution.simplex_iterations = root_relax_soln_.iterations; settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", compute_user_objective(original_lp_, root_objective_), toc(stats_.start_time)); + if (settings_.solution_callback != nullptr) { settings_.solution_callback(solution.x, solution.objective); } @@ -971,81 +1118,84 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut edge_norms_, pc_); + if (toc(stats_.start_time) > settings_.time_limit) { + status_ = mip_exploration_status_t::TIME_LIMIT; + return set_final_solution(solution, root_objective_); + } + // Choose variable to branch on - logger_t log; - log.log = false; - i_t branch_var = pc_.variable_selection( - fractional, root_relax_soln_.x, original_lp_.lower, original_lp_.upper, log); + i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x, log); - stats_.total_lp_iters = 0; - stats_.nodes_explored = 0; - stats_.nodes_unexplored = 0; - stats_.num_nodes = 1; + search_tree_t search_tree(root_objective_, root_vstatus_); + search_tree.graphviz_node(settings_.log, &search_tree.root, "lower bound", root_objective_); + search_tree.branch(&search_tree.root, + branch_var, + root_relax_soln_.x[branch_var], + root_vstatus_, + original_lp_, + log); settings_.log.printf( - "| Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap | " - " Time \n"); + "Exploring the B&B tree using %d best-first threads and %d diving threads (%d threads)\n", + settings_.num_bfs_threads, + settings_.num_diving_threads, + settings_.num_threads); - mutex_branching_.lock(); - currently_branching_ = true; - mutex_branching_.unlock(); - - std::future diving_thread; - - if (settings_.num_threads > 0) { - diving_thread = std::async(std::launch::async, [&]() { return dive(branch_var, solution); }); - } + settings_.log.printf( + " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " + "| Time |\n"); - status = explore_tree(branch_var, solution); + csc_matrix_t Arow(1, 1, 1); + original_lp_.A.transpose(Arow); - if (settings_.num_threads > 0) { mip_status_t diving_status = diving_thread.get(); } + stats_.nodes_explored = 0; + stats_.nodes_unexplored = 2; + stats_.nodes_since_last_log = 0; + stats_.last_log = tic(); + active_subtrees_ = 0; + min_diving_queue_size_ = 4 * settings_.num_diving_threads; + status_ = mip_exploration_status_t::RUNNING; + lower_bound_ceiling_ = inf; + +#pragma omp parallel num_threads(settings_.num_threads) + { + // Make a copy of the original LP. We will modify its bounds at each leaf + lp_problem_t leaf_problem = original_lp_; + +#pragma omp master + { + auto down_child = search_tree.root.get_down_child(); + auto up_child = search_tree.root.get_up_child(); + i_t initial_size = 2 * settings_.num_threads; + +#pragma omp task + exploration_ramp_up(&search_tree, down_child, leaf_problem, Arow, initial_size); + +#pragma omp task + exploration_ramp_up(&search_tree, up_child, leaf_problem, Arow, initial_size); + } - mutex_branching_.lock(); - currently_branching_ = false; - mutex_branching_.unlock(); +#pragma omp barrier - f_t user_upper_bound = compute_user_objective(original_lp_, get_upper_bound()); - f_t user_lower_bound = compute_user_objective(original_lp_, lower_bound_); - settings_.log.printf( - "Explored %d nodes in %.2fs.\nAbsolute Gap %e Objective %.16e Lower Bound %.16e\n", - stats_.nodes_explored.load(), - toc(stats_.start_time), - gap_, - user_upper_bound, - user_lower_bound); - - if (gap_ <= settings_.absolute_mip_gap_tol || - relative_gap(user_upper_bound, user_lower_bound) <= settings_.relative_mip_gap_tol) { - status = mip_status_t::OPTIMAL; - if (gap_ > 0 && gap_ <= settings_.absolute_mip_gap_tol) { - settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", - settings_.absolute_mip_gap_tol); - } else if (gap_ > 0 && - relative_gap(user_upper_bound, user_lower_bound) <= settings_.relative_mip_gap_tol) { - settings_.log.printf("Optimal solution found within relative MIP gap tolerance (%.1e)\n", - settings_.relative_mip_gap_tol); - } else { - settings_.log.printf("Optimal solution found.\n"); - } - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); - } - } +#pragma omp master + { + if (status_ == mip_exploration_status_t::RUNNING && + (active_subtrees_ > 0 || get_heap_size() > 0)) { + for (i_t i = 0; i < settings_.num_bfs_threads; i++) { +#pragma omp task + best_first_thread(i, search_tree, leaf_problem, Arow); + } - if (stats_.nodes_unexplored == 0 && get_upper_bound() == inf) { - settings_.log.printf("Integer infeasible.\n"); - status = mip_status_t::INFEASIBLE; - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); + for (i_t i = 0; i < settings_.num_diving_threads; i++) { +#pragma omp task + diving_thread(leaf_problem, Arow); + } + } } } - uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); - solution.objective = incumbent_.objective; - solution.lower_bound = get_lower_bound(); - solution.nodes_explored = stats_.nodes_explored; - solution.simplex_iterations = stats_.total_lp_iters; - return status; + f_t lower_bound = heap_.size() > 0 ? heap_.top()->lower_bound : search_tree.root.lower_bound; + return set_final_solution(solution, lower_bound); } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index efe28000b..7b80f88fa 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -18,40 +18,82 @@ #pragma once #include +#include #include #include #include #include #include #include +#include -#include -#include -#include +#include #include -#include #include -#include "cuopt/linear_programming/mip/solver_settings.hpp" -#include "dual_simplex/mip_node.hpp" namespace cuopt::linear_programming::dual_simplex { enum class mip_status_t { - OPTIMAL = 0, - UNBOUNDED = 1, - INFEASIBLE = 2, - TIME_LIMIT = 3, - NODE_LIMIT = 4, - NUMERICAL = 5, - UNSET = 6 + OPTIMAL = 0, // The optimal integer solution was found + UNBOUNDED = 1, // The problem is unbounded + INFEASIBLE = 2, // The problem is infeasible + TIME_LIMIT = 3, // The solver reached a time limit + NODE_LIMIT = 4, // The maximum number of nodes was reached (not implemented) + NUMERICAL = 5, // The solver encountered a numerical error + UNSET = 6, // The status is not set +}; + +enum class mip_exploration_status_t { + UNSET = 0, // The status is not set + TIME_LIMIT = 1, // The solver reached a time limit + NODE_LIMIT = 2, // The maximum number of nodes was reached (not implemented) + NUMERICAL = 3, // The solver encountered a numerical error + RUNNING = 4, // The solver is currently exploring the tree + COMPLETED = 5, // The solver finished exploring the tree }; template void upper_bound_callback(f_t upper_bound); +// A min-heap for storing the starting nodes for the dives. +// This has a maximum size of 8192, such that the container +// will discard the least promising node if the queue is full. +template +class dive_queue_t { + private: + std::vector> buffer; + static constexpr i_t max_size_ = 2048; + + public: + dive_queue_t() { buffer.reserve(max_size_); } + + void push(mip_node_t&& node) + { + buffer.push_back(std::move(node)); + std::push_heap(buffer.begin(), buffer.end(), node_compare_t()); + if (buffer.size() > max_size()) { buffer.pop_back(); } + } + + mip_node_t pop() + { + std::pop_heap(buffer.begin(), buffer.end(), node_compare_t()); + mip_node_t node = std::move(buffer.back()); + buffer.pop_back(); + return node; + } + + i_t size() const { return buffer.size(); } + constexpr i_t max_size() const { return max_size_; } + const mip_node_t& top() const { return buffer.front(); } + void clear() { buffer.clear(); } +}; + template class branch_and_bound_t { public: + template + using mip_node_heap_t = std::priority_queue, node_compare_t>; + branch_and_bound_t(const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings); @@ -68,8 +110,8 @@ class branch_and_bound_t { std::vector& repaired_solution) const; f_t get_upper_bound(); - f_t get_lower_bound(); + i_t get_heap_size(); // The main entry routine. Returns the solver status and populates solution with the incumbent. mip_status_t solve(mip_solution_t& solution); @@ -81,18 +123,16 @@ class branch_and_bound_t { // Initial guess. std::vector guess_; + // LP relaxation lp_problem_t original_lp_; std::vector new_slacks_; std::vector var_types_; - // Mutex for lower bound - std::mutex mutex_lower_; - - // Global variable for lower bound - f_t lower_bound_; + // Local lower bounds for each thread + std::vector> local_lower_bounds_; // Mutex for upper bound - std::mutex mutex_upper_; + omp_mutex_t mutex_upper_; // Global variable for upper bound f_t upper_bound_; @@ -100,31 +140,21 @@ class branch_and_bound_t { // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; - // Mutex for gap - std::mutex mutex_gap_; - - // Global variable for gap - f_t gap_; - - // Mutex for branching - std::mutex mutex_branching_; - bool currently_branching_; - - // Global variable for stats - std::mutex mutex_stats_; - - // Note that floating point atomics are only supported in C++20. + // Structure with the general info of the solver. struct stats_t { - f_t start_time = 0.0; - f_t total_lp_solve_time = 0.0; - std::atomic nodes_explored = 0; - std::atomic nodes_unexplored = 0; - f_t total_lp_iters = 0; - std::atomic num_nodes = 0; + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + + // This should only be used by the main thread + f_t last_log = 0.0; + omp_atomic_t nodes_since_last_log = 0; } stats_; // Mutex for repair - std::mutex mutex_repair_; + omp_mutex_t mutex_repair_; std::vector> repair_queue_; // Variables for the root node in the search tree. @@ -135,49 +165,77 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - std::mutex mutex_pc_; - // Update the status of the nodes in the search tree. - void update_tree(mip_node_t* node_ptr, node_status_t status); + // Heap storing the nodes to be explored. + omp_mutex_t mutex_heap_; + mip_node_heap_t*> heap_; + + // Count the number of subtrees that are currently being explored. + omp_atomic_t active_subtrees_; + + // Queue for storing the promising node for performing dives. + omp_mutex_t mutex_dive_queue_; + dive_queue_t dive_queue_; + i_t min_diving_queue_size_; + + // Global status of the solver. + omp_atomic_t status_; + + // In case, a best-first thread encounters a numerical issue when solving a node, + // its blocks the progression of the lower bound. + omp_atomic_t lower_bound_ceiling_; + + // Set the final solution. + mip_status_t set_final_solution(mip_solution_t& solution, f_t lower_bound); // Update the incumbent solution with the new feasible solution. // found during branch and bound. void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - char symbol); + char thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - // Explore the search tree using the best-first search strategy. - mip_status_t explore_tree(i_t branch_var, mip_solution_t& solution); - - // Explore the search tree using the depth-first search strategy. - mip_status_t dive(i_t branch_var, mip_solution_t& solution); - - // Branch the current node, creating two children. - void branch(mip_node_t* parent_node, - i_t branch_var, - f_t branch_var_val, - const std::vector& parent_vstatus); - - // Solve the LP relaxation of a leaf node. - mip_status_t solve_node_lp(mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - csc_matrix_t& Arow, - const std::vector& var_types, - f_t upper_bound); - - // Solve the LP relaxation of a leaf node using the dual simplex method. - dual::status_t node_dual_simplex(i_t leaf_id, - lp_problem_t& leaf_problem, - std::vector& leaf_vstatus, - lp_solution_t& leaf_solution, - std::vector& bounds_changed, - csc_matrix_t& Arow, - f_t upper_bound, - logger_t& log); + // Ramp-up phase of the solver, where we greedily expand the tree until + // there is enough unexplored nodes. This is done recursively using OpenMP tasks. + void exploration_ramp_up(search_tree_t* search_tree, + mip_node_t* node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + i_t initial_heap_size); + + // Explore the search tree using the best-first search with plunging strategy. + void explore_subtree(i_t id, + search_tree_t& search_tree, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow); + + // Each "main" thread pops a node from the global heap and then performs a plunge + // (i.e., a shallow dive) into the subtree determined by the node. + void best_first_thread(i_t id, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow); + + // Each diving thread pops the first node from the dive queue and then performs + // a deep dive into the subtree determined by the node. + void diving_thread(lp_problem_t& leaf_problem, const csc_matrix_t& Arow); + + // Solve the LP relaxation of a leaf node and update the tree. + node_status_t solve_node(search_tree_t& search_tree, + mip_node_t* node_ptr, + lp_problem_t& leaf_problem, + const csc_matrix_t& Arow, + f_t upper_bound, + logger_t& log, + char thread_type); + + // Sort the children based on the Martin's criteria. + std::pair*, mip_node_t*> child_selection( + mip_node_t* node_ptr); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.cpp b/cpp/src/dual_simplex/mip_node.cpp index f0b275d0a..2a907c402 100644 --- a/cpp/src/dual_simplex/mip_node.cpp +++ b/cpp/src/dual_simplex/mip_node.cpp @@ -22,7 +22,7 @@ namespace cuopt::linear_programming::dual_simplex { bool inactive_status(node_status_t status) { return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || - status == node_status_t::INFEASIBLE); + status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); } } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 2d315fe0e..a3a34eb81 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -19,8 +19,8 @@ #include #include +#include -#include #include #include #include @@ -30,11 +30,12 @@ namespace cuopt::linear_programming::dual_simplex { enum class node_status_t : int { ACTIVE = 0, // Node still in the tree - IN_PROGRESS = 1, // Node is currently being solved - INTEGER_FEASIBLE = 2, // Node has an integer feasible solution - INFEASIBLE = 3, // Node is infeasible - FATHOMED = 4, // Node objective is greater than the upper bound - HAS_CHILDREN = 5, // Node has children to explore + INTEGER_FEASIBLE = 1, // Node has an integer feasible solution + INFEASIBLE = 2, // Node is infeasible + FATHOMED = 3, // Node objective is greater than the upper bound + HAS_CHILDREN = 4, // Node has children to explore + NUMERICAL = 5, // Encountered numerical issue when solving the LP relaxation + TIME_LIMIT = 6 // Time out during the LP relaxation }; bool inactive_status(node_status_t status); @@ -201,6 +202,22 @@ class mip_node_t { } } + // This method creates a copy of the current node + // with its parent set to `nullptr`, `node_id = 0` + // and `depth = 0` such that it is the root + // of a separated tree. + mip_node_t detach_copy() const + { + mip_node_t copy(lower_bound, vstatus); + copy.branch_var = branch_var; + copy.branch_dir = branch_dir; + copy.branch_var_lower = branch_var_lower; + copy.branch_var_upper = branch_var_upper; + copy.fractional_val = fractional_val; + copy.node_id = node_id; + return copy; + } + node_status_t status; f_t lower_bound; i_t depth; @@ -230,11 +247,96 @@ void remove_fathomed_nodes(std::vector*>& stack) template class node_compare_t { public: - bool operator()(mip_node_t& a, mip_node_t& b) + bool operator()(const mip_node_t& a, const mip_node_t& b) const { return a.lower_bound > b.lower_bound; // True if a comes before b, elements that come before are output last } + + bool operator()(const mip_node_t* a, const mip_node_t* b) const + { + return a->lower_bound > + b->lower_bound; // True if a comes before b, elements that come before are output last + } +}; + +template +class search_tree_t { + public: + search_tree_t(f_t root_lower_bound, const std::vector& basis) + : root(root_lower_bound, basis), num_nodes(0) + { + } + + search_tree_t(mip_node_t&& node) : root(std::move(node)), num_nodes(0) {} + + void update_tree(mip_node_t* node_ptr, node_status_t status) + { + mutex.lock(); + std::vector*> stack; + node_ptr->set_status(status, stack); + remove_fathomed_nodes(stack); + mutex.unlock(); + } + + void branch(mip_node_t* parent_node, + const i_t branch_var, + const f_t fractional_val, + const std::vector& parent_vstatus, + const lp_problem_t& original_lp, + logger_t& log) + { + i_t id = num_nodes.fetch_add(2); + + // down child + auto down_child = std::make_unique>( + original_lp, parent_node, ++id, branch_var, 0, fractional_val, parent_vstatus); + + graphviz_edge(log, parent_node, down_child.get(), branch_var, 0, std::floor(fractional_val)); + + // up child + auto up_child = std::make_unique>( + original_lp, parent_node, ++id, branch_var, 1, fractional_val, parent_vstatus); + + graphviz_edge(log, parent_node, up_child.get(), branch_var, 1, std::ceil(fractional_val)); + + assert(parent_vstatus.size() == original_lp.num_cols); + parent_node->add_children(std::move(down_child), + std::move(up_child)); // child pointers moved into the tree + } + + void graphviz_node(logger_t& log, + const mip_node_t* node_ptr, + const std::string label, + const f_t val) + { + if (write_graphviz) { + log.printf("Node%d [label=\"%s %.16e\"]\n", node_ptr->node_id, label.c_str(), val); + } + } + + void graphviz_edge(logger_t& log, + const mip_node_t* origin_ptr, + const mip_node_t* dest_ptr, + const i_t branch_var, + const i_t branch_dir, + const f_t bound) + { + if (write_graphviz) { + log.printf("Node%d -> Node%d [label=\"x%d %s %e\"]\n", + origin_ptr->node_id, + dest_ptr->node_id, + branch_var, + branch_dir == 0 ? "<=" : ">=", + bound); + } + } + + mip_node_t root; + omp_mutex_t mutex; + omp_atomic_t num_nodes; + + static constexpr bool write_graphviz = false; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 370badf33..01d995ba6 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -29,9 +29,7 @@ #include #include #include -#include -#include -#include +#include namespace cuopt::linear_programming::dual_simplex { diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 95c96f859..4bd9590e1 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -21,15 +21,14 @@ #include #include -#include +#include namespace cuopt::linear_programming::dual_simplex { namespace { template -void strong_branch_helper(i_t thread_id, - i_t start, +void strong_branch_helper(i_t start, i_t end, f_t start_time, const lp_problem_t& original_lp, @@ -46,6 +45,7 @@ void strong_branch_helper(i_t thread_id, constexpr bool verbose = false; f_t last_log = tic(); + i_t thread_id = omp_get_thread_num(); for (i_t k = start; k < end; ++k) { const i_t j = fractional[k]; @@ -79,7 +79,6 @@ void strong_branch_helper(i_t thread_id, solution, iter, child_edge_norms); - const f_t lp_solve_time = toc(lp_start_time); f_t obj = std::numeric_limits::infinity(); if (status == dual::status_t::DUAL_UNBOUNDED) { @@ -126,15 +125,10 @@ void strong_branch_helper(i_t thread_id, } if (toc(start_time) > settings.time_limit) { break; } - pc.strong_branches_completed.lock(); - pc.num_strong_branches_completed++; - pc.strong_branches_completed.unlock(); + const i_t completed = pc.num_strong_branches_completed++; if (thread_id == 0 && toc(last_log) > 10) { last_log = tic(); - pc.strong_branches_completed.lock(); - const i_t completed = pc.num_strong_branches_completed; - pc.strong_branches_completed.unlock(); settings.log.printf("%d of %ld strong branches completed in %.1fs\n", completed, fractional.size(), @@ -148,58 +142,6 @@ void strong_branch_helper(i_t thread_id, } } -template -void get_partitioning(i_t N, i_t num_threads, i_t tid, i_t& begin, i_t& end) -{ - int base_tasks = N / num_threads; - int remaining_tasks = N % num_threads; - - if (tid < remaining_tasks) { - begin = tid * (base_tasks + 1); - end = begin + base_tasks + 1; - } else { - begin = tid * base_tasks + remaining_tasks; - end = begin + base_tasks; - } -} - -template -void thread_strong_branch(i_t thread_id, - i_t num_threads, - f_t start_time, - const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings, - const std::vector& var_types, - const std::vector& fractional, - f_t root_obj, - const std::vector& root_soln, - const std::vector& root_vstatus, - const std::vector& edge_norms, - pseudo_costs_t& pc) -{ - i_t start, end; - get_partitioning(static_cast(fractional.size()), num_threads, thread_id, start, end); - constexpr bool verbose = false; - if (verbose) { - settings.log.printf( - "Thread id %d start %d end %d. size %d\n", thread_id, start, end, end - start); - } - - strong_branch_helper(thread_id, - start, - end, - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - pc); -} - } // namespace template @@ -214,51 +156,49 @@ void strong_branching(const lp_problem_t& original_lp, const std::vector& edge_norms, pseudo_costs_t& pc) { + pc.resize(original_lp.num_cols); pc.strong_branch_down.resize(fractional.size()); pc.strong_branch_up.resize(fractional.size()); - pc.num_strong_branches_completed = 0; - if (fractional.size() > settings.num_threads) { - int num_threads = settings.num_threads; - std::thread threads[num_threads]; - settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", - num_threads, - fractional.size()); - for (int thread_id = 0; thread_id < num_threads; thread_id++) { - threads[thread_id] = std::thread(thread_strong_branch, - thread_id, - num_threads, - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - std::ref(pc)); - } + settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", + settings.num_threads, + fractional.size()); + +#pragma omp parallel num_threads(settings.num_threads) + { + i_t n = std::min(4 * settings.num_threads, fractional.size()); + + // Here we are creating more tasks than the number of threads + // such that they can be scheduled dynamically to the threads. +#pragma omp for schedule(dynamic, 1) + for (i_t k = 0; k < n; k++) { + i_t start = std::floor(k * fractional.size() / n); + i_t end = std::floor((k + 1) * fractional.size() / n); + + constexpr bool verbose = false; + if (verbose) { + settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", + omp_get_thread_num(), + k, + start, + end, + end - start); + } - for (int thread_id = 0; thread_id < num_threads; thread_id++) { - threads[thread_id].join(); + strong_branch_helper(start, + end, + start_time, + original_lp, + settings, + var_types, + fractional, + root_obj, + root_soln, + root_vstatus, + edge_norms, + pc); } - } else { - settings.log.printf("Strong branching on %ld fractional variables\n", fractional.size()); - strong_branch_helper(0, - 0, - static_cast(fractional.size()), - start_time, - original_lp, - settings, - var_types, - fractional, - root_obj, - root_soln, - root_vstatus, - edge_norms, - pc); } pc.update_pseudo_costs_from_strong_branching(fractional, root_soln); @@ -268,6 +208,7 @@ template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { + mutex.lock(); const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; const f_t frac = node_ptr->branch_dir == 0 ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) @@ -279,6 +220,7 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_up[node_ptr->branch_var]++; } + mutex.unlock(); } template @@ -317,10 +259,10 @@ void pseudo_costs_t::initialized(i_t& num_initialized_down, template i_t pseudo_costs_t::variable_selection(const std::vector& fractional, const std::vector& solution, - const std::vector& lower, - const std::vector& upper, - logger_t& log) const + logger_t& log) { + mutex.lock(); + const i_t num_fractional = fractional.size(); std::vector pseudo_cost_up(num_fractional); std::vector pseudo_cost_down(num_fractional); @@ -373,6 +315,8 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio log.printf( "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); + mutex.unlock(); + return branch_var; } diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index d26fe8d48..5bd03e3fc 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -21,8 +21,9 @@ #include #include #include +#include -#include +#include namespace cuopt::linear_programming::dual_simplex { @@ -54,9 +55,7 @@ class pseudo_costs_t { i_t variable_selection(const std::vector& fractional, const std::vector& solution, - const std::vector& lower, - const std::vector& upper, - logger_t& log) const; + logger_t& log); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); @@ -67,8 +66,8 @@ class pseudo_costs_t { std::vector strong_branch_down; std::vector strong_branch_up; - std::mutex strong_branches_completed; - i_t num_strong_branches_completed = 0; + omp_mutex_t mutex; + omp_atomic_t num_strong_branches_completed = 0; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 5b7e8bf0f..9ff96ebbd 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -20,11 +20,11 @@ #include #include +#include #include #include #include #include -#include namespace cuopt::linear_programming::dual_simplex { @@ -59,9 +59,9 @@ struct simplex_solver_settings_t { refactor_frequency(100), iteration_log_frequency(1000), first_iteration_log(2), - num_threads(std::thread::hardware_concurrency() > 8 - ? (std::thread::hardware_concurrency() / 8) - : std::thread::hardware_concurrency()), + num_threads(omp_get_max_threads() - 1), + num_bfs_threads(std::min(num_threads / 4, 1)), + num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), random_seed(0), inside_mip(0), solution_callback(nullptr), @@ -97,15 +97,17 @@ struct simplex_solver_settings_t { bool use_bound_flip_ratio; // true if using the bound flip ratio test bool scale_columns; // true to scale the columns of A bool relaxation; // true to only solve the LP relaxation of a MIP - bool - use_left_looking_lu; // true to use left looking LU factorization, false to use right looking - bool eliminate_singletons; // true to eliminate singletons from the basis - bool print_presolve_stats; // true to print presolve stats - i_t refactor_frequency; // number of basis updates before refactorization - i_t iteration_log_frequency; // number of iterations between log updates - i_t first_iteration_log; // number of iterations to log at beginning of solve - i_t num_threads; // number of threads to use - i_t random_seed; // random seed + bool use_left_looking_lu; // true to use left looking LU factorization, + // false to use right looking + bool eliminate_singletons; // true to eliminate singletons from the basis + bool print_presolve_stats; // true to print presolve stats + i_t refactor_frequency; // number of basis updates before refactorization + i_t iteration_log_frequency; // number of iterations between log updates + i_t first_iteration_log; // number of iterations to log at beginning of solve + i_t num_threads; // number of threads to use + i_t num_bfs_threads; // number of threads dedicated to the best-first search + i_t num_diving_threads; // number of threads dedicated to diving + i_t random_seed; // random seed i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node std::function&, f_t)> solution_callback; std::function heuristic_preemption_callback; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 14986df66..a8da9c325 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -91,7 +91,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_ITERATION_LIMIT, &pdlp_settings.iteration_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, {CUOPT_PDLP_SOLVER_MODE, reinterpret_cast(&pdlp_settings.pdlp_solver_mode), CUOPT_PDLP_SOLVER_MODE_STABLE1, CUOPT_PDLP_SOLVER_MODE_STABLE3, CUOPT_PDLP_SOLVER_MODE_STABLE3}, {CUOPT_METHOD, reinterpret_cast(&pdlp_settings.method), CUOPT_METHOD_CONCURRENT, CUOPT_METHOD_DUAL_SIMPLEX, CUOPT_METHOD_CONCURRENT}, - {CUOPT_NUM_CPU_THREADS, &mip_settings.num_cpu_threads, -1, std::numeric_limits::max(), -1} + {CUOPT_NUM_CPU_THREADS, &mip_settings.num_cpu_threads, -1, std::numeric_limits::max(), -1}, }; // Bool parameters diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 9df096655..a9cd335a9 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -312,9 +312,9 @@ void diversity_manager_t::run_fj_alone(solution_t& solution) template void diversity_manager_t::run_fp_alone(solution_t& solution) { - CUOPT_LOG_INFO("Running FP alone!"); + CUOPT_LOG_DEBUG("Running FP alone!"); ls.run_fp(solution, timer, &population); - CUOPT_LOG_INFO("FP alone finished!"); + CUOPT_LOG_DEBUG("FP alone finished!"); } template @@ -543,7 +543,7 @@ void diversity_manager_t::recombine_and_ls_with_all( { raft::common::nvtx::range fun_scope("recombine_and_ls_with_all"); if (solutions.size() > 0) { - CUOPT_LOG_INFO("Running recombiners on B&B solutions with size %lu", solutions.size()); + CUOPT_LOG_DEBUG("Running recombiners on B&B solutions with size %lu", solutions.size()); // add all solutions because time limit might have been consumed and we might have exited before for (auto& sol : solutions) { cuopt_func_call(sol.test_feasibility(true)); diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 1818092fc..fa5968c6f 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -111,8 +111,11 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 2; + branch_and_bound_settings.num_bfs_threads = 1; + branch_and_bound_settings.num_diving_threads = 1; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu index b9408755c..be7461106 100644 --- a/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/lb_constraint_prop.cu @@ -770,7 +770,6 @@ bool lb_constraint_prop_t::find_integer( { RAFT_CHECK_CUDA(problem.handle_ptr->get_stream()); if (orig_sol.problem_ptr->n_integer_vars == 0) { - CUOPT_LOG_ERROR("No integer variables provided in the bounds prop rounding"); cuopt_func_call(orig_sol.test_variable_bounds()); return orig_sol.compute_feasibility(); } diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index a9b7615ed..7c89e849d 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -172,11 +172,19 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - if (context.settings.num_cpu_threads != -1) { + if (context.settings.num_cpu_threads < 0) { + branch_and_bound_settings.num_threads = omp_get_max_threads() - 1; + } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); + i_t num_threads = branch_and_bound_settings.num_threads; + i_t num_bfs_threads = std::max(1, num_threads / 4); + i_t num_diving_threads = std::max(1, num_threads - num_bfs_threads); + branch_and_bound_settings.num_bfs_threads = num_bfs_threads; + branch_and_bound_settings.num_diving_threads = num_diving_threads; + // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = std::bind(&branch_and_bound_solution_helper_t::solution_callback, diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp new file mode 100644 index 000000000..d5b91c6f9 --- /dev/null +++ b/cpp/src/utilities/omp_helpers.hpp @@ -0,0 +1,138 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef _OPENMP + +#include +#include + +namespace cuopt { + +// Wrapper of omp_lock_t. Optionally, you can provide a hint as defined in +// https://www.openmp.org/spec-html/5.1/openmpse39.html#x224-2570003.9 +class omp_mutex_t { + public: + omp_mutex_t() { omp_init_lock(&mutex); } + virtual ~omp_mutex_t() { omp_destroy_lock(&mutex); } + void lock() { omp_set_lock(&mutex); } + void unlock() { omp_unset_lock(&mutex); } + bool try_lock() { return omp_test_lock(&mutex); } + + private: + omp_lock_t mutex; +}; + +// Wrapper for omp atomic operations. See +// https://www.openmp.org/spec-html/5.1/openmpsu105.html. +template +class omp_atomic_t { + public: + omp_atomic_t() = default; + omp_atomic_t(T val) : val(val) {} + + T operator=(T new_val) + { + store(new_val); + return new_val; + } + + operator T() { return load(); } + T operator+=(T inc) { return fetch_add(inc) + inc; } + T operator-=(T inc) { return fetch_sub(inc) - inc; } + + // In theory, this should be enabled only for integers, + // but it works for any numerical types. + T operator++() { return fetch_add(T(1)) + 1; } + T operator++(int) { return fetch_add(T(1)); } + T operator--() { return fetch_sub(T(1)) - 1; } + T operator--(int) { return fetch_sub(T(1)); } + + T load() + { + T res; +#pragma omp atomic read + res = val; + return res; + } + + void store(T new_val) + { +#pragma omp atomic write + val = new_val; + } + + T exchange(T other) + { + T old; +#pragma omp atomic capture + { + old = val; + val = other; + } + return old; + } + + T fetch_add(T inc) + { + T old; +#pragma omp atomic capture + { + old = val; + val += inc; + } + return old; + } + + T fetch_sub(T inc) { return fetch_add(-inc); } + +// Atomic CAS are only supported in OpenMP v5.1 +// (gcc 12+ or clang 14+), however, nvcc (or the host compiler) cannot +// parse it correctly yet +#ifndef __NVCC__ + + T fetch_min(T other) + { + T old; +#pragma omp atomic compare capture + { + old = val; + val = other < val ? other : val; + } + return old; + } + + T fetch_max(T other) + { + T old; +#pragma omp atomic compare capture + { + old = val; + val = other > val ? other : val; + } + return old; + } +#endif + + private: + T val; +}; + +#endif + +} // namespace cuopt