diff --git a/ci/compute-sanitizer-suppressions.xml b/ci/compute-sanitizer-suppressions.xml new file mode 100644 index 000000000..254a8b6ce --- /dev/null +++ b/ci/compute-sanitizer-suppressions.xml @@ -0,0 +1,225 @@ + + + + Initcheck + + Uninitialized __global__ memory read of size 4 bytes + 4 + + + .* + + + + .*libcuda.so.* + + + cusparseCsr2cscEx2 + .*libcusparse.so.* + + + + + Initcheck + + Uninitialized __global__ memory read of size 4 bytes + 4 + + + ThreadLoad + + + + .*libcuda.so.* + + + libcudart.* + + + cudaLaunchKernel + + + .*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.* + + + + + Initcheck + + Uninitialized __global__ memory read of size 2 bytes + 2 + + + ThreadLoad + + + + .*libcuda.so.* + + + libcudart.* + + + cudaLaunchKernel + + + .*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.* + + + + + Initcheck + + Uninitialized __global__ memory read of size 8 bytes + 8 + + + DeviceSegmentedReduceKernel + + + + Initcheck + + Uninitialized __global__ memory read of size 4 bytes + 4 + + + ThreadLoad + + + + .*libcuda.so.* + + + libcudart.* + + + libcudart.* + + + .*libcuopt.* + + + .*Device(Reduce|Scan)Kernel.* + + + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + 16 + + + + cuMemcpyDtoHAsync.* + .*libcuda.so.* + + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyAsync + .*libcuda.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*librmm.so.* + + + rmm::device_buffer::device_buffer + .*librmm.so.* + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyAsync + .*libcuda.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*librmm.so.* + + + .*librmm.so.* + + + rmm::device_uvector.*::device_uvector + .*libcuopt.so.* + + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyDtoDAsync.* + .*libcuda.so.* + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyAsync + .*libcuda.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + cudaMemcpyAsync + + + rmm::device_buffer::resize + .*librmm.so.* + + + + diff --git a/cpp/libmps_parser/src/mps_parser.cpp b/cpp/libmps_parser/src/mps_parser.cpp index 09c8bec64..b5ff94b18 100644 --- a/cpp/libmps_parser/src/mps_parser.cpp +++ b/cpp/libmps_parser/src/mps_parser.cpp @@ -1139,7 +1139,7 @@ void mps_parser_t::parse_bounds(std::string_view line) c_values.emplace_back(f_t(0)); variable_lower_bounds.emplace_back(0); variable_upper_bounds.emplace_back(+std::numeric_limits::infinity()); - var_types.resize(var_types.size() + 1); + var_types.emplace_back('C'); itr = var_names_map.find(std::string(var_name)); } i_t var_id = itr->second; diff --git a/cpp/src/linear_programming/cusparse_view.cu b/cpp/src/linear_programming/cusparse_view.cu index 475353078..644a3c52d 100644 --- a/cpp/src/linear_programming/cusparse_view.cu +++ b/cpp/src/linear_programming/cusparse_view.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -541,12 +542,54 @@ cusparse_view_t::cusparse_view_t( A_indices_(dummy_int) { } +template +cusparseStatus_t cusparsespmv_wrapper(cusparseHandle_t handle, + cusparseOperation_t opA, + const T* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnVecDescr_t vecX, + const T* beta, + const cusparseDnVecDescr_t vecY, + cusparseSpMVAlg_t alg, + T* externalBuffer, + cudaStream_t stream) +{ + void* dest_ptr; + int64_t dest_size; + cudaDataType valtype; + RAFT_CUSPARSE_TRY(cusparseDnVecGet(vecY, &dest_size, &dest_ptr, &valtype)); + // cusparse flags a false positive here on the destination tmp buffer, silence it + cuopt::mark_memory_as_initialized(dest_ptr, dest_size, stream); + + return raft::sparse::detail::cusparsespmv( + handle, opA, alpha, matA, vecX, beta, vecY, alg, externalBuffer, stream); +} #if MIP_INSTANTIATE_FLOAT template class cusparse_view_t; +template cusparseStatus_t cusparsespmv_wrapper(cusparseHandle_t handle, + cusparseOperation_t opA, + const float* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnVecDescr_t vecX, + const float* beta, + const cusparseDnVecDescr_t vecY, + cusparseSpMVAlg_t alg, + float* externalBuffer, + cudaStream_t stream); #endif #if MIP_INSTANTIATE_DOUBLE template class cusparse_view_t; +template cusparseStatus_t cusparsespmv_wrapper(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnVecDescr_t vecX, + const double* beta, + const cusparseDnVecDescr_t vecY, + cusparseSpMVAlg_t alg, + double* externalBuffer, + cudaStream_t stream); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/linear_programming/cusparse_view.hpp b/cpp/src/linear_programming/cusparse_view.hpp index fc82725b5..139f235a5 100644 --- a/cpp/src/linear_programming/cusparse_view.hpp +++ b/cpp/src/linear_programming/cusparse_view.hpp @@ -103,4 +103,17 @@ class cusparse_view_t { const rmm::device_uvector& A_offsets_; const rmm::device_uvector& A_indices_; }; + +template +cusparseStatus_t cusparsespmv_wrapper(cusparseHandle_t handle, + cusparseOperation_t opA, + const T* alpha, + const cusparseSpMatDescr_t matA, + const cusparseDnVecDescr_t vecX, + const T* beta, + const cusparseDnVecDescr_t vecY, + cusparseSpMVAlg_t alg, + T* externalBuffer, + cudaStream_t stream); + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index f932eeb8d..b658cfb82 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -85,17 +87,16 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_scalar // Done in previous function // K(x'+delta_x) - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), // 1 - cusparse_view_.A, - cusparse_view_.tmp_primal, - reusable_device_scalar_value_0_.data(), // 1 - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), // 1 + cusparse_view_.A, + cusparse_view_.tmp_primal, + reusable_device_scalar_value_0_.data(), // 1 + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); // y - (sigma*dual_gradient) // max(min(0, sigma*constraint_upper+primal_product), sigma*constraint_lower+primal_product) @@ -122,16 +123,16 @@ void pdhg_solver_t::compute_At_y() { // A_t @ y - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); } template diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 84b04d43f..204304229 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include "cuopt/linear_programming/pdlp/solver_solution.hpp" #include @@ -1048,6 +1050,9 @@ optimization_problem_solution_t pdlp_solver_t::run_solver( primal_size_h_, clamp(), stream_view_); + // Triggers a false positive in compute-sanitizer otherwise (lack of initialization doesn't + // matter here) + cuopt::mark_span_as_initialized(make_span(unscaled_primal_avg_solution_), stream_view_); raft::linalg::ternaryOp(unscaled_primal_avg_solution_.data(), unscaled_primal_avg_solution_.data(), op_problem_scaled_.variable_lower_bounds.data(), diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index 236ec373d..ac7ac571b 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -1731,16 +1731,16 @@ void pdlp_restart_strategy_t::compute_primal_gradient( primal_size_h_, stream_view_); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_neg_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, - reusable_device_scalar_value_1_.data(), - cusparse_view.primal_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_neg_1_.data(), + cusparse_view.A_T, + cusparse_view.dual_solution, + reusable_device_scalar_value_1_.data(), + cusparse_view.primal_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_transpose.data(), + stream_view_)); } template @@ -1798,17 +1798,16 @@ void pdlp_restart_strategy_t::compute_dual_gradient( // is changed with the introduction of constraint upper and lower bounds // gradient constains primal_product - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_non_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view.A, + cusparse_view.primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_non_transpose.data(), + stream_view_)); // tmp_dual will contain the subgradient i_t number_of_blocks = dual_size_h_ / block_size; @@ -1856,16 +1855,16 @@ void pdlp_restart_strategy_t::compute_lagrangian_value( stream_view_)); // third term, let beta be 0 to not add what is in tmp_primal, compute it and compute dot - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_primal, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view.A_T, + cusparse_view.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view.tmp_primal, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_transpose.data(), + stream_view_)); RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(handle_ptr_->get_cublas_handle(), primal_size_h_, diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu index 156986918..bb40dcc67 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -276,17 +278,16 @@ void adaptive_step_size_strategy_t::compute_interaction_and_movement( // Compute A_t @ (y' - y) = A_t @ y' - 1 * current_AtY // First compute Ay' to be reused as Ay in next PDHG iteration (if found step size if valid) - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), // alpha - cusparse_view.A_T, - cusparse_view.potential_next_dual_solution, - reusable_device_scalar_value_0_.data(), // beta - cusparse_view.next_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), // alpha + cusparse_view.A_T, + cusparse_view.potential_next_dual_solution, + reusable_device_scalar_value_0_.data(), // beta + cusparse_view.next_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_transpose.data(), + stream_view_)); // Compute Ay' - Ay = next_Aty - current_Aty cub::DeviceTransform::Transform( diff --git a/cpp/src/linear_programming/termination_strategy/convergence_information.cu b/cpp/src/linear_programming/termination_strategy/convergence_information.cu index 2378b8b9b..479efc393 100644 --- a/cpp/src/linear_programming/termination_strategy/convergence_information.cu +++ b/cpp/src/linear_programming/termination_strategy/convergence_information.cu @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include @@ -225,17 +227,16 @@ void convergence_information_t::compute_primal_residual( raft::common::nvtx::range fun_scope("compute_primal_residual"); // primal_product - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_dual, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_non_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view.A, + cusparse_view.primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view.tmp_dual, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_non_transpose.data(), + stream_view_)); // The constraint bound violations for the first part of the residual raft::linalg::ternaryOp>(primal_residual_.data(), @@ -298,16 +299,16 @@ void convergence_information_t::compute_dual_residual( raft::copy( tmp_primal.data(), problem_ptr->objective_coefficients.data(), primal_size_h_, stream_view_); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_neg_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, - reusable_device_scalar_value_1_.data(), - cusparse_view.tmp_primal, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_neg_1_.data(), + cusparse_view.A_T, + cusparse_view.dual_solution, + reusable_device_scalar_value_1_.data(), + cusparse_view.tmp_primal, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_transpose.data(), + stream_view_)); compute_reduced_cost_from_primal_gradient(tmp_primal, primal_solution); diff --git a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu index fd9651d9a..02250a4c4 100644 --- a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu +++ b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu @@ -226,17 +226,16 @@ template void infeasibility_information_t::compute_homogenous_primal_residual( cusparse_view_t& cusparse_view, rmm::device_uvector& tmp_dual) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view.A, - cusparse_view.primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_dual, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_non_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view.A, + cusparse_view.primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view.tmp_dual, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_non_transpose.data(), + stream_view_)); raft::linalg::ternaryOp(homogenous_primal_residual_.data(), tmp_dual.data(), @@ -299,16 +298,16 @@ void infeasibility_information_t::compute_homogenous_dual_residual( // need to recompute the primal gradient since c is the all zero vector in the homogenous case // this means that the primal gradient is computed as -A^T*y - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_neg_1_.data(), - cusparse_view.A_T, - cusparse_view.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view.tmp_primal, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_neg_1_.data(), + cusparse_view.A_T, + cusparse_view.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view.tmp_primal, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view.buffer_transpose.data(), + stream_view_)); compute_reduced_cost_from_primal_gradient(tmp_primal, primal_ray); // primal gradient is now in temp diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu index d0fc6811b..4c33efcec 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cu +++ b/cpp/src/linear_programming/utilities/problem_checking.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index b406d56a3..6d336679e 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -450,6 +450,7 @@ solution_t diversity_manager_t::run_solver() lp_settings.return_first_feasible = false; lp_settings.save_state = true; lp_settings.concurrent_halt = &global_concurrent_halt; + lp_settings.has_initial_primal = false; rmm::device_uvector lp_optimal_solution_copy(lp_optimal_solution.size(), problem_ptr->handle_ptr->get_stream()); auto lp_result = diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu index 6a42cb39a..6999a1f5f 100644 --- a/cpp/src/mip/diversity/population.cu +++ b/cpp/src/mip/diversity/population.cu @@ -745,6 +745,7 @@ void population_t::print() if (index.first == 0 && solutions[0].first) { CUOPT_LOG_DEBUG(" Best feasible: %f", solutions[index.first].second.get_user_objective()); } + if (index.first == 0 && !solutions[0].first) continue; CUOPT_LOG_DEBUG("%d : %f\t%f\t%f\t%d", i, index.second, diff --git a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh index f38cc5759..2be784fc4 100644 --- a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh @@ -115,6 +115,7 @@ class bound_prop_recombiner_t : public recombiner_t { auto other_view = other.view(); auto offspring_view = offspring.view(); const f_t int_tol = guiding.problem_ptr->tolerances.integrality_tolerance; + cuopt_assert(variable_map.size() == probing_values.size(), "The number of vars should match!"); thrust::for_each( guiding.handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0lu), @@ -183,6 +184,7 @@ class bound_prop_recombiner_t : public recombiner_t { if (guiding_solution.get_feasible()) { this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); + probing_values.resize(fixed_problem.n_variables, a.handle_ptr->get_stream()); timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit); rmm::device_uvector old_assignment(offspring.assignment, offspring.handle_ptr->get_stream()); diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cu b/cpp/src/mip/feasibility_jump/feasibility_jump.cu index 8e864dcf2..7598f6b59 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cu @@ -506,6 +506,9 @@ void fj_t::climber_init(i_t climber_idx, const rmm::cuda_stream_view& view = climber->view(); + cuopt::mark_span_as_initialized(view.row_size_bin_prefix_sum, climber_stream); + cuopt::mark_span_as_initialized(view.row_size_nonbin_prefix_sum, climber_stream); + if (pb_ptr->related_variables.size() > 0) { // for each variable, compute the number of nnzs that would be examined during a FJ move update // pass to help determine whether to run load balancing or not diff --git a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu index 8286d8148..9fb1a7af6 100644 --- a/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip/local_search/feasibility_pump/feasibility_pump.cu @@ -67,6 +67,10 @@ feasibility_pump_t::feasibility_pump_t( rng(cuopt::seed_generator::get_seed()), timer(20.) { + thrust::fill(context.problem_ptr->handle_ptr->get_thrust_policy(), + last_projection.begin(), + last_projection.end(), + (f_t)0); } template diff --git a/cpp/src/mip/local_search/rounding/constraint_prop.cu b/cpp/src/mip/local_search/rounding/constraint_prop.cu index 6e6a5deb3..4f9b04f9f 100644 --- a/cpp/src/mip/local_search/rounding/constraint_prop.cu +++ b/cpp/src/mip/local_search/rounding/constraint_prop.cu @@ -816,6 +816,7 @@ bool constraint_prop_t::is_problem_ii(problem_t& problem) { bounds_update.calculate_activity_on_problem_bounds(problem); bounds_update.calculate_infeasible_redundant_constraints(problem); + multi_probe.calculate_activity(problem, problem.handle_ptr); bool problem_ii = bounds_update.infeas_constraints_count > 0; return problem_ii; } diff --git a/cpp/src/mip/presolve/bounds_update_data.cu b/cpp/src/mip/presolve/bounds_update_data.cu index a1616b1d9..acc2a1498 100644 --- a/cpp/src/mip/presolve/bounds_update_data.cu +++ b/cpp/src/mip/presolve/bounds_update_data.cu @@ -45,6 +45,35 @@ void bounds_update_data_t::resize(problem_t& problem) changed_constraints.resize(problem.n_constraints, problem.handle_ptr->get_stream()); next_changed_constraints.resize(problem.n_constraints, problem.handle_ptr->get_stream()); changed_variables.resize(problem.n_variables, problem.handle_ptr->get_stream()); + + thrust::fill(problem.handle_ptr->get_thrust_policy(), + min_activity.begin(), + min_activity.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + max_activity.begin(), + max_activity.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + lb.begin(), + lb.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + ub.begin(), + ub.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + changed_constraints.begin(), + changed_constraints.end(), + -1); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + next_changed_constraints.begin(), + next_changed_constraints.end(), + -1); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + changed_variables.begin(), + changed_variables.end(), + -1); } template diff --git a/cpp/src/mip/presolve/trivial_presolve.cuh b/cpp/src/mip/presolve/trivial_presolve.cuh index f4940a62f..029227207 100644 --- a/cpp/src/mip/presolve/trivial_presolve.cuh +++ b/cpp/src/mip/presolve/trivial_presolve.cuh @@ -248,6 +248,8 @@ void update_from_csr(problem_t& pb) // create renumbering maps rmm::device_uvector cnst_renum_ids(pb.n_constraints, handle_ptr->get_stream()); rmm::device_uvector var_renum_ids(pb.n_variables, handle_ptr->get_stream()); + cuopt_assert(cnst_map.size() == pb.n_constraints, "cnst_map size mismatch"); + cuopt_assert(var_map.size() == pb.n_variables, "var_map size mismatch"); thrust::inclusive_scan( handle_ptr->get_thrust_policy(), cnst_map.begin(), @@ -269,7 +271,6 @@ void update_from_csr(problem_t& pb) pb.variables.begin() + nnz_edge_count, pb.variables.begin(), apply_renumbering_t{make_span(var_renum_ids)}); - cuopt_func_call(test_renumbered_coo(make_span(cnst, 0, nnz_edge_count), pb)); auto updated_n_cnst = 1 + cnst_renum_ids.back_element(handle_ptr->get_stream()); diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 1a5f76b03..5c9ed365b 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -21,6 +21,7 @@ #include "problem_kernels.cuh" #include +#include #include #include @@ -328,6 +329,10 @@ void problem_t::compute_transpose_of_problem() n_variables, nnz, handle_ptr->get_stream()); + // cuSparse causes false positives + cuopt::mark_span_as_initialized(make_span(reverse_offsets), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(reverse_constraints), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(reverse_coefficients), handle_ptr->get_stream()); } template @@ -884,6 +889,8 @@ void problem_t::compute_related_variables(double time_limit) // generate the related var offsets from the prefix sum auto offset_it = related_variables_offsets.begin() + 1 + output_offset; + + // avoid initcheck false positive thrust::tabulate(handle_ptr->get_thrust_policy(), offset_it, offset_it + slice_size, diff --git a/cpp/src/mip/relaxed_lp/lp_state.cuh b/cpp/src/mip/relaxed_lp/lp_state.cuh index 0961a537f..3b50768f2 100644 --- a/cpp/src/mip/relaxed_lp/lp_state.cuh +++ b/cpp/src/mip/relaxed_lp/lp_state.cuh @@ -56,8 +56,25 @@ class lp_state_t { void resize(problem_t& problem, rmm::cuda_stream_view stream) { + i_t prev_primal_size = prev_primal.size(); + i_t prev_dual_size = prev_dual.size(); prev_primal.resize(problem.n_variables, stream); prev_dual.resize(problem.n_constraints, stream); + + // zero-fill the newly allocated space + if (prev_primal_size < problem.n_variables) { + thrust::fill(problem.handle_ptr->get_thrust_policy(), + prev_primal.data() + prev_primal_size, + prev_primal.data() + problem.n_variables, + 0); + clamp_within_var_bounds(prev_primal, &problem, problem.handle_ptr); + } + if (prev_dual_size < problem.n_constraints) { + thrust::fill(problem.handle_ptr->get_thrust_policy(), + prev_dual.data() + prev_dual_size, + prev_dual.data() + problem.n_constraints, + 0); + } } void set_state(const rmm::device_uvector& primal_solution, diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cu b/cpp/src/mip/relaxed_lp/relaxed_lp.cu index d7a337dd2..8cb51cc67 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cu @@ -68,19 +68,23 @@ optimization_problem_solution_t get_relaxed_lp_solution( op_problem.n_variables, op_problem.n_constraints); lp_state.resize(op_problem, op_problem.handle_ptr->get_stream()); - clamp_within_var_bounds(assignment, &op_problem, op_problem.handle_ptr); - // The previous dual sometimes contain invalid values w.r.t current problem - // Adjust better dual values when we use warm start - thrust::tabulate(op_problem.handle_ptr->get_thrust_policy(), - lp_state.prev_dual.data(), - lp_state.prev_dual.data() + op_problem.n_constraints, - [prev_size, dual = make_span(lp_state.prev_dual)] __device__(i_t i) { - f_t x = dual[i]; - if (!isfinite(x) || i >= prev_size) { return 0.0; } - return x; - }); - lp_solver.set_initial_primal_solution(assignment); - lp_solver.set_initial_dual_solution(lp_state.prev_dual); + + if (settings.has_initial_primal) { + // The previous dual sometimes contain invalid values w.r.t current problem + // Adjust better dual values when we use warm start + thrust::tabulate(op_problem.handle_ptr->get_thrust_policy(), + lp_state.prev_dual.data(), + lp_state.prev_dual.data() + op_problem.n_constraints, + [prev_size, dual = make_span(lp_state.prev_dual)] __device__(i_t i) { + f_t x = dual[i]; + if (!isfinite(x) || i >= prev_size) { return 0.0; } + return x; + }); + + clamp_within_var_bounds(assignment, &op_problem, op_problem.handle_ptr); + lp_solver.set_initial_primal_solution(assignment); + lp_solver.set_initial_dual_solution(lp_state.prev_dual); + } } CUOPT_LOG_DEBUG( "running LP with n_vars %d n_cstr %d", op_problem.n_variables, op_problem.n_constraints); diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh index a5fe23adb..b6ce2bb5a 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh @@ -33,6 +33,7 @@ struct relaxed_lp_settings_t { bool return_first_feasible = false; bool save_state = true; bool per_constraint_residual = false; + bool has_initial_primal = true; std::atomic* concurrent_halt = nullptr; }; diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index cabb0edda..f0c42982e 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,18 @@ void solution_t::copy_from(const solution_t& other_sol) h_user_obj = other_sol.h_user_obj; h_infeasibility_cost = other_sol.h_infeasibility_cost; expand_device_copy(assignment, other_sol.assignment, handle_ptr->get_stream()); + + // slack, excess, and constraint value may be uninitialized (and computed later). Mark them as + // such + cuopt::mark_span_as_initialized(make_span(other_sol.lower_excess), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.upper_excess), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.lower_slack), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.upper_slack), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.constraint_value), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.obj_val), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.n_feasible_constraints), + handle_ptr->get_stream()); + expand_device_copy(lower_excess, other_sol.lower_excess, handle_ptr->get_stream()); expand_device_copy(upper_excess, other_sol.upper_excess, handle_ptr->get_stream()); expand_device_copy(lower_slack, other_sol.lower_slack, handle_ptr->get_stream()); @@ -273,6 +286,7 @@ void solution_t::compute_constraints() { if (problem_ptr->n_constraints == 0) { return; } + n_feasible_constraints.set_value_to_zero_async(handle_ptr->get_stream()); i_t TPB = 64; compute_constraint_values <<n_constraints, TPB, 0, handle_ptr->get_stream()>>>(view()); @@ -304,7 +318,6 @@ f_t solution_t::compute_l2_residual() template bool solution_t::compute_feasibility() { - n_feasible_constraints.set_value_to_zero_async(handle_ptr->get_stream()); compute_constraints(); compute_objective(); compute_infeasibility(); @@ -476,6 +489,8 @@ template f_t solution_t::get_quality(const rmm::device_uvector& cstr_weights, const rmm::device_scalar& objective_weight) { + compute_constraints(); + // TODO we can as well keep the weights in the solution and compute this once f_t weighted_infeasibility = thrust::transform_reduce( handle_ptr->get_thrust_policy(), diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 78593d3e1..28262498b 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -234,6 +235,18 @@ raft::device_span make_span(rmm::device_uvector const& container) return raft::device_span(container.data(), container.size()); } +template +raft::device_span make_span(rmm::device_scalar& scalar) +{ + return raft::device_span(scalar.data(), 1); +} + +template +raft::device_span make_span(rmm::device_scalar const& scalar) +{ + return raft::device_span(scalar.data(), 1); +} + // resizes the device vector if it the std vector is larger template inline void expand_device_copy(rmm::device_uvector& device_vec, diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 3de820699..4bf127684 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -20,11 +20,24 @@ #include #include +#include #include #include #include #include +#if CUDART_VERSION >= 12080 +// TODO: investigate why this is necessary? dependency conflict? file NVBUG if necessary +#include +#ifndef NVTX_NULLPTR +#define NVTX_NULLPTR nullptr +#endif +#ifndef NVTX_REINTERPRET_CAST +#define NVTX_REINTERPRET_CAST(type, value) (reinterpret_cast(value)) +#endif +#include +#endif + namespace cuopt { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700) @@ -208,4 +221,48 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size) array[0] = item; } +// NOTE: this marks a range of virtual memory as initialized. This is not tied to any object's +// lifetime As such, when using a pool for allocations, false negatives could occurs e.g. a range +// previously marked as initialized is now occupied by a new uninitialized object Unlikely to cause +// issues in practice - but worth noting (RAII? I'm not even sure the API allows to un-mark a range +// as initialized) +static inline void mark_memory_as_initialized(const void* ptr, size_t size, cudaStream_t stream = 0) +{ +#if CUDART_VERSION >= 12080 + + if (size == 0 || ptr == nullptr) return; + +#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) + constexpr auto PerThreadDefaultStream = true; +#else + constexpr auto PerThreadDefaultStream = false; +#endif + + nvtxMemVirtualRangeDesc_t nvtxRangeDesc = {}; + nvtxRangeDesc.size = size; + nvtxRangeDesc.ptr = ptr; + + nvtxMemMarkInitializedBatch_t nvtxRegionsDesc = {}; + nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; + nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); + nvtxRegionsDesc.regionType = NVTX_MEM_TYPE_VIRTUAL_ADDRESS; + nvtxRegionsDesc.regionDescCount = 1; + nvtxRegionsDesc.regionDescElementSize = sizeof(nvtxRangeDesc); + nvtxRegionsDesc.regionDescElements = &nvtxRangeDesc; + + nvtxMemCudaMarkInitialized( + raft::common::nvtx::detail::domain_store::value(), + stream, + PerThreadDefaultStream, + &nvtxRegionsDesc); +#endif +} + +template +static inline void mark_span_as_initialized(const raft::device_span span, + rmm::cuda_stream_view stream) +{ + mark_memory_as_initialized(span.data(), span.size() * sizeof(T), stream.value()); +} + } // namespace cuopt