diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index b251b3eaba..4c01f68384 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -46,6 +46,8 @@ #define CUOPT_DUALIZE "dualize" #define CUOPT_ORDERING "ordering" #define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" +#define CUOPT_BARRIER_ITERATIVE_REFINEMENT "barrier_iterative_refinement" +#define CUOPT_BARRIER_STEP_SCALE "barrier_step_scale" #define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" #define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" #define CUOPT_PRESOLVE "presolve" @@ -74,6 +76,7 @@ #define CUOPT_MIP_BATCH_PDLP_RELIABILITY_BRANCHING "mip_batch_pdlp_reliability_branching" #define CUOPT_MIP_STRONG_BRANCHING_SIMPLEX_ITERATION_LIMIT \ "mip_strong_branching_simplex_iteration_limit" + #define CUOPT_SOLUTION_FILE "solution_file" #define CUOPT_NUM_CPU_THREADS "num_cpu_threads" #define CUOPT_NUM_GPUS "num_gpus" @@ -186,4 +189,7 @@ #define CUOPT_MIP_SCALING_ON 1 #define CUOPT_MIP_SCALING_NO_OBJECTIVE 2 +#define CUOPT_BARRIER_ITERATIVE_REFINEMENT_OFF 0 +#define CUOPT_BARRIER_ITERATIVE_REFINEMENT_ON 1 + #endif // CUOPT_CONSTANTS_H diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index a1cb787f09..b30286f9ce 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -282,6 +282,8 @@ class pdlp_solver_settings_t { i_t barrier_dual_initial_point{-1}; bool eliminate_dense_columns{true}; pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision}; + bool barrier_iterative_refinement{true}; + f_t barrier_step_scale{0.9}; bool save_best_primal_so_far{false}; /** * @brief Stop the solver as soon as a primal feasible iterate is encountered. diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index 778038db1f..5e497f8ae7 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -93,6 +93,7 @@ class iteration_data_t { public: iteration_data_t(const lp_problem_t& lp, i_t num_upper_bounds, + const std::vector& free_variable_indices, const csc_matrix_t& Qin, const simplex_solver_settings_t& settings) : upper_bounds(num_upper_bounds), @@ -165,6 +166,8 @@ class iteration_data_t { d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()), use_augmented(false), has_factorization(false), + n_free_vars(0), + d_is_free_(0, lp.handle_ptr->get_stream()), num_factorizations(0), has_solve_info(false), settings_(settings), @@ -220,6 +223,18 @@ class iteration_data_t { { raft::common::nvtx::range fun_scope("Barrier: LP Data Creation"); + // Set up free variable tracking for QPs + if (!free_variable_indices.empty()) { + n_free_vars = free_variable_indices.size(); + std::vector is_free_host(lp.num_cols, 0); + for (i_t j : free_variable_indices) { + is_free_host[j] = 1; + } + d_is_free_.resize(lp.num_cols, stream_view_); + raft::copy(d_is_free_.data(), is_free_host.data(), lp.num_cols, stream_view_); + settings.log.printf("Free variables (QP) : %d\n", n_free_vars); + } + bool has_Q = Q.x.size() > 0; indefinite_Q = false; if (has_Q) { @@ -288,9 +303,12 @@ class iteration_data_t { std::vector dense_columns_unordered; f_t start_column_density = tic(); - // Ignore Q matrix for now - find_dense_columns( - lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT); + + // Do not look for dense columns if Q is not diagonal + if (!has_Q || Q_diagonal) { + find_dense_columns( + lp.A, settings, dense_columns_unordered, n_dense_rows, max_row_nz, estimated_nz_AAT); + } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } #ifdef PRINT_INFO for (i_t j : dense_columns_unordered) { @@ -382,21 +400,25 @@ class iteration_data_t { A_dense.from_sparse(lp.A, j, k++); } } - original_A_values = AD.x; - d_original_A_values.resize(original_A_values.size(), handle_ptr->get_stream()); - raft::copy(d_original_A_values.data(), AD.x.data(), AD.x.size(), handle_ptr->get_stream()); + AD.transpose(AT); - device_AD.copy(AD, handle_ptr->get_stream()); - // For efficient scaling of AD col we form the col index array - device_AD.form_col_index(handle_ptr->get_stream()); - device_A_x_values.resize(original_A_values.size(), handle_ptr->get_stream()); - raft::copy( - device_A_x_values.data(), device_AD.x.data(), device_AD.x.size(), handle_ptr->get_stream()); - csr_matrix_t host_A_CSR(1, 1, 1); // Sizes will be set by to_compressed_row() - AD.to_compressed_row(host_A_CSR); - device_A.copy(host_A_CSR, lp.handle_ptr->get_stream()); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); + // device_AD / device_A / ADAT path is only used when forming ADAT (!use_augmented). + if (!use_augmented) { + device_AD.copy(AD, handle_ptr->get_stream()); + d_original_A_values.resize(device_AD.x.size(), handle_ptr->get_stream()); + raft::copy(d_original_A_values.data(), + device_AD.x.data(), + device_AD.x.size(), + handle_ptr->get_stream()); + // For efficient scaling of AD col we form the col index array + device_AD.form_col_index(handle_ptr->get_stream()); + device_A_x_values.resize(device_AD.x.size(), handle_ptr->get_stream()); + raft::copy( + device_A_x_values.data(), device_AD.x.data(), device_AD.x.size(), handle_ptr->get_stream()); + device_AD.to_compressed_row(device_A, handle_ptr->get_stream()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); + } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows; @@ -1498,7 +1520,6 @@ class iteration_data_t { pinned_dense_vector_t inv_diag; pinned_dense_vector_t inv_sqrt_diag; - std::vector original_A_values; rmm::device_uvector d_original_A_values; csc_matrix_t AD; @@ -1538,6 +1559,8 @@ class iteration_data_t { bool use_augmented; i_t symbolic_status; + i_t n_free_vars{0}; + rmm::device_uvector d_is_free_; // 1 if variable is free (QP only), 0 otherwise std::unique_ptr> chol; @@ -1794,7 +1817,10 @@ int barrier_solver_t::initial_point(iteration_data_t& data) data_.chol->solve(b, x); } } op(data); - iterative_refinement(op, rhs, soln); + + if (settings.barrier_iterative_refinement) { + iterative_refinement(op, rhs, soln); + } for (i_t k = 0; k < lp.num_cols; k++) { data.x[k] = soln[k]; @@ -1908,6 +1934,12 @@ int barrier_solver_t::initial_point(iteration_data_t& data) } } } + // Free variables have z = 0 (no complementarity condition) + if (data.n_free_vars > 0) { + for (i_t j : presolve_info.free_variable_indices) { + data.z[j] = 0.0; + } + } } else if (use_augmented) { dense_vector_t dual_rhs(lp.num_cols + lp.num_rows); dual_rhs.set_scalar(0.0); @@ -1970,8 +2002,23 @@ int barrier_solver_t::initial_point(iteration_data_t& data) vector_norm2(data.dual_residual)); #endif // Make sure (w, x, v, z) > 0 + if (data.n_free_vars > 0) { + std::vector nonnegative_variables(data.w.size(), 1); + for (i_t j : presolve_info.free_variable_indices) { + nonnegative_variables[j] = 0; + } + + data.x.ensure_positive(epsilon_adjust, nonnegative_variables); + + for (i_t j : presolve_info.free_variable_indices) { + data.z[j] = 0.0; + } + + } else { + data.x.ensure_positive(epsilon_adjust); + } data.w.ensure_positive(epsilon_adjust); - data.x.ensure_positive(epsilon_adjust); + #ifdef PRINT_INFO settings.log.printf("min v %e min z %e\n", data.v.minimum(), data.z.minimum()); #endif @@ -2166,6 +2213,29 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& x, const rmm::device_uvector& dx) { + // For x-sized vectors with free variables, skip free vars in ratio test + const bool has_free = data.n_free_vars > 0 && static_cast(x.size()) == lp.num_cols; + + if (has_free) { + auto is_free_ptr = data.d_is_free_.data(); + auto ratio_test_free = [is_free_ptr] HD(const thrust::tuple t) { + const f_t dx_val = thrust::get<0>(t); + const f_t x_val = thrust::get<1>(t); + const i_t is_free = thrust::get<2>(t); + if (is_free) return f_t(1.0); + if (dx_val < f_t(0.0)) return -x_val / dx_val; + return f_t(1.0); + }; + + return data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator(dx.data(), x.data(), is_free_ptr), + thrust::minimum(), + ratio_test_free, + f_t(1.0), + x.size(), + stream_view_); + } + return data.transform_reduce_helper_.transform_reduce( thrust::make_zip_iterator(dx.data(), x.data()), thrust::minimum(), @@ -2248,11 +2318,37 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t{}, - stream_view_.value()); + // For native free variables (QP): use Q diagonal if available, otherwise a static regularizer + if (data.n_free_vars > 0) { + constexpr f_t free_var_reg = 1e-7; + if (data.Q.n > 0 && data.Q_diagonal) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple( + data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data(), data.d_Q_diag_.data()), + data.d_diag_.data(), + data.d_diag_.size(), + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free, f_t q_jj) { + if (!is_free) return z_j / x_j; + return (q_jj > f_t(0)) ? f_t(0) : free_var_reg; + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data()), + data.d_diag_.data(), + data.d_diag_.size(), + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free) { + return is_free ? free_var_reg : (z_j / x_j); + }, + stream_view_.value()); + } + } else { + cub::DeviceTransform::Transform(cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data()), + data.d_diag_.data(), + data.d_diag_.size(), + cuda::std::divides<>{}, + stream_view_.value()); + } RAFT_CHECK_CUDA(stream_view_); // diag = z ./ x + E * (v ./ w) * E' @@ -2358,20 +2454,40 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t thrust::tuple { - const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; - return {tmp, inv_diag * tmp}; - }, - stream_view_.value()); + // For free variables, skip the complementarity_xz_rhs / x term + if (data.n_free_vars > 0) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_inv_diag.data(), + data.d_tmp3_.data(), + data.d_complementarity_xz_rhs_.data(), + data.d_x_.data(), + data.d_dual_rhs_.data(), + data.d_is_free_.data()), + thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), + lp.num_cols, + [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs, i_t is_free) + -> thrust::tuple { + const f_t xz_term = is_free ? f_t(0) : (complementarity_xz_rhs / x); + const f_t tmp = tmp3 - xz_term + dual_rhs; + return {tmp, inv_diag * tmp}; + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_inv_diag.data(), + data.d_tmp3_.data(), + data.d_complementarity_xz_rhs_.data(), + data.d_x_.data(), + data.d_dual_rhs_.data()), + thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), + lp.num_cols, + [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs) + -> thrust::tuple { + const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; + return {tmp, inv_diag * tmp}; + }, + stream_view_.value()); + } RAFT_CHECK_CUDA(stream_view_); raft::copy(data.d_r1_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); raft::copy(data.d_r1_prime_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); @@ -2381,6 +2497,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_tsolve(b, x); } } op(data); - auto solve_err = iterative_refinement(op, d_augmented_rhs, d_augmented_soln); - if (solve_err > 1e-1) { - settings.log.printf("|| Aug (dx, dy) - aug_rhs || %e after IR\n", solve_err); + if (settings.barrier_iterative_refinement) { + auto solve_err = iterative_refinement(op, d_augmented_rhs, d_augmented_soln); + if (solve_err > 1e-1) { + settings.log.printf("|| Aug (dx, dy) - aug_rhs || %e after IR\n", solve_err); + } } raft::copy(data.d_dx_.data(), d_augmented_soln.data(), lp.num_cols, stream_view_); @@ -2656,17 +2775,33 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), + data.d_z_.data(), + data.d_dx_.data(), + data.d_x_.data(), + data.d_is_free_.data()), + data.d_dz_.data(), + data.d_dz_.size(), + [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x, i_t is_free) { + return is_free ? f_t(0) : ((complementarity_xz_rhs - z * dx) / x); + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), + data.d_z_.data(), + data.d_dx_.data(), + data.d_x_.data()), + data.d_dz_.data(), + data.d_dz_.size(), + [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x) { + return (complementarity_xz_rhs - z * dx) / x; + }, + stream_view_.value()); + } RAFT_CHECK_CUDA(stream_view_); raft::copy(dz.data(), data.d_dz_.data(), data.d_dz_.size(), stream_view_); } @@ -2956,11 +3091,41 @@ void barrier_solver_t::compute_target_mu( stream_view_); complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; + f_t mu_denom = static_cast(data.x.size()) + static_cast(data.n_upper_bounds); + mu_denom -= static_cast(data.n_free_vars); + mu_denom = std::max(mu_denom, f_t(1.0)); + mu_aff = complementarity_aff_sum / mu_denom; + sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); + new_mu = sigma * mu_aff; +} - mu_aff = (complementarity_aff_sum) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); - sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); - new_mu = sigma * mu_aff; +template +static void fill_linear_cc_rhs(iteration_data_t& data, + f_t new_mu, + raft::device_span out, + raft::device_span dx_aff, + raft::device_span dz_aff, + rmm::cuda_stream_view stream_view) +{ + if (out.empty()) return; + if (data.n_free_vars > 0) { + auto is_free_ptr = data.d_is_free_.data(); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), is_free_ptr), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_free) { + return is_free ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); + }, + stream_view.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data()), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; }, + stream_view.value()); + } } template @@ -2968,12 +3133,12 @@ void barrier_solver_t::compute_cc_rhs(iteration_data_t& data { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_dx_aff_.data(), data.d_dz_aff_.data()), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [new_mu] HD(f_t dx_aff, f_t dz_aff) { return -(dx_aff * dz_aff) + new_mu; }, - stream_view_.value()); + fill_linear_cc_rhs(data, + new_mu, + cuopt::make_span(data.d_complementarity_xz_rhs_), + cuopt::make_span(data.d_dx_aff_), + cuopt::make_span(data.d_dz_aff_), + stream_view_); RAFT_CHECK_CUDA(stream_view_); cub::DeviceTransform::Transform( cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), @@ -3160,13 +3325,17 @@ void barrier_solver_t::compute_mu(iteration_data_t& data, f_ { raft::common::nvtx::range fun_scope("Barrier: compute_mu"); + f_t mu_denom = static_cast(data.x.size()) + static_cast(data.n_upper_bounds); + mu_denom -= static_cast(data.n_free_vars); // free vars don't contribute to mu + mu_denom = std::max(mu_denom, f_t(1.0)); + mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), data.d_complementarity_xz_residual_.size(), stream_view_) + data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), data.d_complementarity_wv_residual_.size(), stream_view_)) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); + mu_denom; } template @@ -3320,7 +3489,6 @@ void barrier_solver_t::compute_primal_dual_objective(iteration_data_t< template lp_status_t barrier_solver_t::check_for_suboptimal_solution( - const barrier_solver_settings_t& options, iteration_data_t& data, f_t start_time, i_t iter, @@ -3333,6 +3501,7 @@ lp_status_t barrier_solver_t::check_for_suboptimal_solution( f_t& relative_complementarity_residual, lp_solution_t& solution) { + raft::common::nvtx::range fun_scope("Barrier: check_for_suboptimal_solution"); if (relative_primal_residual < settings.barrier_relaxed_feasibility_tol && relative_dual_residual < settings.barrier_relaxed_optimality_tol && relative_complementarity_residual < settings.barrier_relaxed_complementarity_tol && @@ -3411,10 +3580,9 @@ lp_status_t barrier_solver_t::check_for_suboptimal_solution( } template -lp_status_t barrier_solver_t::solve(f_t start_time, - const barrier_solver_settings_t& options, - lp_solution_t& solution) +lp_status_t barrier_solver_t::solve(f_t start_time, lp_solution_t& solution) { + settings.log.printf("Barrier solver started at %.2f seconds\n", toc(start_time)); try { raft::common::nvtx::range fun_scope("Barrier: solve"); @@ -3444,7 +3612,8 @@ lp_status_t barrier_solver_t::solve(f_t start_time, csc_matrix_t Q(lp.num_cols, 0, 0); if (lp.Q.n > 0) { create_Q(lp, Q); } - iteration_data_t data(lp, num_upper_bounds, Q, settings); + iteration_data_t data( + lp, num_upper_bounds, presolve_info.free_variable_indices, Q, settings); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; @@ -3594,8 +3763,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); if (status < 0) { - return check_for_suboptimal_solution(options, - data, + return check_for_suboptimal_solution(data, start_time, iter, primal_objective, @@ -3632,8 +3800,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, // Sync to make sure all the async copies to host done inside are finished RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); if (status < 0) { - return check_for_suboptimal_solution(options, - data, + return check_for_suboptimal_solution(data, start_time, iter, primal_objective, @@ -3658,9 +3825,9 @@ lp_status_t barrier_solver_t::solve(f_t start_time, compute_final_direction(data); f_t step_primal, step_dual; - compute_primal_dual_step_length(data, options.step_scale, step_primal, step_dual); + compute_primal_dual_step_length(data, settings.barrier_step_scale, step_primal, step_dual); - compute_next_iterate(data, options.step_scale, step_primal, step_dual); + compute_next_iterate(data, settings.barrier_step_scale, step_primal, step_dual); compute_residual_norms( data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm); @@ -3711,8 +3878,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, if (primal_objective != primal_objective || dual_objective != dual_objective) { settings.log.printf("Numerical error in objective\n"); - return check_for_suboptimal_solution(options, - data, + return check_for_suboptimal_solution(data, start_time, iter, primal_objective, @@ -3744,7 +3910,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, if (converged) { settings.log.printf("\n"); settings.log.printf( - "Optimal solution found in %d iterations and %.2fs\n", iter, toc(start_time)); + "Optimal solution found in %d iterations and %.3fs\n", iter, toc(start_time)); settings.log.printf("Objective %+.8e\n", compute_user_objective(lp, primal_objective)); settings.log.printf("Primal infeasibility (abs/rel): %8.2e/%8.2e\n", primal_residual_norm, @@ -3778,8 +3944,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, data.relative_dual_residual_save < settings.barrier_relaxed_optimality_tol && data.relative_complementarity_residual_save < settings.barrier_relaxed_complementarity_tol) { - return check_for_suboptimal_solution(options, - data, + return check_for_suboptimal_solution(data, start_time, iter, primal_objective, @@ -3816,7 +3981,6 @@ template class barrier_solver_t; template class sparse_cholesky_base_t; template class sparse_cholesky_cudss_t; template class iteration_data_t; -template class barrier_solver_settings_t; #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/barrier/barrier.hpp b/cpp/src/barrier/barrier.hpp index 014a46db35..4d25fa930e 100644 --- a/cpp/src/barrier/barrier.hpp +++ b/cpp/src/barrier/barrier.hpp @@ -19,12 +19,6 @@ #include namespace cuopt::linear_programming::dual_simplex { -template -struct barrier_solver_settings_t { - i_t iteration_limit = 1000; - f_t step_scale = 0.9; -}; - template class iteration_data_t; // Forward declare @@ -34,15 +28,12 @@ class barrier_solver_t { barrier_solver_t(const lp_problem_t& lp, const presolve_info_t& presolve, const simplex_solver_settings_t& settings); - lp_status_t solve(f_t start_time, - const barrier_solver_settings_t& options, - lp_solution_t& solution); + lp_status_t solve(f_t start_time, lp_solution_t& solution); private: void my_pop_range(bool debug) const; void create_Q(const lp_problem_t& lp, csc_matrix_t& Q); int initial_point(iteration_data_t& data); - void compute_residual_norms(const dense_vector_t& w, const dense_vector_t& x, const dense_vector_t& y, @@ -113,8 +104,7 @@ class barrier_solver_t { f_t& max_residual); private: - lp_status_t check_for_suboptimal_solution(const barrier_solver_settings_t& options, - iteration_data_t& data, + lp_status_t check_for_suboptimal_solution(iteration_data_t& data, f_t start_time, i_t iter, f_t& primal_objective, diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp index f73a9a5fce..5d6f2b12ec 100644 --- a/cpp/src/barrier/dense_vector.hpp +++ b/cpp/src/barrier/dense_vector.hpp @@ -184,6 +184,21 @@ class dense_vector_t : public std::vector { } } + void ensure_positive(f_t epsilon_adjust, const std::vector& mask) + { + f_t min_x = inf; + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + if (mask[i]) { min_x = std::min(min_x, (*this)[i]); } + } + if (min_x <= 0.0) { + const f_t delta_x = -min_x + epsilon_adjust; + for (i_t i = 0; i < n; i++) { + if (mask[i]) { (*this)[i] += delta_x; } + } + } + } + void bound_away_from_zero(f_t epsilon_adjust) { const i_t n = this->size(); diff --git a/cpp/src/barrier/device_sparse_matrix.cuh b/cpp/src/barrier/device_sparse_matrix.cuh index 97352e2e78..29927c81b3 100644 --- a/cpp/src/barrier/device_sparse_matrix.cuh +++ b/cpp/src/barrier/device_sparse_matrix.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -16,8 +16,18 @@ #include #include +#include +#include +#include +#include +#include +#include + namespace cuopt::linear_programming::dual_simplex { +template +class device_csr_matrix_t; + template struct sum_reduce_helper_t { rmm::device_buffer buffer_data; @@ -158,6 +168,9 @@ class device_csc_matrix_t { raft::copy(x.data(), A.x.data(), A.x.size(), stream); } + /** Same semantics as csc_matrix_t::to_compressed_row, entirely on device. */ + void to_compressed_row(device_csr_matrix_t& Arow, rmm::cuda_stream_view stream) const; + void form_col_index(rmm::cuda_stream_view stream) { col_index.resize(x.size(), stream); @@ -293,4 +306,86 @@ class device_csr_matrix_t { // to avoid extra space / computation) }; +template +void device_csc_matrix_t::to_compressed_row(device_csr_matrix_t& Arow, + rmm::cuda_stream_view stream) const +{ + static_assert(std::is_signed_v); + + // Device CSC -> CSR: col_start[], i[], x[] (this) -> Arow.row_start[], j[], x[]. + // Nonzeros are reordered by sorting (row, col) so each CSR row segment is contiguous. + + i_t const nz = nz_max; + + Arow.m = m; + Arow.n = n; + Arow.nz_max = nz_max; + Arow.row_start.resize(m + 1, stream); + Arow.j.resize(nz, stream); + Arow.x.resize(nz, stream); + + auto exec = rmm::exec_policy(stream); + + if (nz == 0) { + // Empty matrix: row_start all zero; j/x unused. + RAFT_CUDA_TRY(cudaMemsetAsync(Arow.row_start.data(), 0, sizeof(i_t) * (m + 1), stream)); + return; + } + + // Per-row nnz from CSC row indices i[] (one atomic add per nonzero). + rmm::device_uvector row_counts(m, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, sizeof(i_t) * m, stream)); + + thrust::for_each(exec, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nz), + [row_ind = i.data(), counts = row_counts.data()] __device__(i_t p) { + atomicAdd(counts + row_ind[p], i_t(1)); + }); + + // CSR row pointers: exclusive prefix sum of row_counts; Arow.row_start[m] = nz. + rmm::device_buffer scan_tmp; + std::size_t scan_bytes = 0; + cub::DeviceScan::ExclusiveSum( + nullptr, scan_bytes, row_counts.data(), Arow.row_start.data(), m, stream); + scan_tmp.resize(scan_bytes, stream); + cub::DeviceScan::ExclusiveSum( + scan_tmp.data(), scan_bytes, row_counts.data(), Arow.row_start.data(), m, stream); + + RAFT_CUDA_TRY( + cudaMemcpyAsync(Arow.row_start.data() + m, &nz, sizeof(i_t), cudaMemcpyHostToDevice, stream)); + + // rows[]: CSC row indices (sort key). Arow.j / Arow.x hold (col, val) per flat CSC index, + // then sort_by_key permutes j and x in place into CSR (row, col) order. + rmm::device_uvector rows(nz, stream); + raft::copy(rows.data(), i.data(), nz, stream); + raft::copy(Arow.x.data(), x.data(), nz, stream); + + // Global CSC position p lies in column c iff col_start[c] <= p < col_start[c+1]. + thrust::tabulate(exec, + thrust::device_pointer_cast(Arow.j.data()), + thrust::device_pointer_cast(Arow.j.data() + nz), + [cs = col_start.data(), nn_c = n] __device__(i_t p) { + i_t lo = 0; + i_t hi = nn_c; + while (lo < hi) { + i_t mid = lo + (hi - lo) / 2; + if (cs[mid] <= p) { + lo = mid + 1; + } else { + hi = mid; + } + } + return lo - 1; + }); + + // CSR column order: sort (row, col) lexicographically; values follow the same permutation. + auto row_iter = thrust::device_pointer_cast(rows.data()); + auto col_iter = thrust::device_pointer_cast(Arow.j.data()); + thrust::sort_by_key(exec, + thrust::make_zip_iterator(thrust::make_tuple(row_iter, col_iter)), + thrust::make_zip_iterator(thrust::make_tuple(row_iter + nz, col_iter + nz)), + thrust::device_pointer_cast(Arow.x.data())); +} + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/barrier/iterative_refinement.hpp b/cpp/src/barrier/iterative_refinement.hpp index 69e72d66bc..807d055787 100644 --- a/cpp/src/barrier/iterative_refinement.hpp +++ b/cpp/src/barrier/iterative_refinement.hpp @@ -178,6 +178,7 @@ f_t iterative_refinement_gmres(T& op, bool show_info = false; + f_t stop_ratio = 5.0; f_t bnorm = std::max(1.0, vector_norm_inf(b)); f_t rel_res = 1.0; int outer_iter = 0; @@ -361,10 +362,21 @@ f_t iterative_refinement_gmres(T& op, l2_residual); } + f_t improvement_ratio = best_residual / residual; // Track best solution - if (residual < best_residual) { + if (improvement_ratio >= stop_ratio) { best_residual = residual; raft::copy(x_sav.data(), x.data(), x.size(), x.stream()); + } else if (improvement_ratio < stop_ratio && improvement_ratio > 1.0) { + best_residual = residual; + raft::copy(x_sav.data(), x.data(), x.size(), x.stream()); + // Residual decreased, but not enough, continue + if (show_info) { + CUOPT_LOG_INFO("GMRES IR: improvement ratio %e is less than %e, breaking early", + improvement_ratio, + stop_ratio); + } + break; } else { // Residual increased or stagnated, restore best and stop if (show_info) { diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh index 52fea89502..d6112b73c4 100644 --- a/cpp/src/barrier/sparse_cholesky.cuh +++ b/cpp/src/barrier/sparse_cholesky.cuh @@ -243,9 +243,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { auto cudss_device_count = 1; CUDSS_CALL_AND_CHECK_EXIT( cudssCreateMg(&handle, cudss_device_count, &cudss_device_idx), status, "cudssCreateMg"); - CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate"); - mem_handler.ctx = reinterpret_cast(handle_ptr_->get_workspace_resource()); mem_handler.device_alloc = cudss_device_alloc; mem_handler.device_free = cudss_device_dealloc; diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index c5ef847106..20efbaf8ae 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -850,8 +851,7 @@ i_t presolve(const lp_problem_t& original, } i_t removed_free_variables = 0; - - if (constraints_to_check.size() > 0) { + if (!constraints_to_check.empty()) { // Check if the constraints are feasible csr_matrix_t Arow(0, 0, 0); problem.A.to_compressed_row(Arow); @@ -973,15 +973,14 @@ i_t presolve(const lp_problem_t& original, } } - i_t new_free_variables = 0; + free_variables = 0; for (i_t j = 0; j < problem.num_cols; j++) { - if (problem.lower[j] == -inf && problem.upper[j] == inf) { new_free_variables++; } + if (problem.lower[j] == -inf && problem.upper[j] == inf) { free_variables++; } } if (removed_free_variables != 0) { - settings.log.printf("Bounded %d free variables\n", removed_free_variables); + settings.log.printf("Bounded %d free variable row(s) in presolve\n", + static_cast(removed_free_variables)); } - assert(new_free_variables == free_variables - removed_free_variables); - free_variables = new_free_variables; } // The original problem may have a variable without a lower bound @@ -1139,7 +1138,17 @@ i_t presolve(const lp_problem_t& original, problem.Q.check_matrix("Before free variable expansion"); - if (settings.barrier_presolve && free_variables > 0) { + // For QPs, keep free variables as-is rather than + // splitting x = v - w. The barrier solver handles them natively with a + // static regularizer on the diagonal instead of z/x complementarity terms. + if (settings.barrier_presolve && free_variables > 0 && problem.Q.n > 0) { + presolve_info.free_variable_indices.clear(); + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { + presolve_info.free_variable_indices.push_back(j); + } + } + } else if (settings.barrier_presolve && free_variables > 0) { // We have a variable x_j: with -inf < x_j < inf // we create new variables v and w with 0 <= v, w and x_j = v - w // Constraints diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index d0e2d52812..2caf5673e0 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -153,6 +153,9 @@ struct presolve_info_t { // Variables that were negated to handle -inf < x_j <= u_j std::vector negated_variables; + + // Free variable indices for QP augmented system (not split, handled natively) + std::vector free_variable_indices; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index cfc120e477..454d977a21 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -84,6 +84,8 @@ struct simplex_solver_settings_t { deterministic(false), barrier(false), eliminate_dense_columns(true), + barrier_iterative_refinement(true), + barrier_step_scale(0.9), num_gpus(1), folding(-1), augmented(0), @@ -164,7 +166,9 @@ struct simplex_solver_settings_t { bool cudss_deterministic; // true to use cuDSS deterministic mode, false for non-deterministic bool barrier; // true to use barrier method, false to use dual simplex method bool deterministic; // true to use B&B deterministic mode, false to use non-deterministic mode - bool eliminate_dense_columns; // true to eliminate dense columns from A*D*A^T + bool eliminate_dense_columns; // true to eliminate dense columns from A*D*A^T + bool barrier_iterative_refinement; // true to use iterative refinement for barrier method + f_t barrier_step_scale; // step scale for barrier method int num_gpus; // Number of GPUs to use (maximum of 2 gpus are supported at the moment) i_t folding; // -1 automatic, 0 don't fold, 1 fold i_t augmented; // -1 automatic, 0 to solve with ADAT, 1 to solve with augmented system diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 82d922eec3..a52381e374 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -373,27 +373,8 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us // Solve using barrier lp_solution_t barrier_solution(barrier_lp.num_rows, barrier_lp.num_cols); - // Clear variable pairs for QP - if (barrier_lp.Q.n > 0) { - const i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; - for (i_t k = 0; k < num_free_variables; k++) { - i_t u = presolve_info.free_variable_pairs[2 * k]; - i_t v = presolve_info.free_variable_pairs[2 * k + 1]; - - const i_t row_start_u = barrier_lp.Q.row_start[u]; - const i_t row_end_u = barrier_lp.Q.row_start[u + 1]; - const i_t row_start_v = barrier_lp.Q.row_start[v]; - const i_t row_end_v = barrier_lp.Q.row_start[v + 1]; - if (row_end_u - row_start_u == 0 && row_end_v - row_start_v == 0) { - settings.log.printf("Free variable pair %d-%d has no quadratic term\n", u, v); - } - } - } - barrier_solver_t barrier_solver(barrier_lp, presolve_info, barrier_settings); - barrier_solver_settings_t barrier_solver_settings; - lp_status_t barrier_status = - barrier_solver.solve(start_time, barrier_solver_settings, barrier_solution); + lp_status_t barrier_status = barrier_solver.solve(start_time, barrier_solution); if (barrier_status == lp_status_t::OPTIMAL) { #ifdef COMPUTE_SCALED_RESIDUALS std::vector scaled_residual = barrier_lp.rhs; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index b968ad18ea..b7626aee51 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -102,6 +102,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits::infinity(), f_t(-1.0)}, {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, f_t(0.0), f_t(1.0), f_t(0.5)}, + {CUOPT_BARRIER_STEP_SCALE, &pdlp_settings.barrier_step_scale, f_t(0.5), f_t(0.9999), f_t(0.9)}, // MIP heuristic hyper-parameters (hidden from default --help: name contains "hyper_") {CUOPT_MIP_HYPER_HEURISTIC_PRESOLVE_TIME_RATIO, &mip_settings.heuristic_params.presolve_time_ratio, f_t(0.0), f_t(1.0), f_t(0.1), "fraction of total time for presolve"}, {CUOPT_MIP_HYPER_HEURISTIC_PRESOLVE_MAX_TIME, &mip_settings.heuristic_params.presolve_max_time, f_t(0.0), std::numeric_limits::infinity(), f_t(60.0), "hard cap on presolve seconds"}, @@ -172,6 +173,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_ELIMINATE_DENSE_COLUMNS, &pdlp_settings.eliminate_dense_columns, true}, {CUOPT_CUDSS_DETERMINISTIC, &pdlp_settings.cudss_deterministic, false}, {CUOPT_DUAL_POSTSOLVE, &pdlp_settings.dual_postsolve, true}, + {CUOPT_BARRIER_ITERATIVE_REFINEMENT, &pdlp_settings.barrier_iterative_refinement, true}, }; // String parameters string_parameters = { diff --git a/cpp/src/mip_heuristics/presolve/semi_continuous.cu b/cpp/src/mip_heuristics/presolve/semi_continuous.cu index 15728d02bb..5f00507314 100644 --- a/cpp/src/mip_heuristics/presolve/semi_continuous.cu +++ b/cpp/src/mip_heuristics/presolve/semi_continuous.cu @@ -44,7 +44,7 @@ std::vector call_host_bounds_strengthening(const optimization_problem_t& sc_indices) { auto user_problem = - cuopt_problem_to_simplex_problem(op_problem.get_handle_ptr(), op_problem); + cuopt_problem_to_user_problem(op_problem.get_handle_ptr(), op_problem); dual_simplex::lp_problem_t lp_problem(op_problem.get_handle_ptr(), 1, 1, 1); std::vector new_slacks; diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index d3d20a5c8f..ee9bf3e1b1 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -352,8 +352,12 @@ void adjust_dual_solution_and_reduced_cost(rmm::device_uvector& dual_soluti template optimization_problem_solution_t convert_dual_simplex_sol( - detail::problem_t& problem, const dual_simplex::lp_solution_t& solution, + raft::handle_t const* handle_ptr, + std::string const& objective_name, + std::vector const& var_names, + std::vector const& row_names, + bool maximize, dual_simplex::lp_status_t status, f_t duration, f_t norm_user_objective, @@ -378,18 +382,18 @@ optimization_problem_solution_t convert_dual_simplex_sol( }; rmm::device_uvector final_primal_solution = - cuopt::device_copy(solution.x, problem.handle_ptr->get_stream()); + cuopt::device_copy(solution.x, handle_ptr->get_stream()); rmm::device_uvector final_dual_solution = - cuopt::device_copy(solution.y, problem.handle_ptr->get_stream()); + cuopt::device_copy(solution.y, handle_ptr->get_stream()); rmm::device_uvector final_reduced_cost = - cuopt::device_copy(solution.z, problem.handle_ptr->get_stream()); - problem.handle_ptr->sync_stream(); + cuopt::device_copy(solution.z, handle_ptr->get_stream()); + handle_ptr->sync_stream(); // Negate dual variables and reduced costs for maximization problems - if (problem.maximize) { + if (maximize) { adjust_dual_solution_and_reduced_cost( - final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream()); - problem.handle_ptr->sync_stream(); + final_dual_solution, final_reduced_cost, handle_ptr->get_stream()); + handle_ptr->sync_stream(); } // Should be filled with more information from dual simplex @@ -417,9 +421,9 @@ optimization_problem_solution_t convert_dual_simplex_sol( auto sol = optimization_problem_solution_t(final_primal_solution, final_dual_solution, final_reduced_cost, - problem.objective_name, - problem.var_names, - problem.row_names, + objective_name, + var_names, + row_names, std::move(info), {termination_status}); @@ -431,10 +435,56 @@ optimization_problem_solution_t convert_dual_simplex_sol( sol.get_termination_status_string().c_str()); } - problem.handle_ptr->sync_stream(); + handle_ptr->sync_stream(); return sol; } +template +optimization_problem_solution_t convert_dual_simplex_sol( + detail::problem_t& problem, + const dual_simplex::lp_solution_t& solution, + dual_simplex::lp_status_t status, + f_t duration, + f_t norm_user_objective, + f_t norm_rhs, + method_t method) +{ + return convert_dual_simplex_sol(solution, + problem.handle_ptr, + problem.objective_name, + problem.var_names, + problem.row_names, + problem.maximize, + status, + duration, + norm_user_objective, + norm_rhs, + method); +} + +template +optimization_problem_solution_t convert_dual_simplex_sol( + optimization_problem_t& op_problem, + const dual_simplex::lp_solution_t& solution, + dual_simplex::lp_status_t status, + f_t duration, + f_t norm_user_objective, + f_t norm_rhs, + method_t method) +{ + return convert_dual_simplex_sol(solution, + op_problem.get_handle_ptr(), + op_problem.get_objective_name(), + op_problem.get_variable_names(), + op_problem.get_row_names(), + op_problem.get_sense(), + status, + duration, + norm_user_objective, + norm_rhs, + method); +} + template std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t> run_barrier(dual_simplex::user_problem_t& user_problem, @@ -457,6 +507,8 @@ run_barrier(dual_simplex::user_problem_t& user_problem, barrier_settings.barrier = true; barrier_settings.crossover = settings.crossover; barrier_settings.eliminate_dense_columns = settings.eliminate_dense_columns; + barrier_settings.barrier_iterative_refinement = settings.barrier_iterative_refinement; + barrier_settings.barrier_step_scale = settings.barrier_step_scale; barrier_settings.cudss_deterministic = settings.cudss_deterministic; barrier_settings.barrier_relaxed_feasibility_tol = settings.tolerances.relative_primal_tolerance; barrier_settings.barrier_relaxed_optimality_tol = settings.tolerances.relative_dual_tolerance; @@ -493,7 +545,7 @@ optimization_problem_solution_t run_barrier( { // Convert data structures to dual simplex format and back dual_simplex::user_problem_t dual_simplex_problem = - cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); + cuopt_problem_to_user_problem(problem.handle_ptr, problem); auto sol_dual_simplex = run_barrier(dual_simplex_problem, settings, timer); return convert_dual_simplex_sol(problem, std::get<0>(sol_dual_simplex), @@ -567,7 +619,7 @@ optimization_problem_solution_t run_dual_simplex( { // Convert data structures to dual simplex format and back dual_simplex::user_problem_t dual_simplex_problem = - cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); + cuopt_problem_to_user_problem(problem.handle_ptr, problem); auto sol_dual_simplex = run_dual_simplex(dual_simplex_problem, settings, timer); return convert_dual_simplex_sol(problem, std::get<0>(sol_dual_simplex), @@ -627,25 +679,27 @@ static optimization_problem_solution_t run_pdlp_solver_in_fp32( static_cast(settings.tolerances.primal_infeasible_tolerance); fs.tolerances.dual_infeasible_tolerance = static_cast(settings.tolerances.dual_infeasible_tolerance); - fs.detect_infeasibility = settings.detect_infeasibility; - fs.strict_infeasibility = settings.strict_infeasibility; - fs.iteration_limit = settings.iteration_limit; - fs.time_limit = static_cast(settings.time_limit); - fs.pdlp_solver_mode = settings.pdlp_solver_mode; - fs.log_to_console = settings.log_to_console; - fs.log_file = settings.log_file; - fs.per_constraint_residual = settings.per_constraint_residual; - fs.save_best_primal_so_far = settings.save_best_primal_so_far; - fs.first_primal_feasible = settings.first_primal_feasible; - fs.all_primal_feasible = settings.all_primal_feasible; - fs.eliminate_dense_columns = settings.eliminate_dense_columns; - fs.pdlp_precision = pdlp_precision_t::DefaultPrecision; - fs.method = method_t::PDLP; - fs.inside_mip = settings.inside_mip; - fs.hyper_params = settings.hyper_params; - fs.presolver = settings.presolver; - fs.num_gpus = settings.num_gpus; - fs.concurrent_halt = settings.concurrent_halt; + fs.detect_infeasibility = settings.detect_infeasibility; + fs.strict_infeasibility = settings.strict_infeasibility; + fs.iteration_limit = settings.iteration_limit; + fs.time_limit = static_cast(settings.time_limit); + fs.pdlp_solver_mode = settings.pdlp_solver_mode; + fs.log_to_console = settings.log_to_console; + fs.log_file = settings.log_file; + fs.per_constraint_residual = settings.per_constraint_residual; + fs.save_best_primal_so_far = settings.save_best_primal_so_far; + fs.first_primal_feasible = settings.first_primal_feasible; + fs.all_primal_feasible = settings.all_primal_feasible; + fs.eliminate_dense_columns = settings.eliminate_dense_columns; + fs.barrier_iterative_refinement = settings.barrier_iterative_refinement; + fs.barrier_step_scale = settings.barrier_step_scale; + fs.pdlp_precision = pdlp_precision_t::DefaultPrecision; + fs.method = method_t::PDLP; + fs.inside_mip = settings.inside_mip; + fs.hyper_params = settings.hyper_params; + fs.presolver = settings.presolver; + fs.num_gpus = settings.num_gpus; + fs.concurrent_halt = settings.concurrent_halt; detail::pdlp_solver_t solver(float_problem, fs, is_batch_mode); if (settings.inside_mip) { solver.set_inside_mip(true); } @@ -1484,7 +1538,7 @@ optimization_problem_solution_t run_concurrent( // Otherwise, CUDA API calls to the problem stream may occur in both threads and throw graph // capture off dual_simplex::user_problem_t dual_simplex_problem = - cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); + cuopt_problem_to_user_problem(problem.handle_ptr, problem); // Create a thread for dual simplex std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> @@ -1679,6 +1733,53 @@ optimization_problem_solution_t solve_lp_with_method( } } +template +optimization_problem_solution_t solve_qp(optimization_problem_t& op_problem, + pdlp_solver_settings_t const& settings, + bool problem_checking) +{ + try { + print_version_info(); + // Create log stream for file logging and add it to default logger + init_logger_t log(settings.log_file, settings.log_to_console); + + // Init libraries before to not include it in solve time + init_handler(op_problem.get_handle_ptr()); + + auto qp_timer = cuopt::timer_t(settings.time_limit); + + raft::common::nvtx::range fun_scope("Running QP solver"); + if (settings.user_problem_file != "") { + CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str()); + op_problem.write_to_mps(settings.user_problem_file); + } + // Convert data structures to dual simplex format and back + dual_simplex::user_problem_t dual_simplex_problem = + cuopt_optimization_problem_to_user_problem(op_problem.get_handle_ptr(), op_problem); + auto sol_dual_simplex = run_barrier(dual_simplex_problem, settings, qp_timer); + auto solution = convert_dual_simplex_sol(op_problem, + std::get<0>(sol_dual_simplex), + std::get<1>(sol_dual_simplex), + std::get<2>(sol_dual_simplex), + std::get<3>(sol_dual_simplex), + std::get<4>(sol_dual_simplex), + method_t::Barrier); + if (settings.sol_file != "") { + CUOPT_LOG_INFO("Writing solution to file %s", settings.sol_file.c_str()); + solution.write_to_sol_file(settings.sol_file, op_problem.get_handle_ptr()->get_stream()); + } + return solution; + } catch (const cuopt::logic_error& e) { + CUOPT_LOG_ERROR("Error in solve_qp: %s", e.what()); + return optimization_problem_solution_t{e, op_problem.get_handle_ptr()->get_stream()}; + } catch (const std::bad_alloc& e) { + CUOPT_LOG_ERROR("Error in solve_qp: %s", e.what()); + return optimization_problem_solution_t{ + cuopt::logic_error("Memory allocation failed", cuopt::error_type_t::RuntimeError), + op_problem.get_handle_ptr()->get_stream()}; + } +} + template optimization_problem_solution_t solve_lp( optimization_problem_t& op_problem, @@ -1687,6 +1788,10 @@ optimization_problem_solution_t solve_lp( bool use_pdlp_solver_mode, bool is_batch_mode) { + if (op_problem.has_quadratic_objective()) { + return solve_qp(op_problem, settings_const, problem_checking); + } + try { if (!settings_const.inside_mip) print_version_info(); @@ -1694,22 +1799,10 @@ optimization_problem_solution_t solve_lp( // Create log stream for file logging and add it to default logger init_logger_t log(settings.log_file, settings.log_to_console); - // Init libraies before to not include it in solve time + // Init libraries before to not include it in solve time // This needs to be called before pdlp is initialized init_handler(op_problem.get_handle_ptr()); - if (op_problem.has_quadratic_objective()) { - CUOPT_LOG_INFO("Problem has a quadratic objective. Using Barrier."); - settings.method = method_t::Barrier; - settings.presolver = presolver_t::None; - // check for sense of the problem - if (op_problem.get_sense()) { - CUOPT_LOG_ERROR("Quadratic problems must be minimized"); - return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, - op_problem.get_handle_ptr()->get_stream()); - } - } - raft::common::nvtx::range fun_scope("Running solver"); if (problem_checking) { @@ -1742,7 +1835,6 @@ optimization_problem_solution_t solve_lp( auto lp_timer = cuopt::timer_t(settings.time_limit); detail::problem_t problem(op_problem); - // handle default presolve if (settings.presolver == presolver_t::Default) { settings.presolver = presolver_t::PSLP; diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index cb2bb3bbba..5c73113879 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -14,10 +15,12 @@ #include +#include + namespace cuopt::linear_programming { template -static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( +static dual_simplex::user_problem_t cuopt_problem_to_user_problem( raft::handle_t const* handle_ptr, const optimization_problem_interface_t& problem) { dual_simplex::user_problem_t user_problem(handle_ptr); @@ -91,7 +94,7 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( } template -static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( +static dual_simplex::user_problem_t cuopt_problem_to_user_problem( raft::handle_t const* handle_ptr, detail::problem_t& model) { dual_simplex::user_problem_t user_problem(handle_ptr); @@ -186,6 +189,100 @@ static dual_simplex::user_problem_t cuopt_problem_to_simplex_problem( return user_problem; } +template +static dual_simplex::user_problem_t cuopt_optimization_problem_to_user_problem( + raft::handle_t const* handle_ptr, optimization_problem_t& model) +{ + dual_simplex::user_problem_t user_problem(handle_ptr); + + i_t const m = model.get_n_constraints(); + i_t const n = model.get_n_variables(); + i_t const nz = model.get_nnz(); + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective = model.get_objective_coefficients_host(); + + dual_simplex::csr_matrix_t csr_A(m, n, nz); + csr_A.x = model.get_constraint_matrix_values_host(); + csr_A.j = model.get_constraint_matrix_indices_host(); + csr_A.row_start = model.get_constraint_matrix_offsets_host(); + if (m == 0) { + csr_A.row_start.resize(1); + csr_A.row_start[0] = 0; + } + csr_A.to_compressed_col(user_problem.A); + + user_problem.rhs.resize(m); + user_problem.row_sense.resize(m); + user_problem.range_rows.clear(); + user_problem.range_value.clear(); + + auto model_constraint_lower_bounds = model.get_constraint_lower_bounds_host(); + auto model_constraint_upper_bounds = model.get_constraint_upper_bounds_host(); + + for (i_t i = 0; i < m; ++i) { + const f_t constraint_lower_bound = model_constraint_lower_bounds[i]; + const f_t constraint_upper_bound = model_constraint_upper_bounds[i]; + if (constraint_lower_bound == constraint_upper_bound) { + user_problem.row_sense[i] = 'E'; + user_problem.rhs[i] = constraint_lower_bound; + } else if (constraint_upper_bound == std::numeric_limits::infinity()) { + user_problem.row_sense[i] = 'G'; + user_problem.rhs[i] = constraint_lower_bound; + } else if (constraint_lower_bound == -std::numeric_limits::infinity()) { + user_problem.row_sense[i] = 'L'; + user_problem.rhs[i] = constraint_upper_bound; + } else { + user_problem.row_sense[i] = 'E'; + user_problem.rhs[i] = constraint_lower_bound; + user_problem.range_rows.push_back(i); + const f_t bound_difference = constraint_upper_bound - constraint_lower_bound; + user_problem.range_value.push_back(bound_difference); + } + } + user_problem.num_range_rows = static_cast(user_problem.range_rows.size()); + + user_problem.lower = model.get_variable_lower_bounds_host(); + user_problem.upper = model.get_variable_upper_bounds_host(); + + user_problem.problem_name = model.get_problem_name(); + if (!model.get_row_names().empty()) { + user_problem.row_names.resize(m); + for (i_t ii = 0; ii < m; ++ii) { + user_problem.row_names[ii] = model.get_row_names()[static_cast(ii)]; + } + } + if (!model.get_variable_names().empty()) { + user_problem.col_names.resize(n); + for (i_t j = 0; j < n; ++j) { + if (static_cast(j) < model.get_variable_names().size()) { + user_problem.col_names[j] = model.get_variable_names()[static_cast(j)]; + } else { + user_problem.col_names[j] = "_CUOPT_x" + std::to_string(static_cast(j)); + } + } + } + + user_problem.obj_constant = model.get_objective_offset(); + user_problem.obj_scale = model.get_objective_scaling_factor(); + + user_problem.var_types.resize(n); + auto model_variable_types = model.get_variable_types_host(); + for (i_t j = 0; j < n; ++j) { + user_problem.var_types[j] = + model_variable_types[static_cast(j)] == var_t::CONTINUOUS + ? cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS + : cuopt::linear_programming::dual_simplex::variable_type_t::INTEGER; + } + + user_problem.Q_offsets = model.get_quadratic_objective_offsets(); + user_problem.Q_indices = model.get_quadratic_objective_indices(); + user_problem.Q_values = model.get_quadratic_objective_values(); + + return user_problem; +} + template void translate_to_crossover_problem(const detail::problem_t& problem, optimization_problem_solution_t& sol,