diff --git a/CMakeLists.txt b/CMakeLists.txt index 67e2337210..5b1d4ac419 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,6 +355,7 @@ set(BOUT_SOURCES ./src/solver/impls/snes/snes.hxx ./src/solver/impls/split-rk/split-rk.cxx ./src/solver/impls/split-rk/split-rk.hxx + ./src/solver/petsc_preconditioner.cxx ./src/solver/solver.cxx ./src/sys/adios_object.cxx ./src/sys/bout_types.cxx diff --git a/include/bout/petsc_preconditioner.hxx b/include/bout/petsc_preconditioner.hxx new file mode 100644 index 0000000000..be283413fc --- /dev/null +++ b/include/bout/petsc_preconditioner.hxx @@ -0,0 +1,94 @@ +/************************************************************************** + * Reusable PETSc coloring-based Jacobian preconditioner support + * + * This class factors out the duplicated code in PETSc-based solvers for: + * - building a Jacobian sparsity pattern (via stencil assumptions) + * - creating a MatFDColoring context for efficient finite-difference Jacobians + * + **************************************************************************/ + +#ifndef BOUT_PETSC_PRECONDITIONER_H +#define BOUT_PETSC_PRECONDITIONER_H + +#include "bout/build_defines.hxx" + +#if BOUT_HAS_PETSC + +#include "bout/petsc_interface.hxx" +#include "bout/petsclib.hxx" + +#include + +#include +#include + +class Options; +class Field3D; + +class PetscPreconditioner { +public: + PetscPreconditioner() = default; + ~PetscPreconditioner() { reset(); } + + PetscPreconditioner(const PetscPreconditioner&) = delete; + PetscPreconditioner& operator=(const PetscPreconditioner&) = delete; + PetscPreconditioner(PetscPreconditioner&&) = delete; + PetscPreconditioner& operator=(PetscPreconditioner&&) = delete; + + /// Create and assemble a finite-difference Jacobian matrix `Jfd` with a sparsity + /// pattern derived from the solver's variable layout and a user-specified stencil. + /// + /// `index` should be generated by the calling solver (e.g. `Solver::globalIndex(0)`). + /// Safe to call multiple times; any existing PETSc objects are destroyed first. + PetscErrorCode createJacobianPattern(Field3D& index, Options& options, PetscInt nlocal, + int n2d, int n3d, MPI_Comm comm); + + /// Recalculate the PETSc coloring and finite-difference coloring context + /// for the currently stored `Jfd` matrix. + template + PetscErrorCode updateColoring(ColoringFunction function, void* ctx) { + // Re-calculate the coloring + MatColoring coloring{nullptr}; + PetscCall(MatColoringCreate(Jfd, &coloring)); + PetscCall(MatColoringSetType(coloring, MATCOLORINGGREEDY)); + PetscCall(MatColoringSetFromOptions(coloring)); + + // Calculate new index sets + ISColoring iscoloring{nullptr}; + PetscCall(MatColoringApply(coloring, &iscoloring)); + PetscCall(MatColoringDestroy(&coloring)); + + // Replace the old coloring with the new one + PetscCall(MatFDColoringDestroy(&fdcoloring)); + PetscCall(MatFDColoringCreate(Jfd, iscoloring, &fdcoloring)); + PetscCall( + MatFDColoringSetFunction(fdcoloring, bout::cast_MatFDColoringFn(function), ctx)); + PetscCall(MatFDColoringSetFromOptions(fdcoloring)); + PetscCall(MatFDColoringSetUp(Jfd, iscoloring, fdcoloring)); + PetscCall(ISColoringDestroy(&iscoloring)); + + PetscFunctionReturn(PETSC_SUCCESS); + } + + Mat jacobian() const { return Jfd; } + MatFDColoring coloring() const { return fdcoloring; } + + void reset(); + +private: + Mat Jfd{nullptr}; + MatFDColoring fdcoloring{nullptr}; +}; + +#else + +// PETSc is not available, but keep the type defined so headers can be included +// unconditionally in PETSc-enabled compilation units. +class PetscPreconditioner { +public: + void reset() {} +}; + +#endif // BOUT_HAS_PETSC + +#endif // BOUT_PETSC_PRECONDITIONER_H diff --git a/manual/sphinx/user_docs/time_integration.rst b/manual/sphinx/user_docs/time_integration.rst index 37bf9a6abf..8dd16f9c7b 100644 --- a/manual/sphinx/user_docs/time_integration.rst +++ b/manual/sphinx/user_docs/time_integration.rst @@ -33,7 +33,7 @@ needed to make the solver available. .. _tab-solvers: .. table:: Available time integration solvers - + +---------------+-----------------------------------------+------------------------+ | Name | Description | Compile options | +===============+=========================================+========================+ @@ -68,7 +68,7 @@ given in table :numref:`tab-solveropts`. .. _tab-solveropts: .. table:: Time integration solver options - + +--------------------------+--------------------------------------------+-------------------------------------+ | Option | Description | Solvers used | +==========================+============================================+=====================================+ @@ -144,6 +144,31 @@ iterations becomes large, this may be an indication that the system is poorly conditioned, and a preconditioner might help improve performance. See :ref:`sec-preconditioning`. +CVODE preconditioning is controlled using ``solver:cvode_precon_method``: + +- ``none`` (default): Disable preconditioning. +- ``auto``: Prefer a user-supplied preconditioner if provided, then PETSc + coloring if PETSc is available, otherwise use BBD. +- ``user``: Require a user-supplied preconditioner. +- ``petsc``: Require PETSc and use PETSc coloring. +- ``bbd``: Force the built-in BBD preconditioner. + +For ``cvode_precon_method = petsc``, PETSc options for the internal KSP/PC can be +set with the prefix ``cvode_petscpre_`` (either on the command line, or by putting +prefixed keys into the ``[petsc]`` section). For example:: + + [petsc] + cvode_petscpre_ksp_type = preonly + cvode_petscpre_pc_type = hypre + +Two CVODE heuristics that control when the linear solver setup routine is called, +and when the Jacobian/preconditioner are recomputed, can be adjusted with: + +- ``cvode_lsetup_frequency`` (default ``0``): Passed to ``CVodeSetLSetupFrequency``. + ``0`` uses the SUNDIALS default. +- ``cvode_jac_eval_frequency`` (default ``0``): Passed to ``CVodeSetJacEvalFrequency``. + ``0`` uses the SUNDIALS default. + CVODE can set constraints to keep some quantities positive, non-negative, negative or non-positive. These constraints can be activated by setting the option ``solver:apply_positivity_constraints=true``, and then in the section diff --git a/src/solver/impls/cvode/cvode.cxx b/src/solver/impls/cvode/cvode.cxx index 4f72e65933..5cc5cae59a 100644 --- a/src/solver/impls/cvode/cvode.cxx +++ b/src/solver/impls/cvode/cvode.cxx @@ -3,7 +3,7 @@ * * ************************************************************************** - * Copyright 2010-2024 BOUT++ contributors + * Copyright 2010-2026 BOUT++ contributors * * Contact: Ben Dudson, dudson2@llnl.gov * @@ -25,6 +25,7 @@ **************************************************************************/ #include "bout/build_defines.hxx" +#include #include "cvode.hxx" @@ -42,6 +43,9 @@ #include "bout/msg_stack.hxx" #include "bout/options.hxx" #include "bout/output.hxx" +#include "bout/petsclib.hxx" +#include "bout/region.hxx" +#include "bout/solver.hxx" #include "bout/sundials_backports.hxx" #include "bout/unused.hxx" @@ -57,8 +61,10 @@ #include #include +#include #include #include +#include BOUT_ENUM_CLASS(positivity_constraint, none, positive, non_negative, negative, non_positive); @@ -116,6 +122,16 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Maximum number of nonlinear iterations allowed by CVODE before " "reducing timestep.") .withDefault(3)), + lsetup_frequency( + (*options)["cvode_lsetup_frequency"] + .doc("Linear solver setup frequency (CVodeSetLSetupFrequency). " + "0 uses the SUNDIALS default.") + .withDefault(0)), + jac_eval_frequency((*options)["cvode_jac_eval_frequency"] + .doc("Jacobian/preconditioner evaluation frequency " + "(CVodeSetJacEvalFrequency). " + "0 uses the SUNDIALS default.") + .withDefault(0)), apply_positivity_constraints( (*options)["apply_positivity_constraints"] .doc("Use CVODE function CVodeSetConstraints to constrain variables - the " @@ -125,11 +141,17 @@ CvodeSolver::CvodeSolver(Options* opts) "each variable") .withDefault(false)), maxl((*options)["maxl"].doc("Maximum number of linear iterations").withDefault(5)), - use_precon((*options)["use_precon"].doc("Use preconditioner?").withDefault(false)), rightprec((*options)["rightprec"] .doc("Use right preconditioner? Otherwise use left.") .withDefault(false)), use_jacobian((*options)["use_jacobian"].withDefault(false)), + precon_method( + (*options)["cvode_precon_method"] + .doc("Preconditioner to use with CVODE Newton iteration. " + "Choices: none (default), auto, user, petsc, bbd. " + "auto prefers user (if supplied), then petsc (if available), " + "then bbd.") + .withDefault(CvodePreconMethod::none)), cvode_nonlinear_convergence_coef( (*options)["cvode_nonlinear_convergence_coef"] .doc("Safety factor used in the nonlinear convergence test") @@ -143,6 +165,22 @@ CvodeSolver::CvodeSolver(Options* opts) has_constraints = false; // This solver doesn't have constraints canReset = true; + if ((*options)["use_precon"].isSet()) { + output_warn << "WARNING: solver:use_precon is deprecated for CVODE and is now " + "ignored. Use solver:cvode_precon_method=none to disable " + "preconditioning.\n"; + } + +#if BOUT_HAS_PETSC + // This is a temporary fix to initialise PetscLib early, working + // around a bug in Hermes-3 braginskii_conduction that creates a + // "petsc:type" subsection. + if (precon_method == CvodePreconMethod::petsc + || ((precon_method == CvodePreconMethod::Auto) && !this->hasPreconditioner())) { + petsc_lib = std::make_unique(); + } +#endif + // Add diagnostics to output // Needs to be in constructor not init() because init() is called after // Solver::outputVars() @@ -169,6 +207,23 @@ CvodeSolver::~CvodeSolver() { CVodeFree(&cvode_mem); SUNLinSolFree(sun_solver); SUNNonlinSolFree(nonlinear_solver); +#if BOUT_HAS_PETSC + if (petsc_ksp != nullptr) { + KSPDestroy(&petsc_ksp); + } + if (petsc_r != nullptr) { + VecDestroy(&petsc_r); + } + if (petsc_z != nullptr) { + VecDestroy(&petsc_z); + } + if (petsc_x != nullptr) { + VecDestroy(&petsc_x); + } + if (petsc_f != nullptr) { + VecDestroy(&petsc_f); + } +#endif } } @@ -186,9 +241,10 @@ int CvodeSolver::init() { const int local_N = getLocalN(); // Get total problem size - int neq; + int neq{0}; if (bout::globals::mpi->MPI_Allreduce(&local_N, &neq, 1, MPI_INT, MPI_SUM, - BoutComm::get())) { + BoutComm::get()) + != 0) { throw BoutException("Allreduce localN -> GlobalN failed!\n"); } @@ -312,6 +368,12 @@ int CvodeSolver::init() { throw BoutException("CVodeSetMaxNonlinIters failed\n"); } +#if SUNDIALS_VERSION_MAJOR >= 6 + if (CVodeSetLSetupFrequency(cvode_mem, lsetup_frequency) != CV_SUCCESS) { + throw BoutException("CVodeSetLSetupFrequency failed\n"); + } +#endif + if (apply_positivity_constraints) { auto f2d_constraints = create_constraints(f2d); auto f3d_constraints = create_constraints(f3d); @@ -346,8 +408,27 @@ int CvodeSolver::init() { } else { output_info.write("\tUsing Newton iteration\n"); - const auto prectype = - use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE; + CvodePreconMethod selected_precon = precon_method; + if (selected_precon == CvodePreconMethod::Auto) { + if (hasPreconditioner()) { + selected_precon = CvodePreconMethod::user; + } else { +#if BOUT_HAS_PETSC + selected_precon = CvodePreconMethod::petsc; +#else + selected_precon = CvodePreconMethod::bbd; +#endif + } + } + + auto prectype = SUN_PREC_NONE; + if (selected_precon != CvodePreconMethod::none) { + if (rightprec) { + prectype = SUN_PREC_RIGHT; + } else { + prectype = SUN_PREC_LEFT; + } + } switch ((*options)["linear_solver"] .doc("Set linear solver type. Default is gmres.") @@ -373,42 +454,87 @@ int CvodeSolver::init() { throw BoutException("CVodeSetLinearSolver failed\n"); } - if (use_precon) { - if (hasPreconditioner()) { - output_info.write("\tUsing user-supplied preconditioner\n"); +#if SUNDIALS_VERSION_MAJOR >= 6 + if (CVodeSetJacEvalFrequency(cvode_mem, jac_eval_frequency) != CVLS_SUCCESS) { + throw BoutException("CVodeSetJacEvalFrequency failed\n"); + } +#endif - if (CVodeSetPreconditioner(cvode_mem, nullptr, cvode_pre) != CVLS_SUCCESS) { - throw BoutException("CVodeSetPreconditioner failed\n"); - } - } else { - output_info.write("\tUsing BBD preconditioner\n"); - - /// Get options - // Compute band_width_default from actually added fields, to allow for multiple - // Mesh objects - // - // Previous implementation was equivalent to: - // int MXSUB = mesh->xend - mesh->xstart + 1; - // int band_width_default = n3Dvars()*(MXSUB+2); - const int band_width_default = std::accumulate( - begin(f3d), end(f3d), 0, [](int a, const VarStr& fvar) { - Mesh* localmesh = fvar.var->getMesh(); - return a + localmesh->xend - localmesh->xstart + 3; - }); - - const auto mudq = (*options)["mudq"].withDefault(band_width_default); - const auto mldq = (*options)["mldq"].withDefault(band_width_default); - const auto mukeep = (*options)["mukeep"].withDefault(n3Dvars() + n2Dvars()); - const auto mlkeep = (*options)["mlkeep"].withDefault(n3Dvars() + n2Dvars()); - - if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, - cvode_bbd_rhs, nullptr) - != CVLS_SUCCESS) { - throw BoutException("CVBBDPrecInit failed\n"); - } - } - } else { + if (selected_precon == CvodePreconMethod::none) { output_info.write("\tNo preconditioning\n"); + + } else if (selected_precon == CvodePreconMethod::user) { + if (!hasPreconditioner()) { + throw BoutException( + "solver:cvode_precon_method=user requested, but no user preconditioner " + "has been supplied."); + } + + output_info.write("\tUsing user-supplied preconditioner\n"); + + if (CVodeSetPreconditioner(cvode_mem, nullptr, cvode_pre) != CVLS_SUCCESS) { + throw BoutException("CVodeSetPreconditioner failed\n"); + } + } else if (selected_precon == CvodePreconMethod::petsc) { +#if !BOUT_HAS_PETSC + throw BoutException( + "solver:cvode_precon_method=petsc requested, but BOUT++ was not " + "configured with PETSc."); +#else + output_info.write("\tUsing PETSc coloring preconditioner\n"); + + if (!petsc_lib) { + petsc_lib = std::make_unique(); + } + + petsc_global_N = neq; + petsc_rhs_tmp.resize(local_N); + + if (petsc_ksp == nullptr) { + PetscCall(KSPCreate(BoutComm::get(), &petsc_ksp)); + PetscCall(KSPSetType(petsc_ksp, KSPPREONLY)); + PetscCall(KSPSetOptionsPrefix(petsc_ksp, "cvode_petscpre_")); + PetscCall(KSPSetFromOptions(petsc_ksp)); + } + + if (petsc_x == nullptr) { + PetscCall(VecCreateMPI(BoutComm::get(), local_N, petsc_global_N, &petsc_x)); + PetscCall(VecDuplicate(petsc_x, &petsc_f)); + PetscCall(VecCreateMPI(BoutComm::get(), local_N, petsc_global_N, &petsc_r)); + PetscCall(VecCreateMPI(BoutComm::get(), local_N, petsc_global_N, &petsc_z)); + } + + Field3D index = globalIndex(0); + PetscCall(petsc_preconditioner.createJacobianPattern( + index, *options, local_N, n2Dvars(), n3Dvars(), BoutComm::get())); + PetscCall( + petsc_preconditioner.updateColoring(CvodeSolver::petscFormFunction, this)); + PetscCall(MatFDColoringSetF(petsc_preconditioner.coloring(), petsc_f)); + + if (CVodeSetPreconditioner(cvode_mem, petscPSetup, petscPSolve) != CVLS_SUCCESS) { + throw BoutException("CVodeSetPreconditioner (PETSc coloring) failed\n"); + } +#endif // BOUT_HAS_PETSC + + } else if (selected_precon == CvodePreconMethod::bbd) { + output_info.write("\tUsing BBD preconditioner\n"); + + const int band_width_default = std::accumulate( + begin(f3d), end(f3d), 0, [](int a, const VarStr& fvar) { + const Mesh* localmesh = fvar.var->getMesh(); + return a + localmesh->xend - localmesh->xstart + 3; + }); + + const auto mudq = (*options)["mudq"].withDefault(band_width_default); + const auto mldq = (*options)["mldq"].withDefault(band_width_default); + const auto mukeep = (*options)["mukeep"].withDefault(n3Dvars() + n2Dvars()); + const auto mlkeep = (*options)["mlkeep"].withDefault(n3Dvars() + n2Dvars()); + + if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, + cvode_bbd_rhs, nullptr) + != CVLS_SUCCESS) { + throw BoutException("CVBBDPrecInit failed\n"); + } } /// Set Jacobian-vector multiplication function @@ -554,7 +680,7 @@ int CvodeSolver::run() { /// Call the monitor function - if (call_monitors(simtime, i, getNumberOutputSteps())) { + if (call_monitors(simtime, i, getNumberOutputSteps()) != 0) { // User signalled to quit break; } @@ -581,7 +707,7 @@ BoutReal CvodeSolver::run(BoutReal tout) { CVodeGetCurrentTime(cvode_mem, &internal_time); while (internal_time < tout) { // Run another step - BoutReal last_time = internal_time; + const BoutReal last_time = internal_time; flag = CVode(cvode_mem, tout, uvec, &internal_time, CV_ONE_STEP); if (flag < 0) { @@ -640,7 +766,7 @@ void CvodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* udat BoutReal* rvec, BoutReal* zvec) { TRACE("Running preconditioner: CvodeSolver::pre({})", t); - BoutReal tstart = bout::globals::mpi->MPI_Wtime(); + const BoutReal tstart = bout::globals::mpi->MPI_Wtime(); const auto length = N_VGetLocalLength_Parallel(uvec); @@ -766,6 +892,123 @@ int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(f } // namespace // NOLINTEND(readability-identifier-length) +#if BOUT_HAS_PETSC +PetscErrorCode CvodeSolver::petscFormFunction(void* UNUSED(dummy), Vec x, Vec f, + void* ctx) { + auto* s = static_cast(ctx); + + PetscInt length = 0; + PetscCall(VecGetLocalSize(x, &length)); + s->petsc_rhs_tmp.resize(static_cast(length)); + + const BoutReal* xdata = nullptr; + PetscCall(VecGetArrayRead(x, &xdata)); + + BoutReal* fdata = nullptr; + PetscCall(VecGetArray(f, &fdata)); + + try { + s->rhs(s->petsc_t, const_cast(xdata), s->petsc_rhs_tmp.data(), true); + } catch (BoutRhsFail&) { + PetscCall(VecRestoreArrayRead(x, &xdata)); + PetscCall(VecRestoreArray(f, &fdata)); + return 1; + } + + for (PetscInt i = 0; i < length; ++i) { + fdata[i] = xdata[i] - (s->petsc_gamma * s->petsc_rhs_tmp[i]); + } + + PetscCall(VecRestoreArrayRead(x, &xdata)); + PetscCall(VecRestoreArray(f, &fdata)); + + return PETSC_SUCCESS; +} + +int CvodeSolver::petscPSetup(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), + CvodeBool UNUSED(jok), CvodeBool* jcurPtr, BoutReal gamma, + void* user_data) { + auto* s = static_cast(user_data); + + s->petsc_t = t; + s->petsc_gamma = gamma; + + auto* ydata = N_VGetArrayPointer(yy); + + PetscErrorCode ierr = VecPlaceArray(s->petsc_x, ydata); + if (ierr != 0) { + return 1; + } + + ierr = petscFormFunction(nullptr, s->petsc_x, s->petsc_f, s); + if (ierr != 0) { + VecResetArray(s->petsc_x); + return 1; + } + + Mat J = s->petsc_preconditioner.jacobian(); + ierr = MatZeroEntries(J); + if (ierr != 0) { + VecResetArray(s->petsc_x); + return 1; + } + + ierr = MatFDColoringApply(J, s->petsc_preconditioner.coloring(), s->petsc_x, nullptr); + if (ierr != 0) { + VecResetArray(s->petsc_x); + return 1; + } + + ierr = MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY); + if (ierr == 0) { + ierr = MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY); + } + if (ierr != 0) { + VecResetArray(s->petsc_x); + return 1; + } + + ierr = KSPSetOperators(s->petsc_ksp, J, J); + if (ierr == 0) { + ierr = KSPSetUp(s->petsc_ksp); + } + + VecResetArray(s->petsc_x); + + if (ierr != 0) { + return 1; + } + + if (jcurPtr != nullptr) { + *jcurPtr = 1; + } + + return 0; +} + +int CvodeSolver::petscPSolve(BoutReal UNUSED(t), N_Vector UNUSED(yy), N_Vector UNUSED(yp), + N_Vector rvec, N_Vector zvec, BoutReal UNUSED(gamma), + BoutReal UNUSED(delta), int UNUSED(lr), void* user_data) { + auto* s = static_cast(user_data); + + auto* rdata = N_VGetArrayPointer(rvec); + auto* zdata = N_VGetArrayPointer(zvec); + + PetscErrorCode ierr = VecPlaceArray(s->petsc_r, rdata); + if (ierr == 0) { + ierr = VecPlaceArray(s->petsc_z, zdata); + } + if (ierr == 0) { + ierr = KSPSolve(s->petsc_ksp, s->petsc_r, s->petsc_z); + } + + VecResetArray(s->petsc_r); + VecResetArray(s->petsc_z); + + return ierr == 0 ? 0 : 1; +} +#endif + /************************************************************************** * CVODE vector option functions **************************************************************************/ @@ -818,5 +1061,4 @@ void CvodeSolver::resetInternalFields() { throw BoutException("CVodeReInit failed\n"); } } - #endif diff --git a/src/solver/impls/cvode/cvode.hxx b/src/solver/impls/cvode/cvode.hxx index f0cc2eedf8..d1409ad9a3 100644 --- a/src/solver/impls/cvode/cvode.hxx +++ b/src/solver/impls/cvode/cvode.hxx @@ -4,9 +4,9 @@ * NOTE: Only one solver can currently be compiled in * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -28,8 +28,10 @@ #ifndef BOUT_SUNDIAL_SOLVER_H #define BOUT_SUNDIAL_SOLVER_H +#include "bout/bout_enum_class.hxx" #include "bout/build_defines.hxx" #include "bout/solver.hxx" +#include #if not BOUT_HAS_CVODE @@ -44,6 +46,14 @@ RegisterUnavailableSolver #include "bout/region.hxx" #include "bout/sundials_backports.hxx" +#if BOUT_HAS_PETSC +#include "bout/petsc_preconditioner.hxx" +#include "bout/petsclib.hxx" + +#include +#include +#endif + #include #include @@ -54,6 +64,16 @@ namespace { RegisterSolver registersolvercvode("cvode"); } +// Preconditioner selection for CVODE. +// Note: String comparisons are case-insensitive so "Auto" avoids conflict with keyword +BOUT_ENUM_CLASS(CvodePreconMethod, none, Auto, user, petsc, bbd); + +#if SUNDIALS_VERSION_AT_LEAST(6, 0, 0) +using CvodeBool = sunbooleantype; +#else +using CvodeBool = booleantype; +#endif + class CvodeSolver : public Solver { public: explicit CvodeSolver(Options* opts = nullptr); @@ -74,6 +94,15 @@ public: void jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* Jvdata); private: +#if BOUT_HAS_PETSC + static PetscErrorCode petscFormFunction(void* dummy, Vec x, Vec f, void* ctx); + static int petscPSetup(BoutReal t, N_Vector yy, N_Vector yp, CvodeBool jok, + CvodeBool* jcurPtr, BoutReal gamma, void* user_data); + static int petscPSolve(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, + N_Vector zvec, BoutReal gamma, BoutReal delta, int lr, + void* user_data); +#endif + BoutReal hcur; //< Current internal timestep bool diagnose{false}; //< Output additional diagnostics @@ -111,17 +140,20 @@ private: /// reducing timestep. CVODE default (used if this option is /// negative) is 3 int max_nonlinear_iterations; + /// Max number of steps between calls to linear solver setup + long int lsetup_frequency; + /// Max number of steps between Jacobian/preconditioner evaluations + long int jac_eval_frequency; /// Use CVODE function CVodeSetConstraints to constrain variables - /// the constraint to be applied is set by the positivity_constraint /// option in the subsection for each variable bool apply_positivity_constraints; /// Maximum number of linear iterations int maxl; - /// Use preconditioner? - bool use_precon; /// Use right preconditioner? Otherwise use left. bool rightprec; bool use_jacobian; + CvodePreconMethod precon_method; BoutReal cvode_nonlinear_convergence_coef; BoutReal cvode_linear_convergence_coef; @@ -153,6 +185,21 @@ private: SUNNonlinearSolver nonlinear_solver{nullptr}; /// Context for SUNDIALS memory allocations sundials::Context suncontext; + +#if BOUT_HAS_PETSC + // PETSc-coloring-based preconditioning for CVODE + std::unique_ptr petsc_lib; + PetscPreconditioner petsc_preconditioner; + KSP petsc_ksp{nullptr}; + Vec petsc_r{nullptr}; + Vec petsc_z{nullptr}; + Vec petsc_x{nullptr}; + Vec petsc_f{nullptr}; + PetscInt petsc_global_N{0}; + BoutReal petsc_gamma{0.0}; + BoutReal petsc_t{0.0}; + std::vector petsc_rhs_tmp; +#endif }; #endif // BOUT_HAS_CVODE diff --git a/src/solver/impls/petsc/petsc.cxx b/src/solver/impls/petsc/petsc.cxx index 1e1e05596b..146e8e5361 100644 --- a/src/solver/impls/petsc/petsc.cxx +++ b/src/solver/impls/petsc/petsc.cxx @@ -24,16 +24,13 @@ **************************************************************************/ #include "bout/build_defines.hxx" +#include "bout/field3d.hxx" +#include "bout/petsclib.hxx" #if BOUT_HAS_PETSC #include "petsc.hxx" -#include -#include -#include -#include - #include #include #include @@ -62,45 +59,6 @@ #define PETSC_UNLIMITED (-3) #endif -class ColoringStencil { -private: - bool static isInSquare(int const i, int const j, int const n_square) { - return std::abs(i) <= n_square && std::abs(j) <= n_square; - } - bool static isInCross(int const i, int const j, int const n_cross) { - if (i == 0) { - return std::abs(j) <= n_cross; - } - if (j == 0) { - return std::abs(i) <= n_cross; - } - return false; - } - bool static isInTaxi(int const i, int const j, int const n_taxi) { - return std::abs(i) + std::abs(j) <= n_taxi; - } - -public: - auto static getOffsets(int n_square, int n_taxi, int n_cross) { - ASSERT2(n_square >= 0 && n_cross >= 0 && n_taxi >= 0 - && n_square + n_cross + n_taxi > 0); - auto inside = [&](int i, int j) { - return isInSquare(i, j, n_square) || isInTaxi(i, j, n_taxi) - || isInCross(i, j, n_cross); - }; - std::vector> xy_offsets; - auto loop_bound = std::max({n_square, n_taxi, n_cross}); - for (int i = -loop_bound; i <= loop_bound; ++i) { - for (int j = -loop_bound; j <= loop_bound; ++j) { - if (inside(i, j)) { - xy_offsets.emplace_back(i, j); - } - } - } - return xy_offsets; - } -}; - namespace { // PETSc callback function for matrix-free preconditioner PetscErrorCode snesPCapply(PC pc, Vec x, Vec y) { @@ -315,7 +273,6 @@ PetscSolver::~PetscSolver() { VecDestroy(&u); MatDestroy(&Jfd); MatDestroy(&Jmf); - MatFDColoringDestroy(&fdcoloring); TSDestroy(&ts); } @@ -559,299 +516,11 @@ int PetscSolver::init() { // preconditioner. if (use_coloring) { - // Use matrix coloring. This greatly reduces the number of - // times the rhs() function needs to be evaluated when - // calculating the Jacobian, by identifying which quantities may - // be simultaneously perturbed. - - // Use global mesh for now - Mesh* mesh = bout::globals::mesh; - - ////////////////////////////////////////////////// - // Get the local indices by starting at 0 Field3D index = globalIndex(0); - - ////////////////////////////////////////////////// - // Pre-allocate PETSc storage - - output_progress.write("Setting Jacobian matrix sizes\n"); - - const int n2d = f2d.size(); - const int n3d = f3d.size(); - - // Set size of Matrix on each processor to nlocal x nlocal - MatCreate(BoutComm::get(), &Jfd); - MatSetOption(Jfd, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE); - MatSetSizes(Jfd, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE); - MatSetFromOptions(Jfd); - // Determine which row/columns of the matrix are locally owned - int Istart, Iend; - MatGetOwnershipRange(Jfd, &Istart, &Iend); - // Convert local into global indices - // Note: Not in the boundary cells, to keep -1 values - for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { - index[i] += Istart; - } - // Now communicate to fill guard cells - mesh->communicate(index); - - // Non-zero elements on this processor - std::vector d_nnz; - std::vector o_nnz; - auto n_square = (*options)["stencil:square"] - .doc("Extent of stencil (square)") - .withDefault(0); - auto n_taxi = (*options)["stencil:taxi"] - .doc("Extent of stencil (taxi-cab norm)") - .withDefault(0); - auto n_cross = (*options)["stencil:cross"] - .doc("Extent of stencil (cross)") - .withDefault(0); - // Set n_taxi 2 if nothing else is set - // Probably a better way to do this - if (n_square == 0 && n_taxi == 0 && n_cross == 0) { - output_info.write("Setting solver:stencil:taxi = 2\n"); - n_taxi = 2; - } - - auto const xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); - { - // This is ugly but can't think of a better and robust way to - // count the non-zeros for some arbitrary stencil - // effectively the same loop as the one that sets the non-zeros below - std::vector> d_nnz_map2d(nlocal); - std::vector> o_nnz_map2d(nlocal); - std::vector> d_nnz_map3d(nlocal); - std::vector> o_nnz_map3d(nlocal); - // Loop over every element in 2D to count the *unique* non-zeros - for (int x = mesh->xstart; x <= mesh->xend; x++) { - for (int y = mesh->ystart; y <= mesh->yend; y++) { - - const int ind0 = ROUND(index(x, y, 0)) - Istart; - - // 2D fields - for (int i = 0; i < n2d; i++) { - const PetscInt row = ind0 + i; - // Loop through each point in the stencil - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - const int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // A boundary point - } - - // Depends on all variables on this cell - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind2 + j; - if (col >= Istart && col < Iend) { - d_nnz_map2d[row].insert(col); - } else { - o_nnz_map2d[row].insert(col); - } - } - } - } - // 3D fields - for (int z = mesh->zstart; z <= mesh->zend; z++) { - const int ind = ROUND(index(x, y, z)) - Istart; - - for (int i = 0; i < n3d; i++) { - PetscInt row = ind + i; - if (z == 0) { - row += n2d; - } - - // Depends on 2D fields - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind0 + j; - if (col >= Istart && col < Iend) { - d_nnz_map2d[row].insert(col); - } else { - o_nnz_map2d[row].insert(col); - } - } - - // Star pattern - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - if (col >= Istart && col < Iend) { - d_nnz_map3d[row].insert(col); - } else { - o_nnz_map3d[row].insert(col); - } - } - } - } - } - } - } - - d_nnz.reserve(nlocal); - d_nnz.reserve(nlocal); - - for (int i = 0; i < nlocal; ++i) { - // Assume all elements in the z direction are potentially coupled - d_nnz.emplace_back((d_nnz_map3d[i].size() * mesh->LocalNz) - + d_nnz_map2d[i].size()); - o_nnz.emplace_back((o_nnz_map3d[i].size() * mesh->LocalNz) - + o_nnz_map2d[i].size()); - } - } - - output_progress.write("Pre-allocating Jacobian\n"); - // Pre-allocate - MatMPIAIJSetPreallocation(Jfd, 0, d_nnz.data(), 0, o_nnz.data()); - MatSeqAIJSetPreallocation(Jfd, 0, d_nnz.data()); - MatSetUp(Jfd); - MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE); - - ////////////////////////////////////////////////// - // Mark non-zero entries - - output_progress.write("Marking non-zero Jacobian entries\n"); - PetscScalar const val = 1.0; - for (int x = mesh->xstart; x <= mesh->xend; x++) { - for (int y = mesh->ystart; y <= mesh->yend; y++) { - - const int ind0 = ROUND(index(x, y, 0)); - - // 2D fields - for (int i = 0; i < n2d; i++) { - const PetscInt row = ind0 + i; - - // Loop through each point in the stencil - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - int const ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // A boundary point - } - - // Depends on all variables on this cell - for (int j = 0; j < n2d; j++) { - PetscInt const col = ind2 + j; - PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); - } - } - } - // 3D fields - for (int z = mesh->zstart; z <= mesh->zend; z++) { - int const ind = ROUND(index(x, y, z)); - - for (int i = 0; i < n3d; i++) { - PetscInt row = ind + i; - if (z == 0) { - row += n2d; - } - - // Depends on 2D fields - for (int j = 0; j < n2d; j++) { - PetscInt const col = ind0 + j; - PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); - } - - // Star pattern - for (const auto& [x_off, y_off] : xy_offsets) { - int xi = x + x_off; - int yi = y + y_off; - - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - for (int zi = mesh->zstart; zi <= mesh->zend; ++zi) { - int ind2 = ROUND(index(xi, yi, zi)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - PetscInt const col = ind2 + j; - int ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - - if (ierr != 0) { - output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", - row, col, x, y, xi, yi, ind2, ind2 + n3d - 1); - } - CHKERRQ(ierr); - } - } - } - } - } - } - } - - // Finished marking non-zero entries - - output_progress.write("Assembling Jacobian matrix\n"); - - // Assemble Matrix - MatAssemblyBegin(Jfd, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(Jfd, MAT_FINAL_ASSEMBLY); - - { - // Test if the matrix is symmetric - // Values are 0 or 1 so tolerance (1e-5) shouldn't matter - PetscBool symmetric; - PetscCall(MatIsSymmetric(Jfd, 1e-5, &symmetric)); - if (!symmetric) { - output_warn.write("Jacobian pattern is not symmetric\n"); - } - } - - // The above can miss entries around the X-point branch cut: - // The diagonal terms are complicated because moving in X then Y - // is different from moving in Y then X at the X-point. - // Making sure the colouring matrix is symmetric does not - // necessarily give the correct stencil but may help. - if ((*options)["force_symmetric_coloring"] - .doc("Modifies coloring matrix to force it to be symmetric") - .withDefault(false)) { - Mat Jfd_T; - MatCreateTranspose(Jfd, &Jfd_T); - MatAXPY(Jfd, 1, Jfd_T, DIFFERENT_NONZERO_PATTERN); - } - + PetscCall(petsc_preconditioner.createJacobianPattern( + index, *options, nlocal, n2Dvars(), n3Dvars(), BoutComm::get())); output_progress.write("Creating Jacobian coloring\n"); updateColoring(); - } else { // Brute force calculation // There is usually no reason to use this, except as a check of @@ -996,55 +665,39 @@ PetscErrorCode PetscSolver::pre(Vec x, Vec y) { } void PetscSolver::updateColoring() { - // Re-calculate the coloring - MatColoring coloring{nullptr}; - MatColoringCreate(Jfd, &coloring); - MatColoringSetType(coloring, MATCOLORINGGREEDY); - MatColoringSetFromOptions(coloring); - - // Calculate new index sets - ISColoring iscoloring{nullptr}; - MatColoringApply(coloring, &iscoloring); - MatColoringDestroy(&coloring); - - // Replace the old coloring with the new one - MatFDColoringDestroy(&fdcoloring); - MatFDColoringCreate(Jfd, iscoloring, &fdcoloring); + Mat jac = petsc_preconditioner.jacobian(); + if (ts_type != TSSUNDIALS) { // Use the SNES function that is defined by the TS method // SNESTSFormFunction is defined in PETSc ts.c // The ctx pointer should be the TS object - // - // Note: The function type casting here is horrible but necessary - MatFDColoringSetFunction(fdcoloring, bout::cast_MatFDColoringFn(SNESTSFormFunction), - ts); + BOUT_DO_PETSC(petsc_preconditioner.updateColoring(SNESTSFormFunction, ts)); } else { // SNESTSFormFunction is not available for SUNDIALS. // This solver_form_function needs to know the shift // (SUNDIALS' gamma) that we capture in solver_ijacobian_color. - MatFDColoringSetFunction(fdcoloring, bout::cast_MatFDColoringFn(solver_form_function), - this); + BOUT_DO_PETSC(petsc_preconditioner.updateColoring(solver_form_function, this)); } - MatFDColoringSetFromOptions(fdcoloring); - MatFDColoringSetUp(Jfd, iscoloring, fdcoloring); - ISColoringDestroy(&iscoloring); - // Replace the CTX pointer in SNES Jacobian + // Replace the CTX pointer in SNES Jacobian / TS Jacobian callback if (ts_type == TSSUNDIALS) { #if PETSC_HAVE_SUNDIALS2 // The SUNDIALS interface calls TSGetIJacobian // https://www.mcs.anl.gov/petsc/petsc-3.14/src/ts/impls/implicit/sundials/sundials.c // This sets the "TSMatFDColoring" property on the Jacobian, that is used in TSComputeIJacobianDefaultColor - PetscObjectCompose((PetscObject)Jfd, "TSMatFDColoring", (PetscObject)fdcoloring); + PetscObjectCompose((PetscObject)jac, "TSMatFDColoring", + (PetscObject)petsc_preconditioner.coloring()); // Call a wrapper function that stores the shift in the PetscSolver - TSSetIJacobian(ts, Jfd, Jfd, solver_ijacobian_color, this); + TSSetIJacobian(ts, jac, jac, solver_ijacobian_color, this); #endif } else { if (matrix_free_operator) { // Use matrix-free calculation for operator, finite difference for preconditioner - SNESSetJacobian(snes, Jmf, Jfd, SNESComputeJacobianDefaultColor, fdcoloring); + SNESSetJacobian(snes, Jmf, jac, SNESComputeJacobianDefaultColor, + petsc_preconditioner.coloring()); } else { - SNESSetJacobian(snes, Jfd, Jfd, SNESComputeJacobianDefaultColor, fdcoloring); + SNESSetJacobian(snes, jac, jac, SNESComputeJacobianDefaultColor, + petsc_preconditioner.coloring()); } } } diff --git a/src/solver/impls/petsc/petsc.hxx b/src/solver/impls/petsc/petsc.hxx index 78f7d1a1e4..ab8e5bf9e0 100644 --- a/src/solver/impls/petsc/petsc.hxx +++ b/src/solver/impls/petsc/petsc.hxx @@ -42,6 +42,7 @@ class PetscSolver; #include #include +#include #include #include #include @@ -93,13 +94,14 @@ private: PetscLib lib; ///< Handles initialising, finalising PETSc - Vec u{nullptr}; ///< PETSc solution vector - TS ts{nullptr}; ///< PETSc timestepper object - SNES snes{nullptr}; ///< PETSc nonlinear solver object - KSP ksp{nullptr}; ///< PETSc linear solver - Mat Jmf{nullptr}; ///< Matrix Free Jacobian - Mat Jfd{nullptr}; ///< Finite Difference Jacobian - MatFDColoring fdcoloring{nullptr}; ///< Matrix coloring context + Vec u{nullptr}; ///< PETSc solution vector + TS ts{nullptr}; ///< PETSc timestepper object + SNES snes{nullptr}; ///< PETSc nonlinear solver object + KSP ksp{nullptr}; ///< PETSc linear solver + Mat Jmf{nullptr}; ///< Matrix Free Jacobian + Mat Jfd{nullptr}; ///< Finite Difference Jacobian (brute-force, when not using coloring) + PetscPreconditioner + petsc_preconditioner; ///< Coloring-based FD Jacobian + MatFDColoring BoutReal next_output; ///< When the monitor should be called next diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 5b28ff413c..9a53aabfcb 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -15,14 +15,12 @@ #include #include #include -#include #include +#include #include #include #include -#include -#include #include #include "petscerror.h" @@ -33,45 +31,6 @@ #include "petscsystypes.h" #include "petscvec.h" -class ColoringStencil { -private: - bool static isInSquare(int const i, int const j, int const n_square) { - return std::abs(i) <= n_square && std::abs(j) <= n_square; - } - bool static isInCross(int const i, int const j, int const n_cross) { - if (i == 0) { - return std::abs(j) <= n_cross; - } - if (j == 0) { - return std::abs(i) <= n_cross; - } - return false; - } - bool static isInTaxi(int const i, int const j, int const n_taxi) { - return std::abs(i) + std::abs(j) <= n_taxi; - } - -public: - auto static getOffsets(int n_square, int n_taxi, int n_cross) { - ASSERT2(n_square >= 0 && n_cross >= 0 && n_taxi >= 0 - && n_square + n_cross + n_taxi > 0); - auto inside = [&](int i, int j) { - return isInSquare(i, j, n_square) || isInTaxi(i, j, n_taxi) - || isInCross(i, j, n_cross); - }; - std::vector> xy_offsets; - auto loop_bound = std::max({n_square, n_taxi, n_cross}); - for (int i = -loop_bound; i <= loop_bound; ++i) { - for (int j = -loop_bound; j <= loop_bound; ++j) { - if (inside(i, j)) { - xy_offsets.emplace_back(i, j); - } - } - } - return xy_offsets; - } -}; - namespace { /* * PETSc callback function, which evaluates the nonlinear @@ -109,300 +68,34 @@ PetscErrorCode snesPCapply(PC pc, Vec x, Vec y) { PetscFunctionReturn(s->precon(x, y)); } + +PetscErrorCode ComputeJacobianScaledColor(SNES snes, Vec x1, Mat Jac, Mat Jac_new, + void* ctx); } // namespace PetscErrorCode SNESSolver::FDJinitialise() { if (use_coloring) { - // Use matrix coloring. - // This greatly reduces the number of times the rhs() function - // needs to be evaluated when calculating the Jacobian. - - // Use global mesh for now - Mesh* mesh = bout::globals::mesh; - - ////////////////////////////////////////////////// - // Get the local indices by starting at 0 Field3D index = globalIndex(0); + PetscCall(petsc_preconditioner.createJacobianPattern( + index, *options, nlocal, n2Dvars(), n3Dvars(), BoutComm::get())); + output_progress.write("Creating Jacobian coloring\n"); + PetscCall(petsc_preconditioner.updateColoring(FormFunctionForColoring, this)); - ////////////////////////////////////////////////// - // Pre-allocate PETSc storage - - output_progress.write("Setting Jacobian matrix sizes\n"); - - const int n2d = f2d.size(); - const int n3d = f3d.size(); - - // Set size of Matrix on each processor to nlocal x nlocal - MatCreate(BoutComm::get(), &Jfd); - MatSetOption(Jfd, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE); - MatSetSizes(Jfd, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE); - MatSetFromOptions(Jfd); - // Determine which row/columns of the matrix are locally owned - int Istart, Iend; - MatGetOwnershipRange(Jfd, &Istart, &Iend); - // Convert local into global indices - // Note: Not in the boundary cells, to keep -1 values - for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { - index[i] += Istart; - } - // Now communicate to fill guard cells - mesh->communicate(index); - - // Non-zero elements on this processor - std::vector d_nnz; - std::vector o_nnz; - auto n_square = (*options)["stencil:square"] - .doc("Extent of stencil (square)") - .withDefault(0); - auto n_cross = - (*options)["stencil:cross"].doc("Extent of stencil (cross)").withDefault(0); - // Set n_taxi 2 if nothing else is set - auto n_taxi = (*options)["stencil:taxi"] - .doc("Extent of stencil (taxi-cab norm)") - .withDefault((n_square == 0 && n_cross == 0) ? 2 : 0); - - auto const xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); - { - // This is ugly but can't think of a better and robust way to - // count the non-zeros for some arbitrary stencil - // effectively the same loop as the one that sets the non-zeros below - std::vector> d_nnz_map2d(nlocal); - std::vector> o_nnz_map2d(nlocal); - std::vector> d_nnz_map3d(nlocal); - std::vector> o_nnz_map3d(nlocal); - // Loop over every element in 2D to count the *unique* non-zeros - for (int x = mesh->xstart; x <= mesh->xend; x++) { - for (int y = mesh->ystart; y <= mesh->yend; y++) { - - const int ind0 = ROUND(index(x, y, 0)) - Istart; - - // 2D fields - for (int i = 0; i < n2d; i++) { - const PetscInt row = ind0 + i; - // Loop through each point in the stencil - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - const int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // A boundary point - } - - // Depends on all variables on this cell - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind2 + j; - if (col >= Istart && col < Iend) { - d_nnz_map2d[row].insert(col); - } else { - o_nnz_map2d[row].insert(col); - } - } - } - } - // 3D fields - for (int z = mesh->zstart; z <= mesh->zend; z++) { - const int ind = ROUND(index(x, y, z)) - Istart; - - for (int i = 0; i < n3d; i++) { - PetscInt row = ind + i; - if (z == 0) { - row += n2d; - } - - // Depends on 2D fields - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind0 + j; - if (col >= Istart && col < Iend) { - d_nnz_map2d[row].insert(col); - } else { - o_nnz_map2d[row].insert(col); - } - } - - // Star pattern - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - if (col >= Istart && col < Iend) { - d_nnz_map3d[row].insert(col); - } else { - o_nnz_map3d[row].insert(col); - } - } - } - } - } - } - } - - d_nnz.reserve(nlocal); - d_nnz.reserve(nlocal); - - for (int i = 0; i < nlocal; ++i) { - // Assume all elements in the z direction are potentially coupled - d_nnz.emplace_back((d_nnz_map3d[i].size() * mesh->LocalNz) - + d_nnz_map2d[i].size()); - o_nnz.emplace_back((o_nnz_map3d[i].size() * mesh->LocalNz) - + o_nnz_map2d[i].size()); - } - } - - output_progress.write("Pre-allocating Jacobian\n"); - // Pre-allocate - MatMPIAIJSetPreallocation(Jfd, 0, d_nnz.data(), 0, o_nnz.data()); - MatSeqAIJSetPreallocation(Jfd, 0, d_nnz.data()); - MatSetUp(Jfd); - MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE); - - ////////////////////////////////////////////////// - // Mark non-zero entries - - output_progress.write("Marking non-zero Jacobian entries\n"); - const PetscScalar val = 1.0; - for (int x = mesh->xstart; x <= mesh->xend; x++) { - for (int y = mesh->ystart; y <= mesh->yend; y++) { - - const int ind0 = ROUND(index(x, y, 0)); - - // 2D fields - for (int i = 0; i < n2d; i++) { - const PetscInt row = ind0 + i; - - // Loop through each point in the stencil - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { - continue; - } - - const int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // A boundary point - } - - // Depends on all variables on this cell - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind2 + j; - PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); - } - } - } - // 3D fields - for (int z = mesh->zstart; z <= mesh->zend; z++) { - const int ind = ROUND(index(x, y, z)); - - for (int i = 0; i < n3d; i++) { - PetscInt row = ind + i; - if (z == 0) { - row += n2d; - } - - // Depends on 2D fields - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind0 + j; - PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); - } - - // Star pattern - for (const auto& [x_off, y_off] : xy_offsets) { - int xi = x + x_off; - int yi = y + y_off; - - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - for (int zi = mesh->zstart; zi <= mesh->zend; ++zi) { - int ind2 = ROUND(index(xi, yi, zi)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - const PetscErrorCode ierr = - MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - - if (ierr != PETSC_SUCCESS) { - output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", row, - col, x, y, xi, yi, ind2, ind2 + n3d - 1); - } - CHKERRQ(ierr); - } - } - } - } - } - } - } - - // Finished marking non-zero entries - - output_progress.write("Assembling Jacobian matrix\n"); - - // Assemble Matrix - MatAssemblyBegin(Jfd, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(Jfd, MAT_FINAL_ASSEMBLY); - - { - // Test if the matrix is symmetric - // Values are 0 or 1 so tolerance (1e-5) shouldn't matter - PetscBool symmetric; - PetscCall(MatIsSymmetric(Jfd, 1e-5, &symmetric)); - if (!static_cast(symmetric)) { - output_warn.write("Jacobian pattern is not symmetric\n"); - } - } - - // The above can miss entries around the X-point branch cut: - // The diagonal terms are complicated because moving in X then Y - // is different from moving in Y then X at the X-point. - // Making sure the colouring matrix is symmetric does not - // necessarily give the correct stencil but may help. - if ((*options)["force_symmetric_coloring"] - .doc("Modifies coloring matrix to force it to be symmetric") - .withDefault(false)) { - Mat Jfd_T; - MatCreateTranspose(Jfd, &Jfd_T); - MatAXPY(Jfd, 1, Jfd_T, DIFFERENT_NONZERO_PATTERN); + if (matrix_free_operator) { + PetscCall(SNESSetJacobian(snes, Jmf, petsc_preconditioner.jacobian(), + ComputeJacobianScaledColor, + petsc_preconditioner.coloring())); + } else { + PetscCall(SNESSetJacobian( + snes, petsc_preconditioner.jacobian(), petsc_preconditioner.jacobian(), + ComputeJacobianScaledColor, petsc_preconditioner.coloring())); } - output_progress.write("Creating Jacobian coloring\n"); - updateColoring(); - if (prune_jacobian) { // Will remove small elements from the Jacobian. // Save a copy to recover from over-pruning - PetscCall(MatDuplicate(Jfd, MAT_SHARE_NONZERO_PATTERN, &Jfd_original)); + PetscCall(MatDuplicate(petsc_preconditioner.jacobian(), MAT_SHARE_NONZERO_PATTERN, + &Jfd_original)); } } else { // Brute force calculation @@ -430,6 +123,11 @@ PetscErrorCode SNESSolver::FDJinitialise() { PetscErrorCode SNESSolver::FDJpruneJacobian() { #if PETSC_VERSION_GE(3, 20, 0) + if (!use_coloring) { + throw BoutException("Jacobian pruning requires solver:use_coloring=true"); + } + + Mat Jfd = petsc_preconditioner.jacobian(); // Remove small elements from the Jacobian and recompute the coloring // Only do this if there are a significant number of small elements. @@ -463,7 +161,16 @@ PetscErrorCode SNESSolver::FDJpruneJacobian() { PetscCall(MatFilter(Jfd, prune_abstol, PETSC_TRUE, PETSC_TRUE)); // Update the coloring from Jfd matrix - updateColoring(); + PetscCall(petsc_preconditioner.updateColoring(FormFunctionForColoring, this)); + if (matrix_free_operator) { + PetscCall(SNESSetJacobian(snes, Jmf, petsc_preconditioner.jacobian(), + ComputeJacobianScaledColor, + petsc_preconditioner.coloring())); + } else { + PetscCall(SNESSetJacobian( + snes, petsc_preconditioner.jacobian(), petsc_preconditioner.jacobian(), + ComputeJacobianScaledColor, petsc_preconditioner.coloring())); + } // Mark the Jacobian as pruned. This is so that it is only restored if pruned. jacobian_pruned = true; @@ -473,10 +180,25 @@ PetscErrorCode SNESSolver::FDJpruneJacobian() { } PetscErrorCode SNESSolver::FDJrestoreFromPruning() { + if (!use_coloring) { + throw BoutException("Jacobian pruning requires solver:use_coloring=true"); + } + + Mat Jfd = petsc_preconditioner.jacobian(); + // Restore pruned non-zero elements PetscCall(MatCopy(Jfd_original, Jfd, DIFFERENT_NONZERO_PATTERN)); // The non-zero pattern has changed, so update coloring - updateColoring(); + PetscCall(petsc_preconditioner.updateColoring(FormFunctionForColoring, this)); + if (matrix_free_operator) { + PetscCall(SNESSetJacobian(snes, Jmf, petsc_preconditioner.jacobian(), + ComputeJacobianScaledColor, + petsc_preconditioner.coloring())); + } else { + PetscCall(SNESSetJacobian(snes, petsc_preconditioner.jacobian(), + petsc_preconditioner.jacobian(), ComputeJacobianScaledColor, + petsc_preconditioner.coloring())); + } jacobian_pruned = false; // Reset flag. Will be set after pruning. return PETSC_SUCCESS; } @@ -543,9 +265,9 @@ SNESSolver::SNESSolver(Options* opts) pseudo_alpha((*options)["pseudo_alpha"] .doc("Sets timestep using dt = alpha / residual") .withDefault(100. * atol * timestep)), - pseudo_alpha_minimum( - (*options)["pseudo_alpha_minimum"].doc("Minimum value of pseudo_alpha") - .withDefault(0.1 * pseudo_alpha)), + pseudo_alpha_minimum((*options)["pseudo_alpha_minimum"] + .doc("Minimum value of pseudo_alpha") + .withDefault(0.1 * pseudo_alpha)), pseudo_growth_factor((*options)["pseudo_growth_factor"] .doc("PTC growth factor on success") .withDefault(1.1)), @@ -1191,10 +913,10 @@ int SNESSolver::run() { if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Adjust pseudo_alpha to globally scale timesteps - pseudo_alpha = std::max({ - updateGlobalTimestep(pseudo_alpha, nl_its, recent_failure_rate, - max_timestep * atol * 100), - pseudo_alpha_minimum}); + pseudo_alpha = + std::max({updateGlobalTimestep(pseudo_alpha, nl_its, recent_failure_rate, + max_timestep * atol * 100), + pseudo_alpha_minimum}); // Adjust local timesteps PetscCall(updatePseudoTimestepping()); @@ -1541,16 +1263,16 @@ PetscErrorCode SNESSolver::updatePseudoTimestepping() { /// rapid changes in timestep. BoutReal SNESSolver::updatePseudoTimestep_inverse_residual(BoutReal previous_timestep, BoutReal current_residual) { - return std::max({ - std::min({std::max({pseudo_alpha / current_residual, previous_timestep / 1.5}), - 1.5 * previous_timestep, max_timestep}), - dt_min_reset}); + return std::max( + {std::min({std::max({pseudo_alpha / current_residual, previous_timestep / 1.5}), + 1.5 * previous_timestep, max_timestep}), + dt_min_reset}); } // Strategy based on history of residuals BoutReal SNESSolver::updatePseudoTimestep_history_based(BoutReal previous_timestep, BoutReal previous_residual, - BoutReal current_residual) { + BoutReal current_residual) const { const BoutReal converged_threshold = 10 * atol; const BoutReal transition_threshold = 100 * atol; @@ -1826,6 +1548,7 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { return PETSC_SUCCESS; } +namespace { /// /// Input Parameters: /// snes - nonlinear solver object @@ -1836,8 +1559,8 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { /// Jac - Jacobian matrix (not altered in this routine) /// Jac_new - newly computed Jacobian matrix to use with preconditioner (generally the same as /// Jac) -static PetscErrorCode ComputeJacobianScaledColor(SNES snes, Vec x1, Mat Jac, Mat Jac_new, - void* ctx) { +PetscErrorCode ComputeJacobianScaledColor(SNES snes, Vec x1, Mat Jac, Mat Jac_new, + void* ctx) { PetscErrorCode err = SNESComputeJacobianDefaultColor(snes, x1, Jac, Jac_new, ctx); CHKERRQ(err); @@ -1854,39 +1577,7 @@ static PetscErrorCode ComputeJacobianScaledColor(SNES snes, Vec x1, Mat Jac, Mat // Call the SNESSolver function return fctx->scaleJacobian(Jac_new); } - -void SNESSolver::updateColoring() { - // Re-calculate the coloring - MatColoring coloring = NULL; - MatColoringCreate(Jfd, &coloring); - // MatColoringSetType(coloring, MATCOLORINGSL); // Serial algorithm. Better for smale-to-medium size problems. - MatColoringSetType( - coloring, MATCOLORINGGREEDY); // Parallel algorith. Better for large parallel runs - // MatColoringSetType(coloring, MATCOLORINGJP); // This didn't work - MatColoringSetFromOptions(coloring); - - // Calculate new index sets - ISColoring iscoloring = NULL; - MatColoringApply(coloring, &iscoloring); - MatColoringDestroy(&coloring); - - // Replace the old coloring with the new one - MatFDColoringDestroy(&fdcoloring); - MatFDColoringCreate(Jfd, iscoloring, &fdcoloring); - MatFDColoringSetFunction(fdcoloring, - bout::cast_MatFDColoringFn(FormFunctionForColoring), this); - MatFDColoringSetFromOptions(fdcoloring); - MatFDColoringSetUp(Jfd, iscoloring, fdcoloring); - ISColoringDestroy(&iscoloring); - - // Replace the CTX pointer in SNES Jacobian - if (matrix_free_operator) { - // Use matrix-free calculation for operator, finite difference for preconditioner - SNESSetJacobian(snes, Jmf, Jfd, ComputeJacobianScaledColor, fdcoloring); - } else { - SNESSetJacobian(snes, Jfd, Jfd, ComputeJacobianScaledColor, fdcoloring); - } -} +} // namespace BoutReal SNESSolver::pid(BoutReal timestep, int nl_its, BoutReal max_dt) { diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index fbcf3baddf..ddb447e865 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -41,6 +41,7 @@ class SNESSolver; #include #include #include +#include #include #include @@ -105,7 +106,7 @@ public: /// and update the internal RHS scaling factors /// This is called by SNESComputeJacobianScaledColor with the /// finite difference approximated Jacobian. - PetscErrorCode scaleJacobian(Mat B); + PetscErrorCode scaleJacobian(Mat Jac_new); /// Save diagnostics to output void outputVars(Options& output_options, bool save_repeat = true) override; @@ -184,7 +185,7 @@ private: BoutReal current_residual); BoutReal updatePseudoTimestep_history_based(BoutReal previous_timestep, BoutReal previous_residual, - BoutReal current_residual); + BoutReal current_residual) const; Field3D pseudo_timestep; @@ -202,8 +203,8 @@ private: BoutReal kI; ///< (0.2 - 0.4) Integral parameter (smooths history of changes) BoutReal kD; ///< (0.1 - 0.3) Derivative (dampens oscillation - optional) bool pid_consider_failures; ///< Reduce timestep increases if recent solves have failed - BoutReal recent_failure_rate; ///< Rolling average of recent failure rate - BoutReal last_failure_weight; ///< 1 / number of recent solves + BoutReal recent_failure_rate; ///< Rolling average of recent failure rate + BoutReal last_failure_weight; ///< 1 / number of recent solves BoutReal nl_its_prev; BoutReal nl_its_prev2; @@ -227,11 +228,11 @@ private: Vec x1; ///< Previous solution BoutReal time1{-1.0}; ///< Time of previous solution - SNES snes; ///< SNES context - Mat Jmf; ///< Matrix Free Jacobian - Mat Jfd; ///< Finite Difference Jacobian - MatFDColoring fdcoloring{nullptr}; ///< Matrix coloring context - ///< Jacobian evaluation + SNES snes; ///< SNES context + Mat Jmf; ///< Matrix Free Jacobian + Mat Jfd; ///< Finite Difference Jacobian (brute-force, when not using coloring) + PetscPreconditioner + petsc_preconditioner; ///< Coloring-based FD Jacobian + MatFDColoring bool use_precon; ///< Use preconditioner std::string ksp_type; ///< Linear solver type @@ -245,7 +246,7 @@ private: bool matrix_free_operator; ///< Use matrix free Jacobian in the operator? int lag_jacobian; ///< Re-use Jacobian bool jacobian_persists; ///< Re-use Jacobian and preconditioner across nonlinear solves - bool use_coloring; ///< Use matrix coloring + bool use_coloring; ///< Use matrix coloring bool jacobian_recalculated; ///< Flag set when Jacobian is recalculated bool prune_jacobian; ///< Remove small elements in the Jacobian? @@ -253,7 +254,6 @@ private: BoutReal prune_fraction; ///< Prune if fraction of small elements is larger than this bool jacobian_pruned{false}; ///< Has the Jacobian been pruned? Mat Jfd_original; ///< Used to reset the Jacobian if over-pruned - void updateColoring(); ///< Updates the coloring using Jfd bool scale_rhs; ///< Scale time derivatives? Vec rhs_scaling_factors; ///< Factors to multiply RHS function diff --git a/src/solver/petsc_preconditioner.cxx b/src/solver/petsc_preconditioner.cxx new file mode 100644 index 0000000000..508d001fd3 --- /dev/null +++ b/src/solver/petsc_preconditioner.cxx @@ -0,0 +1,326 @@ +#include "bout/build_defines.hxx" + +#if BOUT_HAS_PETSC + +#include "bout/petsc_preconditioner.hxx" + +#include "bout/assert.hxx" +#include "bout/boutcomm.hxx" +#include "bout/field3d.hxx" +#include "bout/globals.hxx" +#include "bout/mesh.hxx" +#include "bout/options.hxx" +#include "bout/output.hxx" +#include "bout/utils.hxx" + +#include +#include +#include +#include +#include + +namespace { +class ColoringStencil { +private: + static bool isInSquare(int i, int j, int n_square) { + return std::abs(i) <= n_square && std::abs(j) <= n_square; + } + static bool isInCross(int i, int j, int n_cross) { + if (i == 0) { + return std::abs(j) <= n_cross; + } + if (j == 0) { + return std::abs(i) <= n_cross; + } + return false; + } + static bool isInTaxi(int i, int j, int n_taxi) { + return std::abs(i) + std::abs(j) <= n_taxi; + } + +public: + static auto getOffsets(int n_square, int n_taxi, int n_cross) { + ASSERT2(n_square >= 0 && n_cross >= 0 && n_taxi >= 0 + && n_square + n_cross + n_taxi > 0); + auto inside = [&](int i, int j) { + return isInSquare(i, j, n_square) || isInTaxi(i, j, n_taxi) + || isInCross(i, j, n_cross); + }; + std::vector> xy_offsets; + auto loop_bound = std::max({n_square, n_taxi, n_cross}); + for (int i = -loop_bound; i <= loop_bound; ++i) { + for (int j = -loop_bound; j <= loop_bound; ++j) { + if (inside(i, j)) { + xy_offsets.emplace_back(i, j); + } + } + } + return xy_offsets; + } +}; +} // namespace + +void PetscPreconditioner::reset() { + if (fdcoloring != nullptr) { + MatFDColoringDestroy(&fdcoloring); + fdcoloring = nullptr; + } + if (Jfd != nullptr) { + MatDestroy(&Jfd); + Jfd = nullptr; + } +} + +PetscErrorCode PetscPreconditioner::createJacobianPattern(Field3D& index, + Options& options, + PetscInt nlocal, int n2d, + int n3d, MPI_Comm comm) { + reset(); + + // Use global mesh for now + Mesh* mesh = bout::globals::mesh; + + ////////////////////////////////////////////////// + // Pre-allocate PETSc storage + + output_progress.write("Setting Jacobian matrix sizes\n"); + + // Set size of Matrix on each processor to nlocal x nlocal + PetscCall(MatCreate(comm, &Jfd)); + PetscCall(MatSetOption(Jfd, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); + PetscCall(MatSetSizes(Jfd, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE)); + PetscCall(MatSetFromOptions(Jfd)); + + // Determine which row/columns of the matrix are locally owned + int Istart = 0; + int Iend = 0; + PetscCall(MatGetOwnershipRange(Jfd, &Istart, &Iend)); + + // Convert local into global indices + // Note: Not in the boundary cells, to keep -1 values + for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { + index[i] += Istart; + } + // Now communicate to fill guard cells + mesh->communicate(index); + + // Non-zero elements on this processor + std::vector d_nnz; + std::vector o_nnz; + + auto n_square = + options["stencil:square"].doc("Extent of stencil (square)").withDefault(0); + auto n_cross = + options["stencil:cross"].doc("Extent of stencil (cross)").withDefault(0); + // Default n_taxi=2 if nothing else is set + auto n_taxi = options["stencil:taxi"] + .doc("Extent of stencil (taxi-cab norm)") + .withDefault((n_square == 0 && n_cross == 0) ? 2 : 0); + + const auto xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); + { + // Count the *unique* non-zeros for preallocation + std::vector> d_nnz_map2d(nlocal); + std::vector> o_nnz_map2d(nlocal); + std::vector> d_nnz_map3d(nlocal); + std::vector> o_nnz_map3d(nlocal); + + for (int x = mesh->xstart; x <= mesh->xend; x++) { + for (int y = mesh->ystart; y <= mesh->yend; y++) { + const int ind0 = ROUND(index(x, y, 0)) - Istart; + + // 2D fields + for (int i = 0; i < n2d; i++) { + const PetscInt row = ind0 + i; + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { + continue; + } + + const int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // A boundary point + } + + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind2 + j; + if (col >= Istart && col < Iend) { + d_nnz_map2d[row].insert(col); + } else { + o_nnz_map2d[row].insert(col); + } + } + } + } + + // 3D fields + for (int z = mesh->zstart; z <= mesh->zend; z++) { + const int ind = ROUND(index(x, y, z)) - Istart; + + for (int i = 0; i < n3d; i++) { + PetscInt row = ind + i; + if (z == 0) { + row += n2d; + } + + // Depends on 2D fields + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind0 + j; + if (col >= Istart && col < Iend) { + d_nnz_map2d[row].insert(col); + } else { + o_nnz_map2d[row].insert(col); + } + } + + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) + || (yi >= mesh->LocalNy)) { + continue; + } + + int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // Boundary point + } + + if (z == 0) { + ind2 += n2d; + } + + for (int j = 0; j < n3d; j++) { + const PetscInt col = ind2 + j; + if (col >= Istart && col < Iend) { + d_nnz_map3d[row].insert(col); + } else { + o_nnz_map3d[row].insert(col); + } + } + } + } + } + } + } + + d_nnz.reserve(nlocal); + o_nnz.reserve(nlocal); + for (int i = 0; i < nlocal; ++i) { + // Assume all elements in the z direction are potentially coupled + d_nnz.emplace_back((d_nnz_map3d[i].size() * mesh->LocalNz) + d_nnz_map2d[i].size()); + o_nnz.emplace_back((o_nnz_map3d[i].size() * mesh->LocalNz) + o_nnz_map2d[i].size()); + } + } + + output_progress.write("Pre-allocating Jacobian\n"); + PetscCall(MatMPIAIJSetPreallocation(Jfd, 0, d_nnz.data(), 0, o_nnz.data())); + PetscCall(MatSeqAIJSetPreallocation(Jfd, 0, d_nnz.data())); + PetscCall(MatSetUp(Jfd)); + PetscCall(MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); + + ////////////////////////////////////////////////// + // Mark non-zero entries + + output_progress.write("Marking non-zero Jacobian entries\n"); + const PetscScalar val = 1.0; + for (int x = mesh->xstart; x <= mesh->xend; x++) { + for (int y = mesh->ystart; y <= mesh->yend; y++) { + const int ind0 = ROUND(index(x, y, 0)); + + // 2D fields + for (int i = 0; i < n2d; i++) { + const PetscInt row = ind0 + i; + + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { + continue; + } + + const int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // A boundary point + } + + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind2 + j; + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); + } + } + } + + // 3D fields + for (int z = mesh->zstart; z <= mesh->zend; z++) { + const int ind = ROUND(index(x, y, z)); + + for (int i = 0; i < n3d; i++) { + PetscInt row = ind + i; + if (z == 0) { + row += n2d; + } + + // Depends on 2D fields + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind0 + j; + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); + } + + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { + continue; + } + + for (int zi = mesh->zstart; zi <= mesh->zend; ++zi) { + int ind2 = ROUND(index(xi, yi, zi)); + if (ind2 < 0) { + continue; // Boundary point + } + + if (z == 0) { + ind2 += n2d; + } + + for (int j = 0; j < n3d; j++) { + const PetscInt col = ind2 + j; + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); + } + } + } + } + } + } + } + + output_progress.write("Assembling Jacobian matrix\n"); + PetscCall(MatAssemblyBegin(Jfd, MAT_FINAL_ASSEMBLY)); + PetscCall(MatAssemblyEnd(Jfd, MAT_FINAL_ASSEMBLY)); + + { + PetscBool symmetric = PETSC_FALSE; + PetscCall(MatIsSymmetric(Jfd, 1e-5, &symmetric)); + if (!static_cast(symmetric)) { + output_warn.write("Jacobian pattern is not symmetric\n"); + } + } + + if (options["force_symmetric_coloring"] + .doc("Modifies coloring matrix to force it to be symmetric") + .withDefault(false)) { + Mat Jfd_T{nullptr}; + PetscCall(MatCreateTranspose(Jfd, &Jfd_T)); + PetscCall(MatAXPY(Jfd, 1, Jfd_T, DIFFERENT_NONZERO_PATTERN)); + PetscCall(MatDestroy(&Jfd_T)); + } + + PetscFunctionReturn(PETSC_SUCCESS); +} + +#endif // BOUT_HAS_PETSC