From 14cafb45fc6bf62b25592d8cbd66602b6f2f4e58 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Tue, 7 Apr 2026 15:45:58 -0700 Subject: [PATCH 1/7] checkpoint --- Common/include/linear_algebra/CSysVector.hpp | 27 +++++++++++++++++++ .../src/drivers/CDiscAdjMultizoneDriver.cpp | 4 +-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index f300c01e091..fdcde50c113 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -29,6 +29,7 @@ #pragma once #include +#include #include "../parallelization/mpi_structure.hpp" #include "../parallelization/omp_structure.hpp" @@ -371,6 +372,32 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> return dot_scratch[0]; } + /*! + * \brief Computes the product of V^T W efficiencly, where V and W are tall matrices stored as vectors of CSysVector. + * \param[in] V - Tall matrix. + * \param[in] n - Number of columns to consider from V (if 0, the size of V is used). + * \param[in] W - Tall matrix. + * \param[in] m - Number of columns to consider from W (if 0, the size of W is used). + * \param[out] VTW - Matrix to store the product, must be n by m or larger. + */ + template + static void multiDot(const std::vector& V, size_t n, const std::vector& W, size_t m, + Mat& VTW) { + static constexpr size_t BLOCK_SIZE = 1024; + + if (n == 0) n = V.size(); + if (m == 0) m = W.size(); + if (n == 0 || m == 0) return; + + su2matrix local; + local.resize(n, m) = ScalarType{}; + + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < m; ++j) { + } + } + } + /*! * \brief Squared L2 norm of the vector (via dot with self). * \return Squared L2 norm. diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index 392abeebe9f..315ae662e7c 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -380,8 +380,8 @@ void CDiscAdjMultizoneDriver::KrylovInnerIters(unsigned short iZone) { Scalar eps_l = 0.0; Scalar tol_l = KrylovTol / eps; auto iter = min(totalIter-2ul, config_container[iZone]->GetnQuasiNewtonSamples()-2ul); - iter = LinSolver[iZone].FGMRES_LinSolver(AdjRHS[iZone], AdjSol[iZone], product, Identity(), - tol_l, iter, eps_l, monitor, config_container[iZone]); + iter = LinSolver[iZone].FGCRODR_LinSolver(AdjRHS[iZone], AdjSol[iZone], product, Identity(), + tol_l, iter, eps_l, monitor, config_container[iZone]); totalIter -= iter+1; eps *= eps_l; } From 4cdf922e273b74d88be05c92fb6973013d46c558 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Wed, 8 Apr 2026 00:34:12 -0700 Subject: [PATCH 2/7] optimize product --- Common/include/linear_algebra/CSysVector.hpp | 25 ++------ Common/src/linear_algebra/CSysSolve.cpp | 4 +- Common/src/linear_algebra/CSysVector.cpp | 67 +++++++++++++++++++- 3 files changed, 73 insertions(+), 23 deletions(-) diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index fdcde50c113..316b4d8e993 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -375,28 +375,13 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> /*! * \brief Computes the product of V^T W efficiencly, where V and W are tall matrices stored as vectors of CSysVector. * \param[in] V - Tall matrix. - * \param[in] n - Number of columns to consider from V (if 0, the size of V is used). + * \param[in] n - Number of columns to consider from V. * \param[in] W - Tall matrix. - * \param[in] m - Number of columns to consider from W (if 0, the size of W is used). - * \param[out] VTW - Matrix to store the product, must be n by m or larger. + * \param[in] m - Number of columns to consider from W. + * \return n by m matrix with the result of the product. */ - template - static void multiDot(const std::vector& V, size_t n, const std::vector& W, size_t m, - Mat& VTW) { - static constexpr size_t BLOCK_SIZE = 1024; - - if (n == 0) n = V.size(); - if (m == 0) m = W.size(); - if (n == 0 || m == 0) return; - - su2matrix local; - local.resize(n, m) = ScalarType{}; - - for (size_t i = 0; i < n; ++i) { - for (size_t j = 0; j < m; ++j) { - } - } - } + static const su2matrix& multiDot(const std::vector& V, size_t n, + const std::vector& W, size_t m); /*! * \brief Squared L2 norm of the vector (via dot with self). diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index ce454326546..af1be89e403 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -852,10 +852,10 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::multiDot(V, m + 1, W, k); for (auto i = 0ul; i <= m; ++i) { for (auto j = 0ul; j < k; ++j) { - // TODO(pedro): There are clever ways to avoid this multiplication, or at least use BLAS. - VW(i, j) = V[i].dot(W[j]); + VW(i, j) = tmp(i, j); } } const auto Hm = Heigen.topLeftCorner(m + 1, m); diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index d14a63b52bf..5d0009f39fa 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -67,10 +67,75 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB } } +template +const su2matrix& CSysVector::multiDot(const std::vector>& V, + const size_t n, + const std::vector>& W, + const size_t m) { + static constexpr size_t BLOCK_SIZE = 1024; + static su2matrix shared; + + if (n == 0 || m == 0) return shared; + + SU2_OMP_BARRIER + const auto size = V[0].nElmDomain; + + su2matrix local(n, m); + local.setConstant(0); + + SU2_OMP_FOR_(schedule(static) SU2_NOWAIT) + for (size_t offset = 0; offset < size; offset += BLOCK_SIZE) { + const auto limit = std::min(offset + BLOCK_SIZE, size); + for (size_t i = 0; i < n; ++i) { + const auto& vi = V[i]; + for (size_t j = 0; j < m; ++j) { + const auto& wj = W[j]; + ScalarType sum = 0.0; + SU2_OMP_SIMD + for (auto k = offset; k < limit; ++k) { + sum += vi[k] * wj[k]; + } + local(i, j) += sum; + } + } + } + END_SU2_OMP_FOR + + /*--- Reduce over all threads in an ordered way to ensure a deterministic result. ---*/ + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < m; ++j) { + W[j].dot_scratch[omp_get_thread_num()] = local(i, j); + } + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + for (size_t j = 0; j < m; ++j) { + for (int t = 1; t < omp_get_num_threads(); ++t) { + local(i, j) += W[j].dot_scratch[t]; + } + } + END_SU2_OMP_SAFE_GLOBAL_ACCESS + } + + /*--- Single AllReduce of the result, only the master thread communicates. ---*/ + SU2_OMP_MASTER { + shared.resize(n, m); + + const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE; + SelectMPIWrapper::W::Allreduce(local.data(), shared.data(), n * m, mpi_type, MPI_SUM, + SU2_MPI::GetComm()); + } + END_SU2_OMP_MASTER + + /*--- All threads have the same view of the result. ---*/ + SU2_OMP_BARRIER + + return shared; +} + template CSysVector::~CSysVector() { - if (!std::is_trivial::value) + if constexpr (!std::is_trivial_v) { for (auto i = 0ul; i < nElm; i++) vec_val[i].~ScalarType(); + } MemoryAllocation::aligned_free(vec_val); GPUMemoryAllocation::gpu_free(d_vec_val); From d8df0b8cdd326d9ef44af1029a30bf2fcb08924d Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Wed, 8 Apr 2026 10:54:03 -0700 Subject: [PATCH 3/7] nested parallel improvements --- Common/src/linear_algebra/CSysSolve.cpp | 64 ++++++++++++------- .../src/drivers/CDiscAdjMultizoneDriver.cpp | 3 +- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index af1be89e403..af178fcd49e 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -62,8 +62,8 @@ constexpr float linSolEpsilon() { /*--- Computes v = vs * ws or v += vs * ws with unrolling of up to 4 iterations. ---*/ template -void LinearCombination(const unsigned long n, const Vectors& vs, const Weights& ws, CSysVector& v, - bool inc = false) { +void LinearCombinationImpl(const unsigned long n, const Vectors& vs, const Weights& ws, CSysVector& v, + bool inc = false) { if (n == 0) { if (!inc) v = ScalarType{}; return; @@ -105,19 +105,32 @@ void LinearCombination(const unsigned long n, const Vectors& vs, const Weights& /*--- Overload to handle a vector of CSysVector directly. ---*/ template -void LinearCombination(const unsigned long n, const std::vector>& vs, const Weights& ws, - CSysVector& v, bool inc = false) { - LinearCombination( +void LinearCombinationImpl(const unsigned long n, const std::vector>& vs, const Weights& ws, + CSysVector& v, bool inc = false) { + LinearCombinationImpl( n, [&vs](auto i) -> auto& { return vs[i]; }, ws, v, inc); } /*--- Overload to handle a std::vector of weights directly. ---*/ template -void LinearCombination(const unsigned long n, const Vectors& vs, const std::vector& ws, - CSysVector& v, bool inc = false) { - LinearCombination( +void LinearCombinationImpl(const unsigned long n, const Vectors& vs, const std::vector& ws, + CSysVector& v, bool inc = false) { + LinearCombinationImpl( n, vs, [&ws](auto i) { return ws[i]; }, v, inc); } + +/*--- Wrapper around LinearCombinationImpl. ---*/ +template +void LinearCombination(bool parallel, Ts&&... args) { + if (parallel) { + SU2_OMP_PARALLEL + LinearCombinationImpl(std::forward(args)...); + END_SU2_OMP_PARALLEL + } else { + LinearCombinationImpl(std::forward(args)...); + } +} + } // namespace template @@ -578,13 +591,7 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector const auto& { return (*basis)[j - i]; }; LinearCombination( - j + 1, reversed, [&](auto i) { return invR(j - i, j); }, (*basis)[j]); + nestedParallel, j + 1, reversed, [&](auto i) { return invR(j - i, j); }, (*basis)[j]); } if (j == 0) break; // j is unsigned, avoid underflow. } } - LinearCombination(k, Z, vr, x, true); + LinearCombination(nestedParallel, k, Z, vr, x, true); } ScalarType rNorm = r.norm(); auto iter = k; @@ -839,8 +846,8 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector::multiDot(V, m + 1, W, k); + const su2matrix* VWk = nullptr; + if (nestedParallel) { + SU2_OMP_PARALLEL { + const auto& tmp = CSysVector::multiDot(V, m + 1, W, k); + SU2_OMP_MASTER + VWk = &tmp; + END_SU2_OMP_MASTER + } + END_SU2_OMP_PARALLEL + } else { + VWk = &CSysVector::multiDot(V, m + 1, W, k); + } for (auto i = 0ul; i <= m; ++i) { for (auto j = 0ul; j < k; ++j) { - VW(i, j) = tmp(i, j); + VW(i, j) = (*VWk)(i, j); } } const auto Hm = Heigen.topLeftCorner(m + 1, m); @@ -917,7 +935,7 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVectorGetnQuasiNewtonSamples()-2ul); iter = LinSolver[iZone].FGCRODR_LinSolver(AdjRHS[iZone], AdjSol[iZone], product, Identity(), - tol_l, iter, eps_l, monitor, config_container[iZone]); + tol_l, iter, eps_l, monitor, config_container[iZone], + FgcrodrMode::SAME_MAT); totalIter -= iter+1; eps *= eps_l; } From 972b9b212468da185341db2b3afb15bf996593b3 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 10 Apr 2026 04:36:55 +0100 Subject: [PATCH 4/7] add test, non flexible mode for FGCRODR --- Common/include/linear_algebra/CSysSolve.hpp | 6 ++ Common/src/linear_algebra/CSysSolve.cpp | 91 ++++++++++------- TestCases/vandv.py | 14 ++- TestCases/vandv/rans/30p30n/config_ad.cfg | 104 ++++++++++++++++++++ 4 files changed, 178 insertions(+), 37 deletions(-) create mode 100644 TestCases/vandv/rans/30p30n/config_ad.cfg diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index 611223b657b..ec083535eef 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -41,6 +41,11 @@ #include "CSysVector.hpp" #include "../option_structure.hpp" +SU2_IGNORE_WARNING("-Wmaybe-uninitialized") +#include "Eigen/Core" +#include "Eigen/Dense" +SU2_RESTORE_WARNING + class CConfig; class CGeometry; template @@ -110,6 +115,7 @@ class CSysSolve { mutable unsigned long k = 0; mutable std::vector Z, V; /*!< \brief Large matrices used by FGMRES, v^i+1 = A * z^i. */ mutable std::vector W, T; /*!< \brief Large matrices used by FGCRODR for deflation vectors. */ + mutable Eigen::Matrix VWk; /*!< \brief Temporary used when it is necessary to interface between active and passive types. */ VectorType LinSysSol_tmp; diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index af178fcd49e..2141c263344 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -35,8 +35,6 @@ #include "../../include/linear_algebra/CPreconditioner.hpp" SU2_IGNORE_WARNING("-Wmaybe-uninitialized") -#include "Eigen/Core" -#include "Eigen/Dense" #include "Eigen/Eigenvalues" SU2_RESTORE_WARNING @@ -661,6 +659,7 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVectorGetLinear_Solver_Restart_Deflation(), m - 1); const bool masterRank = (SU2_MPI::GetRank() == MASTER_NODE); + const bool flexible = !precond.IsIdentity(); /*--- If we call the solver outside of a parallel region, but the number of threads allows, * we still want to parallelize some of the expensive operations. ---*/ const bool nestedParallel = !omp_in_parallel() && omp_get_max_threads() > 1; @@ -685,8 +684,10 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector auto& { return i < k ? W[i] : V[i]; }; + + /*--- With an identity preconditioner Z = W. ---*/ + auto GetZ = [&](auto i) -> auto& { + if (flexible) return Z[i]; + return GetW(i); + }; + /*--- Rebuild Z, V, and W for the new matrix if necessary. * Q * R = A * Z * V = Q = A * (Z * R^-1) @@ -725,7 +735,7 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector const auto& { return (*basis)[j - i]; }; LinearCombination( nestedParallel, j + 1, reversed, [&](auto i) { return invR(j - i, j); }, (*basis)[j]); + if (!flexible) break; // skip Z. } if (j == 0) break; // j is unsigned, avoid underflow. } } - LinearCombination(nestedParallel, k, Z, vr, x, true); + LinearCombination(nestedParallel, k, GetZ, vr, x, true); } ScalarType rNorm = r.norm(); auto iter = k; @@ -800,8 +811,12 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector* VWk = nullptr; - if (nestedParallel) { - SU2_OMP_PARALLEL { - const auto& tmp = CSysVector::multiDot(V, m + 1, W, k); - SU2_OMP_MASTER - VWk = &tmp; - END_SU2_OMP_MASTER + if (mode != FgcrodrMode::SAME_MAT) { + const su2matrix* VWk = nullptr; + if (nestedParallel) { + SU2_OMP_PARALLEL + VWk = &CSysVector::multiDot(V, m + 1, W, k); + END_SU2_OMP_PARALLEL + } else { + VWk = &CSysVector::multiDot(V, m + 1, W, k); } - END_SU2_OMP_PARALLEL - } else { - VWk = &CSysVector::multiDot(V, m + 1, W, k); - } - for (auto i = 0ul; i <= m; ++i) { - for (auto j = 0ul; j < k; ++j) { - VW(i, j) = (*VWk)(i, j); + for (auto i = 0ul; i <= m; ++i) { + for (auto j = 0ul; j < k; ++j) { + VW(i, j) = (*VWk)(i, j); + } } + } else if (k > 0) { + /*--- See notes near the end of the outer loop. ---*/ + VW.topLeftCorner(k, k) = VWk.topRows(k); } const auto Hm = Heigen.topLeftCorner(m + 1, m); EigenMatrix HTVW = Hm.transpose() * VW; @@ -938,10 +951,16 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector auto& { return i < k ? W[i] : V[i]; }); + modify(PinvR, GetW); BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { + /*--- Initialize VWk, then apply the V and W modifications of the left and right, respectively. ---*/ + if (mode == FgcrodrMode::SAME_MAT) { + if (k == 0) { + VWk = EigenMatrix::Identity(m + 1, k_new); + } + VWk.topRows(k) = Q.transpose() * (VWk * PinvR); + } /*--- T and W are the same size, so we can swap them. ---*/ std::swap(T, W); k = k_new; @@ -956,8 +975,10 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector tol * 10) { if (masterRank) { diff --git a/TestCases/vandv.py b/TestCases/vandv.py index d207912bbfa..1eac30f0499 100644 --- a/TestCases/vandv.py +++ b/TestCases/vandv.py @@ -28,7 +28,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with SU2. If not, see . -import sys +import sys, os from TestCase import TestCase def main(): @@ -48,6 +48,15 @@ def main(): p30n30.test_vals = [-11.267106, -11.168215, -11.182822, -10.949673, -14.233489, 0.052235, 2.830394, 1.318894, -1.210648, 1, 1.2763e+01] test_list.append(p30n30) + os.symlink("vandv/rans/30p30n/solution.dat", "vandv/rans/30p30n/solution_0.dat") + p30n30_ad = TestCase('30P30N_ad') + p30n30_ad.cfg_dir = "vandv/rans/30p30n" + p30n30_ad.cfg_file = "config_ad.cfg" + p30n30_ad.test_iter = 5 + p30n30_ad.test_vals = [-8.167332, -8.738471, -8.762033, -8.500107, -7.433292, -0.808628, -2.091805, 1.1791e-01, 3.4123e+01] + p30n30_ad.command = TestCase.Command("mpirun -n 2", "SU2_CFD_AD") + test_list.append(p30n30_ad) + # flat plate - sst-v1994m flatplate_sst1994m = TestCase('flatplate_sst1994m') flatplate_sst1994m.cfg_dir = "vandv/rans/flatplate" @@ -120,7 +129,8 @@ def main(): ################# for test in test_list: - test.command = TestCase.Command("mpirun -n 2", "SU2_CFD") + if test.command.empty(): + test.command = TestCase.Command("mpirun -n 2", "SU2_CFD") test.timeout = 300 test.tol = 1e-5 #end diff --git a/TestCases/vandv/rans/30p30n/config_ad.cfg b/TestCases/vandv/rans/30p30n/config_ad.cfg new file mode 100644 index 00000000000..263be10bab4 --- /dev/null +++ b/TestCases/vandv/rans/30p30n/config_ad.cfg @@ -0,0 +1,104 @@ +% ------------------------------- SOLVER -------------------------------- % +% +SOLVER= RANS +KIND_TURB_MODEL= SA +REF_DIMENSIONALIZATION= FREESTREAM_VEL_EQ_MACH +% +OBJECTIVE_FUNCTION= LIFT +% +% ----------------------------- FREESTREAM ------------------------------ % +% +MACH_NUMBER= 0.17 +AOA= 5.5 +INIT_OPTION= REYNOLDS +FREESTREAM_OPTION= TEMPERATURE_FS +FREESTREAM_TEMPERATURE= 295.56 +REYNOLDS_NUMBER= 1.71E6 +REYNOLDS_LENGTH= 0.4572 +% +% -------------------------- REFERENCE VALUES --------------------------- % +% +REF_AREA= 0.4572 +REF_LENGTH= 0.4572 +REF_ORIGIN_MOMENT_X= 0.0 +REF_ORIGIN_MOMENT_Y= 0.0 +REF_ORIGIN_MOMENT_Z= 0.0 +% +% ------------------------- BOUNDARY CONDITIONS ------------------------- % +% +MARKER_HEATFLUX= ( wall, 0.0 ) +MARKER_FAR= ( farfield ) +MARKER_PLOTTING= ( wall ) +MARKER_MONITORING= ( wall ) +% +% ---------------------------- FLUID MODELS ----------------------------- % +% +FLUID_MODEL= STANDARD_AIR +GAMMA_VALUE= 1.4 +GAS_CONSTANT= 287.058 +% +VISCOSITY_MODEL= SUTHERLAND +MU_REF= 1.716E-5 +MU_T_REF= 273.15 +SUTHERLAND_CONSTANT= 110.4 +% +CONDUCTIVITY_MODEL= CONSTANT_PRANDTL +PRANDTL_LAM= 0.72 +PRANDTL_TURB= 0.90 +% +% ----------------------- SPATIAL DISCRETIZATION ------------------------ % +% +NUM_METHOD_GRAD= GREEN_GAUSS +CONV_NUM_METHOD_FLOW= ROE +ENTROPY_FIX_COEFF= 1e-5 +MUSCL_FLOW= YES +SLOPE_LIMITER_FLOW= VAN_ALBADA_EDGE +% +CONV_NUM_METHOD_TURB= SCALAR_UPWIND +MUSCL_TURB= NO +% +% ---------- PSEUDOTIME INTEGRATION / CONVERGENCE ACCELERATION ---------- % +% +TIME_DISCRE_FLOW= EULER_IMPLICIT +TIME_DISCRE_TURB= EULER_IMPLICIT +% +CFL_NUMBER= 2500 +CFL_REDUCTION_TURB= 1 +CFL_ADAPT= NO +% +DISCADJ_LIN_SOLVER= SMOOTHER +DISCADJ_LIN_PREC= ILU +LINEAR_SOLVER_ERROR= 1e-30 +LINEAR_SOLVER_ITER= 20 +LINEAR_SOLVER_SMOOTHER_RELAXATION= 0.6 +% +MGLEVEL= 0 +% +% Adjoint GMRES settings. +MULTIZONE= YES +NEWTON_KRYLOV= YES +% These 3 numbers should usually be the same. +INNER_ITER= 60 +QUASI_NEWTON_NUM_SAMPLES= 60 +LINEAR_SOLVER_RESTART_FREQUENCY= 60 +% This should be ~1/4 of the restart frequency. After the first outer +% iteration, the solver does "frequency - deflation" inner iterations. +LINEAR_SOLVER_RESTART_DEFLATION= 15 +% +% ------------------------ CONVERGENCE CRITERIA ------------------------- % +% +OUTER_ITER= 10 +CONV_RESIDUAL_MINVAL= -8 +% +% --------------------------- INPUT / OUTPUT ---------------------------- % +% +MESH_FILENAME= 2D_L1_coarse_r1.su2 +MESH_FORMAT= SU2 +RESTART_SOL= NO +OUTPUT_WRT_FREQ= 1000 +SCREEN_WRT_FREQ_INNER= 1 +WRT_AD_STATISTICS= NO +WRT_ZONE_CONV= NO +WRT_ZONE_HIST= YES +HISTORY_OUTPUT= ( ITER, RMS_RES, LINSOL, SENSITIVITY ) +SCREEN_OUTPUT= ( OUTER_ITER, ITER_TIME, RMS_RES[0], LINSOL_RESIDUAL[0], LINSOL_RESIDUAL_TURB[0], SENS_AOA[0], SENS_MACH[0] ) From 322d4d42e2b5278b86b60b7ffdd07e3852d06c3d Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 10 Apr 2026 12:38:31 +0100 Subject: [PATCH 5/7] fix the VkWk optimization --- Common/include/linear_algebra/CSysSolve.hpp | 2 +- Common/include/linear_algebra/CSysVector.hpp | 5 ++- Common/src/linear_algebra/CSysSolve.cpp | 41 +++++++++++--------- Common/src/linear_algebra/CSysVector.cpp | 4 +- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index ec083535eef..86d89279ba3 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -115,7 +115,7 @@ class CSysSolve { mutable unsigned long k = 0; mutable std::vector Z, V; /*!< \brief Large matrices used by FGMRES, v^i+1 = A * z^i. */ mutable std::vector W, T; /*!< \brief Large matrices used by FGCRODR for deflation vectors. */ - mutable Eigen::Matrix VWk; + mutable Eigen::Matrix VkWk; /*!< \brief Temporary used when it is necessary to interface between active and passive types. */ VectorType LinSysSol_tmp; diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index 316b4d8e993..c0ab789d4ff 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -375,12 +375,13 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> /*! * \brief Computes the product of V^T W efficiencly, where V and W are tall matrices stored as vectors of CSysVector. * \param[in] V - Tall matrix. - * \param[in] n - Number of columns to consider from V. + * \param[in] i0 - First column of V to consider. + * \param[in] n - Number of columns to consider from V starting at i0. * \param[in] W - Tall matrix. * \param[in] m - Number of columns to consider from W. * \return n by m matrix with the result of the product. */ - static const su2matrix& multiDot(const std::vector& V, size_t n, + static const su2matrix& multiDot(const std::vector& V, size_t i0, size_t n, const std::vector& W, size_t m); /*! diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 2141c263344..218479ee0ee 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -658,8 +658,9 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVectorGetLinear_Solver_Restart_Frequency(), max_iter); const auto deflation = min(config->GetLinear_Solver_Restart_Deflation(), m - 1); - const bool masterRank = (SU2_MPI::GetRank() == MASTER_NODE); const bool flexible = !precond.IsIdentity(); + const bool same_mat = mode == FgcrodrMode::SAME_MAT; + const bool masterRank = SU2_MPI::GetRank() == MASTER_NODE; /*--- If we call the solver outside of a parallel region, but the number of threads allows, * we still want to parallelize some of the expensive operations. ---*/ const bool nestedParallel = !omp_in_parallel() && omp_get_max_threads() > 1; @@ -732,7 +733,7 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector().solve(EigenMatrix::Identity(k, k)); for (auto j = k - 1;; --j) { for (auto* basis : {&W, &Z}) { @@ -871,23 +872,29 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector 0) VW.topLeftCorner(k, k) = VkWk; + + /*--- Rest of VW. Either V[k] * Wk or the entire V * Wk depending on the mode. + * When the matrix stays constant, V[k+1:m+1] are orthogonal to Wk, but when it changes, + * we need to compute that part of the product. Since m >> k, there is less benefit in + * avoiding the cost of V[0:k] * Wk and we opt to make the code a little simpler. ---*/ const su2matrix* VWk = nullptr; + const auto i0 = same_mat ? k : 0; + const auto n = same_mat ? 1 : m + 1; if (nestedParallel) { SU2_OMP_PARALLEL - VWk = &CSysVector::multiDot(V, m + 1, W, k); + VWk = &CSysVector::multiDot(V, i0, n, W, k); END_SU2_OMP_PARALLEL } else { - VWk = &CSysVector::multiDot(V, m + 1, W, k); + VWk = &CSysVector::multiDot(V, i0, n, W, k); } - for (auto i = 0ul; i <= m; ++i) { + for (auto i = 0ul; i < n; ++i) { for (auto j = 0ul; j < k; ++j) { - VW(i, j) = (*VWk)(i, j); + VW(i0 + i, j) = (*VWk)(i, j); } } - } else if (k > 0) { - /*--- See notes near the end of the outer loop. ---*/ - VW.topLeftCorner(k, k) = VWk.topRows(k); } const auto Hm = Heigen.topLeftCorner(m + 1, m); EigenMatrix HTVW = Hm.transpose() * VW; @@ -954,13 +961,9 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector::FGCRODR_LinSolverImpl(const CSysVector::Initialize(unsigned long numBlk, unsigned long numB template const su2matrix& CSysVector::multiDot(const std::vector>& V, - const size_t n, + const size_t i0, const size_t n, const std::vector>& W, const size_t m) { static constexpr size_t BLOCK_SIZE = 1024; @@ -87,7 +87,7 @@ const su2matrix& CSysVector::multiDot(const std::vector< for (size_t offset = 0; offset < size; offset += BLOCK_SIZE) { const auto limit = std::min(offset + BLOCK_SIZE, size); for (size_t i = 0; i < n; ++i) { - const auto& vi = V[i]; + const auto& vi = V[i0 + i]; for (size_t j = 0; j < m; ++j) { const auto& wj = W[j]; ScalarType sum = 0.0; From 823742a084217b2250a769d04e8f4e5ae2ad137d Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Sat, 11 Apr 2026 08:05:40 +0100 Subject: [PATCH 6/7] simplify settings --- Common/include/linear_algebra/CSysSolve.hpp | 17 +++++++++-------- Common/src/linear_algebra/CSysSolve.cpp | 11 ++++++----- SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp | 5 +++-- .../disc_adj_fsi/Airfoil_2d/configFlow.cfg | 10 +++++----- TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg | 5 ++--- TestCases/parallel_regression.py | 1 + TestCases/vandv.py | 4 ++-- TestCases/vandv/rans/30p30n/config_ad.cfg | 9 ++++----- 8 files changed, 32 insertions(+), 30 deletions(-) diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index 86d89279ba3..84b63bcfb4f 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -298,8 +298,8 @@ class CSysSolve { template unsigned long FGCRODR_LinSolverImpl(const VectorType& b, VectorType& x, const ProductType& mat_vec, const PrecondType& precond, ScalarType tol, unsigned long max_iter, - ScalarType& residual, bool monitoring, const CConfig* config, - FgcrodrMode mode) const; + ScalarType& residual, bool monitoring, const CConfig* config, FgcrodrMode mode, + unsigned long custom_m) const; /*! * \brief Creates the inner solver for nested preconditioning if the settings allow it. @@ -322,7 +322,7 @@ class CSysSolve { * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum size of the search subspace * \param[out] residual - final normalized residual - * \param[in] monitoring - turn on priting residuals from solver to screen. + * \param[in] monitoring - turn on priting residuals from solver to screen * \param[in] config - Definition of the particular problem. */ unsigned long CG_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec, const PrecondType& precond, @@ -338,7 +338,7 @@ class CSysSolve { * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum size of the search subspace * \param[out] residual - final normalized residual - * \param[in] monitoring - turn on priting residuals from solver to screen. + * \param[in] monitoring - turn on priting residuals from solver to screen * \param[in] config - Definition of the particular problem. */ unsigned long FGMRES_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec, @@ -361,14 +361,15 @@ class CSysSolve { * \param[in] tol - tolerance with which to solve the system * \param[in] max_iter - maximum number of iterations * \param[out] residual - final normalized residual - * \param[in] monitoring - turn on priting residuals from solver to screen. + * \param[in] monitoring - turn on priting residuals from solver to screen * \param[in] config - Definition of the particular problem. * \param[in] mode - See FgcrodrMode. + * \param[in] custom_m - alternative maximum size of the search subspace, overrides the config value if != 0. */ unsigned long FGCRODR_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec, const PrecondType& precond, ScalarType tol, unsigned long max_iter, ScalarType& residual, bool monitoring, const CConfig* config, - FgcrodrMode mode = FgcrodrMode::NORMAL) const; + FgcrodrMode mode = FgcrodrMode::NORMAL, unsigned long custom_m = 0) const; /*! * \brief Biconjugate Gradient Stabilized Method (BCGSTAB) @@ -379,7 +380,7 @@ class CSysSolve { * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum size of the search subspace * \param[out] residual - final normalized residual - * \param[in] monitoring - turn on priting residuals from solver to screen. + * \param[in] monitoring - turn on priting residuals from solver to screen * \param[in] config - Definition of the particular problem. */ unsigned long BCGSTAB_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec, @@ -395,7 +396,7 @@ class CSysSolve { * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum number of iterations * \param[out] residual - final normalized residual - * \param[in] monitoring - turn on priting residuals from solver to screen. + * \param[in] monitoring - turn on priting residuals from solver to screen * \param[in] config - Definition of the particular problem. */ unsigned long Smoother_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec, diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 218479ee0ee..e093452cfb9 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -650,12 +650,12 @@ unsigned long CSysSolve::FGCRODR_LinSolverImpl(const CSysVector& mat_vec, const CPreconditioner& precond, ScalarType tol, unsigned long max_iter, ScalarType& residual, - bool monitoring, const CConfig* config, - FgcrodrMode mode) const { + bool monitoring, const CConfig* config, FgcrodrMode mode, + unsigned long custom_m) const { using EigenMatrix = Eigen::Matrix; using EigenVector = Eigen::Matrix; - auto m = min(config->GetLinear_Solver_Restart_Frequency(), max_iter); + auto m = min(custom_m != 0 ? custom_m : config->GetLinear_Solver_Restart_Frequency(), max_iter); const auto deflation = min(config->GetLinear_Solver_Restart_Deflation(), m - 1); const bool flexible = !precond.IsIdentity(); @@ -1021,9 +1021,10 @@ unsigned long CSysSolve::FGCRODR_LinSolver(const CSysVector& mat_vec, const CPreconditioner& precond, ScalarType tol, unsigned long max_iter, ScalarType& residual, bool monitoring, - const CConfig* config, [[maybe_unused]] FgcrodrMode mode) const { + const CConfig* config, [[maybe_unused]] FgcrodrMode mode, + [[maybe_unused]] unsigned long custom_m) const { if constexpr (std::is_same_v || std::is_same_v) { - return FGCRODR_LinSolverImpl<>(b, x, mat_vec, precond, tol, max_iter, residual, monitoring, config, mode); + return FGCRODR_LinSolverImpl<>(b, x, mat_vec, precond, tol, max_iter, residual, monitoring, config, mode, custom_m); } else { return RFGMRES_LinSolver(b, x, mat_vec, precond, tol, max_iter, residual, monitoring, config); } diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index b31926271cf..c41544a8f3a 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -382,7 +382,7 @@ void CDiscAdjMultizoneDriver::KrylovInnerIters(unsigned short iZone) { auto iter = min(totalIter-2ul, config_container[iZone]->GetnQuasiNewtonSamples()-2ul); iter = LinSolver[iZone].FGCRODR_LinSolver(AdjRHS[iZone], AdjSol[iZone], product, Identity(), tol_l, iter, eps_l, monitor, config_container[iZone], - FgcrodrMode::SAME_MAT); + FgcrodrMode::SAME_MAT, iter); totalIter -= iter+1; eps *= eps_l; } @@ -416,7 +416,7 @@ void CDiscAdjMultizoneDriver::Run() { /*--- Temporary warning because we need to test writing intermediate output to file (requires re-recording). ---*/ for(iZone = 0; iZone < nZone; iZone++) { for (auto iVolumeFreq = 0; iVolumeFreq < config_container[iZone]->GetnVolumeOutputFrequencies(); iVolumeFreq++){ - if (config_container[iZone]->GetVolumeOutputFrequency(iVolumeFreq) < nOuterIter) { + if (!time_domain && config_container[iZone]->GetVolumeOutputFrequency(iVolumeFreq) < nOuterIter) { if (rank == MASTER_NODE) { cout << "\nWARNING (iZone = " << iZone << "): " @@ -574,6 +574,7 @@ void CDiscAdjMultizoneDriver::Run() { } if (time_domain) { + for (const auto& ls : LinSolver) ls.ResetDeflation(); EvaluateSensitivities(TimeIter, (TimeIter+1) == driver_config->GetnTime_Iter()); } diff --git a/TestCases/disc_adj_fsi/Airfoil_2d/configFlow.cfg b/TestCases/disc_adj_fsi/Airfoil_2d/configFlow.cfg index 1c62889d0b6..96fe4fb0419 100755 --- a/TestCases/disc_adj_fsi/Airfoil_2d/configFlow.cfg +++ b/TestCases/disc_adj_fsi/Airfoil_2d/configFlow.cfg @@ -49,13 +49,13 @@ JST_SENSOR_COEFF= ( 0.5, 0.02 ) TIME_DISCRE_FLOW= EULER_IMPLICIT % Linear solvers ------------------------------------------------------- % -LINEAR_SOLVER= FGMRES -LINEAR_SOLVER_PREC= ILU -LINEAR_SOLVER_ERROR= 1E-4 -LINEAR_SOLVER_ITER= 50 DISCADJ_LIN_SOLVER= FGMRES DISCADJ_LIN_PREC= ILU +LINEAR_SOLVER_ERROR= 1e-4 +LINEAR_SOLVER_ITER= 50 +% NEWTON_KRYLOV= YES +LINEAR_SOLVER_RESTART_DEFLATION= 8 QUASI_NEWTON_NUM_SAMPLES= 999 % DEFORM_LINEAR_SOLVER= CONJUGATE_GRADIENT @@ -71,7 +71,7 @@ TIME_ITER= 1 BGS_RELAXATION= FIXED_PARAMETER STAT_RELAX_PARAMETER= 1.0 % fluid -INNER_ITER= 51 +INNER_ITER= 41 CONV_STARTITER= 0 CONV_RESIDUAL_MINVAL= -9 diff --git a/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg b/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg index f5c233ec9cc..a59a5b0ce8f 100644 --- a/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg +++ b/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg @@ -54,9 +54,7 @@ ENTROPY_FIX_COEFF= 0.01 TIME_DISCRE_FLOW= EULER_IMPLICIT % Linear solvers ------------------------------------------------------- % -LINEAR_SOLVER= FGMRES -LINEAR_SOLVER_PREC= ILU -LINEAR_SOLVER_ERROR= 1E-6 +LINEAR_SOLVER_ERROR= 1E-30 LINEAR_SOLVER_ITER= 25 DISCADJ_LIN_SOLVER= SMOOTHER DISCADJ_LIN_PREC= ILU @@ -64,6 +62,7 @@ LINEAR_SOLVER_SMOOTHER_RELAXATION= 0.7 MGLEVEL= 0 NEWTON_KRYLOV= YES QUASI_NEWTON_NUM_SAMPLES= 999 +LINEAR_SOLVER_RESTART_DEFLATION= 0 % DEFORM_LINEAR_SOLVER= CONJUGATE_GRADIENT DEFORM_LINEAR_SOLVER_PREC= ILU diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index 2925725d1eb..c619d8b5ea4 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -1323,6 +1323,7 @@ def main(): nonlinear_plane_stress.cfg_file = "nonlinear_plane_stress_2d.cfg" nonlinear_plane_stress.test_iter = 19 nonlinear_plane_stress.test_vals = [-7.433449, -3.355607, -13.983863, 162480, 43, -4.070373] + nonlinear_plane_stress.tol = [2e-4, 2e-4, 2e-4, 1e-5, 1e-5, 4e-4] test_list.append(nonlinear_plane_stress) # Dynamic beam, 2d diff --git a/TestCases/vandv.py b/TestCases/vandv.py index 1eac30f0499..4c54ff4228b 100644 --- a/TestCases/vandv.py +++ b/TestCases/vandv.py @@ -28,7 +28,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with SU2. If not, see . -import sys, os +import sys, shutil from TestCase import TestCase def main(): @@ -48,7 +48,7 @@ def main(): p30n30.test_vals = [-11.267106, -11.168215, -11.182822, -10.949673, -14.233489, 0.052235, 2.830394, 1.318894, -1.210648, 1, 1.2763e+01] test_list.append(p30n30) - os.symlink("vandv/rans/30p30n/solution.dat", "vandv/rans/30p30n/solution_0.dat") + shutil.copy("vandv/rans/30p30n/solution.dat", "vandv/rans/30p30n/solution_0.dat") p30n30_ad = TestCase('30P30N_ad') p30n30_ad.cfg_dir = "vandv/rans/30p30n" p30n30_ad.cfg_file = "config_ad.cfg" diff --git a/TestCases/vandv/rans/30p30n/config_ad.cfg b/TestCases/vandv/rans/30p30n/config_ad.cfg index 263be10bab4..5892f8d5520 100644 --- a/TestCases/vandv/rans/30p30n/config_ad.cfg +++ b/TestCases/vandv/rans/30p30n/config_ad.cfg @@ -77,13 +77,12 @@ MGLEVEL= 0 % Adjoint GMRES settings. MULTIZONE= YES NEWTON_KRYLOV= YES -% These 3 numbers should usually be the same. -INNER_ITER= 60 -QUASI_NEWTON_NUM_SAMPLES= 60 -LINEAR_SOLVER_RESTART_FREQUENCY= 60 +QUASI_NEWTON_NUM_SAMPLES= 999 +% Acts as the restart frequency. +INNER_ITER= 30 % This should be ~1/4 of the restart frequency. After the first outer % iteration, the solver does "frequency - deflation" inner iterations. -LINEAR_SOLVER_RESTART_DEFLATION= 15 +LINEAR_SOLVER_RESTART_DEFLATION= 5 % % ------------------------ CONVERGENCE CRITERIA ------------------------- % % From 2c55e8e01fecb733435005193bcab2c0708e29fa Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Sat, 11 Apr 2026 11:50:50 +0100 Subject: [PATCH 7/7] update --- TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg | 2 +- TestCases/parallel_regression_AD.py | 6 ++++-- TestCases/vandv.py | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg b/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg index a59a5b0ce8f..edcd5c2dc31 100644 --- a/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg +++ b/TestCases/disc_adj_fsi/dyn_fsi/configFlow.cfg @@ -62,7 +62,7 @@ LINEAR_SOLVER_SMOOTHER_RELAXATION= 0.7 MGLEVEL= 0 NEWTON_KRYLOV= YES QUASI_NEWTON_NUM_SAMPLES= 999 -LINEAR_SOLVER_RESTART_DEFLATION= 0 +LINEAR_SOLVER_RESTART_DEFLATION= 5 % DEFORM_LINEAR_SOLVER= CONJUGATE_GRADIENT DEFORM_LINEAR_SOLVER_PREC= ILU diff --git a/TestCases/parallel_regression_AD.py b/TestCases/parallel_regression_AD.py index 9e4863cc42b..2c2369c6e9e 100644 --- a/TestCases/parallel_regression_AD.py +++ b/TestCases/parallel_regression_AD.py @@ -286,8 +286,8 @@ def main(): discadj_fsi2.cfg_dir = "disc_adj_fsi/Airfoil_2d" discadj_fsi2.cfg_file = "config.cfg" discadj_fsi2.test_iter = 8 - discadj_fsi2.test_vals = [-4.773024, 0.915849, -3.863369, 0.295450, 3.839800] - discadj_fsi2.test_vals_aarch64 = [-4.772641, 0.917601, -3.863369, 0.295450, 3.839800] + discadj_fsi2.test_vals = [-3.824870, 1.979160, -3.863368, 0.295450, 3.839800] + discadj_fsi2.test_vals_aarch64 = [-3.824870, 1.979160, -3.863368, 0.295450, 3.839800] discadj_fsi2.tol = 0.00001 test_list.append(discadj_fsi2) @@ -559,6 +559,8 @@ def main(): dyn_discadj_fsi.reference_file = "grad_dv.opt.ref" dyn_discadj_fsi.reference_file_aarch64 = "grad_dv_aarch64.opt.ref" dyn_discadj_fsi.test_file = "grad_young.opt" + dyn_discadj_fsi.comp_threshold = 1e-6 + dyn_discadj_fsi.tol_file_percent = 0.1 dyn_discadj_fsi.unsteady = True pass_list.append(dyn_discadj_fsi.run_filediff()) test_list.append(dyn_discadj_fsi) diff --git a/TestCases/vandv.py b/TestCases/vandv.py index 4c54ff4228b..ea6855b661d 100644 --- a/TestCases/vandv.py +++ b/TestCases/vandv.py @@ -48,12 +48,13 @@ def main(): p30n30.test_vals = [-11.267106, -11.168215, -11.182822, -10.949673, -14.233489, 0.052235, 2.830394, 1.318894, -1.210648, 1, 1.2763e+01] test_list.append(p30n30) + # This is not part of the V&V cases yet, its tested in this script because it is a relatively long test (~1 min). shutil.copy("vandv/rans/30p30n/solution.dat", "vandv/rans/30p30n/solution_0.dat") p30n30_ad = TestCase('30P30N_ad') p30n30_ad.cfg_dir = "vandv/rans/30p30n" p30n30_ad.cfg_file = "config_ad.cfg" - p30n30_ad.test_iter = 5 - p30n30_ad.test_vals = [-8.167332, -8.738471, -8.762033, -8.500107, -7.433292, -0.808628, -2.091805, 1.1791e-01, 3.4123e+01] + p30n30_ad.test_iter = 9 + p30n30_ad.test_vals = [-7.283709, -6.072615, -5.995304, -7.197048, -4.568373, -1.167146, -2.316777, 1.1791e-01, 3.4123e+01] p30n30_ad.command = TestCase.Command("mpirun -n 2", "SU2_CFD_AD") test_list.append(p30n30_ad)