su2code · eorbay-metu · Sep 8, 2025 · Sep 8, 2025 · Oct 6, 2025 · Oct 8, 2025
diff --git a/Common/include/CConfig.hpp b/Common/include/CConfig.hpp
@@ -53,6 +53,12 @@
 
 using namespace std;
 
+/*Inner solver for nested linear solver:*/
+enum ENUM_LINEAR_SOLVER_INNER {
+  INNER_SOLVER_NONE = 0,
+  INNER_SOLVER_BCGSTAB
+};
+
 /*!
  * \class CConfig
  * \brief Main class for defining the problem; basically this class reads the configuration file, and
@@ -430,6 +436,7 @@ class CConfig {
   unsigned short nQuasiNewtonSamples;  /*!< \brief Number of samples used in quasi-Newton solution methods. */
   bool UseVectorization;       /*!< \brief Whether to use vectorized numerics schemes. */
   bool NewtonKrylov;           /*!< \brief Use a coupled Newton method to solve the flow equations. */
+  bool Nested_Linear_Solver;   /*!< \brief Enable nested Krylov linear solver (e.g. FGMRES + inner solver). */
   array<unsigned short,3> NK_IntParam{{20, 3, 2}}; /*!< \brief Integer parameters for NK method. */
   array<su2double,5> NK_DblParam{{-2.0, 0.1, -3.0, 1e-4, 1.0}}; /*!< \brief Floating-point parameters for NK method. */
   su2double NK_Relaxation = 1.0;
@@ -525,6 +532,7 @@ class CConfig {
   Kind_Deform_Linear_Solver,             /*!< Numerical method to deform the grid */
   Kind_Deform_Linear_Solver_Prec,        /*!< \brief Preconditioner of the linear solver. */
   Kind_Linear_Solver,                    /*!< \brief Numerical solver for the implicit scheme. */
+  Kind_Linear_Solver_Inner,              /*!< \brief Inner solver used in nested Krylov schemes. */
   Kind_Linear_Solver_Prec,               /*!< \brief Preconditioner of the linear solver. */
   Kind_DiscAdj_Linear_Solver,            /*!< \brief Linear solver for the discrete adjoint system. */
   Kind_DiscAdj_Linear_Prec,              /*!< \brief Preconditioner of the discrete adjoint linear solver. */
@@ -4281,6 +4289,16 @@ class CConfig {
    */
   unsigned short GetKind_Linear_Solver(void) const { return Kind_Linear_Solver; }
 
+  /*!
+   * \brief Check whether nested Krylov linear solver is enabled.
+   */
+  bool GetNested_Linear_Solver(void) const { return Nested_Linear_Solver; }
+
+  /*!
+   * \brief Get the inner linear solver used in nested Krylov schemes.
+   */
+  unsigned short GetKind_Linear_Solver_Inner(void) const { return Kind_Linear_Solver_Inner; }
+
 
   /*!
    * \brief Get the kind of preconditioner for the implicit solver.

diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
@@ -62,6 +62,12 @@ vector<double> GEMM_Profile_MaxTime;      /*!< \brief Maximum time spent for thi
 //#pragma omp threadprivate(Profile_Function_tp, Profile_Time_tp, Profile_ID_tp, Profile_Map_tp)
 
 
+/* --- Map from config string to inner solver enum --- */
+std::map<std::string, ENUM_LINEAR_SOLVER_INNER> Inner_Linear_Solver_Map = {
+  {"NONE",    INNER_SOLVER_NONE},
+  {"BCGSTAB", INNER_SOLVER_BCGSTAB}
+};
+
 CConfig::CConfig(char case_filename[MAX_STRING_SIZE], SU2_COMPONENT val_software, bool verb_high) {
 
   /*--- Set the case name to the base config file name without extension ---*/
@@ -1906,6 +1912,20 @@ void CConfig::SetConfig_Options() {
   addEnumOption("DISCADJ_LIN_PREC", Kind_DiscAdj_Linear_Prec, Linear_Solver_Prec_Map, ILU);
   /* DESCRIPTION: Linear solver for the discete adjoint systems */
 
+  /*!\brief LINEAR_SOLVER_NESTED
+   *  \n DESCRIPTION: Enable nested Krylov linear solver (e.g. FGMRES with inner solver).
+   *  \n OPTIONS: YES, NO \ingroup Config */
+  addBoolOption("LINEAR_SOLVER_NESTED", Nested_Linear_Solver, false);
+
+  /*!\brief LINEAR_SOLVER_INNER
+   *  \n DESCRIPTION: Inner linear solver used when LINEAR_SOLVER_NESTED = YES.
+   *  \n OPTIONS: see \link Inner_Linear_Solver_Map \endlink
+   *  \n DEFAULT: BCGSTAB \ingroup Config */
+  addEnumOption("LINEAR_SOLVER_INNER",
+                Kind_Linear_Solver_Inner,
+                Inner_Linear_Solver_Map,
+                INNER_SOLVER_BCGSTAB);
+
   /* DESCRIPTION: Maximum update ratio value for flow density and energy variables */
   addDoubleOption("MAX_UPDATE_FLOW", MaxUpdateFlow, 0.2);
   /* DESCRIPTION: Maximum update ratio value for SA turbulence variable nu_tilde */
@@ -7257,10 +7277,17 @@ void CConfig::SetOutput(SU2_COMPONENT val_software, unsigned short val_izone) {
             case BCGSTAB:
             case FGMRES:
             case RESTARTED_FGMRES:
-              if (Kind_Linear_Solver == BCGSTAB)
+              if (Kind_Linear_Solver == BCGSTAB){
                 cout << "BCGSTAB is used for solving the linear system." << endl;
-              else
-                cout << "FGMRES is used for solving the linear system." << endl;
+              }
+              else {
+                    if (GetNested_Linear_Solver()){
+                        cout << "Nested FGMRES (FGMRES with inner BiCGSTAB) is used for solving the linear system." << endl;
+                    }
+                    else {
+                       cout << "FGMRES is used for solving the linear system." << endl;
+                    }
+               }
               switch (Kind_Linear_Solver_Prec) {
                 case ILU: cout << "Using a ILU("<< Linear_Solver_ILU_n <<") preconditioning."<< endl; break;
                 case LINELET: cout << "Using a linelet preconditioning."<< endl; break;

diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
@@ -172,6 +172,112 @@ void CSysSolve<ScalarType>::ModGramSchmidt(bool shared_hsbg, int i, su2matrix<Sc
   w[i + 1] /= nrm;
 }
 
+/*--- Inner Krylov solver for nested FGMRES ---*/
+
+template <class ScalarType>
+void BCGSTABpre_parallel(const CSysVector<ScalarType>& a, CSysVector<ScalarType>& b_in,
+                         const CMatrixVectorProduct<ScalarType>& mat_vec, const CPreconditioner<ScalarType>& precond,
+                         const CConfig* config) {
+  // NOTE: norm0_in is currently unused. To avoid -Werror unused-variable warnings,
+  // we mark it [[maybe_unused]] so that future changes can reuse it without
+  // triggering a build failure.
+  ScalarType norm_r_in = 0.0;
+  [[maybe_unused]] ScalarType norm0_in = 0.0;
+  unsigned long ii = 0;
+
+  CSysVector<ScalarType> A_z_i;
+  CSysVector<ScalarType> r_0_in;
+  CSysVector<ScalarType> r_in;
+  CSysVector<ScalarType> p;
+  CSysVector<ScalarType> v;
+  CSysVector<ScalarType> z_i;
+
+  /*--- Allocate if not allocated yet ---*/
+  BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
+    auto nVar = a.GetNVar();
+    auto nBlk = a.GetNBlk();
+    auto nBlkDomain = a.GetNBlkDomain();
+
+    A_z_i.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+    r_0_in.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+    r_in.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+    p.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+    v.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+    z_i.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+  }
+  END_SU2_OMP_SAFE_GLOBAL_ACCESS
+
+  /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/
+  mat_vec(b_in, A_z_i);
+  r_in = a - A_z_i;
+
+  /*--- Only compute the residuals in full communication mode. ---*/
+  if (config->GetComm_Level() == COMM_FULL) {
+    norm_r_in = r_in.norm();
+    norm0_in = a.norm();
+    /*--- Set the norm to the initial residual value ---*/
+    /*--- if (tol_type == LinearToleranceType::RELATIVE) norm0_in = norm_r_in; ---*/
+  }
+
+  /*--- Initialization ---*/
+  ScalarType alpha = 1.0, omega = 1.0, rho = 1.0, rho_prime = 1.0;
+  p = ScalarType(0.0);
+  v = ScalarType(0.0);
+  r_0_in = r_in;
+
+  ScalarType tolerance = 1e-5;  // Tolerance for the residual norm
+
+  /*--- Loop over all search directions ---*/
+  while (ii < 1000) {  // Arbitrary high iteration limit for safety
+    /*--- Compute rho_prime ---*/
+    rho_prime = rho;
+
+    /*--- Compute rho_i ---*/
+    rho = r_in.dot(r_0_in);
+
+    /*--- Compute beta ---*/
+    ScalarType beta_in = (rho / rho_prime) * (alpha / omega);
+
+    /*--- Update p ---*/
+    p = beta_in * (p - omega * v) + r_in;
+
+    /*--- Preconditioning step ---*/
+    precond(p, z_i);
+    mat_vec(z_i, v);
+
+    /*--- Calculate step-length alpha ---*/
+    ScalarType r_0_v = r_0_in.dot(v);
+    alpha = rho / r_0_v;
+
+    /*--- Update solution and residual ---*/
+    b_in += alpha * z_i;
+    r_in -= alpha * v;
+
+    /*--- Preconditioning step ---*/
+    precond(r_in, z_i);
+    mat_vec(z_i, A_z_i);
+
+    /*--- Calculate step-length omega, avoid division by 0. ---*/
+    omega = A_z_i.squaredNorm();
+    if (omega == ScalarType(0)) break;
+    omega = A_z_i.dot(r_in) / omega;
+
+    /*--- Update solution and residual ---*/
+    b_in += omega * z_i;
+    r_in -= omega * A_z_i;
+
+    /*--- Update the residual norm ---*/
+    norm_r_in = r_in.norm();
+
+    /*--- Check if residual norm is below tolerance ---*/
+    if (norm_r_in < tolerance) {
+      break;  // Stop if the residual norm is below the desired tolerance
+    }
+
+    ii++;  // Increment iteration counter
+  }
+}
+
 template <class ScalarType>
 void CSysSolve<ScalarType>::WriteHeader(const string& solver, ScalarType restol, ScalarType resinit) const {
   cout << "\n# " << solver << " residual history\n";
@@ -359,6 +465,11 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
    * we still want to parallelize some of the expensive operations. ---*/
   const bool nestedParallel = !omp_in_parallel() && omp_get_max_threads() > 1;
 
+  /*--- Nested inner solver (BCGSTAB) option ---*/
+  const bool useNestedBiCGSTAB =
+      config->GetNested_Linear_Solver() &&
+      (config->GetKind_Linear_Solver_Inner() == static_cast<unsigned short>(INNER_SOLVER_BCGSTAB));
+
   /*---  Check the subspace size ---*/
 
   if (m < 1) {
@@ -454,13 +565,19 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
     if (beta < tol * norm0) break;
 
     if (flexible) {
-      /*---  Precondition the CSysVector w[i] and store result in z[i] ---*/
+      if (useNestedBiCGSTAB) {
+        /*--- Nested mod: W[i] by inner BCGSTAB ---*/
+        Z[i] = ScalarType(0.0);
+        BCGSTABpre_parallel(W[i], Z[i], mat_vec, precond, config);
 
-      precond(W[i], Z[i]);
+        /*---  Add to Krylov subspace ---*/
+        mat_vec(Z[i], W[i + 1]);
 
-      /*---  Add to Krylov subspace ---*/
+      } else {
+        precond(W[i], Z[i]);
+        mat_vec(Z[i], W[i + 1]);
+      }
 
-      mat_vec(Z[i], W[i + 1]);
     } else {
       mat_vec(W[i], W[i + 1]);
     }

diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
diff --git a/TestCases/rans/naca0012/turb_NACA0012_sa_nested.cfg b/TestCases/rans/naca0012/turb_NACA0012_sa_nested.cfg
@@ -0,0 +1,117 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                              %
+% SU2 configuration file                                                       %
+% Case description: 2D NACA 0012 Airfoil Validation Case (compressible)        %
+%                   http://turbmodels.larc.nasa.gov/naca0012_val_sa.html       %
+% Author: Francisco Palacios                                                   %
+% Institution: Stanford University                                             %
+% Date: Feb 18th, 2013                                                         %
+% File Version 8.3.0 "Harrier"                                                 %
+%                                                                              %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% ------------- DIRECT, ADJOINT, AND LINEARIZED PROBLEM DEFINITION ------------%
+%
+SOLVER= RANS
+KIND_TURB_MODEL= SA
+SA_OPTIONS= NEGATIVE, EXPERIMENTAL
+MATH_PROBLEM= DIRECT
+RESTART_SOL= NO
+
+% -------------------- COMPRESSIBLE FREE-STREAM DEFINITION --------------------%
+%
+MACH_NUMBER= 0.15
+AOA= 0.0
+FREESTREAM_TEMPERATURE= 300.0
+REYNOLDS_NUMBER= 6.0E6
+REYNOLDS_LENGTH= 1.0
+
+% ---------------------- REFERENCE VALUE DEFINITION ---------------------------%
+%
+REF_ORIGIN_MOMENT_X = 0.25
+REF_ORIGIN_MOMENT_Y = 0.00
+REF_ORIGIN_MOMENT_Z = 0.00
+REF_LENGTH= 1.0
+REF_AREA= 1.0
+REF_DIMENSIONALIZATION= FREESTREAM_PRESS_EQ_ONE
+
+% -------------------- BOUNDARY CONDITION DEFINITION --------------------------%
+%
+MARKER_HEATFLUX= ( airfoil, 0.0 )
+MARKER_FAR= ( farfield )
+MARKER_PLOTTING= ( airfoil )
+MARKER_MONITORING= ( airfoil )
+
+% ------------- COMMON PARAMETERS DEFINING THE NUMERICAL METHOD ---------------%
+%
+NUM_METHOD_GRAD= WEIGHTED_LEAST_SQUARES
+NUM_METHOD_GRAD_RECON= LEAST_SQUARES
+CFL_NUMBER= 1000.0
+MAX_DELTA_TIME= 1E10
+CFL_ADAPT= NO
+CFL_ADAPT_PARAM= ( 1.5, 0.5, 1.0, 100.0 )
+ITER= 99999
+
+% ----------------------- SLOPE LIMITER DEFINITION ----------------------------%
+VENKAT_LIMITER_COEFF= 0.03
+LIMITER_ITER= 99999
+
+% ------------------------ LINEAR SOLVER DEFINITION ---------------------------%
+%
+LINEAR_SOLVER= FGMRES
+%
+% Use nested Krylov solver (YES, NO).
+% When YES: FGMRES uses an inner Krylov solver (e.g. BCGSTAB).
+LINEAR_SOLVER_NESTED= YES
+
+% Inner solver used only when LINEAR_SOLVER_NESTED = YES.
+% Options: BCGSTAB
+LINEAR_SOLVER_INNER= BCGSTAB
+
+LINEAR_SOLVER_PREC= LU_SGS
+LINEAR_SOLVER_ERROR= 1E-5
+LINEAR_SOLVER_ITER= 5
+
+% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
+%
+CONV_NUM_METHOD_FLOW= ROE
+MUSCL_FLOW= YES
+JST_SENSOR_COEFF= ( 0.5, 0.02 )
+TIME_DISCRE_FLOW= EULER_IMPLICIT
+
+% -------------------- TURBULENT NUMERICAL METHOD DEFINITION ------------------%
+%
+CONV_NUM_METHOD_TURB= SCALAR_UPWIND
+MUSCL_TURB= YES
+SLOPE_LIMITER_TURB= NONE
+TIME_DISCRE_TURB= EULER_IMPLICIT
+CFL_REDUCTION_TURB= 1.0
+
+% --------------------------- CONVERGENCE PARAMETERS --------------------------%
+CONV_RESIDUAL_MINVAL= -12
+CONV_STARTITER= 10
+CONV_CAUCHY_ELEMS= 100
+CONV_CAUCHY_EPS= 1E-6
+
+% Output the performance summary to the console at the end of SU2_CFD
+WRT_PERFORMANCE= YES
+
+
+% ------------------------- INPUT/OUTPUT INFORMATION --------------------------%
+%
+MESH_FILENAME= n0012_897-257.su2
+MESH_FORMAT= SU2
+MESH_OUT_FILENAME= mesh_out
+SOLUTION_FILENAME= solution_flow_sa
+SOLUTION_ADJ_FILENAME= solution_adj
+TABULAR_FORMAT= CSV
+CONV_FILENAME= history
+RESTART_FILENAME= restart_flow
+RESTART_ADJ_FILENAME= restart_adj
+VOLUME_FILENAME= flow
+VOLUME_ADJ_FILENAME= adjoint
+GRAD_OBJFUNC_FILENAME= of_grad
+SURFACE_FILENAME= surface_flow
+SURFACE_ADJ_FILENAME= surface_adjoint
+OUTPUT_WRT_FREQ= 10000
+SCREEN_OUTPUT=(WALL_TIME, INNER_ITER, RMS_DENSITY, RMS_NU_TILDE, LIFT, DRAG, LINSOL_ITER, LINSOL_RESIDUAL, LINSOL_ITER_TURB, LINSOL_RESIDUAL_TURB, TOTAL_HEATFLUX)