Add nested preconditioner: FGMRES with BiCGSTAB

Ezgi Orbay Akcengiz · Ezgi Orbay Akcengiz · commit d11078ea976e · 2025-09-08T12:08:03.000+03:00
diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp
@@ -348,6 +348,21 @@ class CSysSolve {
   unsigned long BCGSTAB_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec,
                                   const PrecondType& precond, ScalarType tol, unsigned long m, ScalarType& residual,
                                   bool monitoring, const CConfig* config) const;
+  /*!
+   * \brief The nested FGMRES and BCGSTAB solver
+   * \param[in] b - the right hand size vector
+   * \param[in,out] x - on entry the intial guess, on exit the solution
+   * \param[in] mat_vec - object that defines matrix-vector product
+   * \param[in] precond - object that defines preconditioner
+   * \param[in] tol - tolerance with which to solve the system
+   * \param[in] m - maximum size of the search subspace
+   * \param[out] residual - final normalized residual
+   * \param[in] monitoring - turn on priting residuals from solver to screen.
+   * \param[in] config - Definition of the particular problem.
+   */
+  unsigned long FGMRESandBCGSTAB2_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec,
+                                  const PrecondType& precond, ScalarType tol, unsigned long m, ScalarType& residual,
+                                  bool monitoring, const CConfig* config) const;
 
   /*!
    * \brief Generic smoother (modified Richardson iteration with preconditioner)
diff --git a/Common/include/option_structure.hpp b/Common/include/option_structure.hpp
@@ -2354,6 +2354,7 @@ enum ENUM_LINEAR_SOLVER {
   CONJUGATE_GRADIENT,   /*!< \brief Preconditionated conjugate gradient method for grid deformation. */
   FGMRES,               /*!< \brief Flexible Generalized Minimal Residual method. */
   BCGSTAB,              /*!< \brief BCGSTAB - Biconjugate Gradient Stabilized Method (main solver). */
+  FGMRESandBCGSTAB2,    /*!< \brief FGMRESandBCGSTAB (main solver). */
   RESTARTED_FGMRES,     /*!< \brief Flexible Generalized Minimal Residual method with restart. */
   SMOOTHER,             /*!< \brief Iterative smoother. */
   PASTIX_LDLT,          /*!< \brief PaStiX LDLT (complete) factorization. */
@@ -2362,6 +2363,7 @@ enum ENUM_LINEAR_SOLVER {
 static const MapType<std::string, ENUM_LINEAR_SOLVER> Linear_Solver_Map = {
   MakePair("CONJUGATE_GRADIENT", CONJUGATE_GRADIENT)
   MakePair("BCGSTAB", BCGSTAB)
+  MakePair("FGMRESandBCGSTAB2", FGMRESandBCGSTAB2)
   MakePair("FGMRES", FGMRES)
   MakePair("RESTARTED_FGMRES", RESTARTED_FGMRES)
   MakePair("SMOOTHER", SMOOTHER)
diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
@@ -7182,10 +7182,13 @@ void CConfig::SetOutput(SU2_COMPONENT val_software, unsigned short val_izone) {
             SU2_MPI::Error("Implicit time scheme is not yet implemented with Mutation++. Use EULER_EXPLICIT.", CURRENT_FUNCTION);
           switch (Kind_Linear_Solver) {
             case BCGSTAB:
+            case FGMRESandBCGSTAB2:
             case FGMRES:
             case RESTARTED_FGMRES:
               if (Kind_Linear_Solver == BCGSTAB)
                 cout << "BCGSTAB is used for solving the linear system." << endl;
+              else if (Kind_Linear_Solver == FGMRESandBCGSTAB2)
+		 cout << "FGMRES and BCGSTAB is used for solving the linear system." << endl
               else
                 cout << "FGMRES is used for solving the linear system." << endl;
               switch (Kind_Linear_Solver_Prec) {
@@ -7240,6 +7243,12 @@ void CConfig::SetOutput(SU2_COMPONENT val_software, unsigned short val_izone) {
               cout << "Convergence criteria of the linear solver: "<< Linear_Solver_Error <<"."<< endl;
               cout << "Max number of iterations: "<< Linear_Solver_Iter <<"."<< endl;
               break;
+            case FGMRESandBCGSTAB2:
+              cout << "FGMRES and BCGSTAB is used for solving the linear system." << endl;
+              cout << "Convergence criteria of the linear solver: "<< Linear_Solver_Error <<"."<< endl;
+              cout << "Max number of iterations: "<< Linear_Solver_Iter <<"."<< endl;
+              break;
+
           }
           break;
       }
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
@@ -737,6 +737,273 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
   return i;
 }
 
+template <class ScalarType>
+void BCGSTABpre_parallel(const CSysVector<ScalarType>& a, CSysVector<ScalarType>& b_in,
+             const CMatrixVectorProduct<ScalarType>& mat_vec, const CPreconditioner<ScalarType>& precond, const CConfig* config) {
+
+    ScalarType norm_r_in = 0.0, norm0_in = 0.0;
+    unsigned long ii = 0;
+
+    CSysVector<ScalarType> A_z_i;
+    CSysVector<ScalarType> r_0_in;
+    CSysVector<ScalarType> r_in;
+    CSysVector<ScalarType> p;
+    CSysVector<ScalarType> v;
+    CSysVector<ScalarType> z_i;
+
+    /*--- Allocate if not allocated yet ---*/
+    BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
+        auto nVar = a.GetNVar();
+        auto nBlk = a.GetNBlk();
+        auto nBlkDomain = a.GetNBlkDomain();
+
+        A_z_i.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+        r_0_in.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+        r_in.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+        p.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+        v.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+        z_i.Initialize(nBlk, nBlkDomain, nVar, nullptr);
+    }
+    END_SU2_OMP_SAFE_GLOBAL_ACCESS
+
+    /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/
+    mat_vec(b_in, A_z_i);
+    r_in = a - A_z_i;
+
+    /*--- Only compute the residuals in full communication mode. ---*/
+    if (config->GetComm_Level() == COMM_FULL) {
+        norm_r_in = r_in.norm();
+        norm0_in = a.norm();
+        /*--- Set the norm to the initial residual value ---*/
+        /*--- if (tol_type == LinearToleranceType::RELATIVE) norm0_in = norm_r_in; ---*/
+    }
+
+    /*--- Initialization ---*/
+    ScalarType alpha = 1.0, omega = 1.0, rho = 1.0, rho_prime = 1.0;
+    p = ScalarType(0.0);
+    v = ScalarType(0.0);
+    r_0_in = r_in;
+
+    ScalarType tolerance = 1e-5;  // Tolerance for the residual norm
+
+    /*--- Loop over all search directions ---*/
+    while (ii < 1000) {  // Arbitrary high iteration limit for safety
+        /*--- Compute rho_prime ---*/
+        rho_prime = rho;
+
+        /*--- Compute rho_i ---*/
+        rho = r_in.dot(r_0_in);
+
+        /*--- Compute beta ---*/
+        ScalarType beta_in = (rho / rho_prime) * (alpha / omega);
+
+        /*--- Update p ---*/
+        p = beta_in * (p - omega * v) + r_in;
+
+        /*--- Preconditioning step ---*/
+        precond(p, z_i);
+        mat_vec(z_i, v);
+
+        /*--- Calculate step-length alpha ---*/
+        ScalarType r_0_v = r_0_in.dot(v);
+        alpha = rho / r_0_v;
+
+        /*--- Update solution and residual ---*/
+        b_in += alpha * z_i;
+        r_in -= alpha * v;
+
+        /*--- Preconditioning step ---*/
+        precond(r_in, z_i);
+        mat_vec(z_i, A_z_i);
+
+        /*--- Calculate step-length omega, avoid division by 0. ---*/
+        omega = A_z_i.squaredNorm();
+        if (omega == ScalarType(0)) break;
+        omega = A_z_i.dot(r_in) / omega;
+
+        /*--- Update solution and residual ---*/
+        b_in += omega * z_i;
+        r_in -= omega * A_z_i;
+
+        /*--- Update the residual norm ---*/
+        norm_r_in = r_in.norm();
+
+        /*--- Check if residual norm is below tolerance ---*/
+        if (norm_r_in < tolerance) {
+            break;  // Stop if the residual norm is below the desired tolerance
+        }
+
+        ii++;  // Increment iteration counter
+    }
+}
+
+
+
+
+template <class ScalarType>
+unsigned long CSysSolve<ScalarType>::FGMRESandBCGSTAB2_LinSolver(const CSysVector<ScalarType>& b, CSysVector<ScalarType>& x,
+                                                      const CMatrixVectorProduct<ScalarType>& mat_vec,
+                                                      const CPreconditioner<ScalarType>& precond, ScalarType tol,
+                                                      unsigned long m, ScalarType& residual, bool monitoring,
+                                                      const CConfig* config) const {
+
+
+  const bool masterRank = (SU2_MPI::GetRank() == MASTER_NODE);
+  const bool flexible = !precond.IsIdentity();
+  /*--- If we call the solver outside of a parallel region, but the number of threads allows,
+   * we still want to parallelize some of the expensive operations. ---*/
+  const bool nestedParallel = !omp_in_parallel() && omp_get_max_threads() > 1;
+
+  /*---  Check the subspace size ---*/
+
+  if (m < 1) {
+    SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
+  }
+
+  if (m > 5000) {
+    SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION);
+  }
+
+  /*--- Allocate if not allocated yet ---*/
+
+  if (W.size() <= m || (flexible && Z.size() <= m)) {
+    BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
+      W.resize(m + 1);
+      for (auto& w : W) w.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
+      if (flexible) {
+        Z.resize(m + 1);
+        for (auto& z : Z) z.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
+      }
+    }
+    END_SU2_OMP_SAFE_GLOBAL_ACCESS
+  }
+
+  /*--- Define various arrays. In parallel, each thread of each rank has and works
+   on its own thread, since calculations on these arrays are based on dot products
+   (reduced across all threads and ranks) all threads do the same computations. ---*/
+
+  su2vector<ScalarType> g(m + 1), sn(m + 1), cs(m + 1), y(m);
+  g = ScalarType(0);
+  sn = ScalarType(0);
+  cs = ScalarType(0);
+  y = ScalarType(0);
+  su2matrix<ScalarType> H(m + 1, m);
+  H = ScalarType(0);
+
+  /*--- Calculate the norm of the rhs vector. ---*/
+
+  ScalarType norm0 = b.norm();
+
+  /*--- Calculate the initial residual (actually the negative residual) and compute its norm. ---*/
+
+  if (!xIsZero) {
+    mat_vec(x, W[0]);
+    W[0] -= b;
+  } else {
+    W[0] = -b;
+  }
+
+  ScalarType beta = W[0].norm();
+
+  if (tol_type == LinearToleranceType::RELATIVE) norm0 = beta;
+
+  if ((beta < tol * norm0) || (beta < eps)) {
+    if (masterRank) {
+      SU2_OMP_MASTER
+      cout << "CSysSolve::FGMRES(): system solved by initial guess." << endl;
+      END_SU2_OMP_MASTER
+    }
+    residual = beta;
+    return 0;
+  }
+
+  W[0] /= -beta;
+
+  g[0] = beta;
+
+  unsigned long i = 0;
+  if ((monitoring) && (masterRank)) {
+    SU2_OMP_MASTER {
+      WriteHeader("FGMRES", tol, beta);
+      WriteHistory(i, beta / norm0);
+    }
+    END_SU2_OMP_MASTER
+  }
+
+  for (i = 0; i < m; i++) {
+    if (beta < tol * norm0) break;
+
+    if (flexible) {
+      /*--- Use BCGSTAB as inner iteration ---*/
+     BCGSTABpre_parallel(W[i], Z[i], mat_vec, precond, config); 
+
+      mat_vec(Z[i], W[i + 1]);
+    } else {
+      mat_vec(W[i], W[i + 1]);
+    }
+
+    if (nestedParallel) {
+      SU2_OMP_PARALLEL
+      ModGramSchmidt(true, i, H, W);
+      END_SU2_OMP_PARALLEL
+    } else {
+      ModGramSchmidt(false, i, H, W);
+    }
+
+    for (unsigned long k = 0; k < i; k++) ApplyGivens(sn[k], cs[k], H[k][i], H[k + 1][i]);
+    GenerateGivens(H[i][i], H[i + 1][i], sn[i], cs[i]);
+    ApplyGivens(sn[i], cs[i], g[i], g[i + 1]);
+
+    beta = fabs(g[i + 1]);
+
+    if ((((monitoring) && (masterRank)) && ((i + 1) % monitorFreq == 0))) {
+      SU2_OMP_MASTER
+      WriteHistory(i + 1, beta / norm0);
+      END_SU2_OMP_MASTER
+    }
+  }
+
+  SolveReduced(i, H, g, y);
+
+  const auto& basis = flexible ? Z : W;
+
+  if (nestedParallel) {
+    SU2_OMP_PARALLEL
+    for (unsigned long k = 0; k < i; k++) x += y[k] * basis[k];
+    END_SU2_OMP_PARALLEL
+  } else {
+    for (unsigned long k = 0; k < i; k++) x += y[k] * basis[k];
+  }
+
+  if ((monitoring) && (config->GetComm_Level() == COMM_FULL)) {
+    if (masterRank) {
+      SU2_OMP_MASTER
+      WriteFinalResidual("FGMRES", i, beta / norm0);
+      END_SU2_OMP_MASTER
+    }
+
+    if (recomputeRes) {
+      mat_vec(x, W[0]);
+      W[0] -= b;
+      ScalarType res = W[0].norm();
+
+      if (fabs(res - beta) > tol * 10) {
+        if (masterRank) {
+          SU2_OMP_MASTER
+          WriteWarning(beta, res, tol);
+          END_SU2_OMP_MASTER
+        }
+      }
+    }
+  }
+
+  residual = beta / norm0;
+  return i;
+}
+
+
+
+
 template <class ScalarType>
 unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarType>& b, CSysVector<ScalarType>& x,
                                                         const CMatrixVectorProduct<ScalarType>& mat_vec,
@@ -973,6 +1240,10 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType>& Jacobian, con
         IterLinSol = RFGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
                                        ScreenOutput, config);
         break;
+      case FGMRESandBCGSTAB2:
+        IterLinSol = FGMRESandBCGSTAB2_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
+                                      ScreenOutput, config);
+        break;
       case CONJUGATE_GRADIENT:
         IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
                                   ScreenOutput, config);
@@ -1132,6 +1403,10 @@ unsigned long CSysSolve<ScalarType>::Solve_b(CSysMatrix<ScalarType>& Jacobian, c
       IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
                                      ScreenOutput, config);
       break;
+    case FGMRESandBCGSTAB2:
+      IterLinSol = FGMRESandBCGSTAB2_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
+                                      ScreenOutput, config);
+      break;
     case CONJUGATE_GRADIENT:
       IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
                                 ScreenOutput, config);