Skip to content

Commit d11078e

Browse files
author
Ezgi Orbay Akcengiz
committed
Add nested preconditioner: FGMRES with BiCGSTAB
1 parent 8f0e9b9 commit d11078e

File tree

4 files changed

+301
-0
lines changed

4 files changed

+301
-0
lines changed

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,21 @@ class CSysSolve {
348348
unsigned long BCGSTAB_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec,
349349
const PrecondType& precond, ScalarType tol, unsigned long m, ScalarType& residual,
350350
bool monitoring, const CConfig* config) const;
351+
/*!
352+
* \brief The nested FGMRES and BCGSTAB solver
353+
* \param[in] b - the right hand size vector
354+
* \param[in,out] x - on entry the intial guess, on exit the solution
355+
* \param[in] mat_vec - object that defines matrix-vector product
356+
* \param[in] precond - object that defines preconditioner
357+
* \param[in] tol - tolerance with which to solve the system
358+
* \param[in] m - maximum size of the search subspace
359+
* \param[out] residual - final normalized residual
360+
* \param[in] monitoring - turn on priting residuals from solver to screen.
361+
* \param[in] config - Definition of the particular problem.
362+
*/
363+
unsigned long FGMRESandBCGSTAB2_LinSolver(const VectorType& b, VectorType& x, const ProductType& mat_vec,
364+
const PrecondType& precond, ScalarType tol, unsigned long m, ScalarType& residual,
365+
bool monitoring, const CConfig* config) const;
351366

352367
/*!
353368
* \brief Generic smoother (modified Richardson iteration with preconditioner)

Common/include/option_structure.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,6 +2354,7 @@ enum ENUM_LINEAR_SOLVER {
23542354
CONJUGATE_GRADIENT, /*!< \brief Preconditionated conjugate gradient method for grid deformation. */
23552355
FGMRES, /*!< \brief Flexible Generalized Minimal Residual method. */
23562356
BCGSTAB, /*!< \brief BCGSTAB - Biconjugate Gradient Stabilized Method (main solver). */
2357+
FGMRESandBCGSTAB2, /*!< \brief FGMRESandBCGSTAB (main solver). */
23572358
RESTARTED_FGMRES, /*!< \brief Flexible Generalized Minimal Residual method with restart. */
23582359
SMOOTHER, /*!< \brief Iterative smoother. */
23592360
PASTIX_LDLT, /*!< \brief PaStiX LDLT (complete) factorization. */
@@ -2362,6 +2363,7 @@ enum ENUM_LINEAR_SOLVER {
23622363
static const MapType<std::string, ENUM_LINEAR_SOLVER> Linear_Solver_Map = {
23632364
MakePair("CONJUGATE_GRADIENT", CONJUGATE_GRADIENT)
23642365
MakePair("BCGSTAB", BCGSTAB)
2366+
MakePair("FGMRESandBCGSTAB2", FGMRESandBCGSTAB2)
23652367
MakePair("FGMRES", FGMRES)
23662368
MakePair("RESTARTED_FGMRES", RESTARTED_FGMRES)
23672369
MakePair("SMOOTHER", SMOOTHER)

Common/src/CConfig.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7182,10 +7182,13 @@ void CConfig::SetOutput(SU2_COMPONENT val_software, unsigned short val_izone) {
71827182
SU2_MPI::Error("Implicit time scheme is not yet implemented with Mutation++. Use EULER_EXPLICIT.", CURRENT_FUNCTION);
71837183
switch (Kind_Linear_Solver) {
71847184
case BCGSTAB:
7185+
case FGMRESandBCGSTAB2:
71857186
case FGMRES:
71867187
case RESTARTED_FGMRES:
71877188
if (Kind_Linear_Solver == BCGSTAB)
71887189
cout << "BCGSTAB is used for solving the linear system." << endl;
7190+
else if (Kind_Linear_Solver == FGMRESandBCGSTAB2)
7191+
cout << "FGMRES and BCGSTAB is used for solving the linear system." << endl
71897192
else
71907193
cout << "FGMRES is used for solving the linear system." << endl;
71917194
switch (Kind_Linear_Solver_Prec) {
@@ -7240,6 +7243,12 @@ void CConfig::SetOutput(SU2_COMPONENT val_software, unsigned short val_izone) {
72407243
cout << "Convergence criteria of the linear solver: "<< Linear_Solver_Error <<"."<< endl;
72417244
cout << "Max number of iterations: "<< Linear_Solver_Iter <<"."<< endl;
72427245
break;
7246+
case FGMRESandBCGSTAB2:
7247+
cout << "FGMRES and BCGSTAB is used for solving the linear system." << endl;
7248+
cout << "Convergence criteria of the linear solver: "<< Linear_Solver_Error <<"."<< endl;
7249+
cout << "Max number of iterations: "<< Linear_Solver_Iter <<"."<< endl;
7250+
break;
7251+
72437252
}
72447253
break;
72457254
}

Common/src/linear_algebra/CSysSolve.cpp

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,273 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
737737
return i;
738738
}
739739

740+
template <class ScalarType>
741+
void BCGSTABpre_parallel(const CSysVector<ScalarType>& a, CSysVector<ScalarType>& b_in,
742+
const CMatrixVectorProduct<ScalarType>& mat_vec, const CPreconditioner<ScalarType>& precond, const CConfig* config) {
743+
744+
ScalarType norm_r_in = 0.0, norm0_in = 0.0;
745+
unsigned long ii = 0;
746+
747+
CSysVector<ScalarType> A_z_i;
748+
CSysVector<ScalarType> r_0_in;
749+
CSysVector<ScalarType> r_in;
750+
CSysVector<ScalarType> p;
751+
CSysVector<ScalarType> v;
752+
CSysVector<ScalarType> z_i;
753+
754+
/*--- Allocate if not allocated yet ---*/
755+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
756+
auto nVar = a.GetNVar();
757+
auto nBlk = a.GetNBlk();
758+
auto nBlkDomain = a.GetNBlkDomain();
759+
760+
A_z_i.Initialize(nBlk, nBlkDomain, nVar, nullptr);
761+
r_0_in.Initialize(nBlk, nBlkDomain, nVar, nullptr);
762+
r_in.Initialize(nBlk, nBlkDomain, nVar, nullptr);
763+
p.Initialize(nBlk, nBlkDomain, nVar, nullptr);
764+
v.Initialize(nBlk, nBlkDomain, nVar, nullptr);
765+
z_i.Initialize(nBlk, nBlkDomain, nVar, nullptr);
766+
}
767+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
768+
769+
/*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/
770+
mat_vec(b_in, A_z_i);
771+
r_in = a - A_z_i;
772+
773+
/*--- Only compute the residuals in full communication mode. ---*/
774+
if (config->GetComm_Level() == COMM_FULL) {
775+
norm_r_in = r_in.norm();
776+
norm0_in = a.norm();
777+
/*--- Set the norm to the initial residual value ---*/
778+
/*--- if (tol_type == LinearToleranceType::RELATIVE) norm0_in = norm_r_in; ---*/
779+
}
780+
781+
/*--- Initialization ---*/
782+
ScalarType alpha = 1.0, omega = 1.0, rho = 1.0, rho_prime = 1.0;
783+
p = ScalarType(0.0);
784+
v = ScalarType(0.0);
785+
r_0_in = r_in;
786+
787+
ScalarType tolerance = 1e-5; // Tolerance for the residual norm
788+
789+
/*--- Loop over all search directions ---*/
790+
while (ii < 1000) { // Arbitrary high iteration limit for safety
791+
/*--- Compute rho_prime ---*/
792+
rho_prime = rho;
793+
794+
/*--- Compute rho_i ---*/
795+
rho = r_in.dot(r_0_in);
796+
797+
/*--- Compute beta ---*/
798+
ScalarType beta_in = (rho / rho_prime) * (alpha / omega);
799+
800+
/*--- Update p ---*/
801+
p = beta_in * (p - omega * v) + r_in;
802+
803+
/*--- Preconditioning step ---*/
804+
precond(p, z_i);
805+
mat_vec(z_i, v);
806+
807+
/*--- Calculate step-length alpha ---*/
808+
ScalarType r_0_v = r_0_in.dot(v);
809+
alpha = rho / r_0_v;
810+
811+
/*--- Update solution and residual ---*/
812+
b_in += alpha * z_i;
813+
r_in -= alpha * v;
814+
815+
/*--- Preconditioning step ---*/
816+
precond(r_in, z_i);
817+
mat_vec(z_i, A_z_i);
818+
819+
/*--- Calculate step-length omega, avoid division by 0. ---*/
820+
omega = A_z_i.squaredNorm();
821+
if (omega == ScalarType(0)) break;
822+
omega = A_z_i.dot(r_in) / omega;
823+
824+
/*--- Update solution and residual ---*/
825+
b_in += omega * z_i;
826+
r_in -= omega * A_z_i;
827+
828+
/*--- Update the residual norm ---*/
829+
norm_r_in = r_in.norm();
830+
831+
/*--- Check if residual norm is below tolerance ---*/
832+
if (norm_r_in < tolerance) {
833+
break; // Stop if the residual norm is below the desired tolerance
834+
}
835+
836+
ii++; // Increment iteration counter
837+
}
838+
}
839+
840+
841+
842+
843+
template <class ScalarType>
844+
unsigned long CSysSolve<ScalarType>::FGMRESandBCGSTAB2_LinSolver(const CSysVector<ScalarType>& b, CSysVector<ScalarType>& x,
845+
const CMatrixVectorProduct<ScalarType>& mat_vec,
846+
const CPreconditioner<ScalarType>& precond, ScalarType tol,
847+
unsigned long m, ScalarType& residual, bool monitoring,
848+
const CConfig* config) const {
849+
850+
851+
const bool masterRank = (SU2_MPI::GetRank() == MASTER_NODE);
852+
const bool flexible = !precond.IsIdentity();
853+
/*--- If we call the solver outside of a parallel region, but the number of threads allows,
854+
* we still want to parallelize some of the expensive operations. ---*/
855+
const bool nestedParallel = !omp_in_parallel() && omp_get_max_threads() > 1;
856+
857+
/*--- Check the subspace size ---*/
858+
859+
if (m < 1) {
860+
SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
861+
}
862+
863+
if (m > 5000) {
864+
SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION);
865+
}
866+
867+
/*--- Allocate if not allocated yet ---*/
868+
869+
if (W.size() <= m || (flexible && Z.size() <= m)) {
870+
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
871+
W.resize(m + 1);
872+
for (auto& w : W) w.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
873+
if (flexible) {
874+
Z.resize(m + 1);
875+
for (auto& z : Z) z.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
876+
}
877+
}
878+
END_SU2_OMP_SAFE_GLOBAL_ACCESS
879+
}
880+
881+
/*--- Define various arrays. In parallel, each thread of each rank has and works
882+
on its own thread, since calculations on these arrays are based on dot products
883+
(reduced across all threads and ranks) all threads do the same computations. ---*/
884+
885+
su2vector<ScalarType> g(m + 1), sn(m + 1), cs(m + 1), y(m);
886+
g = ScalarType(0);
887+
sn = ScalarType(0);
888+
cs = ScalarType(0);
889+
y = ScalarType(0);
890+
su2matrix<ScalarType> H(m + 1, m);
891+
H = ScalarType(0);
892+
893+
/*--- Calculate the norm of the rhs vector. ---*/
894+
895+
ScalarType norm0 = b.norm();
896+
897+
/*--- Calculate the initial residual (actually the negative residual) and compute its norm. ---*/
898+
899+
if (!xIsZero) {
900+
mat_vec(x, W[0]);
901+
W[0] -= b;
902+
} else {
903+
W[0] = -b;
904+
}
905+
906+
ScalarType beta = W[0].norm();
907+
908+
if (tol_type == LinearToleranceType::RELATIVE) norm0 = beta;
909+
910+
if ((beta < tol * norm0) || (beta < eps)) {
911+
if (masterRank) {
912+
SU2_OMP_MASTER
913+
cout << "CSysSolve::FGMRES(): system solved by initial guess." << endl;
914+
END_SU2_OMP_MASTER
915+
}
916+
residual = beta;
917+
return 0;
918+
}
919+
920+
W[0] /= -beta;
921+
922+
g[0] = beta;
923+
924+
unsigned long i = 0;
925+
if ((monitoring) && (masterRank)) {
926+
SU2_OMP_MASTER {
927+
WriteHeader("FGMRES", tol, beta);
928+
WriteHistory(i, beta / norm0);
929+
}
930+
END_SU2_OMP_MASTER
931+
}
932+
933+
for (i = 0; i < m; i++) {
934+
if (beta < tol * norm0) break;
935+
936+
if (flexible) {
937+
/*--- Use BCGSTAB as inner iteration ---*/
938+
BCGSTABpre_parallel(W[i], Z[i], mat_vec, precond, config);
939+
940+
mat_vec(Z[i], W[i + 1]);
941+
} else {
942+
mat_vec(W[i], W[i + 1]);
943+
}
944+
945+
if (nestedParallel) {
946+
SU2_OMP_PARALLEL
947+
ModGramSchmidt(true, i, H, W);
948+
END_SU2_OMP_PARALLEL
949+
} else {
950+
ModGramSchmidt(false, i, H, W);
951+
}
952+
953+
for (unsigned long k = 0; k < i; k++) ApplyGivens(sn[k], cs[k], H[k][i], H[k + 1][i]);
954+
GenerateGivens(H[i][i], H[i + 1][i], sn[i], cs[i]);
955+
ApplyGivens(sn[i], cs[i], g[i], g[i + 1]);
956+
957+
beta = fabs(g[i + 1]);
958+
959+
if ((((monitoring) && (masterRank)) && ((i + 1) % monitorFreq == 0))) {
960+
SU2_OMP_MASTER
961+
WriteHistory(i + 1, beta / norm0);
962+
END_SU2_OMP_MASTER
963+
}
964+
}
965+
966+
SolveReduced(i, H, g, y);
967+
968+
const auto& basis = flexible ? Z : W;
969+
970+
if (nestedParallel) {
971+
SU2_OMP_PARALLEL
972+
for (unsigned long k = 0; k < i; k++) x += y[k] * basis[k];
973+
END_SU2_OMP_PARALLEL
974+
} else {
975+
for (unsigned long k = 0; k < i; k++) x += y[k] * basis[k];
976+
}
977+
978+
if ((monitoring) && (config->GetComm_Level() == COMM_FULL)) {
979+
if (masterRank) {
980+
SU2_OMP_MASTER
981+
WriteFinalResidual("FGMRES", i, beta / norm0);
982+
END_SU2_OMP_MASTER
983+
}
984+
985+
if (recomputeRes) {
986+
mat_vec(x, W[0]);
987+
W[0] -= b;
988+
ScalarType res = W[0].norm();
989+
990+
if (fabs(res - beta) > tol * 10) {
991+
if (masterRank) {
992+
SU2_OMP_MASTER
993+
WriteWarning(beta, res, tol);
994+
END_SU2_OMP_MASTER
995+
}
996+
}
997+
}
998+
}
999+
1000+
residual = beta / norm0;
1001+
return i;
1002+
}
1003+
1004+
1005+
1006+
7401007
template <class ScalarType>
7411008
unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarType>& b, CSysVector<ScalarType>& x,
7421009
const CMatrixVectorProduct<ScalarType>& mat_vec,
@@ -973,6 +1240,10 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType>& Jacobian, con
9731240
IterLinSol = RFGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
9741241
ScreenOutput, config);
9751242
break;
1243+
case FGMRESandBCGSTAB2:
1244+
IterLinSol = FGMRESandBCGSTAB2_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
1245+
ScreenOutput, config);
1246+
break;
9761247
case CONJUGATE_GRADIENT:
9771248
IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
9781249
ScreenOutput, config);
@@ -1132,6 +1403,10 @@ unsigned long CSysSolve<ScalarType>::Solve_b(CSysMatrix<ScalarType>& Jacobian, c
11321403
IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
11331404
ScreenOutput, config);
11341405
break;
1406+
case FGMRESandBCGSTAB2:
1407+
IterLinSol = FGMRESandBCGSTAB2_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
1408+
ScreenOutput, config);
1409+
break;
11351410
case CONJUGATE_GRADIENT:
11361411
IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual,
11371412
ScreenOutput, config);

0 commit comments

Comments
 (0)